From: Kevin Rosenberg Date: Wed, 26 Nov 2003 02:11:09 +0000 (+0000) Subject: 0.8.6.1: X-Git-Url: http://repo.macrolet.net/gitweb/?a=commitdiff_plain;h=3c5c2c265637357e88cb5bb36bcfdc5569bdea4c;p=sbcl.git 0.8.6.1: * clean.sh: Allow recursive includes in src/runtime/Config * doc/*: Convert from SGML format and processing to XML Currently, catalogs are in place for Debian and RedHat which allow automated use of local files instead of the tools downloading DTDs and XSLs from their canonical URLs. I'm glad to help setup catalogs for other operating systems. --- diff --git a/clean.sh b/clean.sh index 007bf3f..4b84302 100755 --- a/clean.sh +++ b/clean.sh @@ -22,6 +22,10 @@ rm -rf obj/* output/* doc/user-manual \ # distribution, we automatically clean up after it here in the # standard clean.sh file.) +# Ensure we know GNUMAKE +. find-gnumake.sh +find_gnumake + # Ask some other directories to clean themselves up. original_pwd=`pwd` for d in tools-for-build; do @@ -31,7 +35,7 @@ for d in tools-for-build; do # this script is just the operations done by these make's, which # is misleading when this script does lotso other operations too. # -- WHN - make -s clean + $GNUMAKE -I ../src/runtime -s clean cd $original_pwd > /dev/null done diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..b98923c --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,113 @@ +DOCFILE_BASE_DEFAULT:=user-manual +DOCFILE_EXT_DEFAULT:=xml + + +# Standard docfile processing + +DEBIAN=$(shell expr "`cat /etc/issue`" : '.*Debian.*') +SUSE=$(shell expr "`cat /etc/issue`" : '.*SuSE.*') +REDHAT=$(shell expr "`cat /etc/issue`" : '.*RedHat.*') + + +ifneq (${DEBIAN},0) +OS:=debian +else + ifneq (${SUSE},0) + OS=suse + else + ifneq (${REDHAT},0) + OS=redhat + endif + endif +endif + + +ifndef DOCFILE_BASE +DOCFILE_BASE=${DOCFILE_BASE_DEFAULT} +endif + +ifndef DOCFILE_EXT +DOCFILE_EXT=${DOCFILE_EXT_DEFAULT} +endif + +DOCFILE:=${DOCFILE_BASE}.${DOCFILE_EXT} +FOFILE:=${DOCFILE_BASE}.fo +PDFFILE:=${DOCFILE_BASE}.pdf +PSFILE:=${DOCFILE_BASE}.ps +DVIFILE:=${DOCFILE_BASE}.dvi +TXTFILE:=${DOCFILE_BASE}.txt +HTMLFILE:=${DOCFILE_BASE}.html +TMPFILES:=${DOCFILE_BASE}.aux ${DOCFILE_BASE}.out ${DOCFILE_BASE}.log +DOCFILES:=$(shell echo *.xml *.xsl) + +ifeq ($(XSLTPROC),) + XSLTPROC:=xsltproc +endif + +CATALOG:=`pwd`/catalog-${OS}.xml +CHECK:=XML_CATALOG_FILES="$(CATALOG)" xmllint --noout --xinclude --postvalid $(DOCFILE) || exit 1 + +.PHONY: all +all: html pdf + +.PHONY: dist +dist: html pdf + +.PHONY: doc +doc: html pdf + +.PHONY: check +check: + @echo "Operating system detected: ${OS}" + @$(CHECK) + +.PHONY: html +html: html-stamp + +html-stamp: $(DOCFILES) Makefile + @rm -rf html + @mkdir html + @XML_CATALOG_FILES="$(CATALOG)" $(XSLTPROC) --stringparam chunker.output.encoding ISO-8859-1 \ + --xinclude --output html/ html_chunk.xsl $(DOCFILE) + touch html-stamp + +.PHONY: fo +fo: ${FOFILE} + +${FOFILE}: $(DOCFILES) Makefile + @XML_CATALOG_FILES="$(CATALOG)" $(XSLTPROC) --xinclude --output $(FOFILE) fo.xsl $(DOCFILE) + +.PHONY: pdf +pdf: ${PDFFILE} + +${PDFFILE}: ${DOCFILES} Makefile + @$(MAKE) fo + @fop $(FOFILE) -pdf $(PDFFILE) > /dev/null + +.PHONY: dvi +dvi: ${DVIFILE} + +.PHONY: ps +ps: ${PSFILE} + +${PSFILE}: ${DOCFILES} Makefile + @$(MAKE) fo + @fop $(FOFILE) -ps $(PSFILE) > /dev/null + + +.PHONY: txt +txt: ${TXTFILE} + +${TXTFILE}: ${FOFILE} + @XML_CATALOG_FILES="$(CATALOG)" $(XSLTPROC) --xinclude --output ${HTMLFILE} html.xsl $(DOCFILE) + lynx -dump ${HTMLFILE} > ${TXTFILE} + +.PHONY: clean +clean: + @rm -f *~ *.bak *.orig \#*\# .\#* texput.log + @rm -rf html ${PSFILE} ${HTMLFILE} html-stamp + @rm -f ${TMPFILES} ${FOFILE} + @rm -f ${DVIFILE} ${TXTFILE} + +.PHONY: distclean +distclean: clean diff --git a/doc/beyond-ansi.sgml b/doc/beyond-ansi.sgml deleted file mode 100644 index c29fd52..0000000 --- a/doc/beyond-ansi.sgml +++ /dev/null @@ -1,386 +0,0 @@ - -Beyond The &ANSI; Standard</> - -<para>&SBCL; is mostly an implementation of the &ANSI; standard for -Common Lisp. However, there's some important behavior which extends -or clarifies the standard, and various behavior which outright -violates the standard. -</para> - -<sect1 id="non-conformance"><title>Non-Conformance With The &ANSI; Standard</> - -<para> -Essentially every type of non-conformance is considered a bug. -(The exceptions involve internal inconsistencies in the standard.) -In &SBCL; 0.7.6, the master record of known bugs is in -the <filename>BUGS</> file in the distribution. -Some highlight information about bugs may also be found in the -manual page. The recommended way to report bugs is through the sbcl-help or -sbcl-devel mailings lists. -</para> - -</sect1> - -<sect1 id="idiosyncrasies"><title>Idiosyncrasies</> - -<para>The information in this section describes some of the ways -that &SBCL; deals with choices that the &ANSI; standard -leaves to the implementation.</para> - -<para>Declarations are generally treated as assertions. This general -principle, and its implications, and the bugs which still keep the -compiler from quite satisfying this principle, are discussed in the -<link linkend="compiler">chapter on the compiler</link>.</para> - -<para>&SBCL; is essentially a compiler-only implementation of -&CommonLisp;. That is, for all but a few special cases, -<function>eval</> creates a -lambda expression, calls <function>compile</> on the lambda -expression to create a compiled function, and then calls -<function>funcall</> on the resulting function object. This -is explicitly allowed by the &ANSI; standard, but leads to some -oddities, e.g. collapsing <function>functionp</> and -<function>compiled-function-p</> into the same predicate.</para> - -<para>&SBCL; is quite strict about ANSI's definition of -<function>defconstant</>. ANSI says that doing <function>defconstant</> -of the same symbol more than once is undefined unless the new value -is <function>eql</> to the old value. Conforming to this specification -is a nuisance when the "constant" value is only constant under some -weaker test like <function>string=</> or <function>equal</>. It's -especially annoying because, in &SBCL;, <function>defconstant</> takes effect -not only at load time but also at compile time, so that just -compiling and loading reasonable code like -<programlisting>(defconstant +foobyte+ '(1 4))</> -runs into this undefined behavior. Many -implementations of Common Lisp try to help the programmer around -this annoyance by silently accepting the undefined code and -trying to do what the programmer probably meant. &SBCL; instead -treats the undefined behavior as an error. Often -such code can be rewritten -in portable &ANSI; Common Lisp which has the desired behavior. -E.g., the code above can be given an exactly defined meaning by replacing -<function>defconstant</> either with <function>defparameter</> or -with a customized macro which does the right thing, possibly along the -lines of the <function>defconstant-eqx</> macro used internally in the -implementation of &SBCL; itself. In circumstances where this is not -appropriate, the programmer can handle the condition type -<errortype>sb-ext:defconstant-uneql</errortype>, and choose either the -<action>continue</action> or <action>abort</action> restart as -appropriate.</para> - -<para>&SBCL; gives style warnings about various kinds of perfectly -legal code, e.g. -<itemizedlist> - <listitem><para><function>defmethod</> without - <function>defgeneric</></para></listitem> - <listitem><para>multiple <function>defun</>s of the same - symbol</para></listitem> - <listitem><para>special variables not named in the conventional - <varname>*foo*</> style, and lexical variables unconventionally named - in the <varname>*foo*</> style</para></listitem> -</itemizedlist> -This causes friction with people -who point out that other ways of organizing code (especially -avoiding the use of <function>defgeneric</>) -are just as aesthetically stylish. -However, these warnings should be read not -as "warning, bad aesthetics detected, you have no style" but -"warning, this style keeps the compiler from understanding -the code as well as you might like." That is, -unless the compiler warns about such conditions, there's no -way for the compiler to warn -about some programming errors which would otherwise be -easy to overlook. (related bug: The warning about -multiple <function>defun</>s is pointlessly annoying when you compile -and then load a function containing <function>defun</> wrapped -in <function>eval-when</>, and ideally should be suppressed in -that case, but still isn't as of &SBCL; 0.7.6.)</para> - -</sect1> - -<sect1 id="extensions"><title>Extensions</> - -<para>&SBCL; is derived from &CMUCL;, which implements many extensions -to the &ANSI; standard. &SBCL; doesn't support as many extensions as -&CMUCL;, but it still has quite a few.</para> - -<sect2><title>Things Which Might Be In The Next &ANSI; Standard</> - -<para>&SBCL; provides extensive support for -calling external C code, described -<link linkend="ffi">in its own chapter</link>.</para> - -<para>&SBCL; provides additional garbage collection functionality not -specified by &ANSI;. Weak pointers allow references to objects to be -maintained without keeping them from being GCed. And "finalization" -hooks are available to cause code to be executed when an object has been -GCed.</para> <!-- FIXME: Actually documenting these would be good.:-| --> - -<para>&SBCL; supports Gray streams, user-overloadable CLOS classes -whose instances can be used as Lisp streams (e.g. passed as the -first argument to <function>format</>). Additionally, the -bundled contrib module <interface>sb-simple-streams</interface> -implements a subset of the Franz Allegro simple-streams proposal.</para> - -<para>&SBCL; supports a MetaObject Protocol which is intended to be -compatible with &AMOP;; present exceptions to this (as distinct from -current bugs) are:</para> -<itemizedlist> - <listitem><para>the abstract <classname>metaobject</> class is not - present in the class hierarchy;</para></listitem> - <listitem><para>the <classname>standard-object</> and - <classname>funcallable-standard-object</> classes are - disjoint;</para></listitem> - <listitem><para><function>compute-effective-method</> only returns - one value, not two;</para></listitem> - <listitem><para>the system-supplied <property>:around</> method for - <function>compute-slots</> specialized on - <classname>funcallable-standard-class</> does not respect the - requested order from a user-supplied primary method.</para> -</listitem> -</itemizedlist> - -</sect2> - -<sect2><title>Threading (a.k.a Multiprocessing)</> - -<para>&SBCL; (as of version 0.8.3, on Linux x86 only) supports a -fairly low-level threading interface that maps onto the host operating -system's concept of threads or lightweight processes. </para> - -<sect3><title>Lisp-level view - -A rudimentary interface to creating and managing multiple threads -can be found in the sb-thread package. This is -intended for public consumption, so look at the exported symbols and -their documentation strings. - - -Dynamic bindings to symbols are per-thread. Signal handlers -are per-thread. - - -sb-ext:quit exits the current thread, not -necessarily the whole environment. The environment will be shut down -when the last thread exits. - - -Threads arbitrate between themselves for the user's attention. -A thread may be in one of three notional states: foreground, -background, or stopped. When a background process attempts to print a -repl prompt or to enter the debugger, it will stop and print a message -saying that it has stopped. The user at his leisure may switch to -that thread to find out what it needs. If a background thread enters -the debugger, selecting any restart will put it back into the -background before it resumes. - - -If the user has multiple views onto the same Lisp image (for -example, using multiple terminals, or a windowing system, or network -access) they are typically set up as multiple `sessions' such that each -view has its own collection of foreground/background/stopped threads. -sb-thread:make-listener-thread can be used to -start a new thread in its own `session'. - - -Mutexes and condition variables are available for -managing access to shared data: see - - -(apropos "mutex" :sb-thread) - - -(apropos "condition" :sb-thread) - - and the waitqueue structure - - - -and poke around in their documentation strings. - - -Implementation (Linux x86) - -On Linux x86, this is implemented using -clone() and does not involve pthreads. This is -not because there is anything wrong with pthreads per -se, but there is plenty wrong (from our perspective) with -LinuxThreads. &SBCL; threads are mapped 1:1 onto Linux tasks which -share a VM but nothing else - each has its own process id and can be -seen in e.g. ps output. - - -Per-thread local bindings for special variables is achieved -using the %fs segment register to point to a per-thread storage area. -This may cause interesting results if you link to foreign code that -expects threading or creates new threads, and the thread library in -question uses %fs in an incompatible way. - - -Threads waiting on queues (e.g. for locks or condition -variables) are put to sleep using sigtimedwait() -and woken with SIGCONT. - - -&SBCL; at present will alway have at least two tasks running as -seen from Linux: when the first process has done startup -initialization (mapping files in place, installing signal handlers -etc) it creates a new thread to run the Lisp startup and initial -listener. The original thread stays around to reap dead subthreads -and deallocate their resources (e.g. stacks) when they exit. - - -Garbage collection is done with the existing Conservative -Generational GC. Allocation is done in small (typically 8k) regions : -each thread has its own region so this involves no stopping. However, -when a region fills, a lock must be obtained while another is -allocated, and when a collection is required, all processes are -stopped. This is achieved by sending them signals, which may make for -interesting behaviour if they are interrupted in system calls. The -streams interface is believed to handle the required system call -restarting correctly, but this may be a consideration when making -other blocking calls e.g. from foreign library code. - - -Large amounts of the &SBCL; library have not been inspected for -thread-safety. Some of the obviously unsafe areas have large locks -around them, so compilation and fasl loading, for example, cannot be -parallelized. Work is ongoing in this area. - - -A new thread by default is created in the same POSIX process -group and session as the thread it was created by. This has an impact -on keyboard interrupt handling: pressing your terminal's intr key -(typically Control-C) will interrupt all processes in the foreground -process group, including Lisp threads that &SBCL; considers to be -notionally `background'. This is undesirable, so background threads -are set to ignore the SIGINT signal. Arbitration for the input stream -is managed by locking on sb-thread::*session-lock* - - -A thread can be created in a new Lisp 'session' (new terminal or -window) using sb-thread:make-listener-thread. -These sessions map directly onto POSIX sessions, so that pressing -Control-C in the wrong window will not interrupt them - this has been -found to be embarrassing. - - - - - - -Support For Unix</> - -<para>The UNIX command line can be read from the variable -<varname>sb-ext:*posix-argv*</>. The UNIX environment can be queried with the -<function>sb-ext:posix-getenv</> function.</para> - -<para>The &SBCL; system can be terminated with <function>sb-ext:quit</>, -optionally returning a specified numeric value to the calling Unix -process. The normal Unix idiom of terminating on end of file on input -is also supported.</para> - -</sect2> - -<sect2><title>Customization Hooks for Users - -The behaviour of require when called with only -one argument is implementation-defined. In &SBCL; it calls functions -on the user-settable list sb-ext:*module-provider-functions* -- see the require documentation string for details. - - -The toplevel repl prompt may be customized, and the function -that reads user input may be replaced completely. - - - - -Tools To Help Developers - -&SBCL; provides a profiler and other extensions to the &ANSI; -trace facility. See the online function documentation for -trace for more information. - -The debugger supports a number of options. Its documentation is -accessed by typing help at the debugger prompt. - - -Documentation for inspect is accessed by typing -help at the inspect prompt. - - - -Interface To Low-Level &SBCL; Implementation - -&SBCL; has the ability to save its state as a file for later -execution. This functionality is important for its bootstrapping -process, and is also provided as an extension to the user See the -documentation for sb-ext:save-lisp-and-die for more -information. - -&SBCL; has inherited from &CMUCL; various hooks to allow -the user to tweak and monitor the garbage collection process. These -are somewhat stale code, and their interface might need to be cleaned -up. If you have urgent need of them, look at the code in -src/code/gc.lisp and bring it up on the -developers' mailing list. - -&SBCL; has various hooks inherited from &CMUCL;, like -sb-ext:float-denormalized-p, to allow a program to take -advantage of &IEEE; floating point arithmetic properties which aren't -conveniently or efficiently expressible using the &ANSI; standard. These -look good, and their interface looks good, but &IEEE; support is -slightly broken due to a stupid decision to remove some support for -infinities (because it wasn't in the &ANSI; spec and it didn't occur to -me that it was in the &IEEE; spec). If you need this stuff, take a look -at the code and bring it up on the developers' mailing -list. - - - -Efficiency Hacks - -The sb-ext:purify function causes &SBCL; -first to collect all garbage, then to mark all uncollected objects as -permanent, never again attempting to collect them as garbage. This can -cause a large increase in efficiency when using a primitive garbage -collector, or a more moderate increase in efficiency when using a more -sophisticated garbage collector which is well suited to the program's -memory usage pattern. It also allows permanent code to be frozen at -fixed addresses, a precondition for using copy-on-write to share code -between multiple Lisp processes. is less important with modern -generational garbage collectors. - -The sb-ext:truly-the declares the type of the -result of the operations, producing its argument; the declaration is -not checked. In short: don't use it. - -The sb-ext:freeze-type declaration declares that a -type will never change, which can make type testing -(typep, etc.) more efficient for structure types. - -The sb-ext:constant-function declaration specifies -that a function will always return the same value for the same -arguments, which may allow the compiler to optimize calls -to it. This is appropriate for functions like sqrt, but -is not appropriate for functions like aref, -which can change their return values when the underlying data are -changed. - - - - - - - diff --git a/doc/beyond-ansi.xml b/doc/beyond-ansi.xml new file mode 100644 index 0000000..cf6098a --- /dev/null +++ b/doc/beyond-ansi.xml @@ -0,0 +1,392 @@ + + +%myents; +]> + +Beyond The &ANSI; Standard + +&SBCL; is mostly an implementation of the &ANSI; standard for +Common Lisp. However, there's some important behavior which extends +or clarifies the standard, and various behavior which outright +violates the standard. + + +Non-Conformance With The &ANSI; Standard + + + Essentially every type of non-conformance is considered a bug. + (The exceptions involve internal inconsistencies in the standard.) + In &SBCL; 0.7.6, the master record of known bugs is in + the BUGS file in the distribution. + Some highlight information about bugs may also be found in the + manual page. The recommended way to report bugs is through the sbcl-help or + sbcl-devel mailings lists. + + + + +Idiosyncrasies + +The information in this section describes some of the ways +that &SBCL; deals with choices that the &ANSI; standard +leaves to the implementation. + +Declarations are generally treated as assertions. This general +principle, and its implications, and the bugs which still keep the +compiler from quite satisfying this principle, are discussed in the +chapter on the compiler. + +&SBCL; is essentially a compiler-only implementation of +&CommonLisp;. That is, for all but a few special cases, +eval creates a +lambda expression, calls compile on the lambda +expression to create a compiled function, and then calls +funcall on the resulting function object. This +is explicitly allowed by the &ANSI; standard, but leads to some +oddities, e.g. collapsing functionp and +compiled-function-p into the same predicate. + +&SBCL; is quite strict about ANSI's definition of +defconstant. ANSI says that doing defconstant +of the same symbol more than once is undefined unless the new value +is eql to the old value. Conforming to this specification +is a nuisance when the "constant" value is only constant under some +weaker test like string= or equal. It's +especially annoying because, in &SBCL;, defconstant takes effect +not only at load time but also at compile time, so that just +compiling and loading reasonable code like +(defconstant +foobyte+ '(1 4)) +runs into this undefined behavior. Many +implementations of Common Lisp try to help the programmer around +this annoyance by silently accepting the undefined code and +trying to do what the programmer probably meant. &SBCL; instead +treats the undefined behavior as an error. Often +such code can be rewritten +in portable &ANSI; Common Lisp which has the desired behavior. +E.g., the code above can be given an exactly defined meaning by replacing +defconstant either with defparameter or +with a customized macro which does the right thing, possibly along the +lines of the defconstant-eqx macro used internally in the +implementation of &SBCL; itself. In circumstances where this is not +appropriate, the programmer can handle the condition type +sb-ext:defconstant-uneql, and choose either the +continue or abort restart as +appropriate. + +&SBCL; gives style warnings about various kinds of perfectly +legal code, e.g. + + defmethod without + defgeneric + multiple defuns of the same + symbol + special variables not named in the conventional + *foo* style, and lexical variables unconventionally named + in the *foo* style + +This causes friction with people +who point out that other ways of organizing code (especially +avoiding the use of defgeneric) +are just as aesthetically stylish. +However, these warnings should be read not +as "warning, bad aesthetics detected, you have no style" but +"warning, this style keeps the compiler from understanding +the code as well as you might like." That is, +unless the compiler warns about such conditions, there's no +way for the compiler to warn +about some programming errors which would otherwise be +easy to overlook. (related bug: The warning about +multiple defuns is pointlessly annoying when you compile +and then load a function containing defun wrapped +in eval-when, and ideally should be suppressed in +that case, but still isn't as of &SBCL; 0.7.6.) + + + +Extensions + +&SBCL; is derived from &CMUCL;, which implements many extensions +to the &ANSI; standard. &SBCL; doesn't support as many extensions as +&CMUCL;, but it still has quite a few. + +Things Which Might Be In The Next &ANSI; Standard + +&SBCL; provides extensive support for +calling external C code, described +in its own chapter. + +&SBCL; provides additional garbage collection functionality not +specified by &ANSI;. Weak pointers allow references to objects to be +maintained without keeping them from being GCed. And "finalization" +hooks are available to cause code to be executed when an object has been +GCed. + +&SBCL; supports Gray streams, user-overloadable CLOS classes +whose instances can be used as Lisp streams (e.g. passed as the +first argument to format). Additionally, the +bundled contrib module sb-simple-streams +implements a subset of the Franz Allegro simple-streams proposal. + +&SBCL; supports a MetaObject Protocol which is intended to be +compatible with &AMOP;; present exceptions to this (as distinct from +current bugs) are: + + the abstract metaobject class is not + present in the class hierarchy; + the standard-object and + funcallable-standard-object classes are + disjoint; + compute-effective-method only returns + one value, not two; + the system-supplied :around method for + compute-slots specialized on + funcallable-standard-class does not respect the + requested order from a user-supplied primary method. + + + + + +Threading (a.k.a Multiprocessing) + +&SBCL; (as of version 0.8.3, on Linux x86 only) supports a +fairly low-level threading interface that maps onto the host operating +system's concept of threads or lightweight processes. + +Lisp-level view + +A rudimentary interface to creating and managing multiple threads +can be found in the sb-thread package. This is +intended for public consumption, so look at the exported symbols and +their documentation strings. + + +Dynamic bindings to symbols are per-thread. Signal handlers +are per-thread. + + +sb-ext:quit exits the current thread, not +necessarily the whole environment. The environment will be shut down +when the last thread exits. + + +Threads arbitrate between themselves for the user's attention. +A thread may be in one of three notional states: foreground, +background, or stopped. When a background process attempts to print a +repl prompt or to enter the debugger, it will stop and print a message +saying that it has stopped. The user at his leisure may switch to +that thread to find out what it needs. If a background thread enters +the debugger, selecting any restart will put it back into the +background before it resumes. + + +If the user has multiple views onto the same Lisp image (for +example, using multiple terminals, or a windowing system, or network +access) they are typically set up as multiple `sessions' such that each +view has its own collection of foreground/background/stopped threads. +sb-thread:make-listener-thread can be used to +start a new thread in its own `session'. + + +Mutexes and condition variables are available for +managing access to shared data: see + + + (apropos "mutex" :sb-thread) + + + (apropos "condition" :sb-thread) + + and the waitqueue structure + + + +and poke around in their documentation strings. + + +Implementation (Linux x86) + +On Linux x86, this is implemented using +clone() and does not involve pthreads. This is +not because there is anything wrong with pthreads per +se, but there is plenty wrong (from our perspective) with +LinuxThreads. &SBCL; threads are mapped 1:1 onto Linux tasks which +share a VM but nothing else - each has its own process id and can be +seen in e.g. ps output. + + +Per-thread local bindings for special variables is achieved +using the %fs segment register to point to a per-thread storage area. +This may cause interesting results if you link to foreign code that +expects threading or creates new threads, and the thread library in +question uses %fs in an incompatible way. + + +Threads waiting on queues (e.g. for locks or condition +variables) are put to sleep using sigtimedwait() +and woken with SIGCONT. + + +&SBCL; at present will alway have at least two tasks running as +seen from Linux: when the first process has done startup +initialization (mapping files in place, installing signal handlers +etc) it creates a new thread to run the Lisp startup and initial +listener. The original thread stays around to reap dead subthreads +and deallocate their resources (e.g. stacks) when they exit. + + +Garbage collection is done with the existing Conservative +Generational GC. Allocation is done in small (typically 8k) regions : +each thread has its own region so this involves no stopping. However, +when a region fills, a lock must be obtained while another is +allocated, and when a collection is required, all processes are +stopped. This is achieved by sending them signals, which may make for +interesting behaviour if they are interrupted in system calls. The +streams interface is believed to handle the required system call +restarting correctly, but this may be a consideration when making +other blocking calls e.g. from foreign library code. + + +Large amounts of the &SBCL; library have not been inspected for +thread-safety. Some of the obviously unsafe areas have large locks +around them, so compilation and fasl loading, for example, cannot be +parallelized. Work is ongoing in this area. + + +A new thread by default is created in the same POSIX process +group and session as the thread it was created by. This has an impact +on keyboard interrupt handling: pressing your terminal's intr key +(typically Control-C) will interrupt all processes in the foreground +process group, including Lisp threads that &SBCL; considers to be +notionally `background'. This is undesirable, so background threads +are set to ignore the SIGINT signal. Arbitration for the input stream +is managed by locking on sb-thread::*session-lock* + + +A thread can be created in a new Lisp 'session' (new terminal or +window) using sb-thread:make-listener-thread. +These sessions map directly onto POSIX sessions, so that pressing +Control-C in the wrong window will not interrupt them - this has been +found to be embarrassing. + + + + + + +Support For Unix + +The UNIX command line can be read from the variable +sb-ext:*posix-argv*. The UNIX environment can be queried with the +sb-ext:posix-getenv function. + +The &SBCL; system can be terminated with sb-ext:quit, +optionally returning a specified numeric value to the calling Unix +process. The normal Unix idiom of terminating on end of file on input +is also supported. + + + +Customization Hooks for Users + +The behaviour of require when called with only +one argument is implementation-defined. In &SBCL; it calls functions +on the user-settable list sb-ext:*module-provider-functions* +- see the require documentation string for details. + + +The toplevel repl prompt may be customized, and the function +that reads user input may be replaced completely. + + + + +Tools To Help Developers + +&SBCL; provides a profiler and other extensions to the &ANSI; +trace facility. See the online function documentation for +trace for more information. + +The debugger supports a number of options. Its documentation is +accessed by typing help at the debugger prompt. + + +Documentation for inspect is accessed by typing +help at the inspect prompt. + + + +Interface To Low-Level &SBCL; Implementation + +&SBCL; has the ability to save its state as a file for later +execution. This functionality is important for its bootstrapping +process, and is also provided as an extension to the user See the +documentation for sb-ext:save-lisp-and-die for more +information. + +&SBCL; has inherited from &CMUCL; various hooks to allow +the user to tweak and monitor the garbage collection process. These +are somewhat stale code, and their interface might need to be cleaned +up. If you have urgent need of them, look at the code in +src/code/gc.lisp and bring it up on the +developers' mailing list. + +&SBCL; has various hooks inherited from &CMUCL;, like +sb-ext:float-denormalized-p, to allow a program to take +advantage of &IEEE; floating point arithmetic properties which aren't +conveniently or efficiently expressible using the &ANSI; standard. These +look good, and their interface looks good, but &IEEE; support is +slightly broken due to a stupid decision to remove some support for +infinities (because it wasn't in the &ANSI; spec and it didn't occur to +me that it was in the &IEEE; spec). If you need this stuff, take a look +at the code and bring it up on the developers' mailing +list. + + + +Efficiency Hacks + +The sb-ext:purify function causes &SBCL; +first to collect all garbage, then to mark all uncollected objects as +permanent, never again attempting to collect them as garbage. This can +cause a large increase in efficiency when using a primitive garbage +collector, or a more moderate increase in efficiency when using a more +sophisticated garbage collector which is well suited to the program's +memory usage pattern. It also allows permanent code to be frozen at +fixed addresses, a precondition for using copy-on-write to share code +between multiple Lisp processes. is less important with modern +generational garbage collectors. + +The sb-ext:truly-the declares the type of the +result of the operations, producing its argument; the declaration is +not checked. In short: don't use it. + +The sb-ext:freeze-type declaration declares that a +type will never change, which can make type testing +(typep, etc.) more efficient for structure types. + +The sb-ext:constant-function declaration specifies +that a function will always return the same value for the same +arguments, which may allow the compiler to optimize calls +to it. This is appropriate for functions like sqrt, but +is not appropriate for functions like aref, +which can change their return values when the underlying data are +changed. + + + + + + + diff --git a/doc/catalog-debian.xml b/doc/catalog-debian.xml new file mode 100644 index 0000000..86e7032 --- /dev/null +++ b/doc/catalog-debian.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/catalog-suse.xml b/doc/catalog-suse.xml new file mode 100644 index 0000000..7520e88 --- /dev/null +++ b/doc/catalog-suse.xml @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/compiler.sgml b/doc/compiler.sgml deleted file mode 100644 index 7ebc23f..0000000 --- a/doc/compiler.sgml +++ /dev/null @@ -1,933 +0,0 @@ -The Compiler</> - -<para>This chapter will discuss most compiler issues other than -efficiency, including compiler error messages, the &SBCL; compiler's -unusual approach to type safety in the presence of type declarations, -the effects of various compiler optimization policies, and the way -that inlining and open coding may cause optimized code to differ from -a naive translation. Efficiency issues are sufficiently varied and -separate that they have <link linkend="efficiency">their own -chapter</link>.</para> - -<sect1><title>Error Messages</> -<!--INDEX {error messages}{compiler}--> -<!--INDEX {compiler error messages}--> - -<para>The compiler supplies a large amount of source location -information in error messages. The error messages contain a lot of -detail in a terse format, so they may be confusing at first. Error -messages will be illustrated using this example program: -<programlisting>(defmacro zoq (x) - `(roq (ploq (+ ,x 3)))) - -(defun foo (y) - (declare (symbol y)) - (zoq y))</programlisting> -The main problem with this program is that it is trying to add -<literal>3</> to a symbol. Note also that the functions -<function>roq</> and <function>ploq</> aren't defined anywhere. -</para> - -<sect2><title>The Parts of the Error Message</> - -<para>When processing this program, the compiler will produce this warning: -<screen>file: /tmp/foo.lisp - -in: DEFUN FOO - (ZOQ Y) ---> ROQ PLOQ + -==> - Y -caught WARNING: - Result is a SYMBOL, not a NUMBER.</screen> -In this example we see each of the six possible parts of a compiler error -message: -<orderedlist> - <listitem><para><computeroutput>File: /tmp/foo.lisp</> - This is the name of the file that the compiler read the - relevant code from. The file name is displayed because it - may not be immediately obvious when there is an - error during compilation of a large system, especially when - <function>with-compilation-unit</> is used to delay undefined - warnings.</para></listitem> - <listitem><para><computeroutput>in: DEFUN FOO</> This is the - definition top level form responsible for the error. It is - obtained by taking the first two elements of the enclosing form - whose first element is a symbol beginning with <quote><literal>def</></>. - If there is no such enclosing <quote><literal>def</></> form, then the - outermost form is used. If there are multiple <literal>def</> - forms, then they are all printed from the outside in, separated by - <literal>=></>'s. In this example, the problem was in the - <function>defun</> for <function>foo</>.</para></listitem> - <listitem><para><computeroutput>(ZOQ Y)</> This is the - <emphasis>original source</> form responsible for the error. - Original source means that the form directly appeared in the - original input to the compiler, i.e. in the lambda passed to - <function>compile</> or in the top level form read from the - source file. In this example, the expansion of the <function>zoq</> - macro was responsible for the error.</para></listitem> - <listitem><para><computeroutput>--> ROQ PLOQ +</> This is the - <emphasis>processing path</> that the compiler used to produce - the errorful code. The processing path is a representation of - the evaluated forms enclosing the actual source that the - compiler encountered when processing the original source. - The path is the first element of each form, or the form itself - if the form is not a list. These forms result from the - expansion of macros or source-to-source transformation done - by the compiler. In this example, the enclosing evaluated forms - are the calls to <function>roq</>, <function>ploq</> and - <function>+</>. These calls resulted from the expansion of - the <function>zoq</> macro.</para></listitem> - <listitem><para><computeroutput>==> Y</> This is the - <emphasis>actual source</> responsible for the error. If - the actual source appears in the explanation, then - we print the next enclosing evaluated form, instead of - printing the actual source twice. (This is the form - that would otherwise have been the last form of the processing - path.) In this example, the problem is with the evaluation of - the reference to the variable <varname>y</>.</para></listitem> - <listitem><para> - <computeroutput>caught WARNING: Result is a SYMBOL, not a NUMBER.</> - This is the <emphasis>explanation</> of the problem. In this - example, the problem is that <varname>y</> evaluates to a symbol, - but is in a context where a number is required (the argument - to <function>+</>).</para></listitem> -</orderedlist> - -Note that each part of the error message is distinctively marked: - -<itemizedlist> - <listitem><para> <computeroutput>file:</> and <computeroutput>in:</> - mark the file and definition, respectively.</para></listitem> - <listitem><para> The original source is an indented form with no - prefix.</para></listitem> - <listitem><para> Each line of the processing path is prefixed with - <computeroutput>--></computeroutput></para></listitem> - <listitem><para> The actual source form is indented like the original - source, but is marked by a preceding <computeroutput>==></> line. - </para></listitem> - <listitem><para> The explanation is prefixed with the error - severity, which can be <computeroutput>caught ERROR:</>, - <computeroutput>caught WARNING:</>, - <computeroutput>caught STYLE-WARNING:</>, or - <computeroutput>note:</>. </para></listitem> -</itemizedlist> -</para> - -<para>Each part of the error message is more specific than the preceding -one. If consecutive error messages are for nearby locations, then the -front part of the error messages would be the same. In this case, the -compiler omits as much of the second message as in common with the -first. For example: -<screen>file: /tmp/foo.lisp - -in: DEFUN FOO - (ZOQ Y) ---> ROQ -==> - (PLOQ (+ Y 3)) -caught STYLE-WARNING: - undefined function: PLOQ - -==> - (ROQ (PLOQ (+ Y 3))) -caught STYLE-WARNING: - undefined function: ROQ</screen> -In this example, the file, definition and original source are -identical for the two messages, so the compiler omits them in the -second message. If consecutive messages are entirely identical, then -the compiler prints only the first message, followed by: -<computeroutput>[Last message occurs <replaceable>repeats</> times]</> -where <replaceable>repeats</> is the number of times the message -was given.</para> - -<para>If the source was not from a file, then no file line is printed. -If the actual source is the same as the original source, then the -processing path and actual source will be omitted. If no forms -intervene between the original source and the actual source, then the -processing path will also be omitted.</para> - -</sect2> - -<sect2><title>The Original and Actual Source</> - -<para>The <emphasis>original source</> displayed will almost always be -a list. If the actual source for an error message is a symbol, the -original source will be the immediately enclosing evaluated list form. -So even if the offending symbol does appear in the original source, -the compiler will print the enclosing list and then print the symbol -as the actual source (as though the symbol were introduced by a -macro.)</para> - -<para>When the <emphasis>actual source</> is displayed -(and is not a symbol), it will always -be code that resulted from the expansion of a macro or a source-to-source -compiler optimization. This is code that did not appear in the original -source program; it was introduced by the compiler.</para> - -<para>Keep in mind that when the compiler displays a source form -in an error message, it always displays the most specific (innermost) -responsible form. For example, compiling this function -<programlisting>(defun bar (x) - (let (a) - (declare (fixnum a)) - (setq a (foo x)) - a))</programlisting> -gives this error message -<screen>in: DEFUN BAR - (LET (A) (DECLARE (FIXNUM A)) (SETQ A (FOO X)) A) -caught WARNING: The binding of A is not a FIXNUM: - NIL</screen> -This error message is not saying <quote>there is a problem somewhere in -this <function>let</></quote> — it is saying that there is a -problem with the <function>let</> itself. In this example, the problem -is that <varname>a</>'s <literal>nil</> initial value is not a -<type>fixnum</>.</para> - -</sect2> - -<sect2><title>The Processing Path</> -<!--INDEX processing path--> -<!--INDEX macroexpansion--> -<!--INDEX source-to-source transformation--> - -<para>The processing path is mainly useful for debugging macros, so if -you don't write macros, you can probably ignore it. Consider this -example: - -<programlisting>(defun foo (n) - (dotimes (i n *undefined*))) -</programlisting> - -Compiling results in this error message: - -<screen>in: DEFUN FOO - (DOTIMES (I N *UNDEFINED*)) ---> DO BLOCK LET TAGBODY RETURN-FROM -==> - (PROGN *UNDEFINED*) -caught STYLE-WARNING: - undefined variable: *UNDEFINED*</screen> - -Note that <function>do</> appears in the processing path. This is because -<function>dotimes</> expands into: - -<programlisting>(do ((i 0 (1+ i)) (#:g1 n)) - ((>= i #:g1) *undefined*) - (declare (type unsigned-byte i)))</programlisting> - -The rest of the processing path results from the expansion -of <function>do</>: - -<programlisting> -(block nil - (let ((i 0) (#:g1 n)) - (declare (type unsigned-byte i)) - (tagbody (go #:g3) - #:g2 (psetq i (1+ i)) - #:g3 (unless (>= i #:g1) (go #:g2)) - (return-from nil (progn *undefined*))))) -</programlisting> - -In this example, the compiler descended into the <function>block</>, -<function>let</>, <function>tagbody</> and <function>return-from</> to -reach the <function>progn</> printed as the actual source. This is a -place where the <quote>actual source appears in explanation</> rule -was applied. The innermost actual source form was the symbol -<varname>*undefined*</> itself, but that also appeared in the -explanation, so the compiler backed out one level.</para> - -</sect2> - -<sect2><title>Error Severity</> -<!--INDEX severity of compiler errors --> -<!--INDEX compiler error severity --> - -<para>There are four levels of compiler error severity: -<wordasword>error</>, <wordasword>warning</>, <wordasword>style -warning</>, and <wordasword>note</>. The first three levels correspond -to condition classes which are defined in the &ANSI; standard for -&CommonLisp; and which have special significance to the -<function>compile</> and <function>compile-file</> functions. These -levels of compiler error severity occur when the compiler handles -conditions of these classes. The fourth level of compiler error -severity, <wordasword>note</>, is used for problems which are too mild -for the standard condition classes, typically hints about how -efficiency might be improved.</para> - -</sect2> - -<sect2><title>Errors During Macroexpansion</> -<!--INDEX {macroexpansion}{errors during}--> - -<para>The compiler handles errors that happen during macroexpansion, -turning them into compiler errors. If you want to debug the error (to -debug a macro), you can set <varname>*break-on-signals*</> to -<literal>error</>. For example, this definition: - -<programlisting>(defun foo (e l) - (do ((current l (cdr current)) - ((atom current) nil)) - (when (eq (car current) e) (return current))))</programlisting> - -gives this error: - -<screen>in: DEFUN FOO - (DO ((CURRENT L #) (# NIL)) (WHEN (EQ # E) (RETURN CURRENT)) ) -caught ERROR: - (in macroexpansion of (DO # #)) - (hint: For more precise location, try *BREAK-ON-SIGNALS*.) - DO step variable is not a symbol: (ATOM CURRENT)</screen> -</para> - -</sect2> - -<sect2><title>Read Errors</> -<!--INDEX {read errors}{compiler}--> - -<para>&SBCL;'s compiler (unlike &CMUCL;'s) does not attempt to recover -from read errors when reading a source file, but instead just reports -the offending character position and gives up on the entire source -file.</para> - -</sect2> - -<!-- FIXME: How much control over error messages is in SBCL? -_ How much should be? How much of this documentation should -_ we save or adapt? -_ -_ %%\node Error Message Parameterization, , Read Errors, Interpreting Error Messages -_ \subsection{Error Message Parameterization} -_ \cpsubindex{error messages}{verbosity} -_ \cpsubindex{verbosity}{of error messages} -_ -_ There is some control over the verbosity of error messages. See also -_ \varref{undefined-warning-limit}, \code{*efficiency-note-limit*} and -_ \varref{efficiency-note-cost-threshold}. -_ -_ \begin{defvar}{}{enclosing-source-cutoff} -_ -_ This variable specifies the number of enclosing actual source forms -_ that are printed in full, rather than in the abbreviated processing -_ path format. Increasing the value from its default of \code{1} -_ allows you to see more of the guts of the macroexpanded source, -_ which is useful when debugging macros. -_ \end{defvar} -_ -_ \begin{defvar}{}{error-print-length} -_ \defvarx{error-print-level} -_ -_ These variables are the print level and print length used in -_ printing error messages. The default values are \code{5} and -_ \code{3}. If null, the global values of \code{*print-level*} and -_ \code{*print-length*} are used. -_ \end{defvar} -_ -_ \begin{defmac}{extensions:}{define-source-context}{% -_ \args{\var{name} \var{lambda-list} \mstar{form}}} -_ -_ This macro defines how to extract an abbreviated source context from -_ the \var{name}d form when it appears in the compiler input. -_ \var{lambda-list} is a \code{defmacro} style lambda-list used to -_ parse the arguments. The \var{body} should return a list of -_ subforms that can be printed on about one line. There are -_ predefined methods for \code{defstruct}, \code{defmethod}, etc. If -_ no method is defined, then the first two subforms are returned. -_ Note that this facility implicitly determines the string name -_ associated with anonymous functions. -_ \end{defmac} -_ -_ --> - -</sect1> - -<sect1><title>The Compiler's Handling of Types</> - -<para>The most unusual features of the &SBCL; compiler (which is -very similar to the original &CMUCL; compiler, also known as -&Python;) is its unusually sophisticated understanding of the -&CommonLisp; type system and its unusually conservative approach to -the implementation of type declarations. These two features reward the -use of type declarations throughout development, even when high -performance is not a concern. (Also, as discussed <link -linkend="efficiency">in the chapter on performance</>, the use of -appropriate type declarations can be very important for performance as -well.)</para> - -<para>The &SBCL; compiler, like the related compiler in &CMUCL;, -treats type declarations much differently than other Lisp compilers. -By default (<emphasis>i.e.</>, at ordinary levels of the -<parameter>safety</> compiler optimization parameter), the compiler -doesn't blindly believe most type declarations; it considers them -assertions about the program that should be checked.</para> - -<para>The &SBCL; compiler also has a greater knowledge of the -&CommonLisp; type system than other compilers. Support is incomplete -only for the <type>not</>, <type>and</> and <type>satisfies</> -types. -<!-- FIXME: See also sections \ref{advanced-type-stuff} - and \ref{type-inference}, once we snarf them from the - CMU CL manual. --> -</para> - -<sect2 id=compiler-impl-limitations><title>Implementation Limitations</> - -<para> -Ideally, the compiler would consider <emphasis>all</> type declarations to -be assertions, so that adding type declarations to a program, no -matter how incorrect they might be, would <emphasis>never</> cause -undefined behavior. As of &SBCL; version 0.8.1, the compiler is known to -fall short of this goal in two areas: -<itemizedlist> - <listitem><para><function>Proclaim</>ed constraints on argument and - result types of a function are supposed to be checked by the - function. If the function type is proclaimed before function - definition, type checks are inserted by the compiler, but the - standard allows the reversed order, in which case the compiler - will trust the declaration.</para></listitem> - <listitem><para>The compiler cannot check types of an unknown number - of values; if the number of generated values is unknown, but the - number of consumed is known, only consumed values are - checked.</para></listitem> - <listitem><para>There are a few poorly characterized but apparently - very uncommon situations where a type declaration in an unexpected - location will be trusted and never checked by the - compiler.</para></listitem> -</itemizedlist></para> - -<para>These are important bugs, but are not necessarily easy to fix, -so they may, alas, remain in the system for a while.</para> - -</sect2> - -<sect2><title>Type Errors at Compile Time</> -<!--INDEX compile time type errors--> -<!--INDEX type checking}{at compile time}--> - -<para>If the compiler can prove at compile time that some portion of -the program cannot be executed without a type error, then it will give -a warning at compile time. It is possible that the offending code -would never actually be executed at run-time due to some higher level -consistency constraint unknown to the compiler, so a type warning -doesn't always indicate an incorrect program. For example, consider -this code fragment: - -<programlisting>(defun raz (foo) - (let ((x (case foo - (:this 13) - (:that 9) - (:the-other 42)))) - (declare (fixnum x)) - (foo x))) -</programlisting> - -Compilation produces this warning: - -<screen>in: DEFUN RAZ - (CASE FOO (:THIS 13) (:THAT 9) (:THE-OTHER 42)) ---> LET COND IF COND IF COND IF -==> - (COND) -caught WARNING: This is not a FIXNUM: - NIL</screen> - -In this case, the warning means that if <varname>foo</> isn't any of -<literal>:this</>, <literal>:that</> or <literal>:the-other</>, then -<varname>x</> will be initialized to <literal>nil</>, which the -<type>fixnum</> declaration makes illegal. The warning will go away if -<function>ecase</> is used instead of <function>case</>, or if -<literal>:the-other</> is changed to <literal>t</>.</para> - -<para>This sort of spurious type warning happens moderately often in -the expansion of complex macros and in inline functions. In such -cases, there may be dead code that is impossible to correctly execute. -The compiler can't always prove this code is dead (could never be -executed), so it compiles the erroneous code (which will always signal -an error if it is executed) and gives a warning.</para> - -<para> -Type warnings are inhibited when the -<parameter>sb-ext:inhibit-warnings</> optimization quality is -<literal>3</>. (See <link linkend="compiler-policy">the section -on compiler policy</>.) This can be used in a local declaration -to inhibit type warnings in a code fragment that has spurious -warnings.</para> - -</sect2> - -<sect2 id="precisetypechecking"><title>Precise Type Checking</> -<!--INDEX precise type checking--> -<!--INDEX {type checking}{precise}--> - -<para>With the default compilation policy, all type declarations are -precisely checked, except in a few situations where they are simply -ignored instead. Precise checking means that the check is done as -though <function>typep</> had been called with the exact type -specifier that appeared in the declaration. In &SBCL;, adding type -declarations makes code safer. (Except that as noted <link -linkend="compiler-impl-limitations">elsewhere</link>, remaining bugs -in the compiler's handling of types unfortunately provide some -exceptions to this rule.)</para> - -<para>If a variable is declared to be -<type>(integer 3 17)</> then its value must always be an integer -between <literal>3</> and <literal>17</>. If multiple type -declarations apply to a single variable, then all the declarations -must be correct; it is as though all the types were intersected -producing a single <type>and</> type specifier.</para> - -<para>Argument and result type declarations are automatically -enforced. If you declare the type of a function argument, a type check -will be done when that function is called. In a function call, the -called function does the argument type checking.</para> - -<para>The types of structure slots are also checked. The value of a -structure slot must always be of the type indicated in any -<literal>:type</> slot option. </para> - -<para>In traditional &CommonLisp; compilers, not all type assertions -are checked, and type checks are not precise. Traditional compilers -blindly trust explicit type declarations, but may check the argument -type assertions for built-in functions. Type checking is not precise, -since the argument type checks will be for the most general type legal -for that argument. In many systems, type declarations suppress what -little type checking is being done, so adding type declarations makes -code unsafe. This is a problem since it discourages writing type -declarations during initial coding. In addition to being more error -prone, adding type declarations during tuning also loses all the -benefits of debugging with checked type assertions.</para> - -<para>To gain maximum benefit from the compiler's type checking, you -should always declare the types of function arguments and structure -slots as precisely as possible. This often involves the use of -<type>or</>, <type>member</>, and other list-style type specifiers.</para> - -</sect2> - -<sect2 id="weakened-type-checking"><title>Weakened Type Checking</> -<!--INDEX weakened type checking--> -<!--INDEX {type checking}{weakened}--> - -<para>At one time, &CMUCL; supported another level of type checking, -<quote>weakened type checking</>, when the value for the -<parameter>speed</> optimization quality is greater than -<parameter>safety</>, and <parameter>safety</> is not <literal>0</>. -The &CMUCL; manual still has a description of it, but even the CMU CL -code no longer corresponds to the manual. Some of this partial safety -checking lingers on in SBCL, but it's not a supported feature, and -should not be relied on. If you ask the compiler to optimize -<parameter>speed</> to a higher level than <parameter>safety</>, -your program is performing without a safety net, because &SBCL; may -at its option believe any or all type declarations with either partial -or nonexistent runtime checking.</para> - -</sect2> - -<sect2><title>Getting Existing Programs to Run</> -<!--INDEX {existing programs}{to run}--> -<!--INDEX {types}{portability}--> -<!--INDEX {compatibility with other Lisps} - (should also have an entry in the non-&ANSI;-isms section)--> - -<para>Since &SBCL;'s compiler, like &CMUCL;'s compiler, does much more -comprehensive type checking than most Lisp compilers, &SBCL; may -detect type errors in programs that have been debugged using other -compilers. These errors are mostly incorrect declarations, although -compile-time type errors can find actual bugs if parts of the program -have never been tested.</para> - -<para>Some incorrect declarations can only be detected by run-time -type checking. It is very important to initially compile a program -with full type checks (high <parameter>safety</> optimization) and -then test this safe version. After the checking version has been -tested, then you can consider weakening or eliminating type checks. -<emphasis>This applies even to previously debugged -programs,</emphasis> because the &SBCL; compiler does much more type -inference than other &CommonLisp; compilers, so an incorrect -declaration can do more damage.</para> - -<para>The most common problem is with variables whose constant initial -value doesn't match the type declaration. Incorrect constant initial -values will always be flagged by a compile-time type error, and they -are simple to fix once located. Consider this code fragment: - -<programlisting>(prog (foo) - (declare (fixnum foo)) - (setq foo ...) - ...)</programlisting> - -Here <varname>foo</> is given an initial value of <literal>nil</>, but -is declared to be a <type>fixnum</>. Even if it is never read, the -initial value of a variable must match the declared type. There are -two ways to fix this problem. Change the declaration - -<programlisting>(prog (foo) - (declare (type (or fixnum null) foo)) - (setq foo ...) - ...)</programlisting> - -or change the initial value - -<programlisting>(prog ((foo 0)) - (declare (fixnum foo)) - (setq foo ...) - ...)</programlisting> - -It is generally preferable to change to a legal initial value rather -than to weaken the declaration, but sometimes it is simpler to weaken -the declaration than to try to make an initial value of the -appropriate type.</para> - -<para>Another declaration problem occasionally encountered is -incorrect declarations on <function>defmacro</> arguments. This can happen -when a function is converted into a macro. Consider this macro: - -<programlisting>(defmacro my-1+ (x) - (declare (fixnum x)) - `(the fixnum (1+ ,x)))</programlisting> - -Although legal and well-defined &CommonLisp; code, this meaning of -this definition is almost certainly not what the writer intended. For -example, this call is illegal: - -<programlisting>(my-1+ (+ 4 5))</> - -This call is illegal because the argument to the macro is -<literal>(+ 4 5)</>, which is a <type>list</>, not a -<type>fixnum</>. Because of -macro semantics, it is hardly ever useful to declare the types of -macro arguments. If you really want to assert something about the -type of the result of evaluating a macro argument, then put a -<function>the</> in the expansion: - -<programlisting>(defmacro my-1+ (x) - `(the fixnum (1+ (the fixnum ,x))))</programlisting> - -In this case, it would be stylistically preferable to change this -macro back to a function and declare it inline. -<!--FIXME: <xref>inline-expansion</>, once we crib the - relevant text from the CMU CL manual.--> -</para> - -<para> -Some more subtle problems are caused by incorrect declarations that -can't be detected at compile time. Consider this code: - -<programlisting>(do ((pos 0 (position #\a string :start (1+ pos)))) - ((null pos)) - (declare (fixnum pos)) - ...)</programlisting> - -Although <varname>pos</> is almost always a <varname>fixnum</>, it is -<literal>nil</> at the end of the loop. If this example is compiled -with full type checks (the default), then running it will signal a -type error at the end of the loop. If compiled without type checks, -the program will go into an infinite loop (or perhaps -<function>position</> will complain because <literal>(1+ nil)</> isn't -a sensible start.) Why? Because if you compile without type checks, -the compiler just quietly believes the type declaration. Since the -compiler believes that <varname>pos</> is always a <type>fixnum</>, it -believes that <varname>pos</> is never <literal>nil</>, so -<literal>(null pos)</> is never true, and the loop exit test is -optimized away. Such errors are sometimes flagged by unreachable code -notes, but it is still important to initially compile and test any -system with full type checks, even if the system works fine when -compiled using other compilers.</para> - -<para>In this case, the fix is to weaken the type declaration to -<type>(or fixnum null)</>. -<footnote><para>Actually, this declaration is - unnecessary in &SBCL;, since it already knows that <function>position</> - returns a non-negative <type>fixnum</> or <literal>nil</>. - </para></footnote> - -Note that there is usually little performance penalty for weakening a -declaration in this way. Any numeric operations in the body can still -assume that the variable is a <type>fixnum</>, since <literal>nil</> -is not a legal numeric argument. Another possible fix would be to say: - -<programlisting>(do ((pos 0 (position #\a string :start (1+ pos)))) - ((null pos)) - (let ((pos pos)) - (declare (fixnum pos)) - ...))</programlisting> - -This would be preferable in some circumstances, since it would allow a -non-standard representation to be used for the local <varname>pos</> -variable in the loop body. -<!-- FIXME: <xref>ND-variables</>, once we crib the text from the - CMU CL manual. --> -</para> - -</sect2> - -</sect1> - -<sect1 id="compiler-policy"><title>Compiler Policy</> - -<para>As of version 0.6.4, &SBCL; still uses most of the &CMUCL; code -for compiler policy. The &CMUCL; code has many features and high-quality -documentation, but the two unfortunately do not match. So this area of -the compiler and its interface needs to be cleaned up. Meanwhile, here -is some rudimentary documentation on the current behavior of the -system.</para> - -<para>Compiler policy is controlled by the <parameter>optimize</> -declaration. The compiler supports the &ANSI; optimization qualities, -and also an extension <parameter>sb-ext:inhibit-warnings</>.</para> - -<para>Ordinarily, when the <parameter>speed</> quality is high, the -compiler emits notes to notify the programmer about its inability to -apply various optimizations. Setting -<parameter>sb-ext:inhibit-warnings</> to a value at least as large as -the <parameter>speed</> quality inhibits this notification. This can -be useful to suppress notes about code which is known to be -unavoidably inefficient. (For example, the compiler issues notes about -having to use generic arithmetic instead of fixnum arithmetic, which -is not helpful for code which by design supports arbitrary-sized -integers instead of being limited to fixnums.)</para> - -<note><para>The basic functionality of the <parameter>optimize -inhibit-warnings</> extension will probably be supported in all future -versions of the system, but it will probably be renamed when the -compiler and its interface are cleaned up. The current name is -misleading, because it mostly inhibits optimization notes, not -warnings. And making it an optimization quality is misleading, because -it shouldn't affect the resulting code at all. It may become a -declaration identifier with a name like -<parameter>sb-ext:inhibit-notes</>, so that what's currently written - -<programlisting>(declaim (optimize (sb-ext:inhibit-warnings 2)))</> - -would become something like - -<programlisting>(declaim (sb-ext:inhibit-notes 2))</> - -</para></note> - -<para> (In early versions of SBCL, a <parameter>speed</> value of zero -was used to enable byte compilation, but since version 0.7.0, SBCL -only supports native compilation.)</para> - -<para>When <parameter>safety</> is zero, almost all runtime checking -of types, array bounds, and so forth is suppressed.</para> - -<para>When <parameter>safety</> is less than <parameter>speed</>, any -and all type checks may be suppressed. At some point in the past, -&CMUCL; had <link linkend="weakened-type-checking">a more nuanced -interpretation of this</link>. However, &SBCL; doesn't support that -interpretation, and setting <parameter>safety</> less than -<parameter>speed</> may have roughly the same effect as setting -<parameter>safety</> to zero.</para> - -<para>The value of <parameter>space</> mostly influences the -compiler's decision whether to inline operations, which tend to -increase the size of programs. Use the value <literal>0</> with -caution, since it can cause the compiler to inline operations so -indiscriminately that the net effect is to slow the program by causing -cache misses or swapping.</para> - -<!-- FIXME: old CMU CL compiler policy, should perhaps be adapted -_ for SBCL. (Unfortunately, the CMU CL docs are out of sync with the -_ CMU CL code, so adapting this requires not only reformatting -_ the documentation, but rooting out code rot.) -_ -_<sect2 id="compiler-policy"><title>Compiler Policy</> -_ INDEX {policy}{compiler} -_ INDEX compiler policy -_ -_<para>The policy is what tells the compiler <emphasis>how</> to -_compile a program. This is logically (and often textually) distinct -_from the program itself. Broad control of policy is provided by the -_<parameter>optimize</> declaration; other declarations and variables -_control more specific aspects of compilation.</para> -_ -_\begin{comment} -_* The Optimize Declaration:: -_* The Optimize-Interface Declaration:: -_\end{comment} -_ -_%%\node The Optimize Declaration, The Optimize-Interface Declaration, Compiler Policy, Compiler Policy -_\subsection{The Optimize Declaration} -_\label{optimize-declaration} -_\cindex{optimize declaration} -_\cpsubindex{declarations}{\code{optimize}} -_ -_The \code{optimize} declaration recognizes six different -_\var{qualities}. The qualities are conceptually independent aspects -_of program performance. In reality, increasing one quality tends to -_have adverse effects on other qualities. The compiler compares the -_relative values of qualities when it needs to make a trade-off; i.e., -_if \code{speed} is greater than \code{safety}, then improve speed at -_the cost of safety. -_ -_The default for all qualities (except \code{debug}) is \code{1}. -_Whenever qualities are equal, ties are broken according to a broad -_idea of what a good default environment is supposed to be. Generally -_this downplays \code{speed}, \code{compile-speed} and \code{space} in -_favor of \code{safety} and \code{debug}. Novice and casual users -_should stick to the default policy. Advanced users often want to -_improve speed and memory usage at the cost of safety and -_debuggability. -_ -_If the value for a quality is \code{0} or \code{3}, then it may have a -_special interpretation. A value of \code{0} means ``totally -_unimportant'', and a \code{3} means ``ultimately important.'' These -_extreme optimization values enable ``heroic'' compilation strategies -_that are not always desirable and sometimes self-defeating. -_Specifying more than one quality as \code{3} is not desirable, since -_it doesn't tell the compiler which quality is most important. -_ -_ -_These are the optimization qualities: -_\begin{Lentry} -_ -_\item[\code{speed}] \cindex{speed optimization quality}How fast the -_ program should is run. \code{speed 3} enables some optimizations -_ that hurt debuggability. -_ -_\item[\code{compilation-speed}] \cindex{compilation-speed optimization -_ quality}How fast the compiler should run. Note that increasing -_ this above \code{safety} weakens type checking. -_ -_\item[\code{space}] \cindex{space optimization quality}How much space -_ the compiled code should take up. Inline expansion is mostly -_ inhibited when \code{space} is greater than \code{speed}. A value -_ of \code{0} enables indiscriminate inline expansion. Wide use of a -_ \code{0} value is not recommended, as it may waste so much space -_ that run time is slowed. \xlref{inline-expansion} for a discussion -_ of inline expansion. -_ -_\item[\code{debug}] \cindex{debug optimization quality}How debuggable -_ the program should be. The quality is treated differently from the -_ other qualities: each value indicates a particular level of debugger -_ information; it is not compared with the other qualities. -_ \xlref{debugger-policy} for more details. -_ -_\item[\code{safety}] \cindex{safety optimization quality}How much -_ error checking should be done. If \code{speed}, \code{space} or -_ \code{compilation-speed} is more important than \code{safety}, then -_ type checking is weakened (\pxlref{weakened-type-checks}). If -_ \code{safety} if \code{0}, then no run time error checking is done. -_ In addition to suppressing type checks, \code{0} also suppresses -_ argument count checking, unbound-symbol checking and array bounds -_ checks. -_ -_\item[\code{extensions:inhibit-warnings}] \cindex{inhibit-warnings -_ optimization quality}This is a CMU extension that determines how -_ little (or how much) diagnostic output should be printed during -_ compilation. This quality is compared to other qualities to -_ determine whether to print style notes and warnings concerning those -_ qualities. If \code{speed} is greater than \code{inhibit-warnings}, -_ then notes about how to improve speed will be printed, etc. The -_ default value is \code{1}, so raising the value for any standard -_ quality above its default enables notes for that quality. If -_ \code{inhibit-warnings} is \code{3}, then all notes and most -_ non-serious warnings are inhibited. This is useful with -_ \code{declare} to suppress warnings about unavoidable problems. -_\end{Lentry} -_ -_%%\node The Optimize-Interface Declaration, , The Optimize Declaration, Compiler Policy -_\subsection{The Optimize-Interface Declaration} -_\label{optimize-interface-declaration} -_\cindex{optimize-interface declaration} -_\cpsubindex{declarations}{\code{optimize-interface}} -_ -_The \code{extensions:optimize-interface} declaration is identical in -_syntax to the \code{optimize} declaration, but it specifies the policy -_used during compilation of code the compiler automatically generates -_to check the number and type of arguments supplied to a function. It -_is useful to specify this policy separately, since even thoroughly -_debugged functions are vulnerable to being passed the wrong arguments. -_The \code{optimize-interface} declaration can specify that arguments -_should be checked even when the general \code{optimize} policy is -_unsafe. -_ -_Note that this argument checking is the checking of user-supplied -_arguments to any functions defined within the scope of the -_declaration, \code{not} the checking of arguments to \llisp{} -_primitives that appear in those definitions. -_ -_The idea behind this declaration is that it allows the definition of -_functions that appear fully safe to other callers, but that do no -_internal error checking. Of course, it is possible that arguments may -_be invalid in ways other than having incorrect type. Functions -_compiled unsafely must still protect themselves against things like -_user-supplied array indices that are out of bounds and improper lists. -_See also the \kwd{context-declarations} option to -_\macref{with-compilation-unit}. -_ -_(end of section on compiler policy) -_--> - -</sect1> - -<sect1><title>Open Coding and Inline Expansion</> -<!--INDEX open-coding--> -<!--INDEX inline expansion--> -<!--INDEX static functions--> - -<para>Since &CommonLisp; forbids the redefinition of standard -functions, the compiler can have special knowledge of these standard -functions embedded in it. This special knowledge is used in various -ways (open coding, inline expansion, source transformation), but the -implications to the user are basically the same: -<itemizedlist> - <listitem><para> Attempts to redefine standard functions may - be frustrated, since the function may never be called. Although - it is technically illegal to redefine standard functions, users - sometimes want to implicitly redefine these functions when they - are debugging using the <function>trace</> macro. Special-casing - of standard functions can be inhibited using the - <parameter>notinline</> declaration.</para></listitem> - <listitem><para> The compiler can have multiple alternate - implementations of standard functions that implement different - trade-offs of speed, space and safety. This selection is - based on the <link linkend="compiler-policy">compiler policy</link>. - </para></listitem> -</itemizedlist> -</para> - -<para>When a function call is <emphasis>open coded</>, inline code whose -effect is equivalent to the function call is substituted for that -function call. When a function call is <emphasis>closed coded</>, it -is usually left as is, although it might be turned into a call to a -different function with different arguments. As an example, if -<function>nthcdr</> were to be open coded, then - -<programlisting>(nthcdr 4 foobar)</programlisting> - -might turn into - -<programlisting>(cdr (cdr (cdr (cdr foobar))))</> - -or even - -<programlisting>(do ((i 0 (1+ i)) - (list foobar (cdr foobar))) - ((= i 4) list))</programlisting> - -If <function>nth</> is closed coded, then - -<programlisting> -(nth x l) -</programlisting> - -might stay the same, or turn into something like - -<programlisting> -(car (nthcdr x l)) -</programlisting> -</para> - -<para>In general, open coding sacrifices space for speed, but some -functions (such as <function>car</>) are so simple that they are always -open-coded. Even when not open-coded, a call to a standard function -may be transformed into a different function call (as in the last -example) or compiled as <emphasis>static call</>. Static function call -uses a more efficient calling convention that forbids -redefinition.</para> - -</sect1> - -</chapter> diff --git a/doc/compiler.xml b/doc/compiler.xml new file mode 100644 index 0000000..e0fde82 --- /dev/null +++ b/doc/compiler.xml @@ -0,0 +1,942 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" + "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ +<!ENTITY % myents SYSTEM "entities.inc"> +%myents; +]> + +<chapter id="compiler"><title>The Compiler + +This chapter will discuss most compiler issues other than +efficiency, including compiler error messages, the &SBCL; compiler's +unusual approach to type safety in the presence of type declarations, +the effects of various compiler optimization policies, and the way +that inlining and open coding may cause optimized code to differ from +a naive translation. Efficiency issues are sufficiently varied and +separate that they have their own +chapter. + +Error Messages + + + +The compiler supplies a large amount of source location +information in error messages. The error messages contain a lot of +detail in a terse format, so they may be confusing at first. Error +messages will be illustrated using this example program: +(defmacro zoq (x) +`(roq (ploq (+ ,x 3)))) + +(defun foo (y) +(declare (symbol y)) +(zoq y)) +The main problem with this program is that it is trying to add +3 to a symbol. Note also that the functions +roq and ploq aren't defined anywhere. + + +The Parts of the Error Message + +When processing this program, the compiler will produce this +warning: + +file: /tmp/foo.lisp + +in: DEFUN FOO +(ZOQ Y) +--> ROQ PLOQ + +==> +Y +caught WARNING: +Result is a SYMBOL, not a NUMBER. +In this example we see each of the six possible parts of a compiler error +message: + + File: /tmp/foo.lisp + This is the name of the file that the compiler read the relevant + code from. The file name is displayed because it may not be + immediately obvious when there is an error during compilation of a + large system, especially when + with-compilation-unit is used to delay + undefined warnings. + in: DEFUN FOO This is the + definition top level form responsible for the error. It is + obtained by taking the first two elements of the enclosing form + whose first element is a symbol beginning with def. + If there is no such enclosing def form, then the + outermost form is used. If there are multiple def + forms, then they are all printed from the outside in, separated by + =>'s. In this example, the problem was in the + defun for foo. + (ZOQ Y) This is the + original source form responsible for the error. + Original source means that the form directly appeared in the + original input to the compiler, i.e. in the lambda passed to + compile or in the top level form read from the + source file. In this example, the expansion of the zoq + macro was responsible for the error. + --> ROQ PLOQ + This is the + processing path that the compiler used to produce + the errorful code. The processing path is a representation of + the evaluated forms enclosing the actual source that the + compiler encountered when processing the original source. + The path is the first element of each form, or the form itself + if the form is not a list. These forms result from the + expansion of macros or source-to-source transformation done + by the compiler. In this example, the enclosing evaluated forms + are the calls to roq, ploq and + +. These calls resulted from the expansion of + the zoq macro. + ==> Y This is the + actual source responsible for the error. If + the actual source appears in the explanation, then + we print the next enclosing evaluated form, instead of + printing the actual source twice. (This is the form + that would otherwise have been the last form of the processing + path.) In this example, the problem is with the evaluation of + the reference to the variable y. + + caught WARNING: Result is a SYMBOL, not a NUMBER. + This is the explanation of the problem. In this + example, the problem is that y evaluates to a symbol, + but is in a context where a number is required (the argument + to +). + + +Note that each part of the error message is distinctively marked: + + + file: and in: + mark the file and definition, respectively. + The original source is an indented form with no + prefix. + Each line of the processing path is prefixed with + --> + The actual source form is indented like the original + source, but is marked by a preceding ==> line. + + The explanation is prefixed with the error + severity, which can be caught ERROR:, + caught WARNING:, + caught STYLE-WARNING:, or + note:. + + + +Each part of the error message is more specific than the preceding +one. If consecutive error messages are for nearby locations, then the +front part of the error messages would be the same. In this case, the +compiler omits as much of the second message as in common with the +first. For example: +file: /tmp/foo.lisp + +in: DEFUN FOO +(ZOQ Y) +--> ROQ +==> +(PLOQ (+ Y 3)) +caught STYLE-WARNING: +undefined function: PLOQ + +==> +(ROQ (PLOQ (+ Y 3))) +caught STYLE-WARNING: +undefined function: ROQ +In this example, the file, definition and original source are +identical for the two messages, so the compiler omits them in the +second message. If consecutive messages are entirely identical, then +the compiler prints only the first message, followed by: +[Last message occurs repeats times] +where repeats is the number of times the message +was given. + +If the source was not from a file, then no file line is printed. +If the actual source is the same as the original source, then the +processing path and actual source will be omitted. If no forms +intervene between the original source and the actual source, then the +processing path will also be omitted. + + + +The Original and Actual Source + +The original source displayed will almost always be +a list. If the actual source for an error message is a symbol, the +original source will be the immediately enclosing evaluated list form. +So even if the offending symbol does appear in the original source, +the compiler will print the enclosing list and then print the symbol +as the actual source (as though the symbol were introduced by a +macro.) + +When the actual source is displayed +(and is not a symbol), it will always +be code that resulted from the expansion of a macro or a source-to-source +compiler optimization. This is code that did not appear in the original +source program; it was introduced by the compiler. + +Keep in mind that when the compiler displays a source form +in an error message, it always displays the most specific (innermost) +responsible form. For example, compiling this function +(defun bar (x) +(let (a) +(declare (fixnum a)) +(setq a (foo x)) +a)) +gives this error message +in: DEFUN BAR +(LET (A) (DECLARE (FIXNUM A)) (SETQ A (FOO X)) A) +caught WARNING: The binding of A is not a FIXNUM: +NIL +This error message is not saying there is a problem somewhere in +this let — it is saying that there is a +problem with the let itself. In this example, the problem +is that a's nil initial value is not a +fixnum. + + + +The Processing Path + + + + +The processing path is mainly useful for debugging macros, so if +you don't write macros, you can probably ignore it. Consider this +example: + +(defun foo (n) +(dotimes (i n *undefined*))) + + +Compiling results in this error message: + +in: DEFUN FOO +(DOTIMES (I N *UNDEFINED*)) +--> DO BLOCK LET TAGBODY RETURN-FROM +==> +(PROGN *UNDEFINED*) +caught STYLE-WARNING: +undefined variable: *UNDEFINED* + +Note that do appears in the processing path. This is because +dotimes expands into: + +(do ((i 0 (1+ i)) (#:g1 n)) +((>= i #:g1) *undefined*) +(declare (type unsigned-byte i))) + +The rest of the processing path results from the expansion +of do: + + + (block nil + (let ((i 0) (#:g1 n)) + (declare (type unsigned-byte i)) + (tagbody (go #:g3) + #:g2 (psetq i (1+ i)) + #:g3 (unless (>= i #:g1) (go #:g2)) + (return-from nil (progn *undefined*))))) + + +In this example, the compiler descended into the block, +let, tagbody and return-from to +reach the progn printed as the actual source. This is a +place where the actual source appears in explanation rule +was applied. The innermost actual source form was the symbol +*undefined* itself, but that also appeared in the +explanation, so the compiler backed out one level. + + + +Error Severity + + + +There are four levels of compiler error severity: +error, warning, style +warning, and note. The first three levels correspond +to condition classes which are defined in the &ANSI; standard for +&CommonLisp; and which have special significance to the +compile and compile-file functions. These +levels of compiler error severity occur when the compiler handles +conditions of these classes. The fourth level of compiler error +severity, note, is used for problems which are too mild +for the standard condition classes, typically hints about how +efficiency might be improved. + + + +Errors During Macroexpansion + + +The compiler handles errors that happen during macroexpansion, +turning them into compiler errors. If you want to debug the error (to +debug a macro), you can set *break-on-signals* to +error. For example, this definition: + +(defun foo (e l) +(do ((current l (cdr current)) +((atom current) nil)) +(when (eq (car current) e) (return current)))) + +gives this error: + +in: DEFUN FOO +(DO ((CURRENT L #) (# NIL)) (WHEN (EQ # E) (RETURN CURRENT)) ) +caught ERROR: +(in macroexpansion of (DO # #)) +(hint: For more precise location, try *BREAK-ON-SIGNALS*.) +DO step variable is not a symbol: (ATOM CURRENT) + + + + +Read Errors + + +&SBCL;'s compiler (unlike &CMUCL;'s) does not attempt to recover +from read errors when reading a source file, but instead just reports +the offending character position and gives up on the entire source +file. + + + + + + + +The Compiler's Handling of Types + +The most unusual features of the &SBCL; compiler (which is +very similar to the original &CMUCL; compiler, also known as +&Python;) is its unusually sophisticated understanding of the +&CommonLisp; type system and its unusually conservative approach to +the implementation of type declarations. These two features reward the +use of type declarations throughout development, even when high +performance is not a concern. (Also, as discussed in the chapter on performance, the use of +appropriate type declarations can be very important for performance as +well.) + +The &SBCL; compiler, like the related compiler in &CMUCL;, +treats type declarations much differently than other Lisp compilers. +By default (i.e., at ordinary levels of the +safety compiler optimization parameter), the compiler +doesn't blindly believe most type declarations; it considers them +assertions about the program that should be checked. + +The &SBCL; compiler also has a greater knowledge of the +&CommonLisp; type system than other compilers. Support is incomplete +only for the not, and and satisfies +types. + + + +Implementation Limitations + + + Ideally, the compiler would consider all type declarations to + be assertions, so that adding type declarations to a program, no + matter how incorrect they might be, would never cause + undefined behavior. As of &SBCL; version 0.8.1, the compiler is known to + fall short of this goal in two areas: + + Proclaimed constraints on argument and + result types of a function are supposed to be checked by the + function. If the function type is proclaimed before function + definition, type checks are inserted by the compiler, but the + standard allows the reversed order, in which case the compiler + will trust the declaration. + The compiler cannot check types of an unknown number + of values; if the number of generated values is unknown, but the + number of consumed is known, only consumed values are + checked. + There are a few poorly characterized but apparently + very uncommon situations where a type declaration in an unexpected + location will be trusted and never checked by the + compiler. + + +These are important bugs, but are not necessarily easy to fix, +so they may, alas, remain in the system for a while. + + + +Type Errors at Compile Time + + + +If the compiler can prove at compile time that some portion of +the program cannot be executed without a type error, then it will give +a warning at compile time. It is possible that the offending code +would never actually be executed at run-time due to some higher level +consistency constraint unknown to the compiler, so a type warning +doesn't always indicate an incorrect program. For example, consider +this code fragment: + +(defun raz (foo) +(let ((x (case foo +(:this 13) +(:that 9) +(:the-other 42)))) +(declare (fixnum x)) +(foo x))) + + +Compilation produces this warning: + +in: DEFUN RAZ +(CASE FOO (:THIS 13) (:THAT 9) (:THE-OTHER 42)) +--> LET COND IF COND IF COND IF +==> +(COND) +caught WARNING: This is not a FIXNUM: +NIL + +In this case, the warning means that if foo isn't any of +:this, :that or :the-other, then +x will be initialized to nil, which the +fixnum declaration makes illegal. The warning will go away if +ecase is used instead of case, or if +:the-other is changed to t. + +This sort of spurious type warning happens moderately often in +the expansion of complex macros and in inline functions. In such +cases, there may be dead code that is impossible to correctly execute. +The compiler can't always prove this code is dead (could never be +executed), so it compiles the erroneous code (which will always signal +an error if it is executed) and gives a warning. + + + Type warnings are inhibited when the + sb-ext:inhibit-warnings optimization quality is + 3. (See the section + on compiler policy.) This can be used in a local declaration + to inhibit type warnings in a code fragment that has spurious +warnings. + + + +Precise Type Checking + + + +With the default compilation policy, all type declarations are +precisely checked, except in a few situations where they are simply +ignored instead. Precise checking means that the check is done as +though typep had been called with the exact type +specifier that appeared in the declaration. In &SBCL;, adding type +declarations makes code safer. (Except that as noted elsewhere, remaining bugs +in the compiler's handling of types unfortunately provide some +exceptions to this rule.) + +If a variable is declared to be +(integer 3 17) then its value must always be an integer +between 3 and 17. If multiple type +declarations apply to a single variable, then all the declarations +must be correct; it is as though all the types were intersected +producing a single and type specifier. + +Argument and result type declarations are automatically +enforced. If you declare the type of a function argument, a type check +will be done when that function is called. In a function call, the +called function does the argument type checking. + +The types of structure slots are also checked. The value of a +structure slot must always be of the type indicated in any +:type slot option. + +In traditional &CommonLisp; compilers, not all type assertions +are checked, and type checks are not precise. Traditional compilers +blindly trust explicit type declarations, but may check the argument +type assertions for built-in functions. Type checking is not precise, +since the argument type checks will be for the most general type legal +for that argument. In many systems, type declarations suppress what +little type checking is being done, so adding type declarations makes +code unsafe. This is a problem since it discourages writing type +declarations during initial coding. In addition to being more error +prone, adding type declarations during tuning also loses all the +benefits of debugging with checked type assertions. + +To gain maximum benefit from the compiler's type checking, you +should always declare the types of function arguments and structure +slots as precisely as possible. This often involves the use of +or, member, and other list-style type specifiers. + + + +Weakened Type Checking + + + +At one time, &CMUCL; supported another level of type checking, +weakened type checking, when the value for the +speed optimization quality is greater than +safety, and safety is not 0. +The &CMUCL; manual still has a description of it, but even the CMU CL +code no longer corresponds to the manual. Some of this partial safety +checking lingers on in SBCL, but it's not a supported feature, and +should not be relied on. If you ask the compiler to optimize +speed to a higher level than safety, +your program is performing without a safety net, because &SBCL; may +at its option believe any or all type declarations with either partial +or nonexistent runtime checking. + + + +Getting Existing Programs to Run + + + + +Since &SBCL;'s compiler, like &CMUCL;'s compiler, does much more +comprehensive type checking than most Lisp compilers, &SBCL; may +detect type errors in programs that have been debugged using other +compilers. These errors are mostly incorrect declarations, although +compile-time type errors can find actual bugs if parts of the program +have never been tested. + +Some incorrect declarations can only be detected by run-time +type checking. It is very important to initially compile a program +with full type checks (high safety optimization) and +then test this safe version. After the checking version has been +tested, then you can consider weakening or eliminating type checks. +This applies even to previously debugged +programs, because the &SBCL; compiler does much more type +inference than other &CommonLisp; compilers, so an incorrect +declaration can do more damage. + +The most common problem is with variables whose constant initial +value doesn't match the type declaration. Incorrect constant initial +values will always be flagged by a compile-time type error, and they +are simple to fix once located. Consider this code fragment: + +(prog (foo) +(declare (fixnum foo)) +(setq foo ...) +...) + +Here foo is given an initial value of nil, but +is declared to be a fixnum. Even if it is never read, the +initial value of a variable must match the declared type. There are +two ways to fix this problem. Change the declaration + +(prog (foo) +(declare (type (or fixnum null) foo)) +(setq foo ...) +...) + +or change the initial value + +(prog ((foo 0)) +(declare (fixnum foo)) +(setq foo ...) +...) + +It is generally preferable to change to a legal initial value rather +than to weaken the declaration, but sometimes it is simpler to weaken +the declaration than to try to make an initial value of the +appropriate type. + +Another declaration problem occasionally encountered is +incorrect declarations on defmacro arguments. This can happen +when a function is converted into a macro. Consider this macro: + +(defmacro my-1+ (x) +(declare (fixnum x)) +`(the fixnum (1+ ,x))) + +Although legal and well-defined &CommonLisp; code, this meaning of +this definition is almost certainly not what the writer intended. For +example, this call is illegal: + +(my-1+ (+ 4 5)) + +This call is illegal because the argument to the macro is +(+ 4 5), which is a list, not a +fixnum. Because of +macro semantics, it is hardly ever useful to declare the types of +macro arguments. If you really want to assert something about the +type of the result of evaluating a macro argument, then put a +the in the expansion: + +(defmacro my-1+ (x) +`(the fixnum (1+ (the fixnum ,x)))) + +In this case, it would be stylistically preferable to change this +macro back to a function and declare it inline. + + + + + Some more subtle problems are caused by incorrect declarations that + can't be detected at compile time. Consider this code: + + (do ((pos 0 (position #\a string :start (1+ pos)))) + ((null pos)) + (declare (fixnum pos)) + ...) + + Although pos is almost always a fixnum, it is + nil at the end of the loop. If this example is compiled + with full type checks (the default), then running it will signal a + type error at the end of the loop. If compiled without type checks, + the program will go into an infinite loop (or perhaps + position will complain because (1+ nil) isn't + a sensible start.) Why? Because if you compile without type checks, + the compiler just quietly believes the type declaration. Since the + compiler believes that pos is always a fixnum, it + believes that pos is never nil, so + (null pos) is never true, and the loop exit test is + optimized away. Such errors are sometimes flagged by unreachable code + notes, but it is still important to initially compile and test any + system with full type checks, even if the system works fine when +compiled using other compilers. + +In this case, the fix is to weaken the type declaration to +(or fixnum null). +Actually, this declaration is +unnecessary in &SBCL;, since it already knows that position +returns a non-negative fixnum or nil. + + +Note that there is usually little performance penalty for weakening a +declaration in this way. Any numeric operations in the body can still +assume that the variable is a fixnum, since nil +is not a legal numeric argument. Another possible fix would be to say: + +(do ((pos 0 (position #\a string :start (1+ pos)))) +((null pos)) +(let ((pos pos)) +(declare (fixnum pos)) +...)) + +This would be preferable in some circumstances, since it would allow a +non-standard representation to be used for the local pos +variable in the loop body. + + + + + + + +Compiler Policy + +As of version 0.6.4, &SBCL; still uses most of the &CMUCL; code +for compiler policy. The &CMUCL; code has many features and high-quality +documentation, but the two unfortunately do not match. So this area of +the compiler and its interface needs to be cleaned up. Meanwhile, here +is some rudimentary documentation on the current behavior of the +system. + +Compiler policy is controlled by the optimize +declaration. The compiler supports the &ANSI; optimization qualities, +and also an extension sb-ext:inhibit-warnings. + +Ordinarily, when the speed quality is high, the +compiler emits notes to notify the programmer about its inability to +apply various optimizations. Setting +sb-ext:inhibit-warnings to a value at least as large as +the speed quality inhibits this notification. This can +be useful to suppress notes about code which is known to be +unavoidably inefficient. (For example, the compiler issues notes about +having to use generic arithmetic instead of fixnum arithmetic, which +is not helpful for code which by design supports arbitrary-sized +integers instead of being limited to fixnums.) + +The basic functionality of the optimize +inhibit-warnings extension will probably be supported in all future +versions of the system, but it will probably be renamed when the +compiler and its interface are cleaned up. The current name is +misleading, because it mostly inhibits optimization notes, not +warnings. And making it an optimization quality is misleading, because +it shouldn't affect the resulting code at all. It may become a +declaration identifier with a name like +sb-ext:inhibit-notes, so that what's currently written + +(declaim (optimize (sb-ext:inhibit-warnings 2))) + +would become something like + +(declaim (sb-ext:inhibit-notes 2)) + + + + (In early versions of SBCL, a speed value of zero +was used to enable byte compilation, but since version 0.7.0, SBCL +only supports native compilation.) + +When safety is zero, almost all runtime checking +of types, array bounds, and so forth is suppressed. + +When safety is less than speed, any +and all type checks may be suppressed. At some point in the past, +&CMUCL; had a more nuanced +interpretation of this. However, &SBCL; doesn't support that +interpretation, and setting safety less than +speed may have roughly the same effect as setting +safety to zero. + +The value of space mostly influences the +compiler's decision whether to inline operations, which tend to +increase the size of programs. Use the value 0 with +caution, since it can cause the compiler to inline operations so +indiscriminately that the net effect is to slow the program by causing +cache misses or swapping. + + + + + +Open Coding and Inline Expansion + + + + +Since &CommonLisp; forbids the redefinition of standard +functions, the compiler can have special knowledge of these standard +functions embedded in it. This special knowledge is used in various +ways (open coding, inline expansion, source transformation), but the +implications to the user are basically the same: + + Attempts to redefine standard functions may + be frustrated, since the function may never be called. Although + it is technically illegal to redefine standard functions, users + sometimes want to implicitly redefine these functions when they + are debugging using the trace macro. Special-casing + of standard functions can be inhibited using the + notinline declaration. + The compiler can have multiple alternate + implementations of standard functions that implement different + trade-offs of speed, space and safety. This selection is + based on the compiler policy. + + + + +When a function call is open coded, inline code whose +effect is equivalent to the function call is substituted for that +function call. When a function call is closed coded, it +is usually left as is, although it might be turned into a call to a +different function with different arguments. As an example, if +nthcdr were to be open coded, then + +(nthcdr 4 foobar) + +might turn into + +(cdr (cdr (cdr (cdr foobar)))) + +or even + +(do ((i 0 (1+ i)) +(list foobar (cdr foobar))) +((= i 4) list)) + +If nth is closed coded, then + + + (nth x l) + + +might stay the same, or turn into something like + + + (car (nthcdr x l)) + + + +In general, open coding sacrifices space for speed, but some +functions (such as car) are so simple that they are always +open-coded. Even when not open-coded, a call to a standard function +may be transformed into a different function call (as in the last +example) or compiled as static call. Static function call +uses a more efficient calling convention that forbids +redefinition. + + + + diff --git a/doc/efficiency.sgml b/doc/efficiency.sgml deleted file mode 100644 index 10d5761..0000000 --- a/doc/efficiency.sgml +++ /dev/null @@ -1,132 +0,0 @@ -Efficiency</> - -<para>FIXME: The material in the &CMUCL; manual about getting good -performance from the compiler should be reviewed, reformatted in -DocBook, lightly edited for &SBCL;, and substituted into this -manual. In the meantime, the original &CMUCL; manual is still 95+% -correct for the &SBCL; version of the &Python; compiler. See the -sections -<itemizedlist> - <listitem><para>Advanced Compiler Use and Efficiency Hints</></> - <listitem><para>Advanced Compiler Introduction</></> - <listitem><para>More About Types in Python</></> - <listitem><para>Type Inference</></> - <listitem><para>Source Optimization</></> - <listitem><para>Tail Recursion</></> - <listitem><para>Local Call</></> - <listitem><para>Block Compilation</></> - <listitem><para>Inline Expansion</></> - <listitem><para>Object Representation</></> - <listitem><para>Numbers</></> - <listitem><para>General Efficiency Hints</></> - <listitem><para>Efficiency Notes</></> -</itemizedlist> -</para> - -<para>Besides this information from the &CMUCL; manual, there are a -few other points to keep in mind. -<itemizedlist> - <listitem><para>The &CMUCL; manual doesn't seem to state it explicitly, - but &Python; has a mental block about type inference when - assignment is involved. &Python; is very aggressive and clever - about inferring the types of values bound with <function>let</>, - <function>let*</>, inline function call, and so forth. However, - it's much more passive and dumb about inferring the types of - values assigned with <function>setq</>, <function>setf</>, and - friends. It would be nice to fix this, but in the meantime don't - expect that just because it's very smart about types in most - respects it will be smart about types involved in assignments. - (This doesn't affect its ability to benefit from explicit type - declarations involving the assigned variables, only its ability to - get by without explicit type declarations.)</para></listitem> -<!-- FIXME: Python dislikes assignments, but not in type - inference. The real problems are loop induction, closed over - variables and aliases. --> - <listitem><para>Since the time the &CMUCL; manual was written, - &CMUCL; (and thus &SBCL;) has gotten a generational garbage - collector. This means that there are some efficiency implications - of various patterns of memory usage which aren't discussed in the - &CMUCL; manual. (Some new material should be written about - this.)</para></listitem> - <listitem><para>&SBCL; has some important known efficiency problems. - Perhaps the most important are - <itemizedlist> - <listitem><para>There is no support for the &ANSI; - <parameter>dynamic-extent</> declaration, not even for - closures or <parameter>&rest</> lists.</para></listitem> - <listitem><para>The garbage collector is not particularly - efficient.</para></listitem> - <listitem><para>Various aspects of the PCL implementation - of CLOS are more inefficient than necessary.</para></listitem> - </itemizedlist> - </para></listitem> -</itemizedlist> -</para> - -<para>Finally, note that &CommonLisp; defines many constructs which, in -the infamous phrase, <quote>could be compiled efficiently by a -sufficiently smart compiler</quote>. The phrase is infamous because -making a compiler which actually is sufficiently smart to find all -these optimizations systematically is well beyond the state of the art -of current compiler technology. Instead, they're optimized on a -case-by-case basis by hand-written code, or not optimized at all if -the appropriate case hasn't been hand-coded. Some cases where no such -hand-coding has been done as of &SBCL; version 0.6.3 include -<itemizedlist> - <listitem><para><literal>(reduce #'f x)</> - where the type of <varname>x</> is known at compile - time</para></listitem> - <listitem><para>various bit vector operations, e.g. - <literal>(position 0 some-bit-vector)</></para></listitem> -</itemizedlist> -If your system's performance is suffering because of some construct -which could in principle be compiled efficiently, but which the &SBCL; -compiler can't in practice compile efficiently, consider writing a -patch to the compiler and submitting it for inclusion in the main -sources. Such code is often reasonably straightforward to write; -search the sources for the string <quote><function>deftransform</></> -to find many examples (some straightforward, some less so).</para> - -<sect1 id="modular-arithmetic"><title>Modular arithmetic</> -<para> -Some numeric functions have a property: <varname>N</> lower bits of -the result depend only on <varname>N</> lower bits of (all or some) -arguments. If the compiler sees an expression of form <literal>(logand -exp mask)</>, where <varname>exp</> is a tree of such "good" functions -and <varname>mask</> is known to be of type <type>(unsigned-byte -w)</>, where <varname>w</> is a "good" width, all intermediate results -will be cut to <varname>w</> bits (but it is not done for variables -and constants!). This often results in an ability to use simple -machine instructions for the functions. -</para> - -<para> -Consider an example. -<programlisting> -(defun i (x y) - (declare (type (unsigned-byte 32) x y)) - (ldb (byte 32 0) (logxor x (lognot y)))) -</programlisting> -The result of <literal>(lognot y)</> will be negative and of -type <type>(signed-byte 33)</>, so a naive implementation on a 32-bit -platform is unable to use 32-bit arithmetic here. But modular -arithmetic optimizer is able to do it: because the result is cut down -to 32 bits, the compiler will replace <function>logxor</> -and <function>lognot</> with versions cutting results to 32 bits, and -because terminals (here---expressions <literal>x</> and <literal>y</>) -are also of type <type>(unsigned-byte 32)</>, 32-bit machine -arithmetic can be used. -</para> - -<note><para> As of &SBCL; 0.8.5 "good" functions -are <function>+</>, <function>-</>; <function>logand</>, <function>logior</>, -<function>logxor</>, <function>lognot</> and their combinations; -and <function>ash</> with the positive second argument. "Good" widths -are 32 on HPPA, MIPS, PPC, Sparc and X86 and 64 on Alpha. While it is -possible to support smaller widths as well, currently it is not -implemented. -</para></note> - -</sect1> - -</chapter> diff --git a/doc/efficiency.xml b/doc/efficiency.xml new file mode 100644 index 0000000..d495489 --- /dev/null +++ b/doc/efficiency.xml @@ -0,0 +1,139 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" + "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ +<!ENTITY % myents SYSTEM "entities.inc"> +%myents; +]> + +<chapter id="efficiency"><title>Efficiency + +FIXME: The material in the &CMUCL; manual about getting good +performance from the compiler should be reviewed, reformatted in +DocBook, lightly edited for &SBCL;, and substituted into this +manual. In the meantime, the original &CMUCL; manual is still 95+% +correct for the &SBCL; version of the &Python; compiler. See the +sections + + Advanced Compiler Use and Efficiency Hints + Advanced Compiler Introduction + More About Types in Python + Type Inference + Source Optimization + Tail Recursion + Local Call + Block Compilation + Inline Expansion + Object Representation + Numbers + General Efficiency Hints + Efficiency Notes + + + +Besides this information from the &CMUCL; manual, there are a +few other points to keep in mind. + + The &CMUCL; manual doesn't seem to state it explicitly, + but &Python; has a mental block about type inference when + assignment is involved. &Python; is very aggressive and clever + about inferring the types of values bound with let, + let*, inline function call, and so forth. However, + it's much more passive and dumb about inferring the types of + values assigned with setq, setf, and + friends. It would be nice to fix this, but in the meantime don't + expect that just because it's very smart about types in most + respects it will be smart about types involved in assignments. + (This doesn't affect its ability to benefit from explicit type + declarations involving the assigned variables, only its ability to + get by without explicit type declarations.) + + Since the time the &CMUCL; manual was written, + &CMUCL; (and thus &SBCL;) has gotten a generational garbage + collector. This means that there are some efficiency implications + of various patterns of memory usage which aren't discussed in the + &CMUCL; manual. (Some new material should be written about + this.) + &SBCL; has some important known efficiency problems. + Perhaps the most important are + + There is no support for the &ANSI; + dynamic-extent declaration, not even for + closures or &rest lists. + The garbage collector is not particularly + efficient. + Various aspects of the PCL implementation + of CLOS are more inefficient than necessary. + + + + + +Finally, note that &CommonLisp; defines many constructs which, in +the infamous phrase, could be compiled efficiently by a +sufficiently smart compiler. The phrase is infamous because +making a compiler which actually is sufficiently smart to find all +these optimizations systematically is well beyond the state of the art +of current compiler technology. Instead, they're optimized on a +case-by-case basis by hand-written code, or not optimized at all if +the appropriate case hasn't been hand-coded. Some cases where no such +hand-coding has been done as of &SBCL; version 0.6.3 include + + (reduce #'f x) + where the type of x is known at compile + time + various bit vector operations, e.g. + (position 0 some-bit-vector) + +If your system's performance is suffering because of some construct +which could in principle be compiled efficiently, but which the &SBCL; +compiler can't in practice compile efficiently, consider writing a +patch to the compiler and submitting it for inclusion in the main +sources. Such code is often reasonably straightforward to write; +search the sources for the string deftransform +to find many examples (some straightforward, some less so). + +Modular arithmetic + +Some numeric functions have a property: N lower bits of +the result depend only on N lower bits of (all or some) +arguments. If the compiler sees an expression of form (logand +exp mask), where exp is a tree of such "good" functions +and mask is known to be of type (unsigned-byte +w), where w is a "good" width, all intermediate results +will be cut to w bits (but it is not done for variables +and constants!). This often results in an ability to use simple +machine instructions for the functions. + + + +Consider an example. + +(defun i (x y) + (declare (type (unsigned-byte 32) x y)) + (ldb (byte 32 0) (logxor x (lognot y)))) + +The result of (lognot y) will be negative and of +type (signed-byte 33), so a naive implementation on a 32-bit +platform is unable to use 32-bit arithmetic here. But modular +arithmetic optimizer is able to do it: because the result is cut down +to 32 bits, the compiler will replace logxor +and lognot with versions cutting results to 32 bits, and +because terminals (here---expressions x and y) +are also of type (unsigned-byte 32), 32-bit machine +arithmetic can be used. + + + As of &SBCL; 0.8.5 "good" functions +are +, -; logand, logior, +logxor, lognot and their combinations; +and ash with the positive second argument. "Good" widths +are 32 on HPPA, MIPS, PPC, Sparc and X86 and 64 on Alpha. While it is +possible to support smaller widths as well, currently it is not +implemented. + + + + + diff --git a/doc/entities.inc b/doc/entities.inc new file mode 100644 index 0000000..3e9e444 --- /dev/null +++ b/doc/entities.inc @@ -0,0 +1,16 @@ + + AMOP"> + ANSI"> + CMU CL"> + IEEE"> + Python"> + SBCL"> + + + + + + diff --git a/doc/ffi.sgml b/doc/ffi.sgml deleted file mode 100644 index b360261..0000000 --- a/doc/ffi.sgml +++ /dev/null @@ -1,1248 +0,0 @@ -The Foreign Function Interface</> - -<para>This chapter describes &SBCL;'s interface to C programs and -libraries (and, since C interfaces are a sort of <foreignphrase>lingua -franca</> of the Unix world, to other programs and libraries in -general.)</para> - -<note><para>In the modern Lisp world, the usual term for this -functionality is Foreign Function Interface, or <acronym>FFI</>, where -despite the mention of <quote>function</> in this term, <acronym>FFI</> also -refers to direct manipulation of C data structures as well as -functions. The traditional &CMUCL; terminology is Alien Interface, and -while that older terminology is no longer used much in the system -documentation, it still reflected in names in the -implementation, notably in the name of the <literal>SB-ALIEN</> -package.</para></note> - -<sect1><title>Introduction to the Foreign Function Interface</> -<!-- AKA "Introduction to Aliens" in the CMU CL manual --> - -<para> -Because of Lisp's emphasis on dynamic memory allocation and garbage -collection, Lisp implementations use non-C-like memory representations -for objects. This representation mismatch creates friction when a Lisp -program must share objects with programs which expect C data. There -are three common approaches to establishing communication: -<itemizedlist> - <listitem><para>The burden can be placed on the foreign program - (and programmer) by requiring the knowledge and use of the - representations used internally by the Lisp implementation. - This can require a considerable amount of <quote>glue</> code on the - C side, and that code tends to be sensitively dependent on the - internal implementation details of the Lisp system.</para></listitem> - <listitem><para>The Lisp system can automatically convert objects - back and forth between the Lisp and foreign representations. - This is convenient, but translation becomes prohibitively slow - when large or complex data structures must be shared. This approach - is supported by the &SBCL; <acronym>FFI</>, and used automatically - by the when passing integers and strings.</para></listitem> - <listitem><para>The Lisp program can directly manipulate foreign - objects through the use of extensions to the Lisp language. - </para></listitem> -</itemizedlist> -</para> - -<para>&SBCL;, like &CMUCL; before it, relies primarily on the -automatic conversion and direct manipulation approaches. The SB-ALIEN -package provices a facility wherein foreign values of simple scalar -types are automatically converted and complex types are directly -manipulated in their foreign representation. Additionally the -lower-level System Area Pointers (or SAPs) can be used where -necessary to provide untyped access to foreign memory.</para> - -<para>Any foreign objects that can't automatically be converted into -Lisp values are represented by objects of type <type>alien-value</>. -Since Lisp is a dynamically typed language, even foreign objects must -have a run-time type; this type information is provided by -encapsulating the raw pointer to the foreign data within an -<type>alien-value</> object.</para> - -<para>The type language and operations on foreign types are -intentionally similar to those of the C language.</para> - -</sect1> - -<sect1><title>Foreign Types</> -<!-- AKA "Alien Types" in the CMU CL manual --> - -<para>Alien types have a description language based on nested list -structure. For example the C type -<programlisting>struct foo { - int a; - struct foo *b[100]; -};</programlisting> -has the corresponding &SBCL; FFI type -<programlisting>(struct foo - (a int) - (b (array (* (struct foo)) 100)))</programlisting> -</para> - -<sect2><title>Defining Foreign Types</> - -<para> -Types may be either named or anonymous. With structure and union -types, the name is part of the type specifier, allowing recursively -defined types such as: -<programlisting>(struct foo (a (* (struct foo))))</programlisting> -An anonymous structure or union type is specified by using the name -<literal>nil</>. The <function>with-alien</> macro defines a local -scope which <quote>captures</> any named type definitions. Other types -are not inherently named, but can be given named abbreviations using -the <function>define-alien-type</> macro. -</para> - -</sect2> - -<sect2><title>Foreign Types and Lisp Types</> - -<para> -The foreign types form a subsystem of the &SBCL; type system. An -<type>alien</> type specifier provides a way to use any foreign type as a -Lisp type specifier. For example, -<programlisting>(typep foo '(alien (* int)))</programlisting> -can be used to determine whether <varname>foo</> is a pointer to a foreign -<type>int</>. <type>alien</> type specifiers can be used in the same ways -as ordinary Lisp type specifiers (like <type>string</>.) Alien type -declarations are subject to the same -precise type checking <!-- FIXME: should be linked to id="precisetypechecking" --> -as any other declaration. -</para> - -<para> -Note that the type identifiers used in the -foreign type system overlap with native Lisp type -specifiers in some cases. For example, the type specifier -<type>(alien single-float)</type> is identical to <type>single-float</>, since -foreign floats are automatically converted to Lisp floats. When -<function>type-of</> is called on an alien value that is not automatically -converted to a Lisp value, then it will return an <type>alien</> type -specifier. -</para> - -</sect2> - -<sect2><title>Foreign Type Specifiers</> - -<note><para> -All foreign type names are exported from the <literal>sb-alien</> -package. Some foreign type names are also symbols in -the <literal>common-lisp</> package, in which case they are -reexported from the <literal>sb-alien</> package, so that -e.g. it is legal to refer to <type>sb-alien:single-float</>. -</para></note> - -<para> -These are the basic foreign type specifiers: -<!-- FIXME: There must be some better way of formatting definitions - in DocBook than this. I haven't found it yet, but suggestions - or patches would be welcome. --> -<itemizedlist> - <listitem> - <para> - The foreign type specifier <type>(* foo)</> describes a - pointer to an object of type <type>foo</>. A pointed-to type - <type>foo</> of <type>t</> indicates a pointer to anything, - similar to <type>void *</> in ANSI C. A null alien pointer can - be detected with the <function>sb-alien:null-alien</> - function. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>(array foo &optional dimensions)</> - describes array of the specified <literal>dimensions</>, holding - elements of type <type>foo</>. Note that (unlike in C) <type>(* foo)</> - <type>(array foo)}</> are considered to be different types when - type checking is done. If equivalence of pointer and array types - is desired, it may be explicitly coerced using - <function>sb-alien:cast</>. - </para> - <para> - Arrays are accessed using <function>sb-alien:deref</>, passing - the indices as additional arguments. Elements are stored in - column-major order (as in C), so the first dimension determines - only the size of the memory block, and not the layout of the - higher dimensions. An array whose first dimension is variable - may be specified by using <literal>nil</> as the first dimension. - Fixed-size arrays can be allocated as array elements, structure - slots or <function>sb-alien:with-alien</> variables. Dynamic - arrays can only be allocated using <function>sb-alien:make-alien</>. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier - <type>(sb-alien:struct name &rest fields)</> - describes a structure type with the specified <varname>name</> and - <varname>fields</>. Fields are allocated at the same offsets - used by the implementation's C compiler. If <varname>name</> - is <literal>nil</> then the structure is anonymous. - </para> - <para> - If a named foreign <type>struct</> specifier is passed to - <function>define-alien-type</> or <function>with-alien</>, - then this defines, respectively, a new global or local foreign - structure type. If no <varname>fields</> are specified, then - the fields are taken from the current (local or global) alien - structure type definition of <varname>name</>. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier - <type>(sb-alien:union name &rest fields)</> - is similar to <type>sb-alien:struct</>, but describes a union type. - All fields are allocated at the same offset, and the size of the - union is the size of the largest field. The programmer must - determine which field is active from context. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>(sb-alien:enum name &rest specs)</> - describes an enumeration type that maps between integer values - and keywords. If <varname>name</> is <literal>nil</>, then the - type is anonymous. Each element of the <varname>specs</> - list is either a Lisp keyword, or a list <literal>(keyword value)</>. - <varname>value</> is an integer. If <varname>value</> is not - supplied, then it defaults to one greater than the value for - the preceding spec (or to zero if it is the first spec.) - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>(sb-alien:signed &optional bits)</> - specifies a signed integer with the specified number of - <varname>bits</> precision. The upper limit on integer - precision is determined by the machine's word - size. If <varname>bits</> is not specified, the maximum - size will be used. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>(integer &optional bits)</> is - equivalent to the corresponding type specifier using - <type>sb-alien:signed</> instead of <type>integer</>. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier - <type>(sb-alien:unsigned &optional bits)</> - is like corresponding type specifier using <type>sb-alien:signed</> - except that the variable is treated as an unsigned integer. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>(boolean &optional bits)</> is - similar to an enumeration type, but maps from Lisp <literal>nil</> - and <literal>t</> to C <literal>0</> and <literal>1</> - respectively. <varname>bits</> determines the amount of - storage allocated to hold the truth value. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>single-float</> describes a - floating-point number in IEEE single-precision format. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>double-float</> describes a - floating-point number in IEEE double-precision format. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier - <type>(function result-type &rest arg-types)</> - describes a foreign function that takes arguments of the specified - <varname>arg-types</> and returns a result of type <type>result-type</>. - Note that the only context where a foreign <type>function</> type - is directly specified is in the argument to - <function>sb-alien:alien-funcall</>. - In all other contexts, foreign functions are represented by - foreign function pointer types: <type>(* (function ...))</>. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>sb-alien:system-area-pointer</> - describes a pointer which is represented in Lisp as a - <type>system-area-pointer</> object. &SBCL; exports this type from - <literal>sb-alien</> because &CMUCL; did, but tentatively (as of - the first draft of this section of the manual, &SBCL; 0.7.6) it is - deprecated, since it doesn't seem to be required by user code. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>sb-alien:void</> is - used in function types to declare that no useful value - is returned. Using <function>alien-funcall</> - to call a <type>void</> foreign function will return - zero values. - </para> - </listitem> - <listitem> - <para> - The foreign type specifier <type>sb-alien:c-string</> - is similar to <type>(* char)</>, but is interpreted as a - null-terminated string, and is automatically converted into a - Lisp string when accessed; or if the pointer is C <literal>NULL</> - or <literal>0</>, then accessing it gives Lisp <literal>nil</>. - Lisp strings are stored with a trailing NUL termination, so no - copying (either by the user or the implementation) is necessary - when passing them to foreign code. - </para> - <para> - Assigning a Lisp string to a <type>c-string</> structure field or - variable stores the contents of the string to the memory already - pointed to by that variable. When a foreign object of type - <type>(* char)</> is assigned to a <type>c-string</>, then the - <type>c-string</> pointer is assigned to. This allows - <type>c-string</> pointers to be initialized. For example: - <programlisting>(cl:in-package "CL-USER") ; which USEs package "SB-ALIEN" - (define-alien-type nil (struct foo (str c-string))) - (defun make-foo (str) (let ((my-foo (make-alien (struct foo)))) - (setf (slot my-foo 'str) (make-alien char (length str)) - (slot my-foo 'str) str) my-foo))</programlisting> - Storing Lisp <literal>NIL</> in a <type>c-string</> writes C - <literal>NULL</> to the variable. - </para> - </listitem> - <listitem> - <para> - <literal>sb-alien</> also exports translations of these C type - specifiers as foreign type specifiers: - <type>sb-alien:char</>, - <type>sb-alien:short</>, - <type>sb-alien:int</>, - <type>sb-alien:long</>, - <type>sb-alien:unsigned-char</>, - <type>sb-alien:unsigned-short</>, - <type>sb-alien:unsigned-int</>, - <type>sb-alien:unsigned-long</>, - <type>sb-alien:float</>, and - <type>sb-alien:double</>. - </para> - </listitem> - -</itemizedlist> - -</para> - -</sect2> - -</sect1> - -<sect1><title>Operations On Foreign Values</> -<!-- AKA "Alien Operations" in the CMU CL manual --> - -<para>This section describes how to read foreign values as Lisp -values, how to coerce foreign values to different kinds of foreign values, and -how to dynamically allocate and free foreign variables.</para> - -<sect2><title>Accessing Foreign Values</> - -<synopsis>(sb-alien:deref pointer-or-array &rest indices)</> - -<para>The <function>sb-alien:deref</> function returns the value pointed to by -a foreign pointer, or the value of a foreign array element. When -dereferencing a pointer, an optional single index can be specified to -give the equivalent of C pointer arithmetic; this index is scaled by -the size of the type pointed to. When dereferencing an array, the -number of indices must be the same as the number of dimensions in the -array type. <function>deref</> can be set with <function>setf</> to -assign a new value.</para> - -<synopsis>(sb-alien:slot struct-or-union &rest slot-names)</> - -<para>The <function>sb-alien:slot</> function extracts the value of -the slot named <varname>slot-name</> from a foreign <type>struct</> or -<type>union</>. If <varname>struct-or-union</> is a pointer to a -structure or union, then it is automatically dereferenced. -<function>sb-alien:slot</> can be set with <function>setf</> to assign -a new value. Note that <varname>slot-name</> is evaluated, and need -not be a compile-time constant (but only constant slot accesses are -efficiently compiled.)</para> - -<sect3><title>Untyped memory</> - -<para>As noted at the beginning of the chapter, the System Area -Pointer facilities allow untyped access to foreign memory. SAPs can -be converted to and from the usual typed foreign values using -<function>sap-alien</function> and <function>alien-sap</function> -(described elsewhere), and also to and from integers - raw machine -addresses. They should thus be used with caution; corrupting the Lisp -heap or other memory with SAPs is trivial.</para> - -<synopsis>(sb-sys:int-sap machine-address)</> - -<para>Creates a SAP pointing at the virtual address -<varname>machine-address</varname>. </para> - -<synopsis>(sb-sys:sap-ref-32 sap offset)</> - -<para>Access the value of the memory location at -<varname>offset</varname> bytes from <varname>sap</varname>. This form -may also be used with <function>setf</function> to alter the memory at -that location.</para> - -<synopsis>(sb-sys:sap= sap1 sap2)</> - -<para>Compare <varname>sap1</varname> and <varname>sap2</varname> for -equality.</para> - -<para>Similarly named functions exist for accessing other sizes of -word, other comparisons, and other conversions. The reader is invited -to use <function>apropos</function> and <function>describe</function> -for more details</para> -<programlisting> -(apropos "sap" :sb-sys) -</programlisting> -</sect3></sect2> - -<sect2><title>Coercing Foreign Values</> - -<synopsis>(sb-alien:addr alien-expr)</> - -<para> -The <function>sb-alien:addr</> macro -returns a pointer to the location specified by -<varname>alien-expr</>, which must be either a foreign variable, a use of -<function>sb-alien:deref</>, a use of <function>sb-alien:slot</>, or a use of -<function>sb-alien:extern-alien</>. -</para> - -<synopsis>(sb-alien:cast foreign-value new-type)</> - -<para>The <function>sb-alien:cast</> -converts <varname>foreign-value</> to a new foreign value with the specified -<varname>new-type</>. Both types, old and new, must be foreign pointer, -array or function types. Note that the resulting Lisp -foreign variable object -is not <function>eq</> to the -argument, but it does refer to the same foreign data bits.</para> - -<synopsis>(sb-alien:sap-alien sap type)</> - -<para>The <function>sb-alien:sap-alien</> function converts <varname>sap</> -(a system area pointer) to a foreign value with the specified -<varname>type</>. <varname>type</> is not evaluated. -</para> - -<para>The <varname>type</> must be some foreign pointer, array, or -record type.</para> - -<synopsis>(sb-alien:alien-sap foreign-value type)</> - -<para>The <function>sb-alien:alien-sap</> function -returns the SAP which points to <varname>alien-value</>'s data. -</para> - -<para>The <varname>foreign-value</> must be of some foreign pointer, -array, or record type.</para> - -</sect2> - -<sect2><title>Foreign Dynamic Allocation</> - -<para>Lisp code can call the C standard library functions -<function>malloc</> and <function>free</> to dynamically allocate and -deallocate foreign variables. The Lisp code shares the same allocator -with foreign C code, so it's OK for foreign code to call -<function>free</> on the result of Lisp -<function>sb-alien:make-alien</>, or for Lisp code to call -<function>sb-alien:free-alien</> on foreign objects allocated by C -code.</para> - -<synopsis>(sb-alien:make-alien type size)</> - -<para>The <function>sb-alien:make-alien</> macro -returns a dynamically allocated foreign value of the specified -<varname>type</> (which is not evaluated.) The allocated memory is not -initialized, and may contain arbitrary junk. If supplied, -<varname>size</> is an expression to evaluate to compute the size of the -allocated object. There are two major cases: -<itemizedlist> - <listitem> - <para>When <varname>type</> is a foreign array type, an array of - that type is allocated and a pointer to it is returned. Note that you - must use <function>deref</> to change the result to an array before you - can use <function>deref</> to read or write elements: - <programlisting> - (cl:in-package "CL-USER") ; which USEs package "SB-ALIEN" - (defvar *foo* (make-alien (array char 10))) - (type-of *foo*) => (alien (* (array (signed 8) 10))) - (setf (deref (deref foo) 0) 10) => 10</programlisting> - If supplied, <varname>size</> is used as the first dimension for the - array.</para> - </listitem> - <listitem> - <para>When <varname>type</> is any other foreign type, then an - object for that type is allocated, and a pointer to it is - returned. So <function>(make-alien int)</> returns a <type>(* int)</>. - If <varname>size</> is specified, then a block of that many - objects is allocated, with the result pointing to the first one.</para> - </listitem> -</itemizedlist> -</para> - -<synopsis>(sb-alien:free-alien foreign-value)</> - -<para>The <function>sb-alien:free-alien</> function -frees the storage for <varname>foreign-value</>, -which must have been allocated with Lisp <function>make-alien</> -or C <function>malloc</>.</para> - -<para>See also the <function>sb-alien:with-alien</> macro, which -allocates foreign values on the stack.</para> - -</sect2> - -</sect1> - -<sect1><title>Foreign Variables</> -<!-- AKA "Alien Variables" in the CMU CL manual --> - -<para> -Both local (stack allocated) and external (C global) foreign variables are -supported. -</para> - -<sect2><title>Local Foreign Variables</> - -<synopsis>(sb-alien:with-alien var-definitions &body body)</> - -<para>The <function>with-alien</> -macro establishes local -foreign variables -with the specified -alien types and names. -This form is analogous to defining a local variable in C: additional -storage is allocated, and the initial value is copied. -This form is less -analogous to LET-allocated Lisp variables, since the variables -can't be captured in closures: they live only for the dynamic extent -of the body, and referring to them outside is a gruesome error. -</para> - -<para>The <varname>var-definitions</> argument is a list of -variable definitions, each of the form -<programlisting>(name type &optional initial-value)</programlisting> -The names of the variables are established as symbol-macros; the bindings have -lexical scope, and may be assigned with <function>setq</> -or <function>setf</>. -</para> - -<para>The <function>with-alien</> macro also establishes -a new scope for named structures -and unions. Any <varname>type</> specified for a variable may contain -named structure or union types with the slots specified. Within the -lexical scope of the binding specifiers and body, a locally defined -foreign structure type <type>foo</> can be referenced by its name using -<type>(struct foo)</>. -</para> - -</sect2> - -<sect2><title>External Foreign Variables</> - -<para> -External foreign names are strings, and Lisp names are symbols. When -an external foreign value is represented using a Lisp variable, there -must be a way to convert from one name syntax into the other. The -macros <function>extern-alien</>, <function>define-alien-variable</> and -<function>define-alien-routine</> use this conversion heuristic: -<itemizedlist> - <listitem><para>Alien names are converted to Lisp names by uppercasing and - replacing underscores with hyphens.</para></listitem> - <listitem><para>Conversely, Lisp names are converted to alien names by - lowercasing and replacing hyphens with underscores.</para></listitem> - <listitem><para>Both the Lisp symbol and alien string names may be - separately specified by using a list of the form - <programlisting>(alien-string lisp-symbol)</></para></listitem> -</itemizedlist> -</para> - -<synopsis>(sb-alien:define-alien-variable name type)</> - -<para> -The <function>define-alien-variable</> macro -defines <varname>name</> as an external foreign variable of the -specified foreign <type>type</>. <varname>name</> and <type>type</> are not -evaluated. The Lisp name of the variable (see above) becomes a -global alien variable. Global alien variables -are effectively ``global symbol macros''; a reference to the -variable fetches the contents of the external variable. Similarly, -setting the variable stores new contents---the new contents must be -of the declared <type>type</>. Someday, they may well be implemented -using the &ANSI; <function>define-symbol-macro</> mechanism, but -as of &SBCL; 0.7.5, they are still implemented using an older -more-or-less parallel mechanism inherited from &CMUCL;. -</para> - -<para> -For example, to access a C-level counter <varname>foo</>, one could -write -<programlisting> -(define-alien-variable "foo" int) -;; Now it is possible to get the value of the C variable foo simply by -;; referencing that Lisp variable: -(print foo) -(setf foo 14) -(incf foo)</programlisting> -</para> - -<synopsis>(sb-alien:get-errno)</> - -<para> -Since in modern C libraries, the <varname>errno</> "variable" is typically -no longer a variable, but some bizarre artificial construct -which behaves superficially like a variable within a given thread, -it can no longer reliably be accessed through the ordinary -<varname>define-alien-variable</> mechanism. Instead, &SBCL; provides -the operator <function>sb-alien:get-errno</> to allow Lisp code to read it. -</para> - -<synopsis>(sb-alien:extern-alien name type)</> - -<para> -The <function>extern-alien</> macro -returns an alien with the specified <type>type</> which -points to an externally defined value. <varname>name</> is not evaluated, -and may be either a string or a symbol. <type>type</> is -an unevaluated alien type specifier. -</para> - -</sect2> - -</sect1> - -<sect1><title>Foreign Data Structure Examples</> -<!-- AKA "Alien Data Structure Example" in the CMU CL manual --> - -<para> -Now that we have alien types, operations and variables, we can manipulate -foreign data structures. This C declaration -<programlisting> -struct foo { - int a; - struct foo *b[100]; -};</programlisting> -can be translated into the following alien type: -<programlisting>(define-alien-type nil - (struct foo - (a int) - (b (array (* (struct foo)) 100))))</programlisting> -</para> - -<para> -Once the <type>foo</> alien type has been defined as above, -the C expression -<programlisting> -struct foo f; -f.b[7].a</programlisting> -can be translated in this way: -<programlisting> -(with-alien ((f (struct foo))) - (slot (deref (slot f 'b) 7) 'a) - ;; - ;; Do something with f... - )</programlisting> -</para> - -<para> -Or consider this example of an external C variable and some accesses: -<programlisting> -struct c_struct { - short x, y; - char a, b; - int z; - c_struct *n; -}; -extern struct c_struct *my_struct; -my_struct->x++; -my_struct->a = 5; -my_struct = my_struct->n;</programlisting> -which can be manipulated in Lisp like this: -<programlisting> -(define-alien-type nil - (struct c-struct - (x short) - (y short) - (a char) - (b char) - (z int) - (n (* c-struct)))) -(define-alien-variable "my_struct" (* c-struct)) -(incf (slot my-struct 'x)) -(setf (slot my-struct 'a) 5) -(setq my-struct (slot my-struct 'n))</programlisting> -</para> - -</sect1> - -<sect1><title>Loading Unix Object Files</> - -<para> -Foreign object files can be loaded into the running Lisp process by -calling the functions <function>load-foreign</> or -<function>load-1-foreign</>. -</para> - -<para> The <function>sb-alien:load-1-foreign</> function is the more -primitive of the two operations. It loads a single object file. into -the currently running Lisp. The external symbols defining routines and -variables are made available for future external references (e.g. by -<function>extern-alien</>). Forward references to foreign symbols -aren't supported: <function>load-1-foreign</> must be run before any -of the defined symbols are referenced. -</para> - -<para><function>sb-alien:load-foreign</> is built in terms of -<function>load-1-foreign</> and some other machinery -like <function>sb-ext:run-program</>. -It accepts a list of files and libraries, -and runs the linker on the files and -libraries, creating an absolute Unix object file which is then -processed by <function>load-1-foreign</>.</para> - -<note><para>As of &SBCL; 0.7.5, all foreign code (code loaded -with <function>load-1-function</> or <function>load-function</>) is -lost when a Lisp core is saved with -<function>sb-ext:save-lisp-and-die</>, and no attempt is made to -restore it when the core is loaded. Historically this has been an -annoyance both for &SBCL; users and for &CMUCL; users. -It's hard to solve this problem completely cleanly, but some -generally-reliable partial solution might be useful. Once someone in -either camp gets sufficiently annoyed to create it, &SBCL; is -likely to adopt some mechanism for automatically restoring foreign -code when a saved core is loaded.</para></note> - -</sect1> - -<sect1><title>Foreign Function Calls</> - -<para> -The foreign function call interface allows a Lisp program to call -many functions written in languages that use the C calling convention. -</para> - -<para> -Lisp sets up various signal handling routines and other environment -information when it first starts up, and expects these to be in place -at all times. The C functions called by Lisp should not change the -environment, especially the signal handlers: the signal handlers -installed by Lisp typically have interesting flags set (e.g to request -machine context information, or for signal delivery on an alternate -stack) which the Lisp runtime relies on for correct operation. -Precise details of how this works may change without notice between -versions; the source, or the brain of a friendly &SBCL; developer, -is the only documentation. Users of a Lisp built with the :sb-thread -feature should also read the Threading section -<!-- FIXME I'm sure docbook has some syntax for internal links --> -of this manual</para> - -<sect2><title>The <function>alien-funcall</> Primitive - -(sb-alien:alien-funcall alien-function &rest arguments) - - -The alien-funcall function is the foreign function call -primitive: alien-function is called with the supplied -arguments and its C return value is returned as a Lisp value. -The alien-function is an arbitrary -run-time expression; to refer to a constant function, use -extern-alien or a value defined by -define-alien-routine. - - - -The type of alien-function -must be (alien (function ...)) -or (alien (* (function ...))). -The function type is used to -determine how to call the function (as though it was declared with -a prototype.) The type need not be known at compile time, but only -known-type calls are efficiently compiled. Limitations: - - Structure type return values are not implemented. - Passing of structures by value is not implemented. - - - - -Here is an example which allocates a (struct foo), calls a foreign -function to initialize it, then returns a Lisp vector of all the -(* (struct foo)) objects filled in by the foreign call: - -;; Allocate a foo on the stack. -(with-alien ((f (struct foo))) - ;; Call some C function to fill in foo fields. - (alien-funcall (extern-alien "mangle_foo" (function void (* foo))) - (addr f)) - ;; Find how many foos to use by getting the A field. - (let* ((num (slot f 'a)) - (result (make-array num))) - ;; Get a pointer to the array so that we don't have to keep extracting it: - (with-alien ((a (* (array (* (struct foo)) 100)) (addr (slot f 'b)))) - ;; Loop over the first N elements and stash them in the result vector. - (dotimes (i num) - (setf (svref result i) (deref (deref a) i))) - ;; Voila. - result))) - - - - -The <function>define-alien-routine</> Macro</> - -<synopsis>(sb-alien:define-alien-routine} name result-type &rest arg-specifiers)</> - -<para> -The <function>define-alien-routine</> macro is a convenience -for automatically generating Lisp -interfaces to simple foreign functions. The primary feature is the -parameter style specification, which translates the C -pass-by-reference idiom into additional return values. -</para> - -<para> -<varname>name</> is usually a string external symbol, but may also be a -symbol Lisp name or a list of the foreign name and the Lisp name. -If only one name is specified, the other is automatically derived -as for <function>extern-alien</>. -<varname>result-type</> is the alien type of the return value. -</para> - -<para> -Each element of the <varname>arg-specifiers</> list -specifies an argument to the foreign function, and is -of the form -<programlisting>(aname atype &optional style)</programlisting> -<varname>aname</> is the symbol name of the argument to the constructed -function (for documentation). <varname>atype</> is the alien type of -corresponding foreign argument. The semantics of the actual call -are the same as for <function>alien-funcall</>. <varname>style</> -specifies how this argument should be handled at call and return time, -and should be one of the following -<itemizedlist> - <listitem><para><varname>:in</>specifies that the argument is - passed by value. This is the default. <varname>:in</> arguments - have no corresponding return value from the Lisp function. - </para></listitem> - <listitem><para><varname>:copy</> is similar to <varname>:in</>, - but the argument is copied - to a pre-allocated object and a pointer to this object is passed - to the foreign routine.</para></listitem> - <listitem><para><varname>:out</> specifies a pass-by-reference - output value. The type of the argument must be a pointer to - a fixed-sized object (such as an integer or pointer). - <varname>:out</> and <varname>:in-out</> style cannot - be used with pointers to arrays, records or functions. An - object of the correct size is allocated on the stack, and - its address is passed to the foreign function. When the - function returns, the contents - of this location are returned as one of the values of the Lisp - function (and the location is automatically deallocated). - </para></listitem> - <listitem><para><varname>:in-out</> is a combination of - <varname>:copy</> and <varname>:out</>. - The argument is copied to a pre-allocated object and a pointer to - this object is passed to the foreign routine. On return, the - contents of this location is returned as an additional value. - </para></listitem> -</itemizedlist> -</para> - -<note> -<para> -Any efficiency-critical foreign interface function should be inline -expanded, which can be done by preceding the -<function>define-alien-routine</> call with: -<programlisting>(declaim (inline lisp-name))</programlisting> -In addition to avoiding the Lisp call overhead, this allows -pointers, word-integers and floats to be passed using non-descriptor -representations, avoiding consing.) -</para> -</note> - -</sect2> - -<sect2><title><function>define-alien-routine</> Example - - -Consider the C function cfoo -with the following calling convention: - -void -cfoo (str, a, i) - char *str; - char *a; /* update */ - int *i; /* out */ -{ - /* body of cfoo(...) */ -} -This can be described by the following call to -define-alien-routine: - -(define-alien-routine "cfoo" void - (str c-string) - (a char :in-out) - (i int :out)) -The Lisp function cfoo will have -two arguments (str and a) -and two return values (a and i). - - - - -Calling Lisp From C</> - -<para> -Calling Lisp functions from C is sometimes possible, but is extremely -hackish and poorly supported as of &SBCL; 0.7.5. -See <function>funcall0</> ... <function>funcall3</> in -the runtime system. The -arguments must be valid &SBCL; object descriptors (so that -e.g. fixnums must be -left-shifted by 2.) As of &SBCL; 0.7.5, the format -of object descriptors is documented only by the source code and, in parts, -by the old &CMUCL; "INTERNALS" documentation.</para> - -<para> Note that the garbage collector moves objects, and won't be -able to fix up any references in C variables. There are three -mechanisms for coping with this: -<orderedlist> - -<listitem><para>The <function>sb-ext:purify</> moves all live Lisp -data into static or read-only areas such that it will never be moved -(or freed) again in the life of the Lisp session</para></listitem> - -<listitem><para><function>sb-sys:with-pinned-objects</function> is a -macro which arranges for some set of objects to be pinned in memory -for the dynamic extent of its body forms. On ports which use the -generational garbage collector (as of &SBCL; 0.8.3, only the x86) this -has a page granularity - i.e. the entire 4k page or pages containing -the objects will be locked down. On other ports it is implemented by -turning off GC for the duration (so could be said to have a -whole-world granularity). </para></listitem> - -<listitem><para>Disable GC, using the <function>without-gcing</function> -macro or <function>gc-off</function> call.</para></listitem> -</orderedlist> - -</para> - -<!-- FIXME: This is a "changebar" section from the CMU CL manual. - I (WHN 2002-07-14) am not very familiar with this content, so - I'm not immediately prepared to try to update it for SBCL, and - I'm not feeling masochistic enough to work to encourage this - kind of low-level hack anyway. However, I acknowledge that callbacks - are sometimes really really necessary, so I include the original - text in case someone is hard-core enough to benefit from it. If - anyone brings the information up to date for SBCL, it belong - either in the main manual or on a CLiki SBCL Internals page. -LaTeX \subsection{Accessing Lisp Arrays} -LaTeX -LaTeX Due to the way \cmucl{} manages memory, the amount of memory that can -LaTeX be dynamically allocated by \code{malloc} or \funref{make-alien} is -LaTeX limited\footnote{\cmucl{} mmaps a large piece of memory for it's own -LaTeX use and this memory is typically about 8 MB above the start of the C -LaTeX heap. Thus, only about 8 MB of memory can be dynamically -LaTeX allocated.}. - -Empirically determined to be considerably >8Mb on this x86 linux -machine, but I don't know what the actual values are - dan 2003.09.01 - -Note that this technique is used in SB-GROVEL in the SBCL contrib - -LaTeX -LaTeX To overcome this limitation, it is possible to access the content of -LaTeX Lisp arrays which are limited only by the amount of physical memory -LaTeX and swap space available. However, this technique is only useful if -LaTeX the foreign function takes pointers to memory instead of allocating -LaTeX memory for itself. In latter case, you will have to modify the -LaTeX foreign functions. -LaTeX -LaTeX This technique takes advantage of the fact that \cmucl{} has -LaTeX specialized array types (\pxlref{specialized-array-types}) that match -LaTeX a typical C array. For example, a \code{(simple-array double-float -LaTeX (100))} is stored in memory in essentially the same way as the C -LaTeX array \code{double x[100]} would be. The following function allows us -LaTeX to get the physical address of such a Lisp array: -LaTeX \begin{example} -LaTeX (defun array-data-address (array) -LaTeX "Return the physical address of where the actual data of an array is -LaTeX stored. -LaTeX -LaTeX ARRAY must be a specialized array type in CMU Lisp. This means ARRAY -LaTeX must be an array of one of the following types: -LaTeX -LaTeX double-float -LaTeX single-float -LaTeX (unsigned-byte 32) -LaTeX (unsigned-byte 16) -LaTeX (unsigned-byte 8) -LaTeX (signed-byte 32) -LaTeX (signed-byte 16) -LaTeX (signed-byte 8) -LaTeX " -LaTeX (declare (type (or #+signed-array (array (signed-byte 8)) -LaTeX #+signed-array (array (signed-byte 16)) -LaTeX #+signed-array (array (signed-byte 32)) -LaTeX (array (unsigned-byte 8)) -LaTeX (array (unsigned-byte 16)) -LaTeX (array (unsigned-byte 32)) -LaTeX (array single-float) -LaTeX (array double-float)) -LaTeX array) -LaTeX (optimize (speed 3) (safety 0)) -LaTeX (ext:optimize-interface (safety 3))) -LaTeX ;; with-array-data will get us to the actual data. However, because -LaTeX ;; the array could have been displaced, we need to know where the -LaTeX ;; data starts. -LaTeX (lisp::with-array-data ((data array) -LaTeX (start) -LaTeX (end)) -LaTeX (declare (ignore end)) -LaTeX ;; DATA is a specialized simple-array. Memory is laid out like this: -LaTeX ;; -LaTeX ;; byte offset Value -LaTeX ;; 0 type code (should be 70 for double-float vector) -LaTeX ;; 4 4 * number of elements in vector -LaTeX ;; 8 1st element of vector -LaTeX ;; ... ... -LaTeX ;; -LaTeX (let ((addr (+ 8 (logandc1 7 (kernel:get-lisp-obj-address data)))) -LaTeX (type-size (let ((type (array-element-type data))) -LaTeX (cond ((or (equal type '(signed-byte 8)) -LaTeX (equal type '(unsigned-byte 8))) -LaTeX 1) -LaTeX ((or (equal type '(signed-byte 16)) -LaTeX (equal type '(unsigned-byte 16))) -LaTeX 2) -LaTeX ((or (equal type '(signed-byte 32)) -LaTeX (equal type '(unsigned-byte 32))) -LaTeX 4) -LaTeX ((equal type 'single-float) -LaTeX 4) -LaTeX ((equal type 'double-float) -LaTeX 8) -LaTeX (t -LaTeX (error "Unknown specialized array element type")))))) -LaTeX (declare (type (unsigned-byte 32) addr) -LaTeX (optimize (speed 3) (safety 0) (ext:inhibit-warnings 3))) -LaTeX (system:int-sap (the (unsigned-byte 32) -LaTeX (+ addr (* type-size start))))))) -LaTeX \end{example} -LaTeX -LaTeX Assume we have the C function below that we wish to use: -LaTeX \begin{example} -LaTeX double dotprod(double* x, double* y, int n) -LaTeX \{ -LaTeX int k; -LaTeX double sum = 0; -LaTeX -LaTeX for (k = 0; k < n; ++k) \{ -LaTeX sum += x[k] * y[k]; -LaTeX \} -LaTeX \} -LaTeX \end{example} -LaTeX The following example generates two large arrays in Lisp, and calls the C -LaTeX function to do the desired computation. This would not have been -LaTeX possible using \code{malloc} or \code{make-alien} since we need about -LaTeX 16 MB of memory to hold the two arrays. -LaTeX \begin{example} -LaTeX (define-alien-routine "dotprod" double -LaTeX (x (* double-float) :in) -LaTeX (y (* double-float) :in) -LaTeX (n int :in)) -LaTeX -LaTeX (let ((x (make-array 1000000 :element-type 'double-float)) -LaTeX (y (make-array 1000000 :element-type 'double-float))) -LaTeX ;; Initialize X and Y somehow -LaTeX (let ((x-addr (system:int-sap (array-data-address x))) -LaTeX (y-addr (system:int-sap (array-data-address y)))) -LaTeX (dotprod x-addr y-addr 1000000))) -LaTeX \end{example} -LaTeX In this example, it may be useful to wrap the inner \code{let} -LaTeX expression in an \code{unwind-protect} that first turns off garbage -LaTeX collection and then turns garbage collection on afterwards. This will -LaTeX prevent garbage collection from moving \code{x} and \code{y} after we -LaTeX have obtained the (now erroneous) addresses but before the call to -LaTeX \code{dotprod} is made. -LaTeX ---> - -</sect2> - -</sect1> - -<sect1><title>Step-By-Step Example of the Foreign Function Interface</> - -<para> -This section presents a complete example of an interface to a somewhat -complicated C function. -</para> - -<para> -Suppose you have the following C function which you want to be able to -call from Lisp in the file <filename>test.c</> -<programlisting> -struct c_struct -{ - int x; - char *s; -}; - -struct c_struct *c_function (i, s, r, a) - int i; - char *s; - struct c_struct *r; - int a[10]; -{ - int j; - struct c_struct *r2; - - printf("i = %d\n", i); - printf("s = %s\n", s); - printf("r->x = %d\n", r->x); - printf("r->s = %s\n", r->s); - for (j = 0; j < 10; j++) printf("a[%d] = %d.\n", j, a[j]); - r2 = (struct c_struct *) malloc (sizeof(struct c_struct)); - r2->x = i + 5; - r2->s = "a C string"; - return(r2); -};</programlisting> -</para> - -<para> -It is possible to call this C function from Lisp using the file -<filename>test.lisp</> containing -<programlisting> -(cl:defpackage "TEST-C-CALL" (:use "CL" "SB-ALIEN" "SB-C-CALL")) -(cl:in-package "TEST-C-CALL") - -;;; Define the record C-STRUCT in Lisp. -(define-alien-type nil - (struct c-struct - (x int) - (s c-string))) - -;;; Define the Lisp function interface to the C routine. It returns a -;;; pointer to a record of type C-STRUCT. It accepts four parameters: -;;; I, an int; S, a pointer to a string; R, a pointer to a C-STRUCT -;;; record; and A, a pointer to the array of 10 ints. -;;; -;;; The INLINE declaration eliminates some efficiency notes about heap -;;; allocation of alien values. -(declaim (inline c-function)) -(define-alien-routine c-function - (* (struct c-struct)) - (i int) - (s c-string) - (r (* (struct c-struct))) - (a (array int 10))) - -;;; a function which sets up the parameters to the C function and -;;; actually calls it -(defun call-cfun () - (with-alien ((ar (array int 10)) - (c-struct (struct c-struct))) - (dotimes (i 10) ; Fill array. - (setf (deref ar i) i)) - (setf (slot c-struct 'x) 20) - (setf (slot c-struct 's) "a Lisp string") - - (with-alien ((res (* (struct c-struct)) - (c-function 5 "another Lisp string" (addr c-struct) ar))) - (format t "~&back from C function~%") - (multiple-value-prog1 - (values (slot res 'x) - (slot res 's)) - - ;; Deallocate result. (after we are done referring to it: - ;; "Pillage, *then* burn.") - (free-alien res)))))</programlisting> -</para> - -<para> -To execute the above example, it is necessary to compile the C routine, -e.g.: -<userinput>cc -c test.c</> -(In order to enable incremental loading with some linkers, you may need -to say -<userinput>cc -G 0 -c test.c</>) -</para> - -<para> -Once the C code has been compiled, you can start up Lisp and load it in: -<userinput>sbcl</>. -Lisp should start up with its normal prompt.</para> - -<para> -Within Lisp, -compile the Lisp file. (This step can be done separately. You don't -have to recompile every time.) -<userinput>(compile-file "test.lisp")</> -</para> - -<para> -Within Lisp, load the foreign object file to define the necessary -symbols: -<userinput>(load-foreign "test.o")</>. -This must be done before loading any code that refers -to these symbols. -</para> - -<para> -Now you can load the compiled Lisp ("fasl") file into Lisp: -<userinput>(load "test.fasl")</> -And once the Lisp file is loaded, you can call the -Lisp routine that sets up the parameters and calls the C -function: -<userinput>(test-c-call::call-cfun)</> -</para> - -<para> -The C routine should print the following information to standard output: -<!-- FIXME: What should be here is a verbatim environment for computer - output, but since I don't know one in DocBook, I made do with - PROGRAMLISTING for now... --> -<programlisting>i = 5 -s = another Lisp string -r->x = 20 -r->s = a Lisp string -a[0] = 0. -a[1] = 1. -a[2] = 2. -a[3] = 3. -a[4] = 4. -a[5] = 5. -a[6] = 6. -a[7] = 7. -a[8] = 8. -a[9] = 9.</programlisting> -After return from the C function, -the Lisp wrapper function should print the following output: -<programlisting>back from C function</programlisting> -And upon return from the Lisp wrapper function, -before the next prompt is printed, the -Lisp read-eval-print loop should print the following return values: -<!-- FIXME: As above, it's not a program listing, but computer output... --> -<programlisting> -10 -"a C string" -</programlisting> -</para> - -</sect1> - -</chapter> diff --git a/doc/ffi.xml b/doc/ffi.xml new file mode 100644 index 0000000..2efc11c --- /dev/null +++ b/doc/ffi.xml @@ -0,0 +1,1255 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" +"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ +<!ENTITY % myents SYSTEM "entities.inc"> +%myents; +]> + +<chapter id="ffi"><title>The Foreign Function Interface + +This chapter describes &SBCL;'s interface to C programs and +libraries (and, since C interfaces are a sort of lingua +franca of the Unix world, to other programs and libraries in +general.) + +In the modern Lisp world, the usual term for this +functionality is Foreign Function Interface, or FFI, where +despite the mention of function in this term, FFI also +refers to direct manipulation of C data structures as well as +functions. The traditional &CMUCL; terminology is Alien Interface, and +while that older terminology is no longer used much in the system +documentation, it still reflected in names in the +implementation, notably in the name of the SB-ALIEN +package. + +Introduction to the Foreign Function Interface + + + +Because of Lisp's emphasis on dynamic memory allocation and garbage +collection, Lisp implementations use non-C-like memory representations +for objects. This representation mismatch creates friction when a Lisp +program must share objects with programs which expect C data. There +are three common approaches to establishing communication: + + The burden can be placed on the foreign program + (and programmer) by requiring the knowledge and use of the + representations used internally by the Lisp implementation. + This can require a considerable amount of glue code on the + C side, and that code tends to be sensitively dependent on the + internal implementation details of the Lisp system. + The Lisp system can automatically convert objects + back and forth between the Lisp and foreign representations. + This is convenient, but translation becomes prohibitively slow + when large or complex data structures must be shared. This approach + is supported by the &SBCL; FFI, and used automatically + by the when passing integers and strings. + The Lisp program can directly manipulate foreign + objects through the use of extensions to the Lisp language. + + + + +&SBCL;, like &CMUCL; before it, relies primarily on the +automatic conversion and direct manipulation approaches. The SB-ALIEN +package provices a facility wherein foreign values of simple scalar +types are automatically converted and complex types are directly +manipulated in their foreign representation. Additionally the +lower-level System Area Pointers (or SAPs) can be used where +necessary to provide untyped access to foreign memory. + +Any foreign objects that can't automatically be converted into +Lisp values are represented by objects of type alien-value. +Since Lisp is a dynamically typed language, even foreign objects must +have a run-time type; this type information is provided by +encapsulating the raw pointer to the foreign data within an +alien-value object. + +The type language and operations on foreign types are +intentionally similar to those of the C language. + + + +Foreign Types + + +Alien types have a description language based on nested list +structure. For example the C type +struct foo { + int a; + struct foo *b[100]; +}; +has the corresponding &SBCL; FFI type +(struct foo + (a int) + (b (array (* (struct foo)) 100))) + + +Defining Foreign Types + + +Types may be either named or anonymous. With structure and union +types, the name is part of the type specifier, allowing recursively +defined types such as: +(struct foo (a (* (struct foo)))) +An anonymous structure or union type is specified by using the name +nil. The with-alien macro defines a local +scope which captures any named type definitions. Other types +are not inherently named, but can be given named abbreviations using +the define-alien-type macro. + + + + +Foreign Types and Lisp Types + + +The foreign types form a subsystem of the &SBCL; type system. An +alien type specifier provides a way to use any foreign type as a +Lisp type specifier. For example, +(typep foo '(alien (* int))) +can be used to determine whether foo is a pointer to a foreign +int. alien type specifiers can be used in the same ways +as ordinary Lisp type specifiers (like string.) Alien type +declarations are subject to the same +precise type checking +as any other declaration. + + + +Note that the type identifiers used in the +foreign type system overlap with native Lisp type +specifiers in some cases. For example, the type specifier +(alien single-float) is identical to single-float, since +foreign floats are automatically converted to Lisp floats. When +type-of is called on an alien value that is not automatically +converted to a Lisp value, then it will return an alien type +specifier. + + + + +Foreign Type Specifiers + + +All foreign type names are exported from the sb-alien +package. Some foreign type names are also symbols in +the common-lisp package, in which case they are +reexported from the sb-alien package, so that +e.g. it is legal to refer to sb-alien:single-float. + + + +These are the basic foreign type specifiers: + + + + + The foreign type specifier (* foo) describes a + pointer to an object of type foo. A pointed-to type + foo of t indicates a pointer to anything, + similar to void * in ANSI C. A null alien pointer can + be detected with the sb-alien:null-alien + function. + + + + + The foreign type specifier (array foo &optional dimensions) + describes array of the specified dimensions, holding + elements of type foo. Note that (unlike in C) (* foo) + (array foo)} are considered to be different types when + type checking is done. If equivalence of pointer and array types + is desired, it may be explicitly coerced using + sb-alien:cast. + + + Arrays are accessed using sb-alien:deref, passing + the indices as additional arguments. Elements are stored in + column-major order (as in C), so the first dimension determines + only the size of the memory block, and not the layout of the + higher dimensions. An array whose first dimension is variable + may be specified by using nil as the first dimension. + Fixed-size arrays can be allocated as array elements, structure + slots or sb-alien:with-alien variables. Dynamic + arrays can only be allocated using sb-alien:make-alien. + + + + + The foreign type specifier + (sb-alien:struct name &rest fields) + describes a structure type with the specified name and + fields. Fields are allocated at the same offsets + used by the implementation's C compiler. If name + is nil then the structure is anonymous. + + + If a named foreign struct specifier is passed to + define-alien-type or with-alien, + then this defines, respectively, a new global or local foreign + structure type. If no fields are specified, then + the fields are taken from the current (local or global) alien + structure type definition of name. + + + + + The foreign type specifier + (sb-alien:union name &rest fields) + is similar to sb-alien:struct, but describes a union type. + All fields are allocated at the same offset, and the size of the + union is the size of the largest field. The programmer must + determine which field is active from context. + + + + + The foreign type specifier (sb-alien:enum name &rest specs) + describes an enumeration type that maps between integer values + and keywords. If name is nil, then the + type is anonymous. Each element of the specs + list is either a Lisp keyword, or a list (keyword value). + value is an integer. If value is not + supplied, then it defaults to one greater than the value for + the preceding spec (or to zero if it is the first spec.) + + + + + The foreign type specifier (sb-alien:signed &optional bits) + specifies a signed integer with the specified number of + bits precision. The upper limit on integer + precision is determined by the machine's word + size. If bits is not specified, the maximum + size will be used. + + + + + The foreign type specifier (integer &optional bits) is + equivalent to the corresponding type specifier using + sb-alien:signed instead of integer. + + + + + The foreign type specifier + (sb-alien:unsigned &optional bits) + is like corresponding type specifier using sb-alien:signed + except that the variable is treated as an unsigned integer. + + + + + The foreign type specifier (boolean &optional bits) is + similar to an enumeration type, but maps from Lisp nil + and t to C 0 and 1 + respectively. bits determines the amount of + storage allocated to hold the truth value. + + + + + The foreign type specifier single-float describes a + floating-point number in IEEE single-precision format. + + + + + The foreign type specifier double-float describes a + floating-point number in IEEE double-precision format. + + + + + The foreign type specifier + (function result-type &rest arg-types) + describes a foreign function that takes arguments of the specified + arg-types and returns a result of type result-type. + Note that the only context where a foreign function type + is directly specified is in the argument to + sb-alien:alien-funcall. + In all other contexts, foreign functions are represented by + foreign function pointer types: (* (function ...)). + + + + + The foreign type specifier sb-alien:system-area-pointer + describes a pointer which is represented in Lisp as a + system-area-pointer object. &SBCL; exports this type from + sb-alien because &CMUCL; did, but tentatively (as of + the first draft of this section of the manual, &SBCL; 0.7.6) it is + deprecated, since it doesn't seem to be required by user code. + + + + + The foreign type specifier sb-alien:void is + used in function types to declare that no useful value + is returned. Using alien-funcall + to call a void foreign function will return + zero values. + + + + + The foreign type specifier sb-alien:c-string + is similar to (* char), but is interpreted as a + null-terminated string, and is automatically converted into a + Lisp string when accessed; or if the pointer is C NULL + or 0, then accessing it gives Lisp nil. + Lisp strings are stored with a trailing NUL termination, so no + copying (either by the user or the implementation) is necessary + when passing them to foreign code. + + + Assigning a Lisp string to a c-string structure field or + variable stores the contents of the string to the memory already + pointed to by that variable. When a foreign object of type + (* char) is assigned to a c-string, then the + c-string pointer is assigned to. This allows + c-string pointers to be initialized. For example: + (cl:in-package "CL-USER") ; which USEs package "SB-ALIEN" + (define-alien-type nil (struct foo (str c-string))) + (defun make-foo (str) (let ((my-foo (make-alien (struct foo)))) + (setf (slot my-foo 'str) (make-alien char (length str)) + (slot my-foo 'str) str) my-foo)) + Storing Lisp NIL in a c-string writes C + NULL to the variable. + + + + + sb-alien also exports translations of these C type + specifiers as foreign type specifiers: + sb-alien:char, + sb-alien:short, + sb-alien:int, + sb-alien:long, + sb-alien:unsigned-char, + sb-alien:unsigned-short, + sb-alien:unsigned-int, + sb-alien:unsigned-long, + sb-alien:float, and + sb-alien:double. + + + + + + + + + + + +Operations On Foreign Values + + +This section describes how to read foreign values as Lisp +values, how to coerce foreign values to different kinds of foreign values, and +how to dynamically allocate and free foreign variables. + +Accessing Foreign Values + +(sb-alien:deref pointer-or-array &rest indices) + +The sb-alien:deref function returns the value pointed to by +a foreign pointer, or the value of a foreign array element. When +dereferencing a pointer, an optional single index can be specified to +give the equivalent of C pointer arithmetic; this index is scaled by +the size of the type pointed to. When dereferencing an array, the +number of indices must be the same as the number of dimensions in the +array type. deref can be set with setf to +assign a new value. + +(sb-alien:slot struct-or-union &rest slot-names) + +The sb-alien:slot function extracts the value of +the slot named slot-name from a foreign struct or +union. If struct-or-union is a pointer to a +structure or union, then it is automatically dereferenced. +sb-alien:slot can be set with setf to assign +a new value. Note that slot-name is evaluated, and need +not be a compile-time constant (but only constant slot accesses are +efficiently compiled.) + +Untyped memory + +As noted at the beginning of the chapter, the System Area +Pointer facilities allow untyped access to foreign memory. SAPs can +be converted to and from the usual typed foreign values using +sap-alien and alien-sap +(described elsewhere), and also to and from integers - raw machine +addresses. They should thus be used with caution; corrupting the Lisp +heap or other memory with SAPs is trivial. + +(sb-sys:int-sap machine-address) + +Creates a SAP pointing at the virtual address +machine-address. + +(sb-sys:sap-ref-32 sap offset) + +Access the value of the memory location at +offset bytes from sap. This form +may also be used with setf to alter the memory at +that location. + +(sb-sys:sap= sap1 sap2) + +Compare sap1 and sap2 for +equality. + +Similarly named functions exist for accessing other sizes of +word, other comparisons, and other conversions. The reader is invited +to use apropos and describe +for more details + +(apropos "sap" :sb-sys) + + + +Coercing Foreign Values + +(sb-alien:addr alien-expr) + + +The sb-alien:addr macro +returns a pointer to the location specified by +alien-expr, which must be either a foreign variable, a use of +sb-alien:deref, a use of sb-alien:slot, or a use of +sb-alien:extern-alien. + + +(sb-alien:cast foreign-value new-type) + +The sb-alien:cast +converts foreign-value to a new foreign value with the specified +new-type. Both types, old and new, must be foreign pointer, +array or function types. Note that the resulting Lisp +foreign variable object +is not eq to the +argument, but it does refer to the same foreign data bits. + +(sb-alien:sap-alien sap type) + +The sb-alien:sap-alien function converts sap +(a system area pointer) to a foreign value with the specified +type. type is not evaluated. + + +The type must be some foreign pointer, array, or +record type. + +(sb-alien:alien-sap foreign-value type) + +The sb-alien:alien-sap function +returns the SAP which points to alien-value's data. + + +The foreign-value must be of some foreign pointer, +array, or record type. + + + +Foreign Dynamic Allocation + +Lisp code can call the C standard library functions +malloc and free to dynamically allocate and +deallocate foreign variables. The Lisp code shares the same allocator +with foreign C code, so it's OK for foreign code to call +free on the result of Lisp +sb-alien:make-alien, or for Lisp code to call +sb-alien:free-alien on foreign objects allocated by C +code. + +(sb-alien:make-alien type size) + +The sb-alien:make-alien macro +returns a dynamically allocated foreign value of the specified +type (which is not evaluated.) The allocated memory is not +initialized, and may contain arbitrary junk. If supplied, +size is an expression to evaluate to compute the size of the +allocated object. There are two major cases: + + + When type is a foreign array type, an array of + that type is allocated and a pointer to it is returned. Note that you + must use deref to change the result to an array before you + can use deref to read or write elements: + + (cl:in-package "CL-USER") ; which USEs package "SB-ALIEN" + (defvar *foo* (make-alien (array char 10))) + (type-of *foo*) => (alien (* (array (signed 8) 10))) + (setf (deref (deref foo) 0) 10) => 10 + If supplied, size is used as the first dimension for the + array. + + + When type is any other foreign type, then an + object for that type is allocated, and a pointer to it is + returned. So (make-alien int) returns a (* int). + If size is specified, then a block of that many + objects is allocated, with the result pointing to the first one. + + + + +(sb-alien:free-alien foreign-value) + +The sb-alien:free-alien function +frees the storage for foreign-value, +which must have been allocated with Lisp make-alien +or C malloc. + +See also the sb-alien:with-alien macro, which +allocates foreign values on the stack. + + + + + +Foreign Variables + + + +Both local (stack allocated) and external (C global) foreign variables are +supported. + + +Local Foreign Variables + +(sb-alien:with-alien var-definitions &body body) + +The with-alien +macro establishes local +foreign variables +with the specified +alien types and names. +This form is analogous to defining a local variable in C: additional +storage is allocated, and the initial value is copied. +This form is less +analogous to LET-allocated Lisp variables, since the variables +can't be captured in closures: they live only for the dynamic extent +of the body, and referring to them outside is a gruesome error. + + +The var-definitions argument is a list of +variable definitions, each of the form +(name type &optional initial-value) +The names of the variables are established as symbol-macros; the bindings have +lexical scope, and may be assigned with setq +or setf. + + +The with-alien macro also establishes +a new scope for named structures +and unions. Any type specified for a variable may contain +named structure or union types with the slots specified. Within the +lexical scope of the binding specifiers and body, a locally defined +foreign structure type foo can be referenced by its name using +(struct foo). + + + + +External Foreign Variables + + +External foreign names are strings, and Lisp names are symbols. When +an external foreign value is represented using a Lisp variable, there +must be a way to convert from one name syntax into the other. The +macros extern-alien, define-alien-variable and +define-alien-routine use this conversion heuristic: + + Alien names are converted to Lisp names by uppercasing and + replacing underscores with hyphens. + Conversely, Lisp names are converted to alien names by + lowercasing and replacing hyphens with underscores. + Both the Lisp symbol and alien string names may be + separately specified by using a list of the form + (alien-string lisp-symbol) + + + +(sb-alien:define-alien-variable name type) + + +The define-alien-variable macro +defines name as an external foreign variable of the +specified foreign type. name and type are not +evaluated. The Lisp name of the variable (see above) becomes a +global alien variable. Global alien variables +are effectively ``global symbol macros''; a reference to the +variable fetches the contents of the external variable. Similarly, +setting the variable stores new contents---the new contents must be +of the declared type. Someday, they may well be implemented +using the &ANSI; define-symbol-macro mechanism, but +as of &SBCL; 0.7.5, they are still implemented using an older +more-or-less parallel mechanism inherited from &CMUCL;. + + + +For example, to access a C-level counter foo, one could +write + +(define-alien-variable "foo" int) +;; Now it is possible to get the value of the C variable foo simply by +;; referencing that Lisp variable: +(print foo) +(setf foo 14) +(incf foo) + + +(sb-alien:get-errno) + + +Since in modern C libraries, the errno "variable" is typically +no longer a variable, but some bizarre artificial construct +which behaves superficially like a variable within a given thread, +it can no longer reliably be accessed through the ordinary +define-alien-variable mechanism. Instead, &SBCL; provides +the operator sb-alien:get-errno to allow Lisp code to read it. + + +(sb-alien:extern-alien name type) + + +The extern-alien macro +returns an alien with the specified type which +points to an externally defined value. name is not evaluated, +and may be either a string or a symbol. type is +an unevaluated alien type specifier. + + + + + + +Foreign Data Structure Examples + + + +Now that we have alien types, operations and variables, we can manipulate +foreign data structures. This C declaration + +struct foo { + int a; + struct foo *b[100]; +}; +can be translated into the following alien type: +(define-alien-type nil + (struct foo + (a int) + (b (array (* (struct foo)) 100)))) + + + +Once the foo alien type has been defined as above, +the C expression + +struct foo f; +f.b[7].a +can be translated in this way: + +(with-alien ((f (struct foo))) + (slot (deref (slot f 'b) 7) 'a) + ;; + ;; Do something with f... + ) + + + +Or consider this example of an external C variable and some accesses: + +struct c_struct { + short x, y; + char a, b; + int z; + c_struct *n; +}; +extern struct c_struct *my_struct; +my_struct->x++; +my_struct->a = 5; +my_struct = my_struct->n; +which can be manipulated in Lisp like this: + +(define-alien-type nil + (struct c-struct + (x short) + (y short) + (a char) + (b char) + (z int) + (n (* c-struct)))) +(define-alien-variable "my_struct" (* c-struct)) +(incf (slot my-struct 'x)) +(setf (slot my-struct 'a) 5) +(setq my-struct (slot my-struct 'n)) + + + + +Loading Unix Object Files + + +Foreign object files can be loaded into the running Lisp process by +calling the functions load-foreign or +load-1-foreign. + + + The sb-alien:load-1-foreign function is the more +primitive of the two operations. It loads a single object file. into +the currently running Lisp. The external symbols defining routines and +variables are made available for future external references (e.g. by +extern-alien). Forward references to foreign symbols +aren't supported: load-1-foreign must be run before any +of the defined symbols are referenced. + + +sb-alien:load-foreign is built in terms of +load-1-foreign and some other machinery +like sb-ext:run-program. +It accepts a list of files and libraries, +and runs the linker on the files and +libraries, creating an absolute Unix object file which is then +processed by load-1-foreign. + +As of &SBCL; 0.7.5, all foreign code (code loaded +with load-1-function or load-function) is +lost when a Lisp core is saved with +sb-ext:save-lisp-and-die, and no attempt is made to +restore it when the core is loaded. Historically this has been an +annoyance both for &SBCL; users and for &CMUCL; users. +It's hard to solve this problem completely cleanly, but some +generally-reliable partial solution might be useful. Once someone in +either camp gets sufficiently annoyed to create it, &SBCL; is +likely to adopt some mechanism for automatically restoring foreign +code when a saved core is loaded. + + + +Foreign Function Calls + + +The foreign function call interface allows a Lisp program to call +many functions written in languages that use the C calling convention. + + + +Lisp sets up various signal handling routines and other environment +information when it first starts up, and expects these to be in place +at all times. The C functions called by Lisp should not change the +environment, especially the signal handlers: the signal handlers +installed by Lisp typically have interesting flags set (e.g to request +machine context information, or for signal delivery on an alternate +stack) which the Lisp runtime relies on for correct operation. +Precise details of how this works may change without notice between +versions; the source, or the brain of a friendly &SBCL; developer, +is the only documentation. Users of a Lisp built with the :sb-thread +feature should also read the Threading section + +of this manual + +The <function>alien-funcall</function> Primitive + +(sb-alien:alien-funcall alien-function &rest arguments) + + +The alien-funcall function is the foreign function call +primitive: alien-function is called with the supplied +arguments and its C return value is returned as a Lisp value. +The alien-function is an arbitrary +run-time expression; to refer to a constant function, use +extern-alien or a value defined by +define-alien-routine. + + + +The type of alien-function +must be (alien (function ...)) +or (alien (* (function ...))). +The function type is used to +determine how to call the function (as though it was declared with +a prototype.) The type need not be known at compile time, but only +known-type calls are efficiently compiled. Limitations: + + Structure type return values are not implemented. + Passing of structures by value is not implemented. + + + + +Here is an example which allocates a (struct foo), calls a foreign +function to initialize it, then returns a Lisp vector of all the +(* (struct foo)) objects filled in by the foreign call: + +;; Allocate a foo on the stack. +(with-alien ((f (struct foo))) + ;; Call some C function to fill in foo fields. + (alien-funcall (extern-alien "mangle_foo" (function void (* foo))) + (addr f)) + ;; Find how many foos to use by getting the A field. + (let* ((num (slot f 'a)) + (result (make-array num))) + ;; Get a pointer to the array so that we don't have to keep extracting it: + (with-alien ((a (* (array (* (struct foo)) 100)) (addr (slot f 'b)))) + ;; Loop over the first N elements and stash them in the result vector. + (dotimes (i num) + (setf (svref result i) (deref (deref a) i))) + ;; Voila. + result))) + + + + +The <function>define-alien-routine</function> Macro + +(sb-alien:define-alien-routine} name result-type &rest arg-specifiers) + + +The define-alien-routine macro is a convenience +for automatically generating Lisp +interfaces to simple foreign functions. The primary feature is the +parameter style specification, which translates the C +pass-by-reference idiom into additional return values. + + + +name is usually a string external symbol, but may also be a +symbol Lisp name or a list of the foreign name and the Lisp name. +If only one name is specified, the other is automatically derived +as for extern-alien. +result-type is the alien type of the return value. + + + +Each element of the arg-specifiers list +specifies an argument to the foreign function, and is +of the form +(aname atype &optional style) +aname is the symbol name of the argument to the constructed +function (for documentation). atype is the alien type of +corresponding foreign argument. The semantics of the actual call +are the same as for alien-funcall. style +specifies how this argument should be handled at call and return time, +and should be one of the following + + :inspecifies that the argument is + passed by value. This is the default. :in arguments + have no corresponding return value from the Lisp function. + + :copy is similar to :in, + but the argument is copied + to a pre-allocated object and a pointer to this object is passed + to the foreign routine. + :out specifies a pass-by-reference + output value. The type of the argument must be a pointer to + a fixed-sized object (such as an integer or pointer). + :out and :in-out style cannot + be used with pointers to arrays, records or functions. An + object of the correct size is allocated on the stack, and + its address is passed to the foreign function. When the + function returns, the contents + of this location are returned as one of the values of the Lisp + function (and the location is automatically deallocated). + + :in-out is a combination of + :copy and :out. + The argument is copied to a pre-allocated object and a pointer to + this object is passed to the foreign routine. On return, the + contents of this location is returned as an additional value. + + + + + + +Any efficiency-critical foreign interface function should be inline +expanded, which can be done by preceding the +define-alien-routine call with: +(declaim (inline lisp-name)) +In addition to avoiding the Lisp call overhead, this allows +pointers, word-integers and floats to be passed using non-descriptor +representations, avoiding consing.) + + + + + +<function>define-alien-routine</function> Example + + +Consider the C function cfoo +with the following calling convention: + +void +cfoo (str, a, i) + char *str; + char *a; /* update */ + int *i; /* out */ +{ + /* body of cfoo(...) */ +} +This can be described by the following call to +define-alien-routine: + +(define-alien-routine "cfoo" void + (str c-string) + (a char :in-out) + (i int :out)) +The Lisp function cfoo will have +two arguments (str and a) +and two return values (a and i). + + + + +Calling Lisp From C + + +Calling Lisp functions from C is sometimes possible, but is extremely +hackish and poorly supported as of &SBCL; 0.7.5. +See funcall0 ... funcall3 in +the runtime system. The +arguments must be valid &SBCL; object descriptors (so that +e.g. fixnums must be +left-shifted by 2.) As of &SBCL; 0.7.5, the format +of object descriptors is documented only by the source code and, in parts, +by the old &CMUCL; "INTERNALS" documentation. + + Note that the garbage collector moves objects, and won't be +able to fix up any references in C variables. There are three +mechanisms for coping with this: + + +The sb-ext:purify moves all live Lisp +data into static or read-only areas such that it will never be moved +(or freed) again in the life of the Lisp session + +sb-sys:with-pinned-objects is a +macro which arranges for some set of objects to be pinned in memory +for the dynamic extent of its body forms. On ports which use the +generational garbage collector (as of &SBCL; 0.8.3, only the x86) this +has a page granularity - i.e. the entire 4k page or pages containing +the objects will be locked down. On other ports it is implemented by +turning off GC for the duration (so could be said to have a +whole-world granularity). + +Disable GC, using the without-gcing +macro or gc-off call. + + + + + + + + + + +Step-By-Step Example of the Foreign Function Interface + + +This section presents a complete example of an interface to a somewhat +complicated C function. + + + +Suppose you have the following C function which you want to be able to +call from Lisp in the file test.c + +struct c_struct +{ + int x; + char *s; +}; + +struct c_struct *c_function (i, s, r, a) + int i; + char *s; + struct c_struct *r; + int a[10]; +{ + int j; + struct c_struct *r2; + + printf("i = %d\n", i); + printf("s = %s\n", s); + printf("r->x = %d\n", r->x); + printf("r->s = %s\n", r->s); + for (j = 0; j < 10; j++) printf("a[%d] = %d.\n", j, a[j]); + r2 = (struct c_struct *) malloc (sizeof(struct c_struct)); + r2->x = i + 5; + r2->s = "a C string"; + return(r2); +}; + + + +It is possible to call this C function from Lisp using the file +test.lisp containing + +(cl:defpackage "TEST-C-CALL" (:use "CL" "SB-ALIEN" "SB-C-CALL")) +(cl:in-package "TEST-C-CALL") + +;;; Define the record C-STRUCT in Lisp. +(define-alien-type nil + (struct c-struct + (x int) + (s c-string))) + +;;; Define the Lisp function interface to the C routine. It returns a +;;; pointer to a record of type C-STRUCT. It accepts four parameters: +;;; I, an int; S, a pointer to a string; R, a pointer to a C-STRUCT +;;; record; and A, a pointer to the array of 10 ints. +;;; +;;; The INLINE declaration eliminates some efficiency notes about heap +;;; allocation of alien values. +(declaim (inline c-function)) +(define-alien-routine c-function + (* (struct c-struct)) + (i int) + (s c-string) + (r (* (struct c-struct))) + (a (array int 10))) + +;;; a function which sets up the parameters to the C function and +;;; actually calls it +(defun call-cfun () + (with-alien ((ar (array int 10)) + (c-struct (struct c-struct))) + (dotimes (i 10) ; Fill array. + (setf (deref ar i) i)) + (setf (slot c-struct 'x) 20) + (setf (slot c-struct 's) "a Lisp string") + + (with-alien ((res (* (struct c-struct)) + (c-function 5 "another Lisp string" (addr c-struct) ar))) + (format t "~&back from C function~%") + (multiple-value-prog1 + (values (slot res 'x) + (slot res 's)) + + ;; Deallocate result. (after we are done referring to it: + ;; "Pillage, *then* burn.") + (free-alien res))))) + + + +To execute the above example, it is necessary to compile the C routine, +e.g.: +cc -c test.c +(In order to enable incremental loading with some linkers, you may need +to say +cc -G 0 -c test.c) + + + +Once the C code has been compiled, you can start up Lisp and load it in: +sbcl. +Lisp should start up with its normal prompt. + + +Within Lisp, +compile the Lisp file. (This step can be done separately. You don't +have to recompile every time.) +(compile-file "test.lisp") + + + +Within Lisp, load the foreign object file to define the necessary +symbols: +(load-foreign "test.o"). +This must be done before loading any code that refers +to these symbols. + + + +Now you can load the compiled Lisp ("fasl") file into Lisp: +(load "test.fasl") +And once the Lisp file is loaded, you can call the +Lisp routine that sets up the parameters and calls the C +function: +(test-c-call::call-cfun) + + + +The C routine should print the following information to standard output: + +i = 5 +s = another Lisp string +r->x = 20 +r->s = a Lisp string +a[0] = 0. +a[1] = 1. +a[2] = 2. +a[3] = 3. +a[4] = 4. +a[5] = 5. +a[6] = 6. +a[7] = 7. +a[8] = 8. +a[9] = 9. +After return from the C function, +the Lisp wrapper function should print the following output: +back from C function +And upon return from the Lisp wrapper function, +before the next prompt is printed, the +Lisp read-eval-print loop should print the following return values: + + +10 +"a C string" + + + + + + diff --git a/doc/fo.xsl b/doc/fo.xsl new file mode 100644 index 0000000..d04b2de --- /dev/null +++ b/doc/fo.xsl @@ -0,0 +1,8 @@ + + + + + + + diff --git a/doc/html.xsl b/doc/html.xsl new file mode 100644 index 0000000..34c89fd --- /dev/null +++ b/doc/html.xsl @@ -0,0 +1,10 @@ + + + + + + + + + diff --git a/doc/html_chunk.xsl b/doc/html_chunk.xsl new file mode 100644 index 0000000..a2bb88f --- /dev/null +++ b/doc/html_chunk.xsl @@ -0,0 +1,9 @@ + + + + + + + + diff --git a/doc/intro.sgml b/doc/intro.sgml deleted file mode 100644 index 0c4c1ee..0000000 --- a/doc/intro.sgml +++ /dev/null @@ -1,209 +0,0 @@ -Introduction</> - -<para>&SBCL; is a mostly-conforming implementation of the &ANSI; -&CommonLisp; standard. This manual focuses on behavior which is -specific to &SBCL;, not on behavior which is common to all -implementations of &ANSI; &CommonLisp;.</para> - -<sect1 id="more-cl-info"> -<title>Where To Go For More Information about &CommonLisp; in General - -Regardless of your ability level, two very useful resources -for working with any implementation of -&CommonLisp; are the -ILISP -package for Emacs and -the &CommonLisp; -HyperSpec. - -If you're not a programmer and you're trying to learn, -many introductory Lisp books are available. However, we don't have any -standout favorites. If you can't decide, try checking the Usenet -comp.lang.lisp FAQ for recent recommendations. - -If you are an experienced programmer in other languages -but need to learn about Lisp, three books stand out. - - ANSI Common Lisp, by Paul Graham, - will teach you about most of the language. (And later it might - also be worth checking out On Lisp, by the same - author.) - Paradigms Of Artificial Intelligence - Programming, by Peter Norvig, also has some good information - on general &CommonLisp; programming, and many nontrivial examples. - Whether or not your work is AI, it's a very good book to look - at. - - Neither of the books above emphasizes CLOS, but - Object-Oriented Programming In Common Lisp by Sonya Keene - does. Even if you're very knowledgeable about object oriented - programming in the abstract, it's worth looking at this book - if you want to do any OO in &CommonLisp;. Some abstractions - in CLOS (especially multiple dispatch) go beyond anything - you'll see in most OO systems, and there are a number of lesser - differences as well. This book tends to help with the culture shock. - - - - - - - -Where To Go For More Information About &SBCL; - -Before you read this user manual, you should probably read -two other things. - - You should know how to program in &CommonLisp;. - If you don't already know how, you should probably read a - book on it. - The Unix man page for &SBCL; will tell you - how to start the &SBCL; environment, so you can get to the - classic hello, world level of knowledge. It's the file - called sbcl.1 in the &SBCL; distribution. If &SBCL; is - installed on your system, you can read a formatted copy by - executing the command man sbcl. - - - -Besides this user manual and the Unix man page, some -other &SBCL;-specific information is available: - - The - &SBCL; home page has some general - information, plus links to mailing lists devoted to &SBCL;, - and to archives of these mailing lists. - Documentation for non-&ANSI; extensions for - various commands is available online from the &SBCL; executable - itself. The extensions for functions which have their own - command prompts (e.g. the debugger, and inspect) - are documented in text available by typing help - at their command prompts. The extensions for functions which - don't have their own command prompt (like trace - does) are described in their documentation strings, - unless your &SBCL; was compiled with an option not - to include documentation strings, in which case the doc strings - are only readable in the source code. - Some low-level information describing the - programming details of the conversion from &CMUCL; to &SBCL; - is available in the doc/FOR-CMUCL-DEVELOPERS - file in the &SBCL; distribution. - - - - - - -Overview Of SBCL, How It Works And Where It Came From</> - -<para>You can work productively with SBCL without knowing anything -understanding anything about where it came from, how it is implemented, -or how it extends the &ANSI; &CommonLisp; standard. However, -a little knowledge can be helpful in order to understand error -messages, to troubleshoot problems, to understand why some parts of -the system are better debugged than others, and to anticipate which -known bugs, known performance problems, and missing extensions are -likely to be fixed, tuned, or added. </para> - -<para>&SBCL; is descended from &CMUCL;, which is itself descended from -Spice Lisp, including early implementations for the Mach operating -system on the IBM RT, back in the 1980s. Design decisions from that -time are still reflected in the current implementation: -<itemizedlist> - <listitem><para>The system expects to be loaded into a - fixed-at-compile-time location in virtual memory, and also expects - the location of all of its heap storage to be specified - at compile time.</para></listitem> - <listitem><para>The system overcommits memory, allocating large - amounts of address space from the system (often more than - the amount of virtual memory available) and then failing - if ends up using too much of the allocated storage.</para></listitem> - <listitem><para>A word is a 32-bit quantity. The system has been - ported to many processor architectures without altering this - basic principle. Some hacks allow the system to run on the Alpha - chip (a 64-bit architecture) but even there 32-bit words are - used. The assumption that a word is - 32 bits wide is implicit in hundreds of places in the - system.</para></listitem> - <listitem><para>The system is implemented as a C program which is - responsible for supplying low-level services and loading a - Lisp <quote>.core</quote> file. - </para></listitem> -</itemizedlist> -</para> - -<para>&SBCL; also inherited some newer architectural features from -&CMUCL;. The most important is that it has a generational garbage -collector (<quote>GC</>), which has various implications (mostly good) -for performance. These are discussed in <link linkend="efficiency"> -another chapter</link>.</para> - -<para>&SBCL; has diverged from &CMUCL; in that &SBCL; is now -essentially a <quote>compiler-only implementation</quote> of -&CommonLisp;. A &CommonLisp; implementation is permitted to implement -both a compiler and an interpreter, and there's some special support -in the standard (e.g. the distinction between <function>functionp</> -and <function>compiled-function-p</>) to help support that. But &SBCL; -has only a vestigial, rudimentary true interpreter. In &SBCL;, the -<function>eval</> function only truly <quote>interprets</quote> a few -special classes of forms, such as symbols which are -<function>boundp</>. More complicated forms are evaluated by calling -<function>compile</> and then calling <function>funcall</> on the -returned result. -</para> - -<para>The direct ancestor of &SBCL; is the X86 port of &CMUCL;. This -port was in some ways the most cobbled-together of all the &CMUCL; -ports, since a number of strange changes had to be made to support the -register-poor X86 architecture. Some things (like tracing and -debugging) do not work particularly well there. &SBCL; should be able -to improve in these areas (and has already improved in some other -areas), but it takes a while.</para> - -<para>On the x86, &SBCL; like the X86 port of &CMUCL;, uses a -<emphasis>conservative</> GC. This means that it doesn't maintain a -strict separation between tagged and untagged data, instead treating -some untagged data (e.g. raw floating point numbers) as -possibly-tagged data and so not collecting any Lisp objects that they -point to. This has some negative consequences for average time -efficiency (though possibly no worse than the negative consequences of -trying to implement an exact GC on a processor architecture as -register-poor as the X86) and also has potentially unlimited -consequences for worst-case memory efficiency. In practice, -conservative garbage collectors work reasonably well, not getting -anywhere near the worst case. But they can occasionally cause -odd patterns of memory usage.</para> - -<para>The fork from &CMUCL; was based on a major rewrite of the system -bootstrap process. &CMUCL; has for many years tolerated a very unusual -<quote>build</> procedure which doesn't actually build the complete -system from scratch, but instead progressively overwrites parts of a -running system with new versions. This quasi-build procedure can cause -various bizarre bootstrapping hangups, especially when a major change -is made to the system. It also makes the connection between the -current source code and the current executable more tenuous than in -other software systems -- it's easy to accidentally -<quote>build</> a &CMUCL; system containing characteristics not -reflected in the current version of the source code.</para> - -<para>Other major changes since the fork from &CMUCL; include -<itemizedlist> - <listitem><para>&SBCL; has dropped support for many &CMUCL; extensions, - (e.g. IP networking, remote procedure call, Unix system interface, and X11 - interface). Some of these are now available as contributed or - third-party modules.</para></listitem> - <listitem><para>&SBCL; has deleted or deprecated - some nonstandard features and code complexity which helped - efficiency at the price of maintainability. For example, the - &SBCL; compiler no longer implements memory pooling internally - (and so is simpler and more maintainable, but generates more - garbage and runs more slowly), and various block-compilation - efficiency-increasing extensions to the language have been - deleted or are no longer used in the implementation of &SBCL; - itself.</para></listitem> -</itemizedlist> -</para> - -</sect1> - -</chapter> diff --git a/doc/intro.xml b/doc/intro.xml new file mode 100644 index 0000000..79daa99 --- /dev/null +++ b/doc/intro.xml @@ -0,0 +1,216 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" +"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ +<!ENTITY % myents SYSTEM "entities.inc"> +%myents; +]> + +<chapter id="intro"><title>Introduction + +&SBCL; is a mostly-conforming implementation of the &ANSI; +&CommonLisp; standard. This manual focuses on behavior which is +specific to &SBCL;, not on behavior which is common to all +implementations of &ANSI; &CommonLisp;. + + + Where To Go For More Information about &CommonLisp; in General + + Regardless of your ability level, two very useful resources + for working with any implementation of + &CommonLisp; are the + ILISP + package for Emacs and + the &CommonLisp; + HyperSpec. + + If you're not a programmer and you're trying to learn, + many introductory Lisp books are available. However, we don't have any + standout favorites. If you can't decide, try checking the Usenet + comp.lang.lisp FAQ for recent recommendations. + + If you are an experienced programmer in other languages + but need to learn about Lisp, three books stand out. + + ANSI Common Lisp, by Paul Graham, + will teach you about most of the language. (And later it might + also be worth checking out On Lisp, by the same + author.) + Paradigms Of Artificial Intelligence + Programming, by Peter Norvig, also has some good information + on general &CommonLisp; programming, and many nontrivial examples. + Whether or not your work is AI, it's a very good book to look + at. + + Neither of the books above emphasizes CLOS, but + Object-Oriented Programming In Common Lisp by Sonya Keene + does. Even if you're very knowledgeable about object oriented + programming in the abstract, it's worth looking at this book + if you want to do any OO in &CommonLisp;. Some abstractions + in CLOS (especially multiple dispatch) go beyond anything + you'll see in most OO systems, and there are a number of lesser + differences as well. This book tends to help with the culture shock. + + + + + + + + Where To Go For More Information About &SBCL; + + Before you read this user manual, you should probably read + two other things. + + You should know how to program in &CommonLisp;. + If you don't already know how, you should probably read a + book on it. + The Unix man page for &SBCL; will tell you + how to start the &SBCL; environment, so you can get to the + classic hello, world level of knowledge. It's the file + called sbcl.1 in the &SBCL; distribution. If &SBCL; is + installed on your system, you can read a formatted copy by + executing the command man sbcl. + + + + Besides this user manual and the Unix man page, some + other &SBCL;-specific information is available: + + The + &SBCL; home page has some general + information, plus links to mailing lists devoted to &SBCL;, + and to archives of these mailing lists. + Documentation for non-&ANSI; extensions for + various commands is available online from the &SBCL; executable + itself. The extensions for functions which have their own + command prompts (e.g. the debugger, and inspect) + are documented in text available by typing help + at their command prompts. The extensions for functions which + don't have their own command prompt (like trace + does) are described in their documentation strings, + unless your &SBCL; was compiled with an option not + to include documentation strings, in which case the doc strings + are only readable in the source code. + Some low-level information describing the + programming details of the conversion from &CMUCL; to &SBCL; + is available in the doc/FOR-CMUCL-DEVELOPERS + file in the &SBCL; distribution. + + + + + + + Overview Of SBCL, How It Works And Where It Came From + + You can work productively with SBCL without knowing anything + understanding anything about where it came from, how it is implemented, + or how it extends the &ANSI; &CommonLisp; standard. However, + a little knowledge can be helpful in order to understand error + messages, to troubleshoot problems, to understand why some parts of + the system are better debugged than others, and to anticipate which + known bugs, known performance problems, and missing extensions are + likely to be fixed, tuned, or added. + + &SBCL; is descended from &CMUCL;, which is itself descended from + Spice Lisp, including early implementations for the Mach operating + system on the IBM RT, back in the 1980s. Design decisions from that + time are still reflected in the current implementation: + + The system expects to be loaded into a + fixed-at-compile-time location in virtual memory, and also expects + the location of all of its heap storage to be specified + at compile time. + The system overcommits memory, allocating large + amounts of address space from the system (often more than + the amount of virtual memory available) and then failing + if ends up using too much of the allocated storage. + A word is a 32-bit quantity. The system has been + ported to many processor architectures without altering this + basic principle. Some hacks allow the system to run on the Alpha + chip (a 64-bit architecture) but even there 32-bit words are + used. The assumption that a word is + 32 bits wide is implicit in hundreds of places in the + system. + The system is implemented as a C program which is + responsible for supplying low-level services and loading a + Lisp .core file. + + + + + &SBCL; also inherited some newer architectural features from + &CMUCL;. The most important is that it has a generational garbage + collector (GC), which has various implications (mostly good) + for performance. These are discussed in + another chapter. + + &SBCL; has diverged from &CMUCL; in that &SBCL; is now + essentially a compiler-only implementation of + &CommonLisp;. A &CommonLisp; implementation is permitted to implement + both a compiler and an interpreter, and there's some special support + in the standard (e.g. the distinction between functionp + and compiled-function-p) to help support that. But &SBCL; + has only a vestigial, rudimentary true interpreter. In &SBCL;, the + eval function only truly interprets a few + special classes of forms, such as symbols which are + boundp. More complicated forms are evaluated by calling + compile and then calling funcall on the + returned result. + + + The direct ancestor of &SBCL; is the X86 port of &CMUCL;. This + port was in some ways the most cobbled-together of all the &CMUCL; + ports, since a number of strange changes had to be made to support the + register-poor X86 architecture. Some things (like tracing and + debugging) do not work particularly well there. &SBCL; should be able + to improve in these areas (and has already improved in some other + areas), but it takes a while. + + On the x86, &SBCL; like the X86 port of &CMUCL;, uses a + conservative GC. This means that it doesn't maintain a + strict separation between tagged and untagged data, instead treating + some untagged data (e.g. raw floating point numbers) as + possibly-tagged data and so not collecting any Lisp objects that they + point to. This has some negative consequences for average time + efficiency (though possibly no worse than the negative consequences of + trying to implement an exact GC on a processor architecture as + register-poor as the X86) and also has potentially unlimited + consequences for worst-case memory efficiency. In practice, + conservative garbage collectors work reasonably well, not getting + anywhere near the worst case. But they can occasionally cause + odd patterns of memory usage. + + The fork from &CMUCL; was based on a major rewrite of the system + bootstrap process. &CMUCL; has for many years tolerated a very unusual + build procedure which doesn't actually build the complete + system from scratch, but instead progressively overwrites parts of a + running system with new versions. This quasi-build procedure can cause + various bizarre bootstrapping hangups, especially when a major change + is made to the system. It also makes the connection between the + current source code and the current executable more tenuous than in + other software systems -- it's easy to accidentally + build a &CMUCL; system containing characteristics not + reflected in the current version of the source code. + + Other major changes since the fork from &CMUCL; include + + &SBCL; has dropped support for many &CMUCL; extensions, + (e.g. IP networking, remote procedure call, Unix system interface, and X11 + interface). Some of these are now available as contributed or + third-party modules. + &SBCL; has deleted or deprecated + some nonstandard features and code complexity which helped + efficiency at the price of maintainability. For example, the + &SBCL; compiler no longer implements memory pooling internally + (and so is simpler and more maintainable, but generates more + garbage and runs more slowly), and various block-compilation + efficiency-increasing extensions to the language have been + deleted or are no longer used in the implementation of &SBCL; + itself. + + + + + + diff --git a/doc/make-doc.sh b/doc/make-doc.sh index 01f1ea5..53e27bc 100644 --- a/doc/make-doc.sh +++ b/doc/make-doc.sh @@ -1,25 +1,20 @@ #!/bin/sh -# Where is Jade? (i.e. James Clark's implementation of DSSSL, or -# something offsprung) -if [ "" != "$JADE" ]; then +. ../find-gnumake.sh +find_gnumake + +# Where is xsltproc? +if [ "" != "$XSLTPROC" ]; then # The user has told us where to find jade, good. - echo using JADE=$JADE -elif which openjade > /dev/null; then - # OpenJade is the version of Jade which comes with OpenBSD 2.9, + echo using XSLTPROC=$XSLTPROC +elif which xsltproc > /dev/null; then + # Openxsltproc is the version of xsltproc which comes with OpenBSD 2.9, # and I started using it in sbcl-0.pre7.x. -- WHN - JADE=openjade -elif which jade > /dev/null; then - # I used the original Jade until sbcl-0.pre7.x. It might still - # work. -- WHN - JADE=jade + XSLTPROC=xsltproc else - echo "can't find Jade, sorry" + echo "can't find xsltproc, sorry" exit 1 fi -# Our hacked sbcl-html.dsl directs HTML output to html/. Make a clean slate. -rm -rf html -mkdir html - -$JADE -t sgml -ihtml -d sbcl-html.dsl\#html user-manual.sgml +export XSLTPROC +$GNUMAKE html diff --git a/doc/sbcl-asdf-install.1 b/doc/sbcl-asdf-install.1 new file mode 100644 index 0000000..a5f38a1 --- /dev/null +++ b/doc/sbcl-asdf-install.1 @@ -0,0 +1,133 @@ +.TH SBCL-ASDF-INSTALL 1 "$Date$" +.AT 3 +.SH NAME +.BI SBCL-ASDF-INSTALL + -- Download and Install ASDF Systems + +.SH SYNOPSIS +.BI sbcl-asdf-install +[packages ...] + +Each argument may be +.TP 3 +\-- +The name of a cliki page. asdf-install visits that page and finds +the download location from the `:(package)' tag - usually rendered +as "Download ASDF package from ..." +.TP 3 +\-- +A URL, which is downloaded directly +.TP 3 +\-- +A local tar.gz file, which is installed + + +.SH DESCRIPTION +Downloads and installs an ASDF system or anything else that looks +convincingly like one, including updating the ASDF:*CENTRAL-REGISTRY* +symlinks for all the toplevel .asd files it contains. Please read +this file before use: in particular: this is an automatic tool that +downloads and compiles stuff it finds on the 'net. Please look at the +SECURITY section and be sure you understand the implications + + +.SH SECURITY CONCERNS: READ THIS CAREFULLY +When you invoke asdf-install, you are asking SBCL to download, +compile, and install software from some random site on the web. Given +that it's indirected through a page on CLiki, any malicious third party +doesn't even need to hack the distribution server to replace the +package with something else: he can just edit the link. + +For this reason, we encourage package providers to crypto-sign their +packages (see details at the URL in the PACKAGE CREATION section) and +users to check the signatures. asdf-install has three levels of +automatic signature checking: "on", "off" and "unknown sites", which +can be set using the configuration variables described in +CUSTOMIZATION below. The default is "unknown sites", which will +expect a GPG signature on all downloads except those from +presumed-good sites. The current default presumed-good sites are +CCLAN nodes, and two web sites run by SBCL maintainers: again, see +below for customization details + + +.SH CUSTOMIZATION + +If the file $HOME/.asdf-install exists, it is loaded. This can be +used to override the default values of exported special variables. +Presently these are +.TP 3 +\-- +*PROXY* + +defaults to $http_proxy environment variable +.TP 3 +\-- +*CCLAN-MIRROR* + +preferred/nearest CCLAN node. See the list at +http://ww.telent.net/cclan-choose-mirror +.TP 3 +\-- +*SBCL-HOME* + +Set from $SBCL_HOME environment variable. This should already be +correct for whatever SBCL is running, if it's been installed correctly +.TP 3 +\-- +*VERIFY-GPG-SIGNATURES* + +Verify GPG signatures for the downloaded packages? + +NIL - no, T - yes, :UNKNOWN-LOCATIONS - only for URLs which aren't in CCLAN and don't begin with one of the prefixes in *SAFE-URL-PREFIXES* +.TP 3 +\-- +*LOCATIONS* + +Possible places in the filesystem to install packages into. See default +value for format +.TP 3 +\-- +*SAFE-URL-PREFIXES* + +List of locations for which GPG signature checking won't be done when +*verify-gpg-signatures* is :unknown-locations + + +.SH PACKAGE CREATION + +If you want to create your own packages that can be installed using this +loader, see the "Making your package downloadable..." section at + + + +.SH HACKERS NOTE + +Listen very carefully: I will say this only as often as it appears to +be necessary to say it. asdf-install is not a good example of how to +write a URL parser, HTTP client, or anything else, really. +Well-written extensible and robust URL parsers, HTTP clients, FTP +clients, etc would definitely be nice things to have, but it would be +nicer to have them in CCLAN where anyone can use them - after having +downloaded them with asdf-install - than in SBCL contrib where they're +restricted to SBCL users and can only be updated once a month via SBCL +developers. This is a bootstrap tool, and as such, will tend to +resist changes that make it longer or dependent on more other +packages, unless they also add to its usefulness for bootstrapping. + + +.SH TODO + +.TP 3 +a) +gpg signature checking would be better if it actually checked against +a list of "trusted to write Lisp" keys, instead of just "trusted to be +who they say they are" +.TP 3 +b) +nice to have: resume half-done downloads instead of starting from scratch +every time. but right now we're dealing in fairly small packages, this is not +an immediate concern + + +.SH SEE ALSO +.BR sbcl (1) diff --git a/doc/user-manual.sgml b/doc/user-manual.sgml deleted file mode 100644 index c4eb954..0000000 --- a/doc/user-manual.sgml +++ /dev/null @@ -1,66 +0,0 @@ - - AMOP"> - ANSI"> - CMU CL"> - IEEE"> - Python"> - SBCL"> - - - - - - - - - - - - - - ]> - - - - - &SBCL; User Manual - - - This manual is part of the &SBCL; software system. See the - README file for more information. - - This manual is largely derived from the manual for the &CMUCL; - system, which was produced at Carnegie Mellon University and - later released into the public domain. This manual is in the - public domain and is provided with absolutely no warranty. See the - COPYING and CREDITS files for more - information. - - - - -&ch-intro; -&ch-compiler; -&ch-efficiency; -&ch-beyond-ansi; -&ch-ffi; - - -This manual is maintained in SGML/DocBook, and automatically -translated into other forms (e.g. HTML or TeX). If you're -reading this manual in one of these non-DocBook -translated forms, that's fine, but if you want to modify -this manual, you are strongly advised to seek out a DocBook version -and modify that instead of modifying a translated version. Even -better might be to seek out the DocBook version -(maintained at the time of this writing as part of -the &SBCL; project) -and submit a patch. - - - diff --git a/doc/user-manual.xml b/doc/user-manual.xml new file mode 100644 index 0000000..59d2ee2 --- /dev/null +++ b/doc/user-manual.xml @@ -0,0 +1,45 @@ + + +%myents; +]> + + + + &SBCL; User Manual + + + This manual is part of the &SBCL; software system. See the + README file for more information. + + This manual is largely derived from the manual for the + &CMUCL; system, which was produced at Carnegie Mellon University + and later released into the public domain. This manual is in the + public domain and is provided with absolutely no warranty. See the + COPYING and CREDITS + files for more information. + + + + + + + + + + + + This manual is maintained in SGML/DocBook, and automatically + translated into other forms (e.g. HTML or TeX). If you're + reading this manual in one of these non-DocBook + translated forms, that's fine, but if you want to + modify this manual, you are strongly advised to + seek out a DocBook version and modify that instead of modifying a + translated version. Even better might be to seek out + the DocBook version (maintained at the time of + this writing as part of the + &SBCL; project) and submit a patch. + + + diff --git a/version.lisp-expr b/version.lisp-expr index d35a3b9..604ee45 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"0.8.6" +"0.8.6.1"