1 %% CMU Common Lisp User's Manual.
4 %% This is a modified version of the original CMUCL User's Manual.
5 %% The key changes are modification of this file to use standard
6 %% LaTeX2e. This means latexinfo isn't going to work anymore.
7 %% However, Latex2html support has been added.
9 %% Jan 1998 Paul Werkowski
10 %% A few of the packages below are not part of the standard LaTeX2e
11 %% distribution, and must be obtained from a repository. At this time
12 %% I was able to fetch from
13 %% ftp.cdrom.com:pub/tex/ctan/macros/latex/contrib/supported/
18 %% changebar/changebar.ins
19 %% changebar/changebar.dtx
20 %% One runs latex on the .ins file to produce .tex and/or .sty
21 %% files that must be put in a path searched by latex.
23 \documentclass{report}
24 \usepackage{changebar}
32 \usepackage{html,color}
35 %% Define the indices. We need one for Types, Variables, Functions,
36 %% and a general concept index.
38 \newindex{types}{tdx}{tnd}{Type Index}
39 \newindex{vars}{vdx}{vnd}{Variable Index}
40 \newindex{funs}{fdx}{fnd}{Function Index}
41 \newindex{concept}{cdx}{cnd}{Concept Index}
43 \newcommand{\tindexed}[1]{\index[types]{#1}\textsf{#1}}
44 \newcommand{\findexed}[1]{\index[funs]{#1}\textsf{#1}}
45 \newcommand{\vindexed}[1]{\index[vars]{#1}\textsf{*#1*}}
46 \newcommand{\cindex}[1]{\index[concept]{#1}}
47 \newcommand{\cpsubindex}[2]{\index[concept]{#1!#2}}
49 %% This code taken from the LaTeX companion. It's meant as a
50 %% replacement for the description environment. We want one that
51 %% prints description items in a fixed size box and puts the
52 %% description itself on the same line or the next depending on the
54 \newcommand{\entrylabel}[1]{\mbox{#1}\hfil}
55 \newenvironment{entry}{%
57 {\renewcommand{\makelabel}{\entrylabel}%
58 \setlength{\labelwidth}{45pt}%
59 \setlength{\leftmargin}{\labelwidth+\labelsep}}}%
63 \newcommand{\Lentrylabel}[1]{%
64 \settowidth{\Mylen}{#1}%
65 \ifthenelse{\lengthtest{\Mylen > \labelwidth}}%
66 {\parbox[b]{\labelwidth}% term > labelwidth
67 {\makebox[0pt][l]{#1}\\}}%
70 \newenvironment{Lentry}{%
71 \renewcommand{\entrylabel}{\Lentrylabel}
75 \newcommand{\fcntype}[1]{\textit{#1}}
76 \newcommand{\argtype}[1]{\textit{#1}}
77 \newcommand{\fcnname}[1]{\textsf{#1}}
79 \newlength{\formnamelen} % length of a name of a form
80 \newlength{\pboxargslen} % length of parbox for arguments
81 \newlength{\typelen} % length of the type label for the form
83 \newcommand{\args}[1]{#1}
84 \newcommand{\keys}[1]{\textsf{\&key} \= #1}
85 \newcommand{\morekeys}[1]{\\ \> #1}
86 \newcommand{\yetmorekeys}[1]{\\ \> #1}
88 \newcommand{\defunvspace}{\ifhmode\unskip \par\fi\addvspace{18pt plus 12pt minus 6pt}}
91 %% \layout[pkg]{name}{param list}{type}
93 %% This lays out a entry like so:
95 %% pkg:name arg1 arg2 [Function]
97 %% where [Function] is flush right.
99 \newcommand{\layout}[4][\mbox{}]{%
101 \fcnname{#1#2\hspace{1em}}%
102 \settowidth{\formnamelen}{\fcnname{#1#2\hspace{1em}}}%
103 \settowidth{\typelen}{[\argtype{#4}]}%
104 \setlength{\pboxargslen}{\linewidth}%
105 \addtolength{\pboxargslen}{-1\formnamelen}%
106 \addtolength{\pboxargslen}{-1\typelen}%
107 \begin{minipage}[t]{\pboxargslen}
112 \hfill[\fcntype{#4}]%
113 \par\addvspace{2pt plus 2pt minus 2pt}}
115 \newcommand{\vrindexbold}[1]{\index[vars]{#1|textbf}}
116 \newcommand{\fnindexbold}[1]{\index[funs]{#1|textbf}}
120 %% \begin{deftp}{typeclass}{typename}{args}
123 \newenvironment{deftp}[3]{%
124 \par\bigskip\index[types]{#2|textbf}%
125 \layout{#2}{\var{#3}}{#1}
130 %% \begin{defun}{pkg}{name}{params}
131 %% \defunx[pkg]{name}{params}
132 %% description of function
134 \newenvironment{defun}[3]{%
135 \par\defunvspace\fnindexbold{#2}\label{FN:#2}%
136 \layout[#1]{#2}{#3}{Function}
138 \newcommand{\defunx}[3][\mbox{}]{%
139 \par\fnindexbold{#2}\label{FN:#2}%
140 \layout[#1]{#2}{#3}{Function}}
144 %% \begin{defmac}{pkg}{name}{params}
145 %% \defmacx[pkg]{name}{params}
146 %% description of macro
148 \newenvironment{defmac}[3]{%
149 \par\defunvspace\fnindexbold{#2}\label{FN:#2}%
150 \layout[#1]{#2}{#3}{Macro}}{}
151 \newcommand{\defmacx}[3][\mbox{}]{%
152 \par\fnindexbold{#2}\label{FN:#2}%
153 \layout[#1]{#2}{#3}{Function}}
157 %% \begin{defvar}{pkg}{name}
158 %% \defvarx[pkg]{name}
159 %% description of defvar
161 \newenvironment{defvar}[2]{%
162 \par\defunvspace\vrindexbold{#2}\label{VR:#2}
163 \layout[#1]{*#2*}{}{Variable}}{}
164 \newcommand{\defvarx}[2][\mbox{}]{%
165 \par\vrindexbold{#2}\label{VR:#2}
166 \layout[#1]{*#2*}{}{Variable}}
170 %% \begin{defconst}{pkg}{name}
171 %% \ddefconstx[pkg]{name}
172 %% description of defconst
174 \newcommand{\defconstx}[2][\mbox{}]{%
175 \layout[#1]{#2}{}{Constant}}
176 \newenvironment{defconst}[2]{%
177 \defunvspace\defconstx[#1]{#2}}
179 \newenvironment{example}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
180 \newenvironment{lisp}{\begin{example}}{\end{example}}
181 \newenvironment{display}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
183 \newcommand{\hide}[1]{}
184 \newcommand{\trnumber}[1]{#1}
185 \newcommand{\citationinfo}[1]{#1}
186 \newcommand{\var}[1]{{\textsf{\textsl{#1}}\xspace}}
187 \newcommand{\code}[1]{\textnormal{{\sffamily #1}}}
188 \newcommand{\file}[1]{`\texttt{#1}'}
189 \newcommand{\samp}[1]{`\texttt{#1}'}
190 \newcommand{\kwd}[1]{\code{:#1}}
191 \newcommand{\F}[1]{\code{#1}}
192 \newcommand{\w}[1]{\hbox{#1}}
193 \renewcommand{\b}[1]{\textrm{\textbf{#1}}}
194 \renewcommand{\i}[1]{\textit{#1}}
195 \newcommand{\ctrl}[1]{$\uparrow$\textsf{#1}}
196 \newcommand{\result}{$\Rightarrow$}
197 \newcommand{\myequiv}{$\equiv$}
198 \newcommand{\back}[1]{\(\backslash\)#1}
199 \newcommand{\pxlref}[1]{see section~\ref{#1}, page~\pageref{#1}}
200 \newcommand{\xlref}[1]{See section~\ref{#1}, page~\pageref{#1}}
202 \newcommand{\false}{\textsf{nil}}
203 \newcommand{\true}{\textsf{t}}
204 \newcommand{\nil}{\textsf{nil}}
205 \newcommand{\FALSE}{\textsf{nil}}
206 \newcommand{\TRUE}{\textsf{t}}
207 \newcommand{\NIL}{\textsf{nil}}
209 \newcommand{\ampoptional}{\textsf{\&optional}}
210 \newcommand{\amprest}{\textsf{\&rest}}
211 \newcommand{\ampbody}{\textsf{\&body}}
212 \newcommand{\mopt}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}\,$}}
213 \newcommand{\mstar}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}^*\,$}}
214 \newcommand{\mplus}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}^+\,$}}
215 \newcommand{\mgroup}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}\,$}}
216 \newcommand{\mor}{$|$}
218 \newcommand{\funref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
219 \newcommand{\specref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
220 \newcommand{\macref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
221 \newcommand{\varref}[1]{\vindexed{#1} (page~\pageref{VR:#1})}
222 \newcommand{\conref}[1]{\conindexed{#1} (page~\pageref{VR:#1})}
224 %% Some common abbreviations
225 \newcommand{\clisp}{Common Lisp}
226 \newcommand{\dash}{---}
227 \newcommand{\alien}{Alien}
228 \newcommand{\aliens}{Aliens}
229 \newcommand{\Aliens}{Aliens}
230 \newcommand{\Alien}{Alien}
231 \newcommand{\Hemlock}{Hemlock}
232 \newcommand{\hemlock}{Hemlock}
233 \newcommand{\python}{Python}
234 \newcommand{\Python}{Python}
235 \newcommand{\cmucl}{CMU Common Lisp}
236 \newcommand{\llisp}{Common Lisp}
237 \newcommand{\Llisp}{Common Lisp}
238 \newcommand{\cltl}{\emph{Common Lisp: The Language}}
239 \newcommand{\cltltwo}{\emph{Common Lisp: The Language 2}}
241 %% Replacement commands when we run latex2html. This should be last
242 %% so that latex2html uses these commands instead of the LaTeX
247 \newcommand{\var}[1]{\textnormal{\textit{#1}}}
248 \newcommand{\code}[1]{\textnormal{\texttt{#1}}}
249 %%\newcommand{\printindex}[1][\mbox{}]{}
251 %% We need the quote environment because the alltt is broken. The
252 %% quote environment helps us in postprocessing to result to get
254 \newenvironment{example}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
255 \newenvironment{display}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
257 \newcommand{\textnormal}[1]{\rm #1}
258 \newcommand{\hbox}[1]{\mbox{#1}}
259 \newcommand{\xspace}{}
260 \newcommand{newindex}[4]{}
262 \newcommand{\pxlref}[1]{see section~\ref{#1}}
263 \newcommand{\xlref}[1]{See section~\ref{#1}}
265 \newcommand{\tindexed}[1]{\index{#1}\texttt{#1}}
266 \newcommand{\findexed}[1]{\index{#1}\texttt{#1}}
267 \newcommand{\vindexed}[1]{\index{#1}\texttt{*#1*}}
268 \newcommand{\cindex}[1]{\index{#1}}
269 \newcommand{\cpsubindex}[2]{\index{#1!#2}}
271 \newcommand{\keys}[1]{\texttt{\&key} #1}
272 \newcommand{\morekeys}[1]{#1}
273 \newcommand{\yetmorekeys}[1]{#1}
275 \newenvironment{defun}[3]{%
276 \textbf{[Function]}\\
277 \texttt{#1#2} \emph{#3}\\}{}
278 \newcommand{\defunx}[3][\mbox{}]{%
279 \texttt{#1#2} {\em #3}\\}
280 \newenvironment{defmac}[3]{%
282 \texttt{#1#2} \emph{#3}\\}{}
283 \newcommand{\defmacx}[3][\mbox{}]{%
284 \texttt{#1#2} {\em #3}\\}
285 \newenvironment{defvar}[2]{%
286 \textbf{[Variable]}\\
287 \texttt{#1*#2*}\\ \\}{}
288 \newcommand{\defvarx}[2][\mbox{}]{%
290 \newenvironment{defconst}[2]{%
291 \textbf{[Constant]}\\
293 \newcommand{\defconstx}[2][\mbox{}]{\texttt{#1#2}\\}
294 \newenvironment{deftp}[3]{%
296 \texttt{#2} \textit{#3}\\}{}
297 \newenvironment{Lentry}{\begin{description}}{\end{description}}
301 \setlength{\oddsidemargin}{-10pt}
302 \setlength{\evensidemargin}{-10pt}
303 \setlength{\topmargin}{-40pt}
304 \setlength{\headheight}{12pt}
305 \setlength{\headsep}{25pt}
306 \setlength{\footskip}{30pt}
307 \setlength{\textheight}{9.25in}
308 \setlength{\textwidth}{6.75in}
309 \setlength{\columnsep}{0.375in}
310 \setlength{\columnseprule}{0pt}
313 \setcounter{tocdepth}{2}
314 \setcounter{secnumdepth}{3}
315 \def\textfraction{.1}
316 \def\bottomfraction{.9} % was .3
327 \newcommand{\theabstract}{%
329 CMU Common Lisp is an implementation of that Common Lisp runs on
330 various Unix workstations. See the README file in the distribution
331 for current platforms. The largest single part of this document
332 describes the Python compiler and the programming styles and
333 techniques that the compiler encourages. The rest of the document
334 describes extensions and the implementation dependent choices made
335 in developing this implementation of Common Lisp. We have added
336 several extensions, including a source level debugger, an interface
337 to Unix system calls, a foreign function call interface, support for
338 interprocess communication and remote procedure call, and other
339 features that provide a good environment for developing Lisp code.
342 \newcommand{\researchcredit}{%
343 This research was sponsored by the Defense Advanced Research
344 Projects Agency, Information Science and Technology Office, under
345 the title \emph{Research on Parallel Computing} issued by DARPA/CMO
346 under Contract MDA972-90-C-0035 ARPA Order No. 7330.
348 The views and conclusions contained in this document are those of
349 the authors and should not be interpreted as representing the
350 official policies, either expressed or implied, of the Defense
351 Advanced Research Projects Agency or the U.S. government.}
354 \title{CMU Common Lisp User's Manual}
356 %%\author{Robert A. MacLachlan, \var{Editor}}
358 %%\trnumber{CMU-CS-92-161}
361 %%Supersedes Technical Reports CMU-CS-87-156 and CMU-CS-91-108.
364 %%%%\arpasupport{strategic}
365 %%\abstract{\theabstract}
366 %%%%\keywords{lisp, Common Lisp, manual, compiler,
367 %%%% programming language implementation, programming environment}
372 %% \title{CMU Common Lisp User's Manual}
374 \author{Robert A. MacLachlan,
376 \thanks{\small This research was sponsored by the Defense Advanced
377 Research Projects Agency, Information Science and Technology
378 Office, under the title \emph{Research on Parallel Computing}
379 issued by DARPA/CMO under Contract MDA972-90-C-0035 ARPA Order No.
380 7330. The views and conclusions contained in this document are
381 those of the authors and should not be interpreted as representing
382 the official policies, either expressed or implied, of the Defense
383 Advanced Research Projects Agency or the U.S. government.}}
388 July 1992 \\ CMU-CS-92-161 \\
392 \vspace{0.75in} {\small
393 School of Computer Science \\
394 Carnegie Mellon University \\
395 Pittsburgh, PA 15213} \\
396 \vspace{0.5in} \small Supersedes Technical Reports CMU-CS-87-156 and
398 \vspace{0.5in} \textbf{Abstract} \medskip
407 %% Nice HTML version of the title page
410 <h1 align=center>CMU Common Lisp User's Manual</h1>
412 <p align=center>Robert A. MacLachlan, <EM>Editor</EM>
425 School of Computer Science <BR>
426 Carnegie Mellon University <BR>
427 Pittsburgh, PA 15213 <BR>
431 Supersedes Technical Reports CMU-CS-87-156 and
438 CMU Common Lisp is an implementation of that Common Lisp runs on
439 various Unix workstations. See the README file in the
440 distribution for current platforms. The largest single part of
441 this document describes the Python compiler and the programming
442 styles and techniques that the compiler encourages. The rest of
443 the document describes extensions and the implementation
444 dependent choices made in developing this implementation of
445 Common Lisp. We have added several extensions, including a
446 source level debugger, an interface to Unix system calls, a
447 foreign function call interface, support for interprocess
448 communication and remote procedure call, and other features that
449 provide a good environment for developing Lisp code.
452 <blockquote><font size=-1>
453 This research was sponsored by the Defense Advanced Research
454 Projects Agency, Information Science and Technology Office, under
455 the title <em>Research on Parallel Computing</em> issued by DARPA/CMO
456 under Contract MDA972-90-C-0035 ARPA Order No. 7330.
458 The views and conclusions contained in this document are those of
459 the authors and should not be interpreted as representing the
460 official policies, either expressed or implied, of the Defense
461 Advanced Research Projects Agency or the U.S. government.
468 \textbf{Keywords:} lisp, Common Lisp, manual, compiler,
469 programming language implementation, programming environment
472 \pagenumbering{roman}
476 \pagenumbering{arabic}
479 %%\setfilename{cmu-user.info}
480 %%\node Top, Introduction, (dir), (dir)
483 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/intro.ms}
487 \hide{ -*- Dictionary: cmu-user -*- }
490 * Design Choices and Extensions::
493 * Advanced Compiler Use and Efficiency Hints::
495 * Event Dispatching with SERVE-EVENT::
497 * Interprocess Communication under LISP::
498 * Debugger Programmer's Interface::
504 --- The Detailed Node Listing ---
509 * Local Distribution of CMU Common Lisp::
510 * Net Distribution of CMU Common Lisp::
511 * Source Availability::
512 * Command Line Options::
515 Design Choices and Extensions
518 * Default Interrupts for Lisp::
521 * Garbage Collection::
526 * Running Programs from Lisp::
527 * Saving a Core Image::
529 * Filesystem Operations::
530 * Time Parsing and Formatting::
539 * Array Initialization::
543 * IEEE Special Values::
545 * Denormalized Floats::
546 * Floating Point Exceptions::
547 * Floating Point Rounding Mode::
548 * Accessing the Floating Point Modes::
552 * The Graphical Interface::
553 * The TTY Inspector::
555 Running Programs from Lisp
557 * Process Accessors::
562 * Wildcard Pathnames::
563 * Logical Pathnames::
565 * Predefined Search-Lists::
566 * Search-List Operations::
567 * Search List Example::
572 * Search List Example::
574 Search-List Operations
576 * Search List Example::
578 Filesystem Operations
580 * Wildcard Matching::
581 * File Name Completion::
582 * Miscellaneous Filesystem Operations::
586 * Debugger Introduction::
590 * Source Location Printing::
591 * Compiler Policy Control::
593 * Information Commands::
594 * Breakpoint Commands::
601 * How Arguments are Printed::
604 * Debug Tail Recursion::
605 * Unknown Locations and Interrupts::
609 * Variable Value Availability::
610 * Note On Lexical Variable Access::
612 Source Location Printing
614 * How the Source is Found::
615 * Source Location Availability::
619 * Breakpoint Example::
623 * Encapsulation Functions::
627 * Compiler Introduction::
628 * Calling the Compiler::
629 * Compilation Units::
630 * Interpreting Error Messages::
632 * Getting Existing Programs to Run::
634 * Open Coding and Inline Expansion::
638 * Undefined Warnings::
640 Interpreting Error Messages
642 * The Parts of the Error Message::
643 * The Original and Actual Source::
644 * The Processing Path::
646 * Errors During Macroexpansion::
648 * Error Message Parameterization::
652 * Compile Time Type Errors::
653 * Precise Type Checking::
654 * Weakened Type Checking::
658 * The Optimize Declaration::
659 * The Optimize-Interface Declaration::
661 Advanced Compiler Use and Efficiency Hints
663 * Advanced Compiler Introduction::
664 * More About Types in Python::
666 * Source Optimization::
669 * Block Compilation::
671 * Byte Coded Compilation::
672 * Object Representation::
674 * General Efficiency Hints::
678 Advanced Compiler Introduction
683 * Representation of Objects::
684 * Writing Efficient Code::
686 More About Types in Python
688 * More Types Meaningful::
694 * The Values Declaration::
696 * The Freeze-Type Declaration::
697 * Type Restrictions::
698 * Type Style Recommendations::
702 * Variable Type Inference::
703 * Local Function Type Inference::
704 * Global Function Type Inference::
705 * Operation Specific Type Inference::
706 * Dynamic Type Inference::
707 * Type Check Optimization::
713 * Unused Expression Elimination::
714 * Control Optimization::
715 * Unreachable Code Deletion::
716 * Multiple Values Optimization::
717 * Source to Source Transformation::
718 * Style Recommendations::
722 * Tail Recursion Exceptions::
726 * Self-Recursive Calls::
729 * Local Tail Recursion::
734 * Block Compilation Semantics::
735 * Block Compilation Declarations::
736 * Compiler Arguments::
737 * Practical Difficulties::
738 * Context Declarations::
739 * Context Declaration Example::
743 * Inline Expansion Recording::
744 * Semi-Inline Expansion::
745 * The Maybe-Inline Declaration::
747 Object Representation
749 * Think Before You Use a List::
750 * Structure Representation::
759 * Non-Descriptor Representations::
761 * Generic Arithmetic::
764 * Floating Point Efficiency::
765 * Specialized Arrays::
766 * Specialized Structure Slots::
767 * Interactions With Local Call::
768 * Representation of Characters::
770 General Efficiency Hints
772 * Compile Your Code::
773 * Avoid Unnecessary Consing::
774 * Complex Argument Syntax::
775 * Mapping and Iteration::
776 * Trace Files and Disassembly::
781 * Efficiency Notes and Type Checking::
782 * Representation Efficiency Notes::
783 * Verbosity Control::
787 * Profile Interface::
788 * Profiling Techniques::
789 * Nested or Recursive Calls::
791 * Profiling overhead::
792 * Additional Timing Utilities::
794 * Benchmarking Techniques::
798 * Reading the Command Line::
799 * Lisp Equivalents for C Routines::
800 * Type Translations::
801 * System Area Pointers::
802 * Unix System Calls::
803 * File Descriptor Streams::
804 * Making Sense of Mach Return Codes::
809 * Changing Interrupt Handlers::
810 * Examples of Signal Handlers::
812 Event Dispatching with SERVE-EVENT
815 * The SERVE-EVENT Function::
816 * Using SERVE-EVENT with Unix File Descriptors::
817 * Using SERVE-EVENT with the CLX Interface to X::
818 * A SERVE-EVENT Example::
820 Using SERVE-EVENT with the CLX Interface to X
822 * Without Object Sets::
825 A SERVE-EVENT Example
827 * Without Object Sets Example::
828 * With Object Sets Example::
832 * Introduction to Aliens::
836 * Alien Data Structure Example::
837 * Loading Unix Object Files::
838 * Alien Function Calls::
839 * Step-by-Step Alien Example::
843 * Defining Alien Types::
844 * Alien Types and Lisp Types::
845 * Alien Type Specifiers::
846 * The C-Call Package::
850 * Alien Access Operations::
851 * Alien Coercion Operations::
852 * Alien Dynamic Allocation::
856 * Local Alien Variables::
857 * External Alien Variables::
861 * alien-funcall:: The alien-funcall Primitive
862 * def-alien-routine:: The def-alien-routine Macro
863 * def-alien-routine Example::
864 * Calling Lisp from C::
866 Interprocess Communication under LISP
868 * The REMOTE Package::
874 * Connecting Servers and Clients::
875 * Remote Evaluations::
883 * Making Your Own Wires::
885 Debugger Programmer's Interface
887 * DI Exceptional Conditions::
895 * Source Translation Utilities::
897 DI Exceptional Conditions
903 %%\node Introduction, Design Choices and Extensions, Top, Top
904 \chapter{Introduction}
906 CMU Common Lisp is a public-domain implementation of Common Lisp developed in
907 the Computer Science Department of Carnegie Mellon University. \cmucl{} runs
908 on various Unix workstations---see the README file in the distribution for
909 current platforms. This document describes the implementation based on the
910 Python compiler. Previous versions of CMU Common Lisp ran on the IBM RT PC
911 and (when known as Spice Lisp) on the Perq workstation. See \code{man cmucl}
912 (\file{man/man1/cmucl.1}) for other general information.
914 \cmucl{} sources and executables are freely available via anonymous FTP; this
915 software is ``as is'', and has no warranty of any kind. CMU and the
916 authors assume no responsibility for the consequences of any use of this
917 software. See \file{doc/release-notes.txt} for a description of the
918 state of the release you have.
922 * Local Distribution of CMU Common Lisp::
923 * Net Distribution of CMU Common Lisp::
924 * Source Availability::
925 * Command Line Options::
929 %%\node Support, Local Distribution of CMU Common Lisp, Introduction, Introduction
932 The CMU Common Lisp project is no longer funded, so only minimal support is
933 being done at CMU. There is a net community of \cmucl{} users and maintainers
934 who communicate via comp.lang.lisp and the cmucl-bugs@cs.cmu.edu
940 This manual contains only implementation-specific information about
941 \cmucl. Users will also need a separate manual describing the
942 \clisp{} standard. \clisp{} was initially defined in \i{Common Lisp:
943 The Language}, by Guy L. Steele Jr. \clisp{} is now undergoing
944 standardization by the X3J13 committee of ANSI. The X3J13 spec is not
945 yet completed, but a number of clarifications and modification have
946 been approved. We intend that \cmucl{} will eventually adhere to the
947 X3J13 spec, and we have already implemented many of the changes
950 Until the X3J13 standard is completed, the second edition of
951 \cltltwo{} is probably the best available manual for the language and
952 for our implementation of it. This book has no official role in the
953 standardization process, but it does include many of the changes
954 adopted since the first edition was completed.
956 In addition to the language itself, this document describes a number
957 of useful library modules that run in \cmucl. \hemlock, an Emacs-like
958 text editor, is included as an integral part of the \cmucl{}
959 environment. Two documents describe \hemlock{}: the \i{Hemlock User's
960 Manual}, and the \i{Hemlock Command Implementor's Manual}.
962 %%\node Local Distribution of CMU Common Lisp, Net Distribution of CMU Common Lisp, Support, Introduction
963 \section{Local Distribution of CMU Common Lisp}
965 In CMU CS, \cmucl{} should be runnable as \file{/usr/local/bin/cmucl}.
966 The full binary distribution should appear under
967 \file{/usr/local/lib/cmucl/}. Note that the first time you run Lisp,
968 it will take AFS several minutes to copy the image into its local
969 cache. Subsequent starts will be much faster.
971 Or, you can run directly out of the AFS release area (which may be
972 necessary on SunOS machines). Put this in your \file{.login} shell
975 setenv CMUCLLIB "/afs/cs/misc/cmucl/@sys/beta/lib"
976 setenv PATH \${PATH}:/afs/cs/misc/cmucl/@sys/beta/bin
979 If you also set \code{MANPATH} or \code{MPATH} (depending on the Unix)
980 to point to \file{/usr/local/lib/cmucl/man/}, then `\code{man cmucl}'
981 will give an introduction to CMU CL and \samp{man lisp} will describe
982 command line options. For installation notes, see the \file{README}
983 file in the release area.
985 See \file{/usr/local/lib/cmucl/doc} for release notes and
986 documentation. Hardcopy documentation is available in the document
987 room. Documentation supplements may be available for recent
988 additions: see the \file{README} file.
990 Send bug reports and questions to \samp{cmucl-bugs@cs.cmu.edu}. If
991 you send a bug report to \samp{gripe} or \samp{help}, they will just
992 forward it to this mailing list.
994 %%\node Net Distribution of CMU Common Lisp, Source Availability, Local Distribution of CMU Common Lisp, Introduction
995 \section{Net Distribution of CMU Common Lisp}
997 \subsection{CMU Distribution}
998 Externally, CMU Common Lisp is only available via anonymous FTP. We
999 don't have the manpower to make tapes. These are our distribution
1002 lisp-rt1.slisp.cs.cmu.edu (128.2.217.9)
1003 lisp-rt2.slisp.cs.cmu.edu (128.2.217.10)
1006 Log in with the user \samp{anonymous} and \samp{username@host} as
1007 password (i.e. your EMAIL address.) When you log in, the current
1008 directory should be set to the \cmucl{} release area. If you have any
1009 trouble with FTP access, please send mail to \samp{slisp@cs.cmu.edu}.
1011 The release area holds compressed tar files with names of the form:
1013 \var{version}-\var{machine}_\var{os}.tar.Z
1015 FTP compressed tar archives in binary mode. To extract, \samp{cd} to
1016 the directory that is to be the root of the tree, then type:
1018 uncompress <file.tar.Z | tar xf - .
1020 The resulting tree is about 23 megabytes. For installation
1021 directions, see the section ``site initialization'' in README file at
1022 the root of the tree.
1024 If poor network connections make it difficult to transfer a 10 meg
1025 file, the release is also available split into five parts, with the
1026 suffix \file{.0} to \file{.4}. To extract from multiple files, use:
1028 cat file.tar.Z.* | uncompress | tar xf - .
1031 The release area also contains source distributions and other binary
1032 distributions. A listing of the current contents of the release area
1033 is in \file{FILES}. Major release announcements will be made to
1034 \code{comp.lang.lisp} until there is enough volume to warrant a
1035 \code{comp.lang.lisp.cmu}.
1038 \subsection{Net Distribution}
1039 Although the CMU Common Lisp project is no longer actively developed
1040 by CMU, development has continued. You can obtain this version from
1043 ftp://ftp2.cons.org/pub/languages/lisp/cmucl
1044 http://www2.cons.org:8000/ftp-area/cmucl/
1046 Further information can be found via the World Wide Web at
1048 http://www.cons.org/cmucl
1051 %%\node Source Availability, Command Line Options, Net Distribution of CMU Common Lisp, Introduction
1052 \section{Source Availability}
1054 Lisp and documentation sources are available via anonymous FTP ftp to
1055 any CMU CS machine. All CMU written code is public domain, but CMU CL
1056 also makes use of two imported packages: PCL and CLX. Although these
1057 packages are copyrighted, they may be freely distributed without any
1058 licensing agreement or fee. See the \file{README} file in the binary
1059 distribution for up-to-date source pointers.
1061 The release area contains a source distribution, which is an image of
1062 all the \file{.lisp} source files used to build a particular system
1065 \var{version}-source.tar.Z (3.6 meg)
1068 All of our files (including the release area) are actually in the AFS
1069 file system. On the release machines, the FTP server's home is the
1070 release directory: \file{/afs/cs.cmu.edu/project/clisp/release}. The
1071 actual working source areas are in other subdirectories of
1072 \file{clisp}, and you can directly ``cd'' to those directories if you
1073 know the name. Due to the way anonymous FTP access control is done,
1074 it is important to ``cd'' to the source directory with a single
1075 command, and then do a ``get'' operation.
1078 Alternatively, you can obtain the current sources via WWW at
1080 http://www.cons.org/cmucl
1082 which contains pointers on how to get a \code{tar} file of the
1083 current sources or how to get an individual file from the sources.
1084 Binary versions for selected platforms are also available as well.
1087 %%\node Command Line Options, Credits, Source Availability, Introduction
1088 \section{Command Line Options}
1090 The command line syntax and environment is described in the lisp(1)
1091 man page in the man/man1 directory of the distribution. See also
1092 cmucl(1). Currently Lisp accepts the following switches:
1095 \item[\code{-batch}] specifies batch mode, where all input is
1096 directed from standard-input. An error code of 0 is returned upon
1097 encountering an EOF and 1 otherwise.
1099 \item[\code{-core}] requires an argument that should be the name of a
1100 core file. Rather than using the default core file
1101 (\file{lib/lisp.core}), the specified core file is loaded.
1103 \item[\code{-edit}] specifies to enter Hemlock. A file to edit may be
1104 specified by placing the name of the file between the program name
1105 (usually \file{lisp}) and the first switch.
1107 \item[\code{-eval}] accepts one argument which should be a Lisp form
1108 to evaluate during the start up sequence. The value of the form
1109 will not be printed unless it is wrapped in a form that does output.
1111 \item[\code{-hinit}] accepts an argument that should be the name of
1112 the hemlock init file to load the first time the function
1113 \findexed{ed} is invoked. The default is to load
1114 \file{hemlock-init.\var{object-type}}, or if that does not exist,
1115 \file{hemlock-init.lisp} from the user's home directory. If the
1116 file is not in the user's home directory, the full path must be
1119 \item[\code{-init}] accepts an argument that should be the name of an
1120 init file to load during the normal start up sequence. The default
1121 is to load \file{init.\var{object-type}} or, if that does not exist,
1122 \file{init.lisp} from the user's home directory. If the file is not
1123 in the user's home directory, the full path must be specified.
1125 \item[\code{-noinit}] accepts no arguments and specifies that an init
1126 file should not be loaded during the normal start up sequence.
1127 Also, this switch suppresses the loading of a hemlock init file when
1128 Hemlock is started up with the \code{-edit} switch.
1130 \item[\code{-load}] accepts an argument which should be the name of a
1131 file to load into Lisp before entering Lisp's read-eval-print loop.
1133 \item[\code{-slave}] specifies that Lisp should start up as a
1134 \i{slave} Lisp and try to connect to an editor Lisp. The name of
1135 the editor to connect to must be specified\dash{}to find the
1136 editor's name, use the \hemlock{} ``\code{Accept Slave
1137 Connections}'' command. The name for the editor Lisp is of the
1140 \var{machine-name}\code{:}\var{socket}
1142 where \var{machine-name} is the internet host name for the machine
1143 and \var{socket} is the decimal number of the socket to connect to.
1145 For more details on the use of the \code{-edit} and \code{-slave}
1146 switches, see the \i{Hemlock User's Manual}.
1148 Arguments to the above switches can be specified in one of two ways:
1149 \w{\var{switch}\code{=}\var{value}} or
1150 \w{\var{switch}<\var{space}>\var{value}}. For example, to start up
1151 the saved core file mylisp.core use either of the following two
1154 \code{lisp -core=mylisp.core
1155 lisp -core mylisp.core}
1158 %%\node Credits, , Command Line Options, Introduction
1161 Since 1981 many people have contributed to the development of CMU
1162 Common Lisp. The currently active members are:
1168 Scott Fahlman * (fearless leader)
1176 Robert A. Maclachlan *
1179 Many people are voluntarily working on improving CMU Common Lisp. ``*''
1180 means a full-time CMU employee, and ``+'' means a part-time student
1181 employee. A partial listing of significant past contributors follows:
1200 From 1995, development of CMU Common Lisp has been continued by a
1201 group of volunteers. A partial list of volunteers includes the
1206 Paul Werkowski & pw@snoopy.mv.com \\
1207 Peter VanEynde & s950045@uia.ua.ac.be \\
1208 Marco Antoniotti & marcoxa@PATH.Berkeley.EDU\\
1209 Martin Cracauer & cracauer@cons.org\\
1210 Douglas Thomas Crosher & dtc@scrooge.ee.swin.oz.au\\
1211 Simon Leinen & simon@switch.ch\\
1212 Rob MacLachlan & ram+@CS.cmu.edu\\
1213 Raymond Toy & toy@rtp.ericsson.se
1218 In particular Paul Werkowski completed the port for the x86
1219 architecture for FreeBSD. Peter VanEnyde took the FreeBSD port and
1220 created a Linux version.
1224 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/design.ms}
1226 \hide{ -*- Dictionary: cmu-user -*- }
1227 %%\node Design Choices and Extensions, The Debugger, Introduction, Top
1228 \chapter{Design Choices and Extensions}
1230 Several design choices in Common Lisp are left to the individual
1231 implementation, and some essential parts of the programming environment
1232 are left undefined. This chapter discusses the most important design
1233 choices and extensions.
1237 * Default Interrupts for Lisp::
1240 * Garbage Collection::
1245 * Running Programs from Lisp::
1246 * Saving a Core Image::
1248 * Filesystem Operations::
1249 * Time Parsing and Formatting::
1253 %%\node Data Types, Default Interrupts for Lisp, Design Choices and Extensions, Design Choices and Extensions
1254 \section{Data Types}
1261 * Array Initialization::
1264 %%\node Symbols, Integers, Data Types, Data Types
1265 \subsection{Symbols}
1267 As in \cltl, all symbols and package names are printed in lower case, as
1268 a user is likely to type them. Internally, they are normally stored
1271 %%\node Integers, Floats, Symbols, Data Types
1272 \subsection{Integers}
1274 The \tindexed{fixnum} type is equivalent to \code{(signed-byte 30)}.
1275 Integers outside this range are represented as a \tindexed{bignum} or
1276 a word integer (\pxlref{word-integers}.) Almost all integers that
1277 appear in programs can be represented as a \code{fixnum}, so integer
1278 number consing is rare.
1280 %%\node Floats, Characters, Integers, Data Types
1284 \cmucl{} supports two floating point formats: \tindexed{single-float}
1285 and \tindexed{double-float}. These are implemented with IEEE single
1286 and double float arithmetic, respectively. \code{short-float} is a
1287 synonym for \code{single-float}, and \code{long-float} is a synonym
1288 for \code{double-float}. The initial value of
1289 \vindexed{read-default-float-format} is \code{single-float}.
1291 Both \code{single-float} and \code{double-float} are represented with
1292 a pointer descriptor, so float operations can cause number consing.
1293 Number consing is greatly reduced if programs are written to allow the
1294 use of non-descriptor representations (\pxlref{numeric-types}.)
1298 * IEEE Special Values::
1300 * Denormalized Floats::
1301 * Floating Point Exceptions::
1302 * Floating Point Rounding Mode::
1303 * Accessing the Floating Point Modes::
1306 %%\node IEEE Special Values, Negative Zero, Floats, Floats
1307 \subsubsection{IEEE Special Values}
1309 \cmucl{} supports the IEEE infinity and NaN special values. These
1310 non-numeric values will only be generated when trapping is disabled
1311 for some floating point exception (\pxlref{float-traps}), so users of
1312 the default configuration need not concern themselves with special
1315 \begin{defconst}{extensions:}{short-float-positive-infinity}
1316 \defconstx[extensions:]{short-float-negative-infinity}
1317 \defconstx[extensions:]{single-float-positive-infinity}
1318 \defconstx[extensions:]{single-float-negative-infinity}
1319 \defconstx[extensions:]{double-float-positive-infinity}
1320 \defconstx[extensions:]{double-float-negative-infinity}
1321 \defconstx[extensions:]{long-float-positive-infinity}
1322 \defconstx[extensions:]{long-float-negative-infinity}
1324 The values of these constants are the IEEE positive and negative
1325 infinity objects for each float format.
1328 \begin{defun}{extensions:}{float-infinity-p}{\args{\var{x}}}
1330 This function returns true if \var{x} is an IEEE float infinity (of
1331 either sign.) \var{x} must be a float.
1334 \begin{defun}{extensions:}{float-nan-p}{\args{\var{x}}}
1335 \defunx[extensions:]{float-trapping-nan-p}{\args{\var{x}}}
1337 \code{float-nan-p} returns true if \var{x} is an IEEE NaN (Not A
1338 Number) object. \code{float-trapping-nan-p} returns true only if
1339 \var{x} is a trapping NaN. With either function, \var{x} must be a
1343 %%\node Negative Zero, Denormalized Floats, IEEE Special Values, Floats
1344 \subsubsection{Negative Zero}
1346 The IEEE float format provides for distinct positive and negative
1347 zeros. To test the sign on zero (or any other float), use the
1348 \clisp{} \findexed{float-sign} function. Negative zero prints as
1349 \code{-0.0f0} or \code{-0.0d0}.
1351 %%\node Denormalized Floats, Floating Point Exceptions, Negative Zero, Floats
1352 \subsubsection{Denormalized Floats}
1354 \cmucl{} supports IEEE denormalized floats. Denormalized floats
1355 provide a mechanism for gradual underflow. The \clisp{}
1356 \findexed{float-precision} function returns the actual precision of a
1357 denormalized float, which will be less than \findexed{float-digits}.
1358 Note that in order to generate (or even print) denormalized floats,
1359 trapping must be disabled for the underflow exception
1360 (\pxlref{float-traps}.) The \clisp{}
1361 \w{\code{least-positive-}\var{format}-\code{float}} constants are
1364 \begin{defun}{extensions:}{float-normalized-p}{\args{\var{x}}}
1366 This function returns true if \var{x} is a denormalized float.
1367 \var{x} must be a float.
1370 %%\node Floating Point Exceptions, Floating Point Rounding Mode, Denormalized Floats, Floats
1371 \subsubsection{Floating Point Exceptions}
1374 The IEEE floating point standard defines several exceptions that occur
1375 when the result of a floating point operation is unclear or
1376 undesirable. Exceptions can be ignored, in which case some default
1377 action is taken, such as returning a special value. When trapping is
1378 enabled for an exception, a error is signalled whenever that exception
1379 occurs. These are the possible floating point exceptions:
1382 \item[\kwd{underflow}] This exception occurs when the result of an
1383 operation is too small to be represented as a normalized float in
1384 its format. If trapping is enabled, the
1385 \tindexed{floating-point-underflow} condition is signalled.
1386 Otherwise, the operation results in a denormalized float or zero.
1388 \item[\kwd{overflow}] This exception occurs when the result of an
1389 operation is too large to be represented as a float in its format.
1390 If trapping is enabled, the \tindexed{floating-point-overflow}
1391 exception is signalled. Otherwise, the operation results in the
1392 appropriate infinity.
1394 \item[\kwd{inexact}] This exception occurs when the result of a
1395 floating point operation is not exact, i.e. the result was rounded.
1396 If trapping is enabled, the \code{extensions:floating-point-inexact}
1397 condition is signalled. Otherwise, the rounded result is returned.
1399 \item[\kwd{invalid}] This exception occurs when the result of an
1400 operation is ill-defined, such as \code{\w{(/ 0.0 0.0)}}. If
1401 trapping is enabled, the \code{extensions:floating-point-invalid}
1402 condition is signalled. Otherwise, a quiet NaN is returned.
1404 \item[\kwd{divide-by-zero}] This exception occurs when a float is
1405 divided by zero. If trapping is enabled, the
1406 \tindexed{divide-by-zero} condition is signalled. Otherwise, the
1407 appropriate infinity is returned.
1410 %%\node Floating Point Rounding Mode, Accessing the Floating Point Modes, Floating Point Exceptions, Floats
1411 \subsubsection{Floating Point Rounding Mode}
1412 \label{float-rounding-modes}
1414 IEEE floating point specifies four possible rounding modes:
1417 \item[\kwd{nearest}] In this mode, the inexact results are rounded to
1418 the nearer of the two possible result values. If the neither
1419 possibility is nearer, then the even alternative is chosen. This
1420 form of rounding is also called ``round to even'', and is the form
1421 of rounding specified for the \clisp{} \findexed{round} function.
1423 \item[\kwd{positive-infinity}] This mode rounds inexact results to the
1424 possible value closer to positive infinity. This is analogous to
1425 the \clisp{} \findexed{ceiling} function.
1427 \item[\kwd{negative-infinity}] This mode rounds inexact results to the
1428 possible value closer to negative infinity. This is analogous to
1429 the \clisp{} \findexed{floor} function.
1431 \item[\kwd{zero}] This mode rounds inexact results to the possible
1432 value closer to zero. This is analogous to the \clisp{}
1433 \findexed{truncate} function.
1436 \paragraph{Warning:}
1438 Although the rounding mode can be changed with
1439 \code{set-floating-point-modes}, use of any value other than the
1440 default (\kwd{nearest}) can cause unusual behavior, since it will
1441 affect rounding done by \llisp{} system code as well as rounding in
1442 user code. In particular, the unary \code{round} function will stop
1443 doing round-to-nearest on floats, and instead do the selected form of
1446 %%\node Accessing the Floating Point Modes, , Floating Point Rounding Mode, Floats
1447 \subsubsection{Accessing the Floating Point Modes}
1449 These functions can be used to modify or read the floating point modes:
1451 \begin{defun}{extensions:}{set-floating-point-modes}{%
1452 \keys{\kwd{traps} \kwd{rounding-mode}}
1453 \morekeys{\kwd{fast-mode} \kwd{accrued-exceptions}}
1454 \yetmorekeys{\kwd{current-exceptions}}}
1455 \defunx[extensions:]{get-floating-point-modes}{}
1457 The keyword arguments to \code{set-floating-point-modes} set various
1458 modes controlling how floating point arithmetic is done:
1461 \item[\kwd{traps}] A list of the exception conditions that should
1462 cause traps. Possible exceptions are \kwd{underflow},
1463 \kwd{overflow}, \kwd{inexact}, \kwd{invalid} and
1464 \kwd{divide-by-zero}. Initially all traps except \kwd{inexact}
1465 are enabled. \xlref{float-traps}.
1467 \item[\kwd{rounding-mode}] The rounding mode to use when the result
1468 is not exact. Possible values are \kwd{nearest},
1469 \latex{\kwd{positive\-infinity}}\html{\kwd{positive-infinity}},
1470 \kwd{negative-infinity} and \kwd{zero}. Initially, the rounding
1471 mode is \kwd{nearest}. See the warning in section
1472 \ref{float-rounding-modes} about use of other rounding modes.
1474 \item[\kwd{current-exceptions}, \kwd{accrued-exceptions}] Lists of
1475 exception keywords used to set the exception flags. The
1476 \var{current-exceptions} are the exceptions for the previous
1477 operation, so setting it is not very useful. The
1478 \var{accrued-exceptions} are a cumulative record of the exceptions
1479 that occurred since the last time these flags were cleared.
1480 Specifying \code{()} will clear any accrued exceptions.
1482 \item[\kwd{fast-mode}] Set the hardware's ``fast mode'' flag, if
1483 any. When set, IEEE conformance or debuggability may be impaired.
1484 Some machines may not have this feature, in which case the value
1485 is always \false. No currently supported machines have a fast
1488 If a keyword argument is not supplied, then the associated state is
1491 \code{get-floating-point-modes} returns a list representing the
1492 state of the floating point modes. The list is in the same format
1493 as the keyword arguments to \code{set-floating-point-modes}, so
1494 \code{apply} could be used with \code{set-floating-point-modes} to
1495 restore the modes in effect at the time of the call to
1496 \code{get-floating-point-modes}.
1500 To make handling control of floating-point exceptions, the following
1503 \begin{defmac}{ext:}{with-float-traps-masked}{traps \ampbody\ body}
1504 \code{body} is executed with the selected floating-point exceptions
1505 given by \code{traps} masked out (disabled). \code{traps} should be
1506 a list of possible floating-point exceptions that should be ignored.
1507 Possible values are \kwd{underflow}, \kwd{overflow}, \kwd{inexact},
1508 \kwd{invalid} and \kwd{divide-by-zero}.
1510 This is equivalent to saving the current traps from
1511 \code{get-floating-point-modes}, setting the floating-point modes to
1512 the desired exceptions, running the \code{body}, and restoring the
1513 saved floating-point modes. The advantage of this macro is that it
1514 causes less consing to occur.
1516 Some points about the with-float-traps-masked:
1519 \item Two approaches are available for detecting FP exceptions:
1521 \item enabling the traps and handling the exceptions
1522 \item disabling the traps and either handling the return values or
1523 checking the accrued exceptions.
1525 Of these the latter is the most portable because on the alpha port
1526 it is not possible to enable some traps at run-time.
1528 \item To assist the checking of the exceptions within the body any
1529 accrued exceptions matching the given traps are cleared at the
1530 start of the body when the traps are masked.
1532 \item To allow the macros to be nested these accrued exceptions are
1533 restored at the end of the body to their values at the start of
1534 the body. Thus any exceptions that occurred within the body will
1535 not affect the accrued exceptions outside the macro.
1537 \item Note that only the given exceptions are restored at the end of
1538 the body so other exception will be visible in the accrued
1539 exceptions outside the body.
1541 \item On the x86, setting the accrued exceptions of an unmasked
1542 exception would cause a FP trap. The macro behaviour of restoring
1543 the accrued exceptions ensures than if an accrued exception is
1544 initially not flagged and occurs within the body it will be
1545 restored/cleared at the exit of the body and thus not cause a
1548 \item On the x86, and, perhaps, the hppa, the FP exceptions may be
1549 delivered at the next FP instruction which requires a FP
1550 \code{wait} instruction (\code{%vm::float-wait}) if using the lisp
1551 conditions to catch trap within a \code{handler-bind}. The
1552 \code{handler-bind} macro does the right thing and inserts a
1553 float-wait (at the end of its body on the x86). The masking and
1554 noting of exceptions is also safe here.
1556 \item The setting of the FP flags uses the
1557 \code{(floating-point-modes)} and the \code{(set
1558 (floating-point-modes)\ldots)} VOPs. These VOPs blindly update
1559 the flags which may include other state. We assume this state
1560 hasn't changed in between getting and setting the state. For
1561 example, if you used the FP unit between the above calls, the
1562 state may be incorrectly restored! The
1563 \code{with-float-traps-masked} macro keeps the intervening code to
1564 a minimum and uses only integer operations.
1565 %% Safe byte-compiled?
1566 %% Perhaps the VOPs (x86) should be smarter and only update some of
1567 %% the flags, the trap masks and exceptions?
1573 %%\node Characters, Array Initialization, Floats, Data Types
1574 \subsection{Characters}
1576 \cmucl{} implements characters according to \i{Common Lisp: the
1577 Language II}. The main difference from the first version is that
1578 character bits and font have been eliminated, and the names of the
1579 types have been changed. \tindexed{base-character} is the new
1580 equivalent of the old \tindexed{string-char}. In this implementation,
1581 all characters are base characters (there are no extended characters.)
1582 Character codes range between \code{0} and \code{255}, using the ASCII
1585 Table~\ref{tbl:chars}~\vpageref{tbl:chars} shows characters
1586 recognized by \cmucl.
1592 \begin{tabular}{|c|c|l|l|l|l|}
1594 \multicolumn{2}{|c|}{ASCII} & \multicolumn{1}{|c}{Lisp} &
1595 \multicolumn{3}{|c|}{} \\
1597 Name & Code & \multicolumn{1}{|c|}{Name} & \multicolumn{3}{|c|}{\raisebox{1.5ex}{Alternatives}}\\
1600 \code{nul} & 0 & \code{\#\back{NULL}} & \code{\#\back{NUL}} & &\\
1601 \code{bel} & 7 & \code{\#\back{BELL}} & & &\\
1602 \code{bs} & 8 & \code{\#\back{BACKSPACE}} & \code{\#\back{BS}} & &\\
1603 \code{tab} & 9 & \code{\#\back{TAB}} & & &\\
1604 \code{lf} & 10 & \code{\#\back{NEWLINE}} & \code{\#\back{NL}} & \code{\#\back{LINEFEED}} & \code{\#\back{LF}}\\
1605 \code{ff} & 11 & \code{\#\back{VT}} & \code{\#\back{PAGE}} & \code{\#\back{FORM}} &\\
1606 \code{cr} & 13 & \code{\#\back{RETURN}} & \code{\#\back{CR}} & &\\
1607 \code{esc} & 27 & \code{\#\back{ESCAPE}} & \code{\#\back{ESC}} & \code{\#\back{ALTMODE}} & \code{\#\back{ALT}}\\
1608 \code{sp} & 32 & \code{\#\back{SPACE}} & \code{\#\back{SP}} & &\\
1609 \code{del} & 127 & \code{\#\back{DELETE}} & \code{\#\back{RUBOUT}} & &\\
1612 \caption{Characters recognized by \cmucl}
1618 %%\node Array Initialization, , Characters, Data Types
1619 \subsection{Array Initialization}
1621 If no \kwd{initial-value} is specified, arrays are initialized to zero.
1624 %%\node Default Interrupts for Lisp, Packages, Data Types, Design Choices and Extensions
1625 \section{Default Interrupts for Lisp}
1627 CMU Common Lisp has several interrupt handlers defined when it starts up,
1631 \item[\code{SIGINT} (\ctrl{c})] causes Lisp to enter a break loop.
1632 This puts you into the debugger which allows you to look at the
1633 current state of the computation. If you proceed from the break
1634 loop, the computation will proceed from where it was interrupted.
1636 \item[\code{SIGQUIT} (\ctrl{L})] causes Lisp to do a throw to the
1637 top-level. This causes the current computation to be aborted, and
1638 control returned to the top-level read-eval-print loop.
1640 \item[\code{SIGTSTP} (\ctrl{z})] causes Lisp to suspend execution and
1641 return to the Unix shell. If control is returned to Lisp, the
1642 computation will proceed from where it was interrupted.
1644 \item[\code{SIGILL}, \code{SIGBUS}, \code{SIGSEGV}, and \code{SIGFPE}]
1645 cause Lisp to signal an error.
1647 For keyboard interrupt signals, the standard interrupt character is in
1648 parentheses. Your \file{.login} may set up different interrupt
1649 characters. When a signal is generated, there may be some delay before
1650 it is processed since Lisp cannot be interrupted safely in an arbitrary
1651 place. The computation will continue until a safe point is reached and
1652 then the interrupt will be processed. \xlref{signal-handlers} to define
1653 your own signal handlers.
1655 %%\node Packages, The Editor, Default Interrupts for Lisp, Design Choices and Extensions
1658 When CMU Common Lisp is first started up, the default package is the
1659 \code{user} package. The \code{user} package uses the
1660 \code{common-lisp}, \code{extensions}, and \code{pcl} packages. The
1661 symbols exported from these three packages can be referenced without
1662 package qualifiers. This section describes packages which have
1663 exported interfaces that may concern users. The numerous internal
1664 packages which implement parts of the system are not described here.
1665 Package nicknames are in parenthesis after the full name.
1667 \item[\code{alien}, \code{c-call}] Export the features of the Alien
1668 foreign data structure facility (\pxlref{aliens}.)
1670 \item[\code{pcl}] This package contains PCL (Portable CommonLoops),
1671 which is a portable implementation of CLOS (the Common Lisp Object
1672 System.) This implements most (but not all) of the features in the
1673 CLOS chapter of \cltltwo.
1675 \item[\code{debug}] The \code{debug} package contains the command-line
1676 oriented debugger. It exports utility various functions and
1679 \item[\code{debug-internals}] The \code{debug-internals} package
1680 exports the primitives used to write debuggers.
1681 \xlref{debug-internals}.
1683 \item[\code{extensions (ext)}] The \code{extensions} packages exports
1684 local extensions to Common Lisp that are documented in this manual.
1685 Examples include the \code{save-lisp} function and time parsing.
1687 \item[\code{hemlock (ed)}] The \code{hemlock} package contains all the
1688 code to implement Hemlock commands. The \code{hemlock} package
1689 currently exports no symbols.
1691 \item[\code{hemlock-internals (hi)}] The \code{hemlock-internals}
1692 package contains code that implements low level primitives and
1693 exports those symbols used to write Hemlock commands.
1695 \item[\code{keyword}] The \code{keyword} package contains keywords
1696 (e.g., \kwd{start}). All symbols in the \code{keyword} package are
1697 exported and evaluate to themselves (i.e., the value of the symbol
1698 is the symbol itself).
1700 \item[\code{profile}] The \code{profile} package exports a simple
1701 run-time profiling facility (\pxlref{profiling}).
1703 \item[\code{common-lisp (cl lisp)}] The \code{common-lisp} package
1704 exports all the symbols defined by \i{Common Lisp: the Language} and
1705 only those symbols. Strictly portable Lisp code will depend only on
1706 the symbols exported from the \code{lisp} package.
1708 \item[\code{unix}, \code{mach}] These packages export system call
1709 interfaces to generic BSD Unix and Mach (\pxlref{unix-interface}).
1711 \item[\code{system (sys)}] The \code{system} package contains
1712 functions and information necessary for system interfacing. This
1713 package is used by the \code{lisp} package and exports several
1714 symbols that are necessary to interface to system code.
1716 \item[\code{common-lisp-user (user cl-user)}] The
1717 \code{common-lisp-user} package is the default package and is where
1718 a user's code and data is placed unless otherwise specified. This
1719 package exports no symbols.
1721 \item[\code{xlib}] The \code{xlib} package contains the Common Lisp X
1722 interface (CLX) to the X11 protocol. This is mostly Lisp code with
1723 a couple of functions that are defined in C to connect to the
1726 \item[\code{wire}] The \code{wire} package exports a remote procedure
1727 call facility (\pxlref{remote}).
1731 %%\node The Editor, Garbage Collection, Packages, Design Choices and Extensions
1732 \section{The Editor}
1734 The \code{ed} function invokes the Hemlock editor which is described
1735 in \i{Hemlock User's Manual} and \i{Hemlock Command Implementor's
1736 Manual}. Most users at CMU prefer to use Hemlock's slave \Llisp{}
1737 mechanism which provides an interactive buffer for the
1738 \code{read-eval-print} loop and editor commands for evaluating and
1739 compiling text from a buffer into the slave \Llisp. Since the editor
1740 runs in the \Llisp, using slaves keeps users from trashing their
1741 editor by developing in the same \Llisp{} with \Hemlock.
1744 %%\node Garbage Collection, Describe, The Editor, Design Choices and Extensions
1745 \section{Garbage Collection}
1747 CMU Common Lisp uses a stop-and-copy garbage collector that compacts
1748 the items in dynamic space every time it runs. Most users cause the
1749 system to garbage collect (GC) frequently, long before space is
1750 exhausted. With 16 or 24 megabytes of memory, causing GC's more
1751 frequently on less garbage allows the system to GC without much (if
1755 With the default value for the following variable, you can expect a GC to take
1756 about one minute of elapsed time on a 6 megabyte machine running X as well as
1757 Lisp. On machines with 8 megabytes or more of memory a GC should run without
1758 much (if any) paging. GC's run more frequently but tend to take only about 5
1762 The following functions invoke the garbage collector or control whether
1763 automatic garbage collection is in effect:
1765 \begin{defun}{extensions:}{gc}{}
1767 This function runs the garbage collector. If
1768 \code{ext:*gc-verbose*} is non-\nil, then it invokes
1769 \code{ext:*gc-notify-before*} before GC'ing and
1770 \code{ext:*gc-notify-after*} afterwards.
1773 \begin{defun}{extensions:}{gc-off}{}
1775 This function inhibits automatic garbage collection. After calling
1776 it, the system will not GC unless you call \code{ext:gc} or
1780 \begin{defun}{extensions:}{gc-on}{}
1782 This function reinstates automatic garbage collection. If the
1783 system would have GC'ed while automatic GC was inhibited, then this
1784 will call \code{ext:gc}.
1788 \subsection{GC Parameters}
1789 The following variables control the behavior of the garbage collector:
1791 \begin{defvar}{extensions:}{bytes-consed-between-gcs}
1793 CMU Common Lisp automatically GC's whenever the amount of memory
1794 allocated to dynamic objects exceeds the value of an internal
1795 variable. After each GC, the system sets this internal variable to
1796 the amount of dynamic space in use at that point plus the value of
1797 the variable \code{ext:*bytes-consed-between-gcs*}. The default
1801 \begin{defvar}{extensions:}{gc-verbose}
1803 This variable controls whether \code{ext:gc} invokes the functions
1804 in \code{ext:*gc-notify-before*} and
1805 \code{ext:*gc-notify-after*}. If \code{*gc-verbose*} is \nil,
1806 \code{ext:gc} foregoes printing any messages. The default value is
1810 \begin{defvar}{extensions:}{gc-notify-before}
1812 This variable's value is a function that should notify the user that
1813 the system is about to GC. It takes one argument, the amount of
1814 dynamic space in use before the GC measured in bytes. The default
1815 value of this variable is a function that prints a message similar
1818 \b{[GC threshold exceeded with 2,107,124 bytes in use. Commencing GC.]}
1822 \begin{defvar}{extensions:}{gc-notify-after}
1824 This variable's value is a function that should notify the user when
1825 a GC finishes. The function must take three arguments, the amount
1826 of dynamic spaced retained by the GC, the amount of dynamic space
1827 freed, and the new threshold which is the minimum amount of space in
1828 use before the next GC will occur. All values are byte quantities.
1829 The default value of this variable is a function that prints a
1830 message similar to the following:
1832 \b{[GC completed with 25,680 bytes retained and 2,096,808 bytes freed.]}
1833 \b{[GC will next occur when at least 2,025,680 bytes are in use.]}
1837 Note that a garbage collection will not happen at exactly the new
1838 threshold printed by the default \code{ext:*gc-notify-after*}
1839 function. The system periodically checks whether this threshold has
1840 been exceeded, and only then does a garbage collection.
1842 \begin{defvar}{extensions:}{gc-inhibit-hook}
1844 This variable's value is either a function of one argument or \nil.
1845 When the system has triggered an automatic GC, if this variable is a
1846 function, then the system calls the function with the amount of
1847 dynamic space currently in use (measured in bytes). If the function
1848 returns \nil, then the GC occurs; otherwise, the system inhibits
1849 automatic GC as if you had called \code{ext:gc-off}. The writer of
1850 this hook is responsible for knowing when automatic GC has been
1851 turned off and for calling or providing a way to call
1852 \code{ext:gc-on}. The default value of this variable is \nil.
1855 \begin{defvar}{extensions:}{before-gc-hooks}
1856 \defvarx[extensions:]{after-gc-hooks}
1858 These variables' values are lists of functions to call before or
1859 after any GC occurs. The system provides these purely for
1860 side-effect, and the functions take no arguments.
1864 \subsection{Weak Pointers}
1866 A weak pointer provides a way to maintain a reference to an object
1867 without preventing an object from being garbage collected. If the
1868 garbage collector discovers that the only pointers to an object are
1869 weak pointers, then it breaks the weak pointers and deallocates the
1872 \begin{defun}{extensions:}{make-weak-pointer}{\args{\var{object}}}
1873 \defunx[extensions:]{weak-pointer-value}{\args{\var{weak-pointer}}}
1875 \code{make-weak-pointer} returns a weak pointer to an object.
1876 \code{weak-pointer-value} follows a weak pointer, returning the two
1877 values: the object pointed to (or \false{} if broken) and a boolean
1878 value which is true if the pointer has been broken.
1882 \subsection{Finalization}
1884 Finalization provides a ``hook'' that is triggered when the garbage
1885 collector reclaims an object. It is usually used to recover non-Lisp
1886 resources that were allocated to implement the finalized Lisp object.
1887 For example, when a unix file-descriptor stream is collected,
1888 finalization is used to close the underlying file descriptor.
1890 \begin{defun}{extensions:}{finalize}{\args{\var{object} \var{function}}}
1892 This function registers \var{object} for finalization.
1893 \var{function} is called with no arguments when \var{object} is
1894 reclaimed. Normally \var{function} will be a closure over the
1895 underlying state that needs to be freed, e.g. the unix file
1896 descriptor in the fd-stream case. Note that \var{function} must not
1897 close over \var{object} itself, as this prevents the object from
1898 ever becoming garbage.
1901 \begin{defun}{extensions:}{cancel-finalization}{\args{\var{object}}}
1903 This function cancel any finalization request for \var{object}.
1906 %%\node Describe, The Inspector, Garbage Collection, Design Choices and Extensions
1909 In addition to the basic function described below, there are a number of
1910 switches and other things that can be used to control \code{describe}'s
1913 \begin{defun}{}{describe}{ \args{\var{object} \&optional{} \var{stream}}}
1915 The \code{describe} function prints useful information about
1916 \var{object} on \var{stream}, which defaults to
1917 \code{*standard-output*}. For any object, \code{describe} will
1918 print out the type. Then it prints other information based on the
1919 type of \var{object}. The types which are presently handled are:
1923 \item[\tindexed{hash-table}] \code{describe} prints the number of
1924 entries currently in the hash table and the number of buckets
1925 currently allocated.
1927 \item[\tindexed{function}] \code{describe} prints a list of the
1928 function's name (if any) and its formal parameters. If the name
1929 has function documentation, then it will be printed. If the
1930 function is compiled, then the file where it is defined will be
1933 \item[\tindexed{fixnum}] \code{describe} prints whether the integer
1936 \item[\tindexed{symbol}] The symbol's value, properties, and
1937 documentation are printed. If the symbol has a function
1938 definition, then the function is described.
1940 If there is anything interesting to be said about some component of
1941 the object, describe will invoke itself recursively to describe that
1942 object. The level of recursion is indicated by indenting output.
1945 \begin{defvar}{extensions:}{describe-level}
1947 The maximum level of recursive description allowed. Initially two.
1950 \begin{defvar}{extensions:}{describe-indentation}
1952 The number of spaces to indent for each level of recursive
1953 description, initially three.
1956 \begin{defvar}{extensions:}{describe-print-level}
1957 \defvarx[extensions:]{describe-print-length}
1959 The values of \code{*print-level*} and \code{*print-length*} during
1960 description. Initially two and five.
1963 %%\node The Inspector, Load, Describe, Design Choices and Extensions
1964 \section{The Inspector}
1966 \cmucl{} has both a graphical inspector that uses X windows and a simple
1967 terminal-based inspector.
1969 \begin{defun}{}{inspect}{ \args{\ampoptional{} \var{object}}}
1971 \code{inspect} calls the inspector on the optional argument
1972 \var{object}. If \var{object} is unsupplied, \code{inspect}
1973 immediately returns \false. Otherwise, the behavior of inspect
1974 depends on whether Lisp is running under X. When \code{inspect} is
1975 eventually exited, it returns some selected Lisp object.
1979 * The Graphical Interface::
1980 * The TTY Inspector::
1983 %%\node The Graphical Interface, The TTY Inspector, The Inspector, The Inspector
1984 \subsection{The Graphical Interface}
1985 \label{motif-interface}
1987 CMU Common Lisp has an interface to Motif which is functionally similar to
1988 CLM, but works better in CMU CL. See:
1990 \file{doc/motif-toolkit.doc}
1991 \file{doc/motif-internals.doc}
1994 This motif interface has been used to write the inspector and graphical
1995 debugger. There is also a Lisp control panel with a simple file management
1996 facility, apropos and inspector dialogs, and controls for setting global
1997 options. See the \code{interface} and \code{toolkit} packages.
1999 \begin{defun}{interface:}{lisp-control-panel}{}
2001 This function creates a control panel for the Lisp process.
2004 \begin{defvar}{interface:}{interface-style}
2006 When the graphical interface is loaded, this variable controls
2007 whether it is used by \code{inspect} and the error system. If the
2008 value is \kwd{graphics} (the default) and the \code{DISPLAY}
2009 environment variable is defined, the graphical inspector and
2010 debugger will be invoked by \findexed{inspect} or when an error is
2011 signalled. Possible values are \kwd{graphics} and {tty}. If the
2012 value is \kwd{graphics}, but there is no X display, then we quietly
2013 use the TTY interface.
2016 %%\node The TTY Inspector, , The Graphical Interface, The Inspector
2017 \subsection{The TTY Inspector}
2019 If X is unavailable, a terminal inspector is invoked. The TTY inspector
2020 is a crude interface to \code{describe} which allows objects to be
2021 traversed and maintains a history. This inspector prints information
2022 about and object and a numbered list of the components of the object.
2023 The command-line based interface is a normal
2024 \code{read}--\code{eval}--\code{print} loop, but an integer \var{n}
2025 descends into the \var{n}'th component of the current object, and
2026 symbols with these special names are interpreted as commands:
2028 \item[U] Move back to the enclosing object. As you descend into the
2029 components of an object, a stack of all the objects previously seen is
2030 kept. This command pops you up one level of this stack.
2032 \item[Q, E] Return the current object from \code{inspect}.
2034 \item[R] Recompute object display, and print again. Useful if the
2035 object may have changed.
2037 \item[D] Display again without recomputing.
2039 \item[H, ?] Show help message.
2042 %%\node Load, The Reader, The Inspector, Design Choices and Extensions
2045 \begin{defun}{}{load}{%
2046 \args{\var{filename}
2047 \keys{\kwd{verbose} \kwd{print} \kwd{if-does-not-exist}}
2048 \morekeys{\kwd{if-source-newer} \kwd{contents}}}}
2050 As in standard Common Lisp, this function loads a file containing
2051 source or object code into the running Lisp. Several CMU extensions
2052 have been made to \code{load} to conveniently support a variety of
2053 program file organizations. \var{filename} may be a wildcard
2054 pathname such as \file{*.lisp}, in which case all matching files are
2057 If \var{filename} has a \code{pathname-type} (or extension), then
2058 that exact file is loaded. If the file has no extension, then this
2059 tells \code{load} to use a heuristic to load the ``right'' file.
2060 The \code{*load-source-types*} and \code{*load-object-types*}
2061 variables below are used to determine the default source and object
2062 file types. If only the source or the object file exists (but not
2063 both), then that file is quietly loaded. Similarly, if both the
2064 source and object file exist, and the object file is newer than the
2065 source file, then the object file is loaded. The value of the
2066 \var{if-source-newer} argument is used to determine what action to
2067 take when both the source and object files exist, but the object
2068 file is out of date:
2070 \item[\kwd{load-object}] The object file is loaded even though the
2071 source file is newer.
2073 \item[\kwd{load-source}] The source file is loaded instead of the
2076 \item[\kwd{compile}] The source file is compiled and then the new
2077 object file is loaded.
2079 \item[\kwd{query}] The user is asked a yes or no question to
2080 determine whether the source or object file is loaded.
2082 This argument defaults to the value of
2083 \code{ext:*load-if-source-newer*} (initially \kwd{load-object}.)
2085 The \var{contents} argument can be used to override the heuristic
2086 (based on the file extension) that normally determines whether to
2087 load the file as a source file or an object file. If non-null, this
2088 argument must be either \kwd{source} or \kwd{binary}, which forces
2089 loading in source and binary mode, respectively. You really
2090 shouldn't ever need to use this argument.
2093 \begin{defvar}{extensions:}{load-source-types}
2094 \defvarx[extensions:]{load-object-types}
2096 These variables are lists of possible \code{pathname-type} values
2097 for source and object files to be passed to \code{load}. These
2098 variables are only used when the file passed to \code{load} has no
2099 type; in this case, the possible source and object types are used to
2100 default the type in order to determine the names of the source and
2104 \begin{defvar}{extensions:}{load-if-source-newer}
2106 This variable determines the default value of the
2107 \var{if-source-newer} argument to \code{load}. Its initial value is
2111 %%\node The Reader, Stream Extensions, Load, Design Choices and Extensions
2112 \section{The Reader}
2114 \begin{defvar}{extensions:}{ignore-extra-close-parentheses}
2116 If this variable is \true{} (the default), then the reader merely
2117 prints a warning when an extra close parenthesis is detected
2118 (instead of signalling an error.)
2121 %%\node Stream Extensions, Running Programs from Lisp, The Reader, Design Choices and Extensions
2122 \section{Stream Extensions}
2123 \begin{defun}{extensions:}{read-n-bytes}{%
2124 \args{\var{stream buffer start numbytes}
2125 \ampoptional{} \var{eof-error-p}}}
2127 On streams that support it, this function reads multiple bytes of
2128 data into a buffer. The buffer must be a \code{simple-string} or
2129 \code{(simple-array (unsigned-byte 8) (*))}. The argument
2130 \var{nbytes} specifies the desired number of bytes, and the return
2131 value is the number of bytes actually read.
2133 \item If \var{eof-error-p} is true, an \tindexed{end-of-file}
2134 condition is signalled if end-of-file is encountered before
2135 \var{count} bytes have been read.
2137 \item If \var{eof-error-p} is false, \code{read-n-bytes reads} as
2138 much data is currently available (up to count bytes.) On pipes or
2139 similar devices, this function returns as soon as any data is
2140 available, even if the amount read is less than \var{count} and
2141 eof has not been hit. See also \funref{make-fd-stream}.
2144 %%\node Running Programs from Lisp, Saving a Core Image, The Reader, Design Choices and Extensions
2145 \section{Running Programs from Lisp}
2147 It is possible to run programs from Lisp by using the following function.
2149 \begin{defun}{extensions:}{run-program}{%
2150 \args{\var{program} \var{args}
2151 \keys{\kwd{env} \kwd{wait} \kwd{pty} \kwd{input}}
2152 \morekeys{\kwd{if-input-does-not-exist}}
2153 \yetmorekeys{\kwd{output} \kwd{if-output-exists}}
2154 \yetmorekeys{\kwd{error} \kwd{if-error-exists}}
2155 \yetmorekeys{\kwd{status-hook} \kwd{before-execve}}}}
2157 \code{run-program} runs \var{program} in a child process.
2158 \var{Program} should be a pathname or string naming the program.
2159 \var{Args} should be a list of strings which this passes to
2160 \var{program} as normal Unix parameters. For no arguments, specify
2161 \var{args} as \nil. The value returned is either a process
2162 structure or \nil. The process interface follows the description of
2163 \code{run-program}. If \code{run-program} fails to fork the child
2164 process, it returns \nil.
2166 Except for sharing file descriptors as explained in keyword argument
2167 descriptions, \code{run-program} closes all file descriptors in the
2168 child process before running the program. When you are done using a
2169 process, call \code{process-close} to reclaim system resources. You
2170 only need to do this when you supply \kwd{stream} for one of
2171 \kwd{input}, \kwd{output}, or \kwd{error}, or you supply \kwd{pty}
2172 non-\nil. You can call \code{process-close} regardless of whether
2173 you must to reclaim resources without penalty if you feel safer.
2175 \code{run-program} accepts the following keyword arguments:
2178 \item[\kwd{env}] This is an a-list mapping keywords and
2179 simple-strings. The default is \code{ext:*environment-list*}. If
2180 \kwd{env} is specified, \code{run-program} uses the value given
2181 and does not combine the environment passed to Lisp with the one
2184 \item[\kwd{wait}] If non-\nil{} (the default), wait until the child
2185 process terminates. If \nil, continue running Lisp while the
2188 \item[\kwd{pty}] This should be one of \true, \nil, or a stream. If
2189 specified non-\nil, the subprocess executes under a Unix \i{PTY}.
2190 If specified as a stream, the system collects all output to this
2191 pty and writes it to this stream. If specified as \true, the
2192 \code{process-pty} slot contains a stream from which you can read
2193 the program's output and to which you can write input for the
2194 program. The default is \nil.
2196 \item[\kwd{input}] This specifies how the program gets its input.
2197 If specified as a string, it is the name of a file that contains
2198 input for the child process. \code{run-program} opens the file as
2199 standard input. If specified as \nil{} (the default), then
2200 standard input is the file \file{/dev/null}. If specified as
2201 \true, the program uses the current standard input. This may
2202 cause some confusion if \kwd{wait} is \nil{} since two processes
2203 may use the terminal at the same time. If specified as
2204 \kwd{stream}, then the \code{process-input} slot contains an
2205 output stream. Anything written to this stream goes to the
2206 program as input. \kwd{input} may also be an input stream that
2207 already contains all the input for the process. In this case
2208 \code{run-program} reads all the input from this stream before
2209 returning, so this cannot be used to interact with the process.
2211 \item[\kwd{if-input-does-not-exist}] This specifies what to do if
2212 the input file does not exist. The following values are valid:
2213 \nil{} (the default) causes \code{run-program} to return \nil{}
2214 without doing anything; \kwd{create} creates the named file; and
2215 \kwd{error} signals an error.
2217 \item[\kwd{output}] This specifies what happens with the program's
2218 output. If specified as a pathname, it is the name of a file that
2219 contains output the program writes to its standard output. If
2220 specified as \nil{} (the default), all output goes to
2221 \file{/dev/null}. If specified as \true, the program writes to
2222 the Lisp process's standard output. This may cause confusion if
2223 \kwd{wait} is \nil{} since two processes may write to the terminal
2224 at the same time. If specified as \kwd{stream}, then the
2225 \code{process-output} slot contains an input stream from which you
2226 can read the program's output.
2228 \item[\kwd{if-output-exists}] This specifies what to do if the
2229 output file already exists. The following values are valid:
2230 \nil{} causes \code{run-program} to return \nil{} without doing
2231 anything; \kwd{error} (the default) signals an error;
2232 \kwd{supersede} overwrites the current file; and \kwd{append}
2233 appends all output to the file.
2235 \item[\kwd{error}] This is similar to \kwd{output}, except the file
2236 becomes the program's standard error. Additionally, \kwd{error}
2237 can be \kwd{output} in which case the program's error output is
2238 routed to the same place specified for \kwd{output}. If specified
2239 as \kwd{stream}, the \code{process-error} contains a stream
2240 similar to the \code{process-output} slot when specifying the
2241 \kwd{output} argument.
2243 \item[\kwd{if-error-exists}] This specifies what to do if the error
2244 output file already exists. It accepts the same values as
2245 \kwd{if-output-exists}.
2247 \item[\kwd{status-hook}] This specifies a function to call whenever
2248 the process changes status. This is especially useful when
2249 specifying \kwd{wait} as \nil. The function takes the process as
2250 a required argument.
2252 \item[\kwd{before-execve}] This specifies a function to run in the
2253 child process before it becomes the program to run. This is
2254 useful for actions such as authenticating the child process
2255 without modifying the parent Lisp process.
2261 * Process Accessors::
2264 %%\node Process Accessors, , Running Programs from Lisp, Running Programs from Lisp
2265 \subsection{Process Accessors}
2267 The following functions interface the process returned by \code{run-program}:
2269 \begin{defun}{extensions:}{process-p}{\args{\var{thing}}}
2271 This function returns \true{} if \var{thing} is a process.
2272 Otherwise it returns \nil{}
2275 \begin{defun}{extensions:}{process-pid}{\args{\var{process}}}
2277 This function returns the process ID, an integer, for the
2281 \begin{defun}{extensions:}{process-status}{\args{\var{process}}}
2283 This function returns the current status of \var{process}, which is
2284 one of \kwd{running}, \kwd{stopped}, \kwd{exited}, or
2288 \begin{defun}{extensions:}{process-exit-code}{\args{\var{process}}}
2290 This function returns either the exit code for \var{process}, if it
2291 is \kwd{exited}, or the termination signal \var{process} if it is
2292 \kwd{signaled}. The result is undefined for processes that are
2296 \begin{defun}{extensions:}{process-core-dumped}{\args{\var{process}}}
2298 This function returns \true{} if someone used a Unix signal to
2299 terminate the \var{process} and caused it to dump a Unix core image.
2302 \begin{defun}{extensions:}{process-pty}{\args{\var{process}}}
2304 This function returns either the two-way stream connected to
2305 \var{process}'s Unix \i{PTY} connection or \nil{} if there is none.
2308 \begin{defun}{extensions:}{process-input}{\args{\var{process}}}
2309 \defunx[extensions:]{process-output}{\args{\var{process}}}
2310 \defunx[extensions:]{process-error}{\args{\var{process}}}
2312 If the corresponding stream was created, these functions return the
2313 input, output or error file descriptor. \nil{} is returned if there
2317 \begin{defun}{extensions:}{process-status-hook}{\args{\var{process}}}
2319 This function returns the current function to call whenever
2320 \var{process}'s status changes. This function takes the
2321 \var{process} as a required argument. \code{process-status-hook} is
2325 \begin{defun}{extensions:}{process-plist}{\args{\var{process}}}
2327 This function returns annotations supplied by users, and it is
2328 \code{setf}'able. This is available solely for users to associate
2329 information with \var{process} without having to build a-lists or
2330 hash tables of process structures.
2333 \begin{defun}{extensions:}{process-wait}{
2334 \args{\var{process} \ampoptional{} \var{check-for-stopped}}}
2336 This function waits for \var{process} to finish. If
2337 \var{check-for-stopped} is non-\nil, this also returns when
2338 \var{process} stops.
2341 \begin{defun}{extensions:}{process-kill}{%
2342 \args{\var{process} \var{signal} \ampoptional{} \var{whom}}}
2344 This function sends the Unix \var{signal} to \var{process}.
2345 \var{Signal} should be the number of the signal or a keyword with
2346 the Unix name (for example, \kwd{sigsegv}). \var{Whom} should be
2347 one of the following:
2350 \item[\kwd{pid}] This is the default, and it indicates sending the
2351 signal to \var{process} only.
2353 \item[\kwd{process-group}] This indicates sending the signal to
2354 \var{process}'s group.
2356 \item[\kwd{pty-process-group}] This indicates sending the signal to
2357 the process group currently in the foreground on the Unix \i{PTY}
2358 connected to \var{process}. This last option is useful if the
2359 running program is a shell, and you wish to signal the program
2360 running under the shell, not the shell itself. If
2361 \code{process-pty} of \var{process} is \nil, using this option is
2366 \begin{defun}{extensions:}{process-alive-p}{\args{\var{process}}}
2368 This function returns \true{} if \var{process}'s status is either
2369 \kwd{running} or \kwd{stopped}.
2372 \begin{defun}{extensions:}{process-close}{\args{\var{process}}}
2374 This function closes all the streams associated with \var{process}.
2375 When you are done using a process, call this to reclaim system
2380 %%\node Saving a Core Image, Pathnames, Running Programs from Lisp, Design Choices and Extensions
2381 \section{Saving a Core Image}
2383 A mechanism has been provided to save a running Lisp core image and to
2384 later restore it. This is convenient if you don't want to load several files
2385 into a Lisp when you first start it up. The main problem is the large
2386 size of each saved Lisp image, typically at least 20 megabytes.
2388 \begin{defun}{extensions:}{save-lisp}{%
2390 \keys{\kwd{purify} \kwd{root-structures} \kwd{init-function}}
2391 \morekeys{\kwd{load-init-file} \kwd{print-herald} \kwd{site-init}}
2392 \yetmorekeys{\kwd{process-command-line}}}}
2394 The \code{save-lisp} function saves the state of the currently
2395 running Lisp core image in \var{file}. The keyword arguments have
2396 the following meaning:
2399 \item[\kwd{purify}] If non-NIL (the default), the core image is
2400 purified before it is saved (see \funref{purify}.) This reduces
2401 the amount of work the garbage collector must do when the
2402 resulting core image is being run. Also, if more than one Lisp is
2403 running on the same machine, this maximizes the amount of memory
2404 that can be shared between the two processes.
2406 \item[\kwd{root-structures}]
2408 This should be a list of the main entry points in any newly
2409 loaded systems. This need not be supplied, but locality and/or
2410 GC performance will be better if they are. Meaningless if
2411 \kwd{purify} is \nil. See \funref{purify}.
2414 \item[\kwd{init-function}] This is the function that starts running
2415 when the created core file is resumed. The default function
2416 simply invokes the top level read-eval-print loop. If the
2417 function returns the lisp will exit.
2419 \item[\kwd{load-init-file}] If non-NIL, then load an init file;
2420 either the one specified on the command line or
2421 ``\w{\file{init.}\var{fasl-type}}'', or, if
2422 ``\w{\file{init.}\var{fasl-type}}'' does not exist,
2423 \code{init.lisp} from the user's home directory. If the init file
2424 is found, it is loaded into the resumed core file before the
2425 read-eval-print loop is entered.
2427 \item[\kwd{site-init}] If non-NIL, the name of the site init file to
2428 quietly load. The default is \file{library:site-init}. No error
2429 is signalled if the file does not exist.
2431 \item[\kwd{print-herald}] If non-NIL (the default), then print out
2432 the standard Lisp herald when starting.
2434 \item[\kwd{process-command-line}] If non-NIL (the default),
2435 processes the command line switches and performs the appropriate
2440 To resume a saved file, type:
2445 \begin{defun}{extensions:}{purify}{
2447 \keys{\kwd{root-structures} \kwd{environment-name}}}}
2449 This function optimizes garbage collection by moving all currently
2450 live objects into non-collected storage. Once statically allocated,
2451 the objects can never be reclaimed, even if all pointers to them are
2452 dropped. This function should generally be called after a large
2453 system has been loaded and initialized.
2456 \item[\kwd{root-structures}] is an optional list of objects which
2457 should be copied first to maximize locality. This should be a
2458 list of the main entry points for the resulting core image. The
2459 purification process tries to localize symbols, functions, etc.,
2460 in the core image so that paging performance is improved. The
2461 default value is NIL which means that Lisp objects will still be
2462 localized but probably not as optimally as they could be.
2464 \var{defstruct} structures defined with the \code{(:pure t)}
2465 option are moved into read-only storage, further reducing GC cost.
2466 List and vector slots of pure structures are also moved into
2469 \item[\kwd{environment-name}] is gratuitous documentation for the
2470 compacted version of the current global environment (as seen in
2471 \code{c::*info-environment*}.) If \false{} is supplied, then
2472 environment compaction is inhibited.
2476 %%\node Pathnames, Filesystem Operations, Saving a Core Image, Design Choices and Extensions
2479 In \clisp{} quite a few aspects of \tindexed{pathname} semantics are left to
2484 * Wildcard Pathnames::
2485 * Logical Pathnames::
2487 * Predefined Search-Lists::
2488 * Search-List Operations::
2489 * Search List Example::
2492 %%\node Unix Pathnames, Wildcard Pathnames, Pathnames, Pathnames
2493 \subsection{Unix Pathnames}
2494 \cpsubindex{unix}{pathnames}
2496 Unix pathnames are always parsed with a \code{unix-host} object as the host and
2497 \code{nil} as the device. The last two dots (\code{.}) in the namestring mark
2498 the type and version, however if the first character is a dot, it is considered
2499 part of the name. If the last character is a dot, then the pathname has the
2500 empty-string as its type. The type defaults to \code{nil} and the version
2501 defaults to \kwd{newest}.
2504 (values (pathname-name x) (pathname-type x) (pathname-version x)))
2506 (parse "foo") \result "foo", NIL, :NEWEST
2507 (parse "foo.bar") \result "foo", "bar", :NEWEST
2508 (parse ".foo") \result ".foo", NIL, :NEWEST
2509 (parse ".foo.bar") \result ".foo", "bar", :NEWEST
2510 (parse "..") \result ".", "", :NEWEST
2511 (parse "foo.") \result "foo", "", :NEWEST
2512 (parse "foo.bar.1") \result "foo", "bar", 1
2513 (parse "foo.bar.baz") \result "foo.bar", "baz", :NEWEST
2516 The directory of pathnames beginning with a slash (or a search-list,
2517 \pxlref{search-lists}) is starts \kwd{absolute}, others start with
2518 \kwd{relative}. The \code{..} directory is parsed as \kwd{up}; there is no
2519 namestring for \kwd{back}:
2521 (pathname-directory "/usr/foo/bar.baz") \result (:ABSOLUTE "usr" "foo")
2522 (pathname-directory "../foo/bar.baz") \result (:RELATIVE :UP "foo")
2525 %%\node Wildcard Pathnames, Logical Pathnames, Unix Pathnames, Pathnames
2526 \subsection{Wildcard Pathnames}
2528 Wildcards are supported in Unix pathnames. If `\code{*}' is specified for a
2529 part of a pathname, that is parsed as \kwd{wild}. `\code{**}' can be used as a
2530 directory name to indicate \kwd{wild-inferiors}. Filesystem operations
2531 treat \kwd{wild-inferiors} the same as\ \kwd{wild}, but pathname pattern
2532 matching (e.g. for logical pathname translation, \pxlref{logical-pathnames})
2533 matches any number of directory parts with `\code{**}' (see
2534 \pxlref{wildcard-matching}.)
2537 `\code{*}' embedded in a pathname part matches any number of characters.
2538 Similarly, `\code{?}' matches exactly one character, and `\code{[a,b]}'
2539 matches the characters `\code{a}' or `\code{b}'. These pathname parts are
2540 parsed as \code{pattern} objects.
2542 Backslash can be used as an escape character in namestring
2543 parsing to prevent the next character from being treated as a wildcard. Note
2544 that if typed in a string constant, the backslash must be doubled, since the
2545 string reader also uses backslash as a quote:
2547 (pathname-name "foo\(\backslash\backslash\)*bar") => "foo*bar"
2550 %%\node Logical Pathnames, Search Lists, Wildcard Pathnames, Pathnames
2551 \subsection{Logical Pathnames}
2552 \cindex{logical pathnames}
2553 \label{logical-pathnames}
2555 If a namestring begins with the name of a defined logical pathname
2556 host followed by a colon, then it will be parsed as a logical
2557 pathname. Both `\code{*}' and `\code{**}' wildcards are implemented.
2558 \findexed{load-logical-pathname-defaults} on \var{name} looks for a
2559 logical host definition file in
2560 \w{\file{library:\var{name}.translations}}. Note that \file{library:}
2561 designates the search list (\pxlref{search-lists}) initialized to the
2562 \cmucl{} \file{lib/} directory, not a logical pathname. The format of
2563 the file is a single list of two-lists of the from and to patterns:
2565 (("foo;*.text" "/usr/ram/foo/*.txt")
2566 ("foo;*.lisp" "/usr/ram/foo/*.l"))
2571 * Search List Example::
2574 %%\node Search Lists, Predefined Search-Lists, Logical Pathnames, Pathnames
2575 \subsection{Search Lists}
2576 \cindex{search lists}
2577 \label{search-lists}
2579 Search lists are an extension to Common Lisp pathnames. They serve a function
2580 somewhat similar to Common Lisp logical pathnames, but work more like Unix PATH
2581 variables. Search lists are used for two purposes:
2583 \item They provide a convenient shorthand for commonly used directory names,
2586 \item They allow the abstract (directory structure independent) specification
2587 of file locations in program pathname constants (similar to logical pathnames.)
2589 Each search list has an associated list of directories (represented as
2590 pathnames with no name or type component.) The namestring for any relative
2591 pathname may be prefixed with ``\var{slist}\code{:}'', indicating that the
2592 pathname is relative to the search list \var{slist} (instead of to the current
2593 working directory.) Once qualified with a search list, the pathname is no
2594 longer considered to be relative.
2596 When a search list qualified pathname is passed to a file-system operation such
2597 as \code{open}, \code{load} or \code{truename}, each directory in the search
2598 list is successively used as the root of the pathname until the file is
2599 located. When a file is written to a search list directory, the file is always
2600 written to the first directory in the list.
2602 %%\node Predefined Search-Lists, Search-List Operations, Search Lists, Pathnames
2603 \subsection{Predefined Search-Lists}
2605 These search-lists are initialized from the Unix environment or when Lisp was
2608 \item[\code{default:}] The current directory at startup.
2610 \item[\code{home:}] The user's home directory.
2612 \item[\code{library:}] The \cmucl{} \file{lib/} directory (\code{CMUCLLIB} environment
2615 \item[\code{path:}] The Unix command path (\code{PATH} environment variable.)
2617 \item[\code{target:}] The root of the tree where \cmucl{} was compiled.
2619 It can be useful to redefine these search-lists, for example, \file{library:}
2620 can be augmented to allow logical pathname translations to be located, and
2621 \file{target:} can be redefined to point to where \cmucl{} system sources are
2624 %%\node Search-List Operations, Search List Example, Predefined Search-Lists, Pathnames
2625 \subsection{Search-List Operations}
2627 These operations define and access search-list definitions. A search-list name
2628 may be parsed into a pathname before the search-list is actually defined, but
2629 the search-list must be defined before it can actually be used in a filesystem
2632 \begin{defun}{extensions:}{search-list}{\var{name}}
2634 This function returns the list of directories associated with the
2635 search list \var{name}. If \var{name} is not a defined search list,
2636 then an error is signaled. When set with \code{setf}, the list of
2637 directories is changed to the new value. If the new value is just a
2638 namestring or pathname, then it is interpreted as a one-element
2639 list. Note that (unlike Unix pathnames), search list names are
2643 \begin{defun}{extensions:}{search-list-defined-p}{\var{name}}
2644 \defunx[extensions:]{clear-search-list}{\var{name}}
2646 \code{search-list-defined-p} returns \true{} if \var{name} is a
2647 defined search list name, \false{} otherwise.
2648 \code{clear-search-list} make the search list \var{name} undefined.
2651 \begin{defmac}{extensions:}{enumerate-search-list}{%
2652 \args{(\var{var} \var{pathname} \mopt{result}) \mstar{form}}}
2654 This macro provides an interface to search list resolution. The
2655 body \var{forms} are executed with \var{var} bound to each
2656 successive possible expansion for \var{name}. If \var{name} does
2657 not contain a search-list, then the body is executed exactly once.
2658 Everything is wrapped in a block named \nil, so \code{return} can be
2659 used to terminate early. The \var{result} form (default \nil) is
2660 evaluated to determine the result of the iteration.
2664 * Search List Example::
2667 %%\node Search List Example, , Search-List Operations, Pathnames
2668 \subsection{Search List Example}
2670 The search list \code{code:} can be defined as follows:
2672 (setf (ext:search-list "code:") '("/usr/lisp/code/"))
2674 It is now possible to use \code{code:} as an abbreviation for the directory
2675 \file{/usr/lisp/code/} in all file operations. For example, you can now specify
2676 \code{code:eval.lisp} to refer to the file \file{/usr/lisp/code/eval.lisp}.
2678 To obtain the value of a search-list name, use the function search-list
2681 (ext:search-list \var{name})
2683 Where \var{name} is the name of a search list as described above. For example,
2684 calling \code{ext:search-list} on \code{code:} as follows:
2686 (ext:search-list "code:")
2688 returns the list \code{("/usr/lisp/code/")}.
2690 %%\node Filesystem Operations, Time Parsing and Formatting, Pathnames, Design Choices and Extensions
2691 \section{Filesystem Operations}
2693 \cmucl{} provides a number of extensions and optional features beyond those
2697 * Wildcard Matching::
2698 * File Name Completion::
2699 * Miscellaneous Filesystem Operations::
2702 %%\node Wildcard Matching, File Name Completion, Filesystem Operations, Filesystem Operations
2703 \subsection{Wildcard Matching}
2704 \label{wildcard-matching}
2706 Unix filesystem operations such as \code{open} will accept wildcard pathnames
2707 that match a single file (of course, \code{directory} allows any number of
2708 matches.) Filesystem operations treat \kwd{wild-inferiors} the same as\
2711 \begin{defun}{}{directory}{\var{wildname} \keys{\kwd{all} \kwd{check-for-subdirs}}
2712 \morekeys{\kwd{follow-links}}}
2714 The keyword arguments to this \clisp{} function are a CMU extension.
2715 The arguments (all default to \code{t}) have the following
2718 \item[\kwd{all}] Include files beginning with dot such as
2719 \file{.login}, similar to ``\code{ls -a}''.
2721 \item[\kwd{check-for-subdirs}] Test whether files are directories,
2722 similar to ``\code{ls -F}''.
2724 \item[\kwd{follow-links}] Call \code{truename} on each file, which
2725 expands out all symbolic links. Note that this option can easily
2726 result in pathnames being returned which have a different
2727 directory from the one in the \var{wildname} argument.
2731 \begin{defun}{extensions:}{print-directory}{%
2732 \args{\var{wildname}
2733 \ampoptional{} \var{stream}
2734 \keys{\kwd{all} \kwd{verbose}}
2735 \morekeys{\kwd{return-list}}}}
2737 Print a directory of \var{wildname} listing to \var{stream} (default
2738 \code{*standard-output*}.) \kwd{all} and \kwd{verbose} both default
2739 to \false{} and correspond to the ``\code{-a}'' and ``\code{-l}''
2740 options of \file{ls}. Normally this function returns \false{}, but
2741 if \kwd{return-list} is true, a list of the matched pathnames are
2745 %%\node File Name Completion, Miscellaneous Filesystem Operations, Wildcard Matching, Filesystem Operations
2746 \subsection{File Name Completion}
2748 \begin{defun}{extensions:}{complete-file}{%
2749 \args{\var{pathname}
2750 \keys{\kwd{defaults} \kwd{ignore-types}}}}
2752 Attempt to complete a file name to the longest unambiguous prefix.
2753 If supplied, directory from \kwd{defaults} is used as the ``working
2754 directory'' when doing completion. \kwd{ignore-types} is a list of
2755 strings of the pathname types (a.k.a. extensions) that should be
2756 disregarded as possible matches (binary file names, etc.)
2759 \begin{defun}{extensions:}{ambiguous-files}{%
2760 \args{\var{pathname}
2761 \ampoptional{} \var{defaults}}}
2763 Return a list of pathnames for all the possible completions of
2764 \var{pathname} with respect to \var{defaults}.
2767 %%\node Miscellaneous Filesystem Operations, , File Name Completion, Filesystem Operations
2768 \subsection{Miscellaneous Filesystem Operations}
2770 \begin{defun}{extensions:}{default-directory}{}
2772 Return the current working directory as a pathname. If set with
2773 \code{setf}, set the working directory.
2776 \begin{defun}{extensions:}{file-writable}{\var{name}}
2778 This function accepts a pathname and returns \true{} if the current
2779 process can write it, and \false{} otherwise.
2782 \begin{defun}{extensions:}{unix-namestring}{%
2783 \args{\var{pathname}
2784 \ampoptional{} \var{for-input}}}
2786 This function converts \var{pathname} into a string that can be used
2787 with UNIX system calls. Search-lists and wildcards are expanded.
2788 \var{for-input} controls the treatment of search-lists: when true
2789 (the default) and the file exists anywhere on the search-list, then
2790 that absolute pathname is returned; otherwise the first element of
2791 the search-list is used as the directory.
2794 %%\node Time Parsing and Formatting, Lisp Library, Filesystem Operations, Design Choices and Extensions
2795 \section{Time Parsing and Formatting}
2797 \cindex{time parsing} \cindex{time formatting}
2798 Functions are provided to allow parsing strings containing time information
2799 and printing time in various formats are available.
2801 \begin{defun}{extensions:}{parse-time}{%
2802 \args{\var{time-string}
2803 \keys{\kwd{error-on-mismatch} \kwd{default-seconds}}
2804 \morekeys{\kwd{default-minutes} \kwd{default-hours}}
2805 \yetmorekeys{\kwd{default-day} \kwd{default-month}}
2806 \yetmorekeys{\kwd{default-year} \kwd{default-zone}}
2807 \yetmorekeys{\kwd{default-weekday}}}}
2809 \code{parse-time} accepts a string containing a time (e.g.,
2810 \w{"\code{Jan 12, 1952}"}) and returns the universal time if it is
2811 successful. If it is unsuccessful and the keyword argument
2812 \kwd{error-on-mismatch} is non-\FALSE, it signals an error.
2813 Otherwise it returns \FALSE. The other keyword arguments have the
2817 \item[\kwd{default-seconds}] specifies the default value for the
2818 seconds value if one is not provided by \var{time-string}. The
2821 \item[\kwd{default-minutes}] specifies the default value for the
2822 minutes value if one is not provided by \var{time-string}. The
2825 \item[\kwd{default-hours}] specifies the default value for the hours
2826 value if one is not provided by \var{time-string}. The default
2829 \item[\kwd{default-day}] specifies the default value for the day
2830 value if one is not provided by \var{time-string}. The default
2831 value is the current day.
2833 \item[\kwd{default-month}] specifies the default value for the month
2834 value if one is not provided by \var{time-string}. The default
2835 value is the current month.
2837 \item[\kwd{default-year}] specifies the default value for the year
2838 value if one is not provided by \var{time-string}. The default
2839 value is the current year.
2841 \item[\kwd{default-zone}] specifies the default value for the time
2842 zone value if one is not provided by \var{time-string}. The
2843 default value is the current time zone.
2845 \item[\kwd{default-weekday}] specifies the default value for the day
2846 of the week if one is not provided by \var{time-string}. The
2847 default value is the current day of the week.
2849 Any of the above keywords can be given the value \kwd{current} which
2850 means to use the current value as determined by a call to the
2854 \begin{defun}{extensions:}{format-universal-time}{
2855 \args{\var{dest} \var{universal-time}
2857 \keys{\kwd{timezone}}
2858 \morekeys{\kwd{style} \kwd{date-first}}
2859 \yetmorekeys{\kwd{print-seconds} \kwd{print-meridian}}
2860 \yetmorekeys{\kwd{print-timezone} \kwd{print-weekday}}}}
2861 \defunx[extensions:]{format-decoded-time}{
2862 \args{\var{dest} \var{seconds} \var{minutes} \var{hours} \var{day} \var{month} \var{year}
2864 \keys{\kwd{timezone}}
2865 \morekeys{\kwd{style} \kwd{date-first}}
2866 \yetmorekeys{\kwd{print-seconds} \kwd{print-meridian}}
2867 \yetmorekeys{\kwd{print-timezone} \kwd{print-weekday}}}}
2869 \code{format-universal-time} formats the time specified by
2870 \var{universal-time}. \code{format-decoded-time} formats the time
2871 specified by \var{seconds}, \var{minutes}, \var{hours}, \var{day},
2872 \var{month}, and \var{year}. \var{Dest} is any destination
2873 accepted by the \code{format} function. The keyword arguments have
2874 the following meaning:
2877 \item[\kwd{timezone}] is an integer specifying the hours west of
2878 Greenwich. \kwd{timezone} defaults to the current time zone.
2880 \item[\kwd{style}] specifies the style to use in formatting the
2881 time. The legal values are:
2884 \item[\kwd{short}] specifies to use a numeric date.
2886 \item[\kwd{long}] specifies to format months and weekdays as
2887 words instead of numbers.
2889 \item[\kwd{abbreviated}] is similar to long except the words are
2892 \item[\kwd{government}] is similar to abbreviated, except the
2893 date is of the form ``day month year'' instead of ``month day,
2897 \item[\kwd{date-first}] if non-\false{} (default) will place the
2898 date first. Otherwise, the time is placed first.
2900 \item[\kwd{print-seconds}] if non-\false{} (default) will format
2901 the seconds as part of the time. Otherwise, the seconds will be
2904 \item[\kwd{print-meridian}] if non-\false{} (default) will format
2905 ``AM'' or ``PM'' as part of the time. Otherwise, the ``AM'' or
2906 ``PM'' will be omitted.
2908 \item[\kwd{print-timezone}] if non-\false{} (default) will format
2909 the time zone as part of the time. Otherwise, the time zone will
2912 %%\item[\kwd{print-seconds}]
2913 %%if non-\false{} (default) will format the seconds as part of
2914 %%the time. Otherwise, the seconds will be omitted.
2916 \item[\kwd{print-weekday}] if non-\false{} (default) will format
2917 the weekday as part of date. Otherwise, the weekday will be
2924 \section{Random Number Generation}
2925 \cindex{random number generation}
2927 \clisp{} includes a random number generator as a standard part of the
2928 language; however, the implementation of the generator is not
2929 specified. Two random number generators are available in \cmucl{},
2930 depending on the version.
2932 \subsection{Original Generator}
2933 \cpsubindex{random number generation}{original generator}
2934 The default random number generator uses a lagged Fibonacci generator
2937 z[i] = z[i - 24] - z[i - 55] \bmod 536870908
2939 where $z[i]$ is the $i$'th random number. This generator produces
2940 small integer-valued numbers. For larger integer, the small random
2941 integers are concatenated to produce larger integers. For
2942 floating-point numbers, the bits from this generator are used as the
2943 bits of the floating-point significand.
2945 \subsection{New Generator}
2946 \cpsubindex{random number generation}{new generator}
2948 In some versions of \cmucl{}, the original generator above has been
2949 replaced with a subtract-with-borrow generator
2950 combined with a Weyl generator.\footnote{The generator described here
2951 is available if the feature \kwd{new-random} is available.} The
2952 reason for the change was to use a documented generator which has
2953 passed tests for randomness.
2955 The subtract-with-borrow generator is described by the following
2958 z[i] = z[i + 20] - z[i + 5] - b
2960 where $z[i]$ is the $i$'th random number, which is a
2961 \code{double-float}. All of the indices in this equation are
2962 interpreted modulo 32. The quantity $b$ is carried over from the
2963 previous iteration and is either 0 or \code{double-float-epsilon}. If
2964 $z[i]$ is positive, $b$ is set to zero. Otherwise, $b$ is set to
2965 \code{double-float-epsilon}.
2967 To increase the randomness of this generator, this generator is
2968 combined with a Weyl generator defined by
2970 x[i] = x[i - 1] - y \bmod 1,
2972 where $y = 7097293079245107 \times 2^{-53}$. Thus, the resulting
2973 random number $r[i]$ is
2975 r[i] = (z[i] - x[i]) \bmod 1
2978 This generator has been tested by Peter VanEynde using Marsaglia's
2979 diehard test suite for random number generators; this generator
2980 passes the test suite.
2982 This generator is designed for generating floating-point random
2983 numbers. To obtain integers, the bits from the significand of the
2984 floating-point number are used as the bits of the integer. As many
2985 floating-point numbers as needed are generated to obtain the desired
2986 number of bits in the random integer.
2988 For floating-point numbers, this generator can by significantly faster
2989 than the original generator.
2992 %%\node Lisp Library, , Time Parsing and Formatting, Design Choices and Extensions
2993 \section{Lisp Library}
2996 The CMU Common Lisp project maintains a collection of useful or interesting
2997 programs written by users of our system. The library is in
2998 \file{lib/contrib/}. Two files there that users should read are:
3002 This file contains a page for each entry in the library. It
3003 contains information such as the author, portability or dependency issues, how
3004 to load the entry, etc.
3007 This file describes the library's organization and all the
3008 possible pieces of information an entry's catalog description could contain.
3011 Hemlock has a command \F{Library Entry} that displays a list of the current
3012 library entries in an editor buffer. There are mode specific commands that
3013 display catalog descriptions and load entries. This is a simple and convenient
3014 way to browse the library.
3017 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/debug.ms}
3021 %%\node The Debugger, The Compiler, Design Choices and Extensions, Top
3022 \chapter{The Debugger} \hide{-*- Dictionary: cmu-user -*-}
3024 \b{By Robert MacLachlan}
3030 * Debugger Introduction::
3031 * The Command Loop::
3034 * Source Location Printing::
3035 * Compiler Policy Control::
3036 * Exiting Commands::
3037 * Information Commands::
3038 * Breakpoint Commands::
3039 * Function Tracing::
3043 %%\node Debugger Introduction, The Command Loop, The Debugger, The Debugger
3044 \section{Debugger Introduction}
3046 The \cmucl{} debugger is unique in its level of support for source-level
3047 debugging of compiled code. Although some other debuggers allow access of
3048 variables by name, this seems to be the first \llisp{} debugger that:
3052 Tells you when a variable doesn't have a value because it hasn't been
3053 initialized yet or has already been deallocated, or
3056 Can display the precise source location corresponding to a code
3057 location in the debugged program.
3059 These features allow the debugging of compiled code to be made almost
3060 indistinguishable from interpreted code debugging.
3062 The debugger is an interactive command loop that allows a user to examine
3063 the function call stack. The debugger is invoked when:
3067 A \tindexed{serious-condition} is signaled, and it is not handled, or
3070 \findexed{error} is called, and the condition it signals is not handled, or
3073 The debugger is explicitly invoked with the \clisp{} \findexed{break}
3074 or \findexed{debug} functions.
3077 {\it Note: there are two debugger interfaces in CMU CL: the TTY debugger
3078 (described below) and the Motif debugger. Since the difference is only in the
3079 user interface, much of this chapter also applies to the Motif version.
3080 \xlref{motif-interface} for a very brief discussion of the graphical
3083 When you enter the TTY debugger, it looks something like this:
3085 Error in function CAR.
3086 Wrong type argument, 3, should have been of type LIST.
3089 0: Return to Top-Level.
3091 Debug (type H for help)
3096 The first group of lines describe what the error was that put us in the
3097 debugger. In this case \code{car} was called on \code{3}. After \code{Restarts:}
3098 is a list of all the ways that we can restart execution after this error. In
3099 this case, the only option is to return to top-level. After printing its
3100 banner, the debugger prints the current frame and the debugger prompt.
3103 %%\node The Command Loop, Stack Frames, Debugger Introduction, The Debugger
3104 \section{The Command Loop}
3106 The debugger is an interactive read-eval-print loop much like the normal
3107 top-level, but some symbols are interpreted as debugger commands instead
3108 of being evaluated. A debugger command starts with the symbol name of
3109 the command, possibly followed by some arguments on the same line. Some
3110 commands prompt for additional input. Debugger commands can be
3111 abbreviated by any unambiguous prefix: \code{help} can be typed as
3112 \code{h}, \code{he}, etc. For convenience, some commands have
3113 ambiguous one-letter abbreviations: \code{f} for \code{frame}.
3115 The package is not significant in debugger commands; any symbol with the
3116 name of a debugger command will work. If you want to show the value of
3117 a variable that happens also to be the name of a debugger command, you
3118 can use the \code{list-locals} command or the \code{debug:var}
3119 function, or you can wrap the variable in a \code{progn} to hide it from
3122 The debugger prompt is ``\var{frame}\code{]}'', where \var{frame} is the number
3123 of the current frame. Frames are numbered starting from zero at the top (most
3124 recent call), increasing down to the bottom. The current frame is the frame
3125 that commands refer to. The current frame also provides the lexical
3126 environment for evaluation of non-command forms.
3128 \cpsubindex{evaluation}{debugger} The debugger evaluates forms in the lexical
3129 environment of the functions being debugged. The debugger can only
3130 access variables. You can't \code{go} or \code{return-from} into a
3131 function, and you can't call local functions. Special variable
3132 references are evaluated with their current value (the innermost binding
3133 around the debugger invocation)\dash{}you don't get the value that the
3134 special had in the current frame. \xlref{debug-vars} for more
3135 information on debugger variable access.
3138 %%\node Stack Frames, Variable Access, The Command Loop, The Debugger
3139 \section{Stack Frames}
3140 \cindex{stack frames} \cpsubindex{frames}{stack}
3142 A stack frame is the run-time representation of a call to a function;
3143 the frame stores the state that a function needs to remember what it is
3148 Variables (\pxlref{debug-vars}), which are the values being operated
3152 Arguments to the call (which are really just particularly interesting
3156 A current location (\pxlref{source-locations}), which is the place in
3157 the program where the function was running when it stopped to call another
3158 function, or because of an interrupt or error.
3165 * How Arguments are Printed::
3168 * Debug Tail Recursion::
3169 * Unknown Locations and Interrupts::
3172 %%\node Stack Motion, How Arguments are Printed, Stack Frames, Stack Frames
3173 \subsection{Stack Motion}
3175 These commands move to a new stack frame and print the name of the function
3176 and the values of its arguments in the style of a Lisp function call:
3180 Move up to the next higher frame. More recent function calls are considered
3181 to be higher on the stack.
3184 Move down to the next lower frame.
3187 Move to the highest frame.
3189 \item[\code{bottom}]
3190 Move to the lowest frame.
3192 \item[\code{frame} [\textit{n}]]
3193 Move to the frame with the specified number. Prompts for the number if not
3197 \key{S} [\var{function-name} [\var{n}]]
3200 Search down the stack for function. Prompts for the function name if not
3201 supplied. Searches an optional number of times, but doesn't prompt for
3202 this number; enter it following the function.
3204 \item[\key{R} [\var{function-name} [\var{n}]]]
3205 Search up the stack for function. Prompts for the function name if not
3206 supplied. Searches an optional number of times, but doesn't prompt for
3207 this number; enter it following the function.
3211 %%\node How Arguments are Printed, Function Names, Stack Motion, Stack Frames
3212 \subsection{How Arguments are Printed}
3214 A frame is printed to look like a function call, but with the actual argument
3215 values in the argument positions. So the frame for this call in the source:
3219 would look like this:
3223 All keyword and optional arguments are displayed with their actual
3224 values; if the corresponding argument was not supplied, the value will
3225 be the default. So this call:
3229 would look like this:
3235 (string-upcase "test case")
3237 would look like this:
3239 (STRING-UPCASE "test case" :START 0 :END NIL)
3242 The arguments to a function call are displayed by accessing the argument
3243 variables. Although those variables are initialized to the actual argument
3244 values, they can be set inside the function; in this case the new value will be
3247 \code{\amprest} arguments are handled somewhat differently. The value of
3248 the rest argument variable is displayed as the spread-out arguments to
3251 (format t "~A is a ~A." "This" 'test)
3253 would look like this:
3255 (FORMAT T "~A is a ~A." "This" 'TEST)
3257 Rest arguments cause an exception to the normal display of keyword
3258 arguments in functions that have both \code{\amprest} and \code{\&key}
3259 arguments. In this case, the keyword argument variables are not
3260 displayed at all; the rest arg is displayed instead. So for these
3261 functions, only the keywords actually supplied will be shown, and the
3262 values displayed will be the argument values, not values of the
3263 (possibly modified) variables.
3265 If the variable for an argument is never referenced by the function, it will be
3266 deleted. The variable value is then unavailable, so the debugger prints
3267 \code{<unused-arg>} instead of the value. Similarly, if for any of a number of
3268 reasons (described in more detail in section \ref{debug-vars}) the value of the
3269 variable is unavailable or not known to be available, then
3270 \code{<unavailable-arg>} will be printed instead of the argument value.
3272 Printing of argument values is controlled by \code{*debug-print-level*} and
3273 \varref{debug-print-length}.
3276 %%\node Function Names, Funny Frames, How Arguments are Printed, Stack Frames
3277 \subsection{Function Names}
3278 \cpsubindex{function}{names}
3279 \cpsubindex{names}{function}
3281 If a function is defined by \code{defun}, \code{labels}, or \code{flet}, then the
3282 debugger will print the actual function name after the open parenthesis, like:
3284 (STRING-UPCASE "test case" :START 0 :END NIL)
3285 ((SETF AREF) \#\back{a} "for" 1)
3287 Otherwise, the function name is a string, and will be printed in quotes:
3290 ("DEFMACRO DO" (DO ((I 0 (1+ I))) ((= I 13))) NIL)
3291 ("SETQ *GC-NOTIFY-BEFORE*")
3293 This string name is derived from the \w{\code{def}\var{mumble}} form that encloses
3294 or expanded into the lambda, or the outermost enclosing form if there is no
3295 \w{\code{def}\var{mumble}}.
3298 %%\node Funny Frames, Debug Tail Recursion, Function Names, Stack Frames
3299 \subsection{Funny Frames}
3300 \cindex{external entry points}
3301 \cpsubindex{entry points}{external}
3302 \cpsubindex{block compilation}{debugger implications}
3303 \cpsubindex{external}{stack frame kind}
3304 \cpsubindex{optional}{stack frame kind}
3305 \cpsubindex{cleanup}{stack frame kind}
3307 Sometimes the evaluator introduces new functions that are used to implement a
3308 user function, but are not directly specified in the source. The main place
3309 this is done is for checking argument type and syntax. Usually these functions
3310 do their thing and then go away, and thus are not seen on the stack in the
3311 debugger. But when you get some sort of error during lambda-list processing,
3312 you end up in the debugger on one of these funny frames.
3314 These funny frames are flagged by printing ``\code{[}\var{keyword}\code{]}'' after the
3315 parentheses. For example, this call:
3319 will look like this:
3321 (CAR 2 A) [:EXTERNAL]
3325 (string-upcase "test case" :end)
3327 would look like this:
3329 ("DEFUN STRING-UPCASE" "test case" 335544424 1) [:OPTIONAL]
3332 As you can see, these frames have only a vague resemblance to the original
3333 call. Fortunately, the error message displayed when you enter the debugger
3334 will usually tell you what problem is (in these cases, too many arguments
3335 and odd keyword arguments.) Also, if you go down the stack to the frame for
3336 the calling function, you can display the original source (\pxlref{source-locations}.)
3338 With recursive or block compiled functions (\pxlref{block-compilation}), an \kwd{EXTERNAL} frame may appear before the frame
3339 representing the first call to the recursive function or entry to the compiled
3340 block. This is a consequence of the way the compiler does block compilation:
3341 there is nothing odd with your program. You will also see \kwd{CLEANUP} frames
3342 during the execution of \code{unwind-protect} cleanup code. Note that inline
3343 expansion and open-coding affect what frames are present in the debugger, see
3344 sections \ref{debugger-policy} and \ref{open-coding}.
3347 %%\node Debug Tail Recursion, Unknown Locations and Interrupts, Funny Frames, Stack Frames
3348 \subsection{Debug Tail Recursion}
3349 \label{debug-tail-recursion}
3350 \cindex{tail recursion}
3351 \cpsubindex{recursion}{tail}
3353 Both the compiler and the interpreter are ``properly tail recursive.'' If a
3354 function call is in a tail-recursive position, the stack frame will be
3355 deallocated \i{at the time of the call}, rather than after the call returns.
3356 Consider this backtrace:
3361 Because of tail recursion, it is not necessarily the case that
3362 \code{FOO} directly called \code{BAR}. It may be that \code{FOO} called
3363 some other function \code{FOO2} which then called \code{BAR}
3364 tail-recursively, as in this example:
3379 Usually the elimination of tail-recursive frames makes debugging more
3380 pleasant, since these frames are mostly uninformative. If there is any
3381 doubt about how one function called another, it can usually be
3382 eliminated by finding the source location in the calling frame (section
3383 \ref{source-locations}.)
3385 For a more thorough discussion of tail recursion, \pxlref{tail-recursion}.
3388 %%\node Unknown Locations and Interrupts, , Debug Tail Recursion, Stack Frames
3389 \subsection{Unknown Locations and Interrupts}
3390 \label{unknown-locations}
3391 \cindex{unknown code locations}
3392 \cpsubindex{locations}{unknown}
3394 \cpsubindex{errors}{run-time}
3396 The debugger operates using special debugging information attached to
3397 the compiled code. This debug information tells the debugger what it
3398 needs to know about the locations in the code where the debugger can be
3399 invoked. If the debugger somehow encounters a location not described in
3400 the debug information, then it is said to be \var{unknown}. If the code
3401 location for a frame is unknown, then some variables may be
3402 inaccessible, and the source location cannot be precisely displayed.
3404 There are three reasons why a code location could be unknown:
3408 There is inadequate debug information due to the value of the \code{debug}
3409 optimization quality. \xlref{debugger-policy}.
3412 The debugger was entered because of an interrupt such as \code{$\hat{ }C$}.
3415 A hardware error such as ``\code{bus error}'' occurred in code that was
3416 compiled unsafely due to the value of the \code{safety} optimization
3417 quality. \xlref{optimize-declaration}.
3420 In the last two cases, the values of argument variables are accessible,
3421 but may be incorrect. \xlref{debug-var-validity} for more details on
3422 when variable values are accessible.
3424 It is possible for an interrupt to happen when a function call or return is in
3425 progress. The debugger may then flame out with some obscure error or insist
3426 that the bottom of the stack has been reached, when the real problem is that
3427 the current stack frame can't be located. If this happens, return from the
3428 interrupt and try again.
3430 When running interpreted code, all locations should be known. However,
3431 an interrupt might catch some subfunction of the interpreter at an
3432 unknown location. In this case, you should be able to go up the stack a
3433 frame or two and reach an interpreted frame which can be debugged.
3436 %%\node Variable Access, Source Location Printing, Stack Frames, The Debugger
3437 \section{Variable Access}
3439 \cpsubindex{variables}{debugger access}
3440 \cindex{debug variables}
3442 There are three ways to access the current frame's local variables in the
3443 debugger. The simplest is to type the variable's name into the debugger's
3444 read-eval-print loop. The debugger will evaluate the variable reference as
3445 though it had appeared inside that frame.
3447 The debugger doesn't really understand lexical scoping; it has just one
3448 namespace for all the variables in a function. If a symbol is the name of
3449 multiple variables in the same function, then the reference appears ambiguous,
3450 even though lexical scoping specifies which value is visible at any given
3451 source location. If the scopes of the two variables are not nested, then the
3452 debugger can resolve the ambiguity by observing that only one variable is
3455 When there are ambiguous variables, the evaluator assigns each one a
3456 small integer identifier. The \code{debug:var} function and the
3457 \code{list-locals} command use this identifier to distinguish between
3458 ambiguous variables:
3461 \item[\code{list-locals} \mopt{\var{prefix}}]%%\hfill\\
3462 This command prints the name and value of all variables in the current
3463 frame whose name has the specified \var{prefix}. \var{prefix} may be a
3464 string or a symbol. If no \var{prefix} is given, then all available
3465 variables are printed. If a variable has a potentially ambiguous name,
3466 then the name is printed with a ``\code{\#}\var{identifier}'' suffix, where
3467 \var{identifier} is the small integer used to make the name unique.
3470 \begin{defun}{debug:}{var}{\args{\var{name} \ampoptional{} \var{identifier}}}
3472 This function returns the value of the variable in the current frame
3473 with the specified \var{name}. If supplied, \var{identifier}
3474 determines which value to return when there are ambiguous variables.
3476 When \var{name} is a symbol, it is interpreted as the symbol name of
3477 the variable, i.e. the package is significant. If \var{name} is an
3478 uninterned symbol (gensym), then return the value of the uninterned
3479 variable with the same name. If \var{name} is a string,
3480 \code{debug:var} interprets it as the prefix of a variable name, and
3481 must unambiguously complete to the name of a valid variable.
3483 This function is useful mainly for accessing the value of uninterned
3484 or ambiguous variables, since most variables can be evaluated
3490 * Variable Value Availability::
3491 * Note On Lexical Variable Access::
3494 %%\node Variable Value Availability, Note On Lexical Variable Access, Variable Access, Variable Access
3495 \subsection{Variable Value Availability}
3496 \label{debug-var-validity}
3497 \cindex{availability of debug variables}
3498 \cindex{validity of debug variables}
3499 \cindex{debug optimization quality}
3501 The value of a variable may be unavailable to the debugger in portions of the
3502 program where \clisp{} says that the variable is defined. If a variable value is
3503 not available, the debugger will not let you read or write that variable. With
3504 one exception, the debugger will never display an incorrect value for a
3505 variable. Rather than displaying incorrect values, the debugger tells you the
3506 value is unavailable.
3508 The one exception is this: if you interrupt (e.g., with \code{$\hat{ }C$}) or if there is
3509 an unexpected hardware error such as ``\code{bus error}'' (which should only happen
3510 in unsafe code), then the values displayed for arguments to the interrupted
3511 frame might be incorrect.\footnote{Since the location of an interrupt or hardware
3512 error will always be an unknown location (\pxlref{unknown-locations}),
3513 non-argument variable values will never be available in the interrupted frame.}
3514 This exception applies only to the interrupted frame: any frame farther down
3515 the stack will be fine.
3517 The value of a variable may be unavailable for these reasons:
3521 The value of the \code{debug} optimization quality may have omitted debug
3522 information needed to determine whether the variable is available.
3523 Unless a variable is an argument, its value will only be available when
3524 \code{debug} is at least \code{2}.
3527 The compiler did lifetime analysis and determined that the value was no longer
3528 needed, even though its scope had not been exited. Lifetime analysis is
3529 inhibited when the \code{debug} optimization quality is \code{3}.
3532 The variable's name is an uninterned symbol (gensym). To save space, the
3533 compiler only dumps debug information about uninterned variables when the
3534 \code{debug} optimization quality is \code{3}.
3537 The frame's location is unknown (\pxlref{unknown-locations}) because
3538 the debugger was entered due to an interrupt or unexpected hardware error.
3539 Under these conditions the values of arguments will be available, but might be
3540 incorrect. This is the exception above.
3543 The variable was optimized out of existence. Variables with no reads are
3544 always optimized away, even in the interpreter. The degree to which the
3545 compiler deletes variables will depend on the value of the \code{compile-speed}
3546 optimization quality, but most source-level optimizations are done under all
3547 compilation policies.
3551 Since it is especially useful to be able to get the arguments to a function,
3552 argument variables are treated specially when the \code{speed} optimization
3553 quality is less than \code{3} and the \code{debug} quality is at least \code{1}.
3554 With this compilation policy, the values of argument variables are almost
3555 always available everywhere in the function, even at unknown locations. For
3556 non-argument variables, \code{debug} must be at least \code{2} for values to be
3557 available, and even then, values are only available at known locations.
3560 %%\node Note On Lexical Variable Access, , Variable Value Availability, Variable Access
3561 \subsection{Note On Lexical Variable Access}
3562 \cpsubindex{evaluation}{debugger}
3564 When the debugger command loop establishes variable bindings for available
3565 variables, these variable bindings have lexical scope and dynamic
3566 extent.\footnote{The variable bindings are actually created using the \clisp{}
3567 \code{symbol-macro-let} special form.} You can close over them, but such closures
3568 can't be used as upward funargs.
3570 You can also set local variables using \code{setq}, but if the variable was closed
3571 over in the original source and never set, then setting the variable in the
3572 debugger may not change the value in all the functions the variable is defined
3573 in. Another risk of setting variables is that you may assign a value of a type
3574 that the compiler proved the variable could never take on. This may result in
3575 bad things happening.
3578 %%\node Source Location Printing, Compiler Policy Control, Variable Access, The Debugger
3579 \section{Source Location Printing}
3580 \label{source-locations}
3581 \cpsubindex{source location printing}{debugger}
3583 One of CMU \clisp{}'s unique capabilities is source level debugging of compiled
3584 code. These commands display the source location for the current frame:
3587 \item[\code{source} \mopt{\var{context}}]%%\hfill\\
3588 This command displays the file that the current frame's function was defined
3589 from (if it was defined from a file), and then the source form responsible for
3590 generating the code that the current frame was executing. If \var{context} is
3591 specified, then it is an integer specifying the number of enclosing levels of
3592 list structure to print.
3594 \item[\code{vsource} \mopt{\var{context}}]%%\hfill\\
3595 This command is identical to \code{source}, except that it uses the
3596 global values of \code{*print-level*} and \code{*print-length*} instead
3597 of the debugger printing control variables \code{*debug-print-level*}
3598 and \code{*debug-print-length*}.
3601 The source form for a location in the code is the innermost list present
3602 in the original source that encloses the form responsible for generating
3603 that code. If the actual source form is not a list, then some enclosing
3604 list will be printed. For example, if the source form was a reference
3605 to the variable \code{*some-random-special*}, then the innermost
3606 enclosing evaluated form will be printed. Here are some possible
3609 (let ((a *some-random-special*))
3612 (+ *some-random-special* ...)
3615 If the code at a location was generated from the expansion of a macro or a
3616 source-level compiler optimization, then the form in the original source that
3617 expanded into that code will be printed. Suppose the file
3618 \file{/usr/me/mystuff.lisp} looked like this:
3627 If \code{foo} has called \code{myfun}, and is waiting for it to return, then the
3628 \code{source} command would print:
3630 ; File: /usr/me/mystuff.lisp
3634 Note that the macro use was printed, not the actual function call form,
3637 If enclosing source is printed by giving an argument to \code{source} or
3638 \code{vsource}, then the actual source form is marked by wrapping it in a list
3639 whose first element is \code{\#:***HERE***}. In the previous example,
3640 \w{\code{source 1}} would print:
3642 ; File: /usr/me/mystuff.lisp
3652 * How the Source is Found::
3653 * Source Location Availability::
3656 %%\node How the Source is Found, Source Location Availability, Source Location Printing, Source Location Printing
3657 \subsection{How the Source is Found}
3659 If the code was defined from \llisp{} by \code{compile} or
3660 \code{eval}, then the source can always be reliably located. If the
3661 code was defined from a \code{fasl} file created by
3662 \findexed{compile-file}, then the debugger gets the source forms it
3663 prints by reading them from the original source file. This is a
3664 potential problem, since the source file might have moved or changed
3665 since the time it was compiled.
3667 The source file is opened using the \code{truename} of the source file
3668 pathname originally given to the compiler. This is an absolute pathname
3669 with all logical names and symbolic links expanded. If the file can't
3670 be located using this name, then the debugger gives up and signals an
3673 If the source file can be found, but has been modified since the time it was
3674 compiled, the debugger prints this warning:
3676 ; File has been modified since compilation:
3678 ; Using form offset instead of character position.
3680 where \var{filename} is the name of the source file. It then proceeds using a
3681 robust but not foolproof heuristic for locating the source. This heuristic
3686 No top-level forms before the top-level form containing the source have been
3687 added or deleted, and
3690 The top-level form containing the source has not been modified much. (More
3691 precisely, none of the list forms beginning before the source form have been
3695 If the heuristic doesn't work, the displayed source will be wrong, but will
3696 probably be near the actual source. If the ``shape'' of the top-level form in
3697 the source file is too different from the original form, then an error will be
3698 signaled. When the heuristic is used, the the source location commands are
3701 Source location printing can also be confused if (after the source was
3702 compiled) a read-macro you used in the code was redefined to expand into
3703 something different, or if a read-macro ever returns the same \code{eq}
3704 list twice. If you don't define read macros and don't use \code{\#\#} in
3705 perverted ways, you don't need to worry about this.
3708 %%\node Source Location Availability, , How the Source is Found, Source Location Printing
3709 \subsection{Source Location Availability}
3711 \cindex{debug optimization quality}
3712 Source location information is only available when the \code{debug}
3713 optimization quality is at least \code{2}. If source location information is
3714 unavailable, the source commands will give an error message.
3716 If source location information is available, but the source location is
3717 unknown because of an interrupt or unexpected hardware error
3718 (\pxlref{unknown-locations}), then the command will print:
3720 Unknown location: using block start.
3722 and then proceed to print the source location for the start of the \i{basic
3723 block} enclosing the code location. \cpsubindex{block}{basic}
3724 \cpsubindex{block}{start location}
3725 It's a bit complicated to explain exactly what a basic block is, but
3726 here are some properties of the block start location:
3729 \item The block start location may be the same as the true location.
3731 \item The block start location will never be later in the the
3732 program's flow of control than the true location.
3734 \item No conditional control structures (such as \code{if},
3735 \code{cond}, \code{or}) will intervene between the block start and
3736 the true location (but note that some conditionals present in the
3737 original source could be optimized away.) Function calls \i{do not}
3740 \item The head of a loop will be the start of a block.
3742 \item The programming language concept of ``block structure'' and the
3743 \clisp{} \code{block} special form are totally unrelated to the
3744 compiler's basic block.
3747 In other words, the true location lies between the printed location and the
3748 next conditional (but watch out because the compiler may have changed the
3752 %%\node Compiler Policy Control, Exiting Commands, Source Location Printing, The Debugger
3753 \section{Compiler Policy Control}
3754 \label{debugger-policy}
3755 \cpsubindex{policy}{debugger}
3756 \cindex{debug optimization quality}
3757 \cindex{optimize declaration}
3759 The compilation policy specified by \code{optimize} declarations affects the
3760 behavior seen in the debugger. The \code{debug} quality directly affects the
3761 debugger by controlling the amount of debugger information dumped. Other
3762 optimization qualities have indirect but observable effects due to changes in
3763 the way compilation is done.
3765 Unlike the other optimization qualities (which are compared in relative value
3766 to evaluate tradeoffs), the \code{debug} optimization quality is directly
3767 translated to a level of debug information. This absolute interpretation
3768 allows the user to count on a particular amount of debug information being
3769 available even when the values of the other qualities are changed during
3770 compilation. These are the levels of debug information that correspond to the
3771 values of the \code{debug} quality:
3775 Only the function name and enough information to allow the stack to
3778 \item[\code{\w{$>$ 0}}]
3779 Any level greater than \code{0} gives level \code{0} plus all
3780 argument variables. Values will only be accessible if the argument
3781 variable is never set and
3782 \code{speed} is not \code{3}. \cmucl{} allows any real value for optimization
3783 qualities. It may be useful to specify \code{0.5} to get backtrace argument
3784 display without argument documentation.
3786 \item[\code{1}] Level \code{1} provides argument documentation
3787 (printed arglists) and derived argument/result type information.
3788 This makes \findexed{describe} more informative, and allows the
3789 compiler to do compile-time argument count and type checking for any
3790 calls compiled at run-time.
3793 Level \code{1} plus all interned local variables, source location
3794 information, and lifetime information that tells the debugger when arguments
3795 are available (even when \code{speed} is \code{3} or the argument is set.) This is
3799 Level \code{2} plus all uninterned variables. In addition, lifetime
3800 analysis is disabled (even when \code{speed} is \code{3}), ensuring that all variable
3801 values are available at any known location within the scope of the binding.
3802 This has a speed penalty in addition to the obvious space penalty.
3805 As you can see, if the \code{speed} quality is \code{3}, debugger performance is
3806 degraded. This effect comes from the elimination of argument variable
3807 special-casing (\pxlref{debug-var-validity}.) Some degree of
3808 speed/debuggability tradeoff is unavoidable, but the effect is not too drastic
3809 when \code{debug} is at least \code{2}.
3811 \cindex{inline expansion}
3812 \cindex{semi-inline expansion}
3813 In addition to \code{inline} and \code{notinline} declarations, the relative values
3814 of the \code{speed} and \code{space} qualities also change whether functions are
3815 inline expanded (\pxlref{inline-expansion}.) If a function is inline
3816 expanded, then there will be no frame to represent the call, and the arguments
3817 will be treated like any other local variable. Functions may also be
3818 ``semi-inline'', in which case there is a frame to represent the call, but the
3819 call is to an optimized local version of the function, not to the original
3823 %%\node Exiting Commands, Information Commands, Compiler Policy Control, The Debugger
3824 \section{Exiting Commands}
3826 These commands get you out of the debugger.
3833 \item[\code{restart} \mopt{\var{n}}]%%\hfill\\
3834 Invokes the \var{n}th restart case as displayed by the \code{error}
3835 command. If \var{n} is not specified, the available restart cases are
3839 Calls \code{continue} on the condition given to \code{debug}. If there is no
3840 restart case named \var{continue}, then an error is signaled.
3843 Calls \code{abort} on the condition given to \code{debug}. This is
3844 useful for popping debug command loop levels or aborting to top level,
3848 (\code{debug:debug-return} \var{expression} \mopt{\var{frame}})
3851 From the current or specified frame, return the result of evaluating
3852 expression. If multiple values are expected, then this function should be
3853 called for multiple values.
3858 %%\node Information Commands, Breakpoint Commands, Exiting Commands, The Debugger
3859 \section{Information Commands}
3861 Most of these commands print information about the current frame or
3862 function, but a few show general information.
3866 \item[\code{help}, \code{?}]
3867 Displays a synopsis of debugger commands.
3869 \item[\code{describe}]
3870 Calls \code{describe} on the current function, displays number of local
3871 variables, and indicates whether the function is compiled or interpreted.
3874 Displays the current function call as it would be displayed by moving to
3877 \item[\code{vprint} (or \code{pp}) \mopt{\var{verbosity}}]%%\hfill\\
3878 Displays the current function call using \code{*print-level*} and
3879 \code{*print-length*} instead of \code{*debug-print-level*} and
3880 \code{*debug-print-length*}. \var{verbosity} is a small integer
3881 (default 2) that controls other dimensions of verbosity.
3884 Prints the condition given to \code{invoke-debugger} and the active
3887 \item[\code{backtrace} \mopt{\var{n}}]\hfill\\
3888 Displays all the frames from the current to the bottom. Only shows
3889 \var{n} frames if specified. The printing is controlled by
3890 \code{*debug-print-level*} and \code{*debug-print-length*}.
3893 (\code{debug:debug-function} \mopt{\var{n}})
3896 Returns the function from the current or specified frame.
3898 \item[(\code{debug:function-name} \mopt{\var{n}])]
3899 Returns the function name from the current or specified frame.
3901 \item[(\code{debug:pc} \mopt{\var{frame}})]
3902 Returns the index of the instruction for the function in the current or
3903 specified frame. This is useful in conjunction with \code{disassemble}.
3904 The pc returned points to the instruction after the one that was fatal.
3909 %%\node Breakpoint Commands, Function Tracing, Information Commands, The Debugger
3910 \section{Breakpoint Commands}
3912 \cmucl{} supports setting of breakpoints inside compiled functions and
3913 stepping of compiled code. Breakpoints can only be set at at known
3914 locations (\pxlref{unknown-locations}), so these commands are largely
3915 useless unless the \code{debug} optimize quality is at least \code{2}
3916 (\pxlref{debugger-policy}). These commands manipulate breakpoints:
3918 \item[\code{breakpoint} \var{location} \mstar{\var{option} \var{value}}]
3920 Set a breakpoint in some function. \var{location} may be an integer
3921 code location number (as displayed by \code{list-locations}) or a
3922 keyword. The keyword can be used to indicate setting a breakpoint at
3923 the function start (\kwd{start}, \kwd{s}) or function end
3924 (\kwd{end}, \kwd{e}). The \code{breakpoint} command has
3925 \kwd{condition}, \kwd{break}, \kwd{print} and \kwd{function}
3926 options which work similarly to the \code{trace} options.
3928 \item[\code{list-locations} (or \code{ll}) \mopt{\var{function}}]%%\hfill\\
3929 List all the code locations in the current frame's function, or in
3930 \var{function} if it is supplied. The display format is the code
3931 location number, a colon and then the source form for that location:
3935 If consecutive locations have the same source, then a numeric range like
3936 \code{3-5:} will be printed. For example, a default function call has a
3937 known location both immediately before and after the call, which would
3938 result in two code locations with the same source. The listed function
3939 becomes the new default function for breakpoint setting (via the
3940 \code{breakpoint}) command.
3942 \item[\code{list-breakpoints} (or \code{lb})]%%\hfill\\
3943 List all currently active breakpoints with their breakpoint number.
3945 \item[\code{delete-breakpoint} (or \code{db}) \mopt{\var{number}}]%%\hfill\\
3946 Delete a breakpoint specified by its breakpoint number. If no number is
3947 specified, delete all breakpoints.
3949 \item[\code{step}]%%\hfill\\
3950 Step to the next possible breakpoint location in the current function.
3951 This always steps over function calls, instead of stepping into them
3955 * Breakpoint Example::
3958 %%\node Breakpoint Example, , Breakpoint Commands, Breakpoint Commands
3959 \subsection{Breakpoint Example}
3961 Consider this definition of the factorial function:
3968 This debugger session demonstrates the use of breakpoints:
3970 common-lisp-user> (break) ; Invoke debugger
3975 0: [CONTINUE] Return from BREAK.
3976 1: [ABORT ] Return to Top-Level.
3978 Debug (type H for help)
3980 (INTERACTIVE-EVAL (BREAK))
3982 0: #'(LAMBDA (N) (BLOCK ! (IF # 1 #)))
3988 6: #'(LAMBDA (N) (BLOCK ! (IF # 1 #)))
3995 common-lisp-user> (! 10) ; Call the function
4000 0: [CONTINUE] Return from BREAK.
4001 1: [ABORT ] Return to Top-Level.
4003 Debug (type H for help)
4005 (! 10) ; We are now in first call (arg 10) before the multiply
4006 Source: (* N (! (1- N)))
4011 (! 10) ; We have finished evaluation of (1- n)
4018 0: [CONTINUE] Return from BREAK.
4019 1: [ABORT ] Return to Top-Level.
4021 Debug (type H for help)
4023 (! 9) ; We hit the breakpoint in the recursive call
4024 Source: (* N (! (1- N)))
4031 %%\node Function Tracing, Specials, Breakpoint Commands, The Debugger
4032 \section{Function Tracing}
4034 \cpsubindex{function}{tracing}
4036 The tracer causes selected functions to print their arguments and
4037 their results whenever they are called. Options allow conditional
4038 printing of the trace information and conditional breakpoints on
4039 function entry or exit.
4041 \begin{defmac}{}{trace}{%
4042 \args{\mstar{option global-value} \mstar{name \mstar{option
4045 \code{trace} is a debugging tool that prints information when
4046 specified functions are called. In its simplest form:
4048 (trace \var{name-1} \var{name-2} ...)
4050 \code{trace} causes a printout on \vindexed{trace-output} each time
4051 that one of the named functions is entered or returns (the
4052 \var{names} are not evaluated.) Trace output is indented according
4053 to the number of pending traced calls, and this trace depth is
4054 printed at the beginning of each line of output. Printing verbosity
4055 of arguments and return values is controlled by
4056 \vindexed{debug-print-level} and \vindexed{debug-print-length}.
4058 If no \var{names} or \var{options} are are given, \code{trace}
4059 returns the list of all currently traced functions,
4060 \code{*traced-function-list*}.
4062 Trace options can cause the normal printout to be suppressed, or
4063 cause extra information to be printed. Each option is a pair of an
4064 option keyword and a value form. Options may be interspersed with
4065 function names. Options only affect tracing of the function whose
4066 name they appear immediately after. Global options are specified
4067 before the first name, and affect all functions traced by a given
4068 use of \code{trace}. If an already traced function is traced again,
4069 any new options replace the old options. The following options are
4072 \item[\kwd{condition} \var{form}, \kwd{condition-after} \var{form},
4073 \kwd{condition-all} \var{form}] If \kwd{condition} is specified,
4074 then \code{trace} does nothing unless \var{form} evaluates to true
4075 at the time of the call. \kwd{condition-after} is similar, but
4076 suppresses the initial printout, and is tested when the function
4077 returns. \kwd{condition-all} tries both before and after.
4079 \item[\kwd{wherein} \var{names}] If specified, \var{names} is a
4080 function name or list of names. \code{trace} does nothing unless
4081 a call to one of those functions encloses the call to this
4082 function (i.e. it would appear in a backtrace.) Anonymous
4083 functions have string names like \code{"DEFUN FOO"}.
4085 \item[\kwd{break} \var{form}, \kwd{break-after} \var{form},
4086 \kwd{break-all} \var{form}] If specified, and \var{form} evaluates
4087 to true, then the debugger is invoked at the start of the
4088 function, at the end of the function, or both, according to the
4091 \item[\kwd{print} \var{form}, \kwd{print-after} \var{form},
4092 \kwd{print-all} \var{form}] In addition to the usual printout, the
4093 result of evaluating \var{form} is printed at the start of the
4094 function, at the end of the function, or both, according to the
4095 respective option. Multiple print options cause multiple values
4098 \item[\kwd{function} \var{function-form}] This is a not really an
4099 option, but rather another way of specifying what function to
4100 trace. The \var{function-form} is evaluated immediately, and the
4101 resulting function is traced.
4103 \item[\kwd{encapsulate \mgroup{:default | t | nil}}] In \cmucl,
4104 tracing can be done either by temporarily redefining the function
4105 name (encapsulation), or using breakpoints. When breakpoints are
4106 used, the function object itself is destructively modified to
4107 cause the tracing action. The advantage of using breakpoints is
4108 that tracing works even when the function is anonymously called
4111 When \kwd{encapsulate} is true, tracing is done via encapsulation.
4112 \kwd{default} is the default, and means to use encapsulation for
4113 interpreted functions and funcallable instances, breakpoints
4114 otherwise. When encapsulation is used, forms are {\it not}
4115 evaluated in the function's lexical environment, but
4116 \code{debug:arg} can still be used.
4119 \kwd{condition}, \kwd{break} and \kwd{print} forms are evaluated in
4120 the lexical environment of the called function; \code{debug:var} and
4121 \code{debug:arg} can be used. The \code{-after} and \code{-all}
4122 forms are evaluated in the null environment.
4125 \begin{defmac}{}{untrace}{ \args{\amprest{} \var{function-names}}}
4127 This macro turns off tracing for the specified functions, and
4128 removes their names from \code{*traced-function-list*}. If no
4129 \var{function-names} are given, then all currently traced functions
4133 \begin{defvar}{extensions:}{traced-function-list}
4135 A list of function names maintained and used by \code{trace},
4136 \code{untrace}, and \code{untrace-all}. This list should contain
4137 the names of all functions currently being traced.
4140 \begin{defvar}{extensions:}{max-trace-indentation}
4142 The maximum number of spaces which should be used to indent trace
4143 printout. This variable is initially set to 40.
4147 * Encapsulation Functions::
4150 %%\node Encapsulation Functions, , Function Tracing, Function Tracing
4151 \subsection{Encapsulation Functions}
4152 \cindex{encapsulation}
4155 The encapsulation functions provide a mechanism for intercepting the
4156 arguments and results of a function. \code{encapsulate} changes the
4157 function definition of a symbol, and saves it so that it can be
4158 restored later. The new definition normally calls the original
4159 definition. The \clisp{} \findexed{fdefinition} function always returns
4160 the original definition, stripping off any encapsulation.
4162 The original definition of the symbol can be restored at any time by
4163 the \code{unencapsulate} function. \code{encapsulate} and \code{unencapsulate}
4164 allow a symbol to be multiply encapsulated in such a way that different
4165 encapsulations can be completely transparent to each other.
4167 Each encapsulation has a type which may be an arbitrary lisp object.
4168 If a symbol has several encapsulations of different types, then any
4169 one of them can be removed without affecting more recent ones.
4170 A symbol may have more than one encapsulation of the same type, but
4171 only the most recent one can be undone.
4173 \begin{defun}{extensions:}{encapsulate}{%
4174 \args{\var{symbol} \var{type} \var{body}}}
4176 Saves the current definition of \var{symbol}, and replaces it with a
4177 function which returns the result of evaluating the form,
4178 \var{body}. \var{Type} is an arbitrary lisp object which is the
4179 type of encapsulation.
4181 When the new function is called, the following variables are bound
4182 for the evaluation of \var{body}:
4185 \item[\code{extensions:argument-list}] A list of the arguments to
4188 \item[\code{extensions:basic-definition}] The unencapsulated
4189 definition of the function.
4191 The unencapsulated definition may be called with the original
4192 arguments by including the form
4194 (apply extensions:basic-definition extensions:argument-list)
4197 \code{encapsulate} always returns \var{symbol}.
4200 \begin{defun}{extensions:}{unencapsulate}{\args{\var{symbol} \var{type}}}
4202 Undoes \var{symbol}'s most recent encapsulation of type \var{type}.
4203 \var{Type} is compared with \code{eq}. Encapsulations of other
4204 types are left in place.
4207 \begin{defun}{extensions:}{encapsulated-p}{%
4208 \args{\var{symbol} \var{type}}}
4210 Returns \true{} if \var{symbol} has an encapsulation of type
4211 \var{type}. Returns \nil{} otherwise. \var{type} is compared with
4217 section{The Single Stepper}
4219 \begin{defmac}{}{step}{ \args{\var{form}}}
4221 Evaluates form with single stepping enabled or if \var{form} is
4222 \code{T}, enables stepping until explicitly disabled. Stepping can
4223 be disabled by quitting to the lisp top level, or by evaluating the
4224 form \w{\code{(step ())}}.
4226 While stepping is enabled, every call to eval will prompt the user
4227 for a single character command. The prompt is the form which is
4228 about to be \code{eval}ed. It is printed with \code{*print-level*}
4229 and \code{*print-length*} bound to \code{*step-print-level*} and
4230 \code{*step-print-length*}. All interaction is done through the
4231 stream \code{*query-io*}. Because of this, the stepper can not be
4232 used in Hemlock eval mode. When connected to a slave Lisp, the
4233 stepper can be used from Hemlock.
4238 \item[\key{n} (next)] Evaluate the expression with stepping still
4241 \item[\key{s} (skip)] Evaluate the expression with stepping
4244 \item[\key{q} (quit)] Evaluate the expression, but disable all
4245 further stepping inside the current call to \code{step}.
4247 \item[\key{p} (print)] Print current form. (does not use
4248 \code{*step-print-level*} or \code{*step-print-length*}.)
4250 \item[\key{b} (break)] Enter break loop, and then prompt for the
4251 command again when the break loop returns.
4253 \item[\key{e} (eval)] Prompt for and evaluate an arbitrary
4254 expression. The expression is evaluated with stepping disabled.
4256 \item[\key{?} (help)] Prints a brief list of the commands.
4258 \item[\key{r} (return)] Prompt for an arbitrary value to return as
4259 result of the current call to eval.
4261 \item[\key{g}] Throw to top level.
4265 \begin{defvar}{extensions:}{step-print-level}
4266 \defvarx[extensions:]{step-print-length}
4268 \code{*print-level*} and \code{*print-length*} are bound to these
4269 values while printing the current form. \code{*step-print-level*}
4270 and \code{*step-print-length*} are initially bound to 4 and 5,
4274 \begin{defvar}{extensions:}{max-step-indentation}
4276 Step indents the prompts to highlight the nesting of the evaluation.
4277 This variable contains the maximum number of spaces to use for
4278 indenting. Initially set to 40.
4284 %%\node Specials, , Function Tracing, The Debugger
4286 These are the special variables that control the debugger action.
4289 \begin{defvar}{debug:}{debug-print-level}
4290 \defvarx[debug:]{debug-print-length}
4292 \code{*print-level*} and \code{*print-length*} are bound to these
4293 values during the execution of some debug commands. When evaluating
4294 arbitrary expressions in the debugger, the normal values of
4295 \code{*print-level*} and \code{*print-length*} are in effect. These
4296 variables are initially set to 3 and 5, respectively.
4301 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/compiler.ms}
4304 %%\node The Compiler, Advanced Compiler Use and Efficiency Hints, The Debugger, Top
4305 \chapter{The Compiler} \hide{ -*- Dictionary: cmu-user -*-}
4308 * Compiler Introduction::
4309 * Calling the Compiler::
4310 * Compilation Units::
4311 * Interpreting Error Messages::
4313 * Getting Existing Programs to Run::
4315 * Open Coding and Inline Expansion::
4318 %%\node Compiler Introduction, Calling the Compiler, The Compiler, The Compiler
4319 \section{Compiler Introduction}
4321 This chapter contains information about the compiler that every \cmucl{} user
4322 should be familiar with. Chapter \ref{advanced-compiler} goes into greater
4323 depth, describing ways to use more advanced features.
4325 The \cmucl{} compiler (also known as \Python{}) has many features
4326 that are seldom or never supported by conventional \llisp{}
4330 \item Source level debugging of compiled code (see chapter
4333 \item Type error compiler warnings for type errors detectable at
4336 \item Compiler error messages that provide a good indication of where
4337 the error appeared in the source.
4339 \item Full run-time checking of all potential type errors, with
4340 optimization of type checks to minimize the cost.
4342 \item Scheme-like features such as proper tail recursion and extensive
4343 source-level optimization.
4345 \item Advanced tuning and optimization features such as comprehensive
4346 efficiency notes, flow analysis, and untagged number representations
4347 (see chapter \ref{advanced-compiler}.)
4352 %%\node Calling the Compiler, Compilation Units, Compiler Introduction, The Compiler
4353 \section{Calling the Compiler}
4355 Functions may be compiled using \code{compile}, \code{compile-file}, or
4356 \code{compile-from-stream}.
4358 \begin{defun}{}{compile}{ \args{\var{name} \ampoptional{} \var{definition}}}
4360 This function compiles the function whose name is \var{name}. If
4361 \var{name} is \false, the compiled function object is returned. If
4362 \var{definition} is supplied, it should be a lambda expression that
4363 is to be compiled and then placed in the function cell of
4364 \var{name}. As per the proposed X3J13 cleanup
4365 ``compile-argument-problems'', \var{definition} may also be an
4366 interpreted function.
4368 The return values are as per the proposed X3J13 cleanup
4369 ``compiler-diagnostics''. The first value is the function name or
4370 function object. The second value is \false{} if no compiler
4371 diagnostics were issued, and \true{} otherwise. The third value is
4372 \false{} if no compiler diagnostics other than style warnings were
4373 issued. A non-\false{} value indicates that there were ``serious''
4374 compiler diagnostics issued, or that other conditions of type
4375 \tindexed{error} or \tindexed{warning} (but not
4376 \tindexed{style-warning}) were signaled during compilation.
4380 \begin{defun}{}{compile-file}{
4381 \args{\var{input-pathname}
4382 \keys{\kwd{output-file} \kwd{error-file} \kwd{trace-file}}
4383 \morekeys{\kwd{error-output} \kwd{verbose} \kwd{print} \kwd{progress}}
4384 \yetmorekeys{\kwd{load} \kwd{block-compile} \kwd{entry-points}}
4385 \yetmorekeys{\kwd{byte-compile}}}}
4387 The \cmucl{} \code{compile-file} is extended through the addition of
4388 several new keywords and an additional interpretation of
4389 \var{input-pathname}:
4392 \item[\var{input-pathname}] If this argument is a list of input
4393 files, rather than a single input pathname, then all the source
4394 files are compiled into a single object file. In this case, the
4395 name of the first file is used to determine the default output
4396 file names. This is especially useful in combination with
4397 \var{block-compile}.
4399 \item[\kwd{output-file}] This argument specifies the name of the
4400 output file. \true{} gives the default name, \false{} suppresses
4403 \item[\kwd{error-file}] A listing of all the error output is
4404 directed to this file. If there are no errors, then no error file
4405 is produced (and any existing error file is deleted.) \true{}
4406 gives \w{"\var{name}\code{.err}"} (the default), and \false{}
4407 suppresses the output file.
4409 \item[\kwd{error-output}] If \true{} (the default), then error
4410 output is sent to \code{*error-output*}. If a stream, then output
4411 is sent to that stream instead. If \false, then error output is
4412 suppressed. Note that this error output is in addition to (but
4413 the same as) the output placed in the \var{error-file}.
4415 \item[\kwd{verbose}] If \true{} (the default), then the compiler
4416 prints to error output at the start and end of compilation of each
4417 file. See \varref{compile-verbose}.
4419 \item[\kwd{print}] If \true{} (the default), then the compiler
4420 prints to error output when each function is compiled. See
4421 \varref{compile-print}.
4423 \item[\kwd{progress}] If \true{} (default \false{}), then the
4424 compiler prints to error output progress information about the
4425 phases of compilation of each function. This is a CMU extension
4426 that is useful mainly in large block compilations. See
4427 \varref{compile-progress}.
4429 \item[\kwd{trace-file}] If \true{}, several of the intermediate
4430 representations (including annotated assembly code) are dumped out
4431 to this file. \true{} gives \w{"\var{name}\code{.trace}"}. Trace
4432 output is off by default. \xlref{trace-files}.
4434 \item[\kwd{load}] If \true{}, load the resulting output file.
4436 \item[\kwd{block-compile}] Controls the compile-time resolution of
4437 function calls. By default, only self-recursive calls are
4438 resolved, unless an \code{ext:block-start} declaration appears in
4439 the source file. \xlref{compile-file-block}.
4441 \item[\kwd{entry-points}] If non-null, then this is a list of the
4442 names of all functions in the file that should have global
4443 definitions installed (because they are referenced in other
4444 files.) \xlref{compile-file-block}.
4446 \item[\kwd{byte-compile}] If \true{}, compiling to a compact
4447 interpreted byte code is enabled. Possible values are \true{},
4448 \false{}, and \kwd{maybe} (the default.) See
4449 \varref{byte-compile-default} and \xlref{byte-compile}.
4452 The return values are as per the proposed X3J13 cleanup
4453 ``compiler-diagnostics''. The first value from \code{compile-file}
4454 is the truename of the output file, or \false{} if the file could
4455 not be created. The interpretation of the second and third values
4456 is described above for \code{compile}.
4459 \begin{defvar}{}{compile-verbose}
4460 \defvarx{compile-print}
4461 \defvarx{compile-progress}
4463 These variables determine the default values for the \kwd{verbose},
4464 \kwd{print} and \kwd{progress} arguments to \code{compile-file}.
4467 \begin{defun}{extensions:}{compile-from-stream}{%
4468 \args{\var{input-stream}
4469 \keys{\kwd{error-stream}}
4470 \morekeys{\kwd{trace-stream}}
4471 \yetmorekeys{\kwd{block-compile} \kwd{entry-points}}
4472 \yetmorekeys{\kwd{byte-compile}}}}
4474 This function is similar to \code{compile-file}, but it takes all
4475 its arguments as streams. It reads \llisp{} code from
4476 \var{input-stream} until end of file is reached, compiling into the
4477 current environment. This function returns the same two values as
4478 the last two values of \code{compile}. No output files are
4484 %%\node Compilation Units, Interpreting Error Messages, Calling the Compiler, The Compiler
4485 \section{Compilation Units}
4486 \cpsubindex{compilation}{units}
4488 \cmucl{} supports the \code{with-compilation-unit} macro added to the
4489 language by the proposed X3J13 ``with-compilation-unit'' compiler
4490 cleanup. This provides a mechanism for eliminating spurious undefined
4491 warnings when there are forward references across files, and also
4492 provides a standard way to access compiler extensions.
4494 \begin{defmac}{}{with-compilation-unit}{%
4495 \args{(\mstar{\var{key} \var{value}}) \mstar{\var{form}}}}
4497 This macro evaluates the \var{forms} in an environment that causes
4498 warnings for undefined variables, functions and types to be delayed
4499 until all the forms have been evaluated. Each keyword \var{value}
4500 is an evaluated form. These keyword options are recognized:
4503 \item[\kwd{override}] If uses of \code{with-compilation-unit} are
4504 dynamically nested, the outermost use will take precedence,
4505 suppressing printing of undefined warnings by inner uses.
4506 However, when the \code{override} option is true this shadowing is
4507 inhibited; an inner use will print summary warnings for the
4508 compilations within the inner scope.
4510 \item[\kwd{optimize}] This is a CMU extension that specifies of the
4511 ``global'' compilation policy for the dynamic extent of the body.
4512 The argument should evaluate to an \code{optimize} declare form,
4515 (optimize (speed 3) (safety 0))
4517 \xlref{optimize-declaration}
4519 \item[\kwd{optimize-interface}] Similar to \kwd{optimize}, but
4520 specifies the compilation policy for function interfaces (argument
4521 count and type checking) for the dynamic extent of the body.
4522 \xlref{optimize-interface-declaration}.
4524 \item[\kwd{context-declarations}] This is a CMU extension that
4525 pattern-matches on function names, automatically splicing in any
4526 appropriate declarations at the head of the function definition.
4527 \xlref{context-declarations}.
4532 * Undefined Warnings::
4535 %%\node Undefined Warnings, , Compilation Units, Compilation Units
4536 \subsection{Undefined Warnings}
4538 \cindex{undefined warnings}
4539 Warnings about undefined variables, functions and types are delayed until the
4540 end of the current compilation unit. The compiler entry functions
4541 (\code{compile}, etc.) implicitly use \code{with-compilation-unit}, so undefined
4542 warnings will be printed at the end of the compilation unless there is an
4543 enclosing \code{with-compilation-unit}. In order the gain the benefit of this
4544 mechanism, you should wrap a single \code{with-compilation-unit} around the calls
4545 to \code{compile-file}, i.e.:
4547 (with-compilation-unit ()
4548 (compile-file "file1")
4549 (compile-file "file2")
4553 Unlike for functions and types, undefined warnings for variables are
4554 not suppressed when a definition (e.g. \code{defvar}) appears after
4555 the reference (but in the same compilation unit.) This is because
4556 doing special declarations out of order just doesn't
4557 work\dash{}although early references will be compiled as special,
4558 bindings will be done lexically.
4560 Undefined warnings are printed with full source context
4561 (\pxlref{error-messages}), which tremendously simplifies the problem
4562 of finding undefined references that resulted from macroexpansion.
4563 After printing detailed information about the undefined uses of each
4564 name, \code{with-compilation-unit} also prints summary listings of the
4565 names of all the undefined functions, types and variables.
4567 \begin{defvar}{}{undefined-warning-limit}
4569 This variable controls the number of undefined warnings for each
4570 distinct name that are printed with full source context when the
4571 compilation unit ends. If there are more undefined references than
4572 this, then they are condensed into a single warning:
4574 Warning: \var{count} more uses of undefined function \var{name}.
4576 When the value is \code{0}, then the undefined warnings are not
4577 broken down by name at all: only the summary listing of undefined
4582 %%\node Interpreting Error Messages, Types in Python, Compilation Units, The Compiler
4583 \section{Interpreting Error Messages}
4584 \label{error-messages}
4585 \cpsubindex{error messages}{compiler}
4586 \cindex{compiler error messages}
4588 One of \Python{}'s unique features is the level of source location
4589 information it provides in error messages. The error messages contain
4590 a lot of detail in a terse format, to they may be confusing at first.
4591 Error messages will be illustrated using this example program:
4594 `(roq (ploq (+ ,x 3))))
4597 (declare (symbol y))
4600 The main problem with this program is that it is trying to add \code{3} to a
4601 symbol. Note also that the functions \code{roq} and \code{ploq} aren't defined
4605 * The Parts of the Error Message::
4606 * The Original and Actual Source::
4607 * The Processing Path::
4609 * Errors During Macroexpansion::
4611 * Error Message Parameterization::
4614 %%\node The Parts of the Error Message, The Original and Actual Source, Interpreting Error Messages, Interpreting Error Messages
4615 \subsection{The Parts of the Error Message}
4617 The compiler will produce this warning:
4619 File: /usr/me/stuff.lisp
4626 Warning: Result is a SYMBOL, not a NUMBER.
4628 In this example we see each of the six possible parts of a compiler error
4632 \item[\w{\code{File: /usr/me/stuff.lisp}}] This is the \var{file} that
4633 the compiler read the relevant code from. The file name is
4634 displayed because it may not be immediately obvious when there is an
4635 error during compilation of a large system, especially when
4636 \code{with-compilation-unit} is used to delay undefined warnings.
4638 \item[\w{\code{In: DEFUN FOO}}] This is the \var{definition} or
4639 top-level form responsible for the error. It is obtained by taking
4640 the first two elements of the enclosing form whose first element is
4641 a symbol beginning with ``\code{DEF}''. If there is no enclosing
4642 \w{\var{def}mumble}, then the outermost form is used. If there are
4643 multiple \w{\var{def}mumbles}, then they are all printed from the
4644 out in, separated by \code{$=>$}'s. In this example, the problem
4645 was in the \code{defun} for \code{foo}.
4647 \item[\w{\code{(ZOQ Y)}}] This is the \i{original source} form
4648 responsible for the error. Original source means that the form
4649 directly appeared in the original input to the compiler, i.e. in the
4650 lambda passed to \code{compile} or the top-level form read from the
4651 source file. In this example, the expansion of the \code{zoq} macro
4652 was responsible for the error.
4654 \item[\w{\code{--$>$ ROQ PLOQ +}} ] This is the \i{processing path}
4655 that the compiler used to produce the errorful code. The processing
4656 path is a representation of the evaluated forms enclosing the actual
4657 source that the compiler encountered when processing the original
4658 source. The path is the first element of each form, or the form
4659 itself if the form is not a list. These forms result from the
4660 expansion of macros or source-to-source transformation done by the
4661 compiler. In this example, the enclosing evaluated forms are the
4662 calls to \code{roq}, \code{ploq} and \code{+}. These calls resulted
4663 from the expansion of the \code{zoq} macro.
4665 \item[\code{==$>$ Y}] This is the \i{actual source} responsible for
4666 the error. If the actual source appears in the explanation, then we
4667 print the next enclosing evaluated form, instead of printing the
4668 actual source twice. (This is the form that would otherwise have
4669 been the last form of the processing path.) In this example, the
4670 problem is with the evaluation of the reference to the variable
4673 \item[\w{\code{Warning: Result is a SYMBOL, not a NUMBER.}}] This is
4674 the \var{explanation} the problem. In this example, the problem is
4675 that \code{y} evaluates to a \code{symbol}, but is in a context
4676 where a number is required (the argument to \code{+}).
4679 Note that each part of the error message is distinctively marked:
4682 \item \code{File:} and \code{In:} mark the file and definition,
4685 \item The original source is an indented form with no prefix.
4687 \item Each line of the processing path is prefixed with \code{--$>$}.
4689 \item The actual source form is indented like the original source, but
4690 is marked by a preceding \code{==$>$} line. This is like the
4691 ``macroexpands to'' notation used in \cltl.
4693 \item The explanation is prefixed with the error severity
4694 (\pxlref{error-severity}), either \code{Error:}, \code{Warning:}, or
4699 Each part of the error message is more specific than the preceding
4700 one. If consecutive error messages are for nearby locations, then the
4701 front part of the error messages would be the same. In this case, the
4702 compiler omits as much of the second message as in common with the
4705 File: /usr/me/stuff.lisp
4712 Warning: Undefined function: PLOQ
4715 (ROQ (PLOQ (+ Y 3)))
4716 Warning: Undefined function: ROQ
4718 In this example, the file, definition and original source are
4719 identical for the two messages, so the compiler omits them in the
4720 second message. If consecutive messages are entirely identical, then
4721 the compiler prints only the first message, followed by:
4723 [Last message occurs \var{repeats} times]
4725 where \var{repeats} is the number of times the message was given.
4727 If the source was not from a file, then no file line is printed. If
4728 the actual source is the same as the original source, then the
4729 processing path and actual source will be omitted. If no forms
4730 intervene between the original source and the actual source, then the
4731 processing path will also be omitted.
4734 %%\node The Original and Actual Source, The Processing Path, The Parts of the Error Message, Interpreting Error Messages
4735 \subsection{The Original and Actual Source}
4736 \cindex{original source}
4737 \cindex{actual source}
4739 The \i{original source} displayed will almost always be a list. If the actual
4740 source for an error message is a symbol, the original source will be the
4741 immediately enclosing evaluated list form. So even if the offending symbol
4742 does appear in the original source, the compiler will print the enclosing list
4743 and then print the symbol as the actual source (as though the symbol were
4744 introduced by a macro.)
4746 When the \i{actual source} is displayed (and is not a symbol), it will always
4747 be code that resulted from the expansion of a macro or a source-to-source
4748 compiler optimization. This is code that did not appear in the original
4749 source program; it was introduced by the compiler.
4751 Keep in mind that when the compiler displays a source form in an error message,
4752 it always displays the most specific (innermost) responsible form. For
4753 example, compiling this function:
4757 (declare (fixnum a))
4761 Gives this error message:
4764 (LET (A) (DECLARE (FIXNUM A)) (SETQ A (FOO X)) A)
4765 Warning: The binding of A is not a FIXNUM:
4768 This error message is not saying ``there's a problem somewhere in this
4769 \code{let}''\dash{}it is saying that there is a problem with the
4770 \code{let} itself. In this example, the problem is that \code{a}'s
4771 \false{} initial value is not a \code{fixnum}.
4774 %%\node The Processing Path, Error Severity, The Original and Actual Source, Interpreting Error Messages
4775 \subsection{The Processing Path}
4776 \cindex{processing path}
4777 \cindex{macroexpansion}
4778 \cindex{source-to-source transformation}
4780 The processing path is mainly useful for debugging macros, so if you don't
4781 write macros, you can ignore the processing path. Consider this example:
4784 (dotimes (i n *undefined*)))
4786 Compiling results in this error message:
4789 (DOTIMES (I N *UNDEFINED*))
4790 --> DO BLOCK LET TAGBODY RETURN-FROM
4793 Warning: Undefined variable: *UNDEFINED*
4795 Note that \code{do} appears in the processing path. This is because \code{dotimes}
4798 (do ((i 0 (1+ i)) (#:g1 n))
4799 ((>= i #:g1) *undefined*)
4800 (declare (type unsigned-byte i)))
4802 The rest of the processing path results from the expansion of \code{do}:
4805 (let ((i 0) (#:g1 n))
4806 (declare (type unsigned-byte i))
4808 #:g2 (psetq i (1+ i))
4809 #:g3 (unless (>= i #:g1) (go #:g2))
4810 (return-from nil (progn *undefined*)))))
4812 In this example, the compiler descended into the \code{block},
4813 \code{let}, \code{tagbody} and \code{return-from} to reach the
4814 \code{progn} printed as the actual source. This is a place where the
4815 ``actual source appears in explanation'' rule was applied. The
4816 innermost actual source form was the symbol \code{*undefined*} itself,
4817 but that also appeared in the explanation, so the compiler backed out
4821 %%\node Error Severity, Errors During Macroexpansion, The Processing Path, Interpreting Error Messages
4822 \subsection{Error Severity}
4823 \label{error-severity}
4824 \cindex{severity of compiler errors}
4825 \cindex{compiler error severity}
4827 There are three levels of compiler error severity:
4830 \item[Error] This severity is used when the compiler encounters a
4831 problem serious enough to prevent normal processing of a form.
4832 Instead of compiling the form, the compiler compiles a call to
4833 \code{error}. Errors are used mainly for signaling syntax errors.
4834 If an error happens during macroexpansion, the compiler will handle
4835 it. The compiler also handles and attempts to proceed from read
4838 \item[Warning] Warnings are used when the compiler can prove that
4839 something bad will happen if a portion of the program is executed,
4840 but the compiler can proceed by compiling code that signals an error
4841 at runtime if the problem has not been fixed:
4844 \item Violation of type declarations, or
4846 \item Function calls that have the wrong number of arguments or
4847 malformed keyword argument lists, or
4849 \item Referencing a variable declared \code{ignore}, or unrecognized
4850 declaration specifiers.
4853 In the language of the \clisp{} standard, these are situations where
4854 the compiler can determine that a situation with undefined
4855 consequences or that would cause an error to be signaled would
4858 \item[Note] Notes are used when there is something that seems a bit
4859 odd, but that might reasonably appear in correct programs.
4861 Note that the compiler does not fully conform to the proposed X3J13
4862 ``compiler-diagnostics'' cleanup. Errors, warnings and notes mostly
4863 correspond to errors, warnings and style-warnings, but many things
4864 that the cleanup considers to be style-warnings are printed as
4865 warnings rather than notes. Also, warnings, style-warnings and most
4866 errors aren't really signaled using the condition system.
4869 %%\node Errors During Macroexpansion, Read Errors, Error Severity, Interpreting Error Messages
4870 \subsection{Errors During Macroexpansion}
4871 \cpsubindex{macroexpansion}{errors during}
4873 The compiler handles errors that happen during macroexpansion, turning
4874 them into compiler errors. If you want to debug the error (to debug a
4875 macro), you can set \code{*break-on-signals*} to \code{error}. For
4876 example, this definition:
4879 (do ((current l (cdr current))
4880 ((atom current) nil))
4881 (when (eq (car current) e) (return current))))
4886 (DO ((CURRENT L #) (# NIL)) (WHEN (EQ # E) (RETURN CURRENT)) )
4887 Error: (during macroexpansion)
4889 Error in function LISP::DO-DO-BODY.
4890 DO step variable is not a symbol: (ATOM CURRENT)
4895 %%\node Read Errors, Error Message Parameterization, Errors During Macroexpansion, Interpreting Error Messages
4896 \subsection{Read Errors}
4897 \cpsubindex{read errors}{compiler}
4899 The compiler also handles errors while reading the source. For example:
4901 Error: Read error at 2:
4903 Error in function LISP::COMMA-MACRO.
4904 Comma not inside a backquote.
4906 The ``\code{at 2}'' refers to the character position in the source file at
4907 which the error was signaled, which is generally immediately after the
4908 erroneous text. The next line, ``\code{(,/\back{foo})}'', is the line in
4909 the source that contains the error file position. The ``\code{/\back{} }''
4910 indicates the error position within that line (in this example,
4911 immediately after the offending comma.)
4913 When in \hemlock{} (or any other EMACS-like editor), you can go to a
4914 character position with:
4916 M-< C-u \var{position} C-f
4918 Note that if the source is from a \hemlock{} buffer, then the position
4919 is relative to the start of the compiled region or \code{defun}, not the
4920 file or buffer start.
4922 After printing a read error message, the compiler attempts to recover from the
4923 error by backing up to the start of the enclosing top-level form and reading
4924 again with \code{*read-suppress*} true. If the compiler can recover from the
4925 error, then it substitutes a call to \code{cerror} for the unreadable form and
4926 proceeds to compile the rest of the file normally.
4928 If there is a read error when the file position is at the end of the file
4929 (i.e., an unexpected EOF error), then the error message looks like this:
4931 Error: Read error in form starting at 14:
4933 Error in function LISP::FLUSH-WHITESPACE.
4934 EOF while reading #<Stream for file "/usr/me/test.lisp">
4936 In this case, ``\code{starting at 14}'' indicates the character
4937 position at which the compiler started reading, i.e. the position
4938 before the start of the form that was missing the closing delimiter.
4939 The line \w{"\code{(defun test ()}"} is first line after the starting
4940 position that the compiler thinks might contain the unmatched open
4944 %%\node Error Message Parameterization, , Read Errors, Interpreting Error Messages
4945 \subsection{Error Message Parameterization}
4946 \cpsubindex{error messages}{verbosity}
4947 \cpsubindex{verbosity}{of error messages}
4949 There is some control over the verbosity of error messages. See also
4950 \varref{undefined-warning-limit}, \code{*efficiency-note-limit*} and
4951 \varref{efficiency-note-cost-threshold}.
4953 \begin{defvar}{}{enclosing-source-cutoff}
4955 This variable specifies the number of enclosing actual source forms
4956 that are printed in full, rather than in the abbreviated processing
4957 path format. Increasing the value from its default of \code{1}
4958 allows you to see more of the guts of the macroexpanded source,
4959 which is useful when debugging macros.
4962 \begin{defvar}{}{error-print-length}
4963 \defvarx{error-print-level}
4965 These variables are the print level and print length used in
4966 printing error messages. The default values are \code{5} and
4967 \code{3}. If null, the global values of \code{*print-level*} and
4968 \code{*print-length*} are used.
4971 \begin{defmac}{extensions:}{def-source-context}{%
4972 \args{\var{name} \var{lambda-list} \mstar{form}}}
4974 This macro defines how to extract an abbreviated source context from
4975 the \var{name}d form when it appears in the compiler input.
4976 \var{lambda-list} is a \code{defmacro} style lambda-list used to
4977 parse the arguments. The \var{body} should return a list of
4978 subforms that can be printed on about one line. There are
4979 predefined methods for \code{defstruct}, \code{defmethod}, etc. If
4980 no method is defined, then the first two subforms are returned.
4981 Note that this facility implicitly determines the string name
4982 associated with anonymous functions.
4986 %%\node Types in Python, Getting Existing Programs to Run, Interpreting Error Messages, The Compiler
4987 \section{Types in Python}
4988 \cpsubindex{types}{in python}
4990 A big difference between \Python{} and all other \llisp{} compilers
4991 is the approach to type checking and amount of knowledge about types:
4994 \item \Python{} treats type declarations much differently that other
4995 Lisp compilers do. \Python{} doesn't blindly believe type
4996 declarations; it considers them assertions about the program that
4999 \item \Python{} also has a tremendously greater knowledge of the
5000 \clisp{} type system than other compilers. Support is incomplete
5001 only for the \code{not}, \code{and} and \code{satisfies} types.
5003 See also sections \ref{advanced-type-stuff} and \ref{type-inference}.
5007 * Compile Time Type Errors::
5008 * Precise Type Checking::
5009 * Weakened Type Checking::
5012 %%\node Compile Time Type Errors, Precise Type Checking, Types in Python, Types in Python
5013 \subsection{Compile Time Type Errors}
5014 \cindex{compile time type errors}
5015 \cpsubindex{type checking}{at compile time}
5017 If the compiler can prove at compile time that some portion of the
5018 program cannot be executed without a type error, then it will give a
5019 warning at compile time. It is possible that the offending code would
5020 never actually be executed at run-time due to some higher level
5021 consistency constraint unknown to the compiler, so a type warning
5022 doesn't always indicate an incorrect program. For example, consider
5030 (declare (fixnum x))
5033 Compilation produces this warning:
5036 (CASE FOO (:THIS 13) (:THAT 9) (:THE-OTHER 42))
5037 --> LET COND IF COND IF COND IF
5040 Warning: This is not a FIXNUM:
5043 In this case, the warning is telling you that if \code{foo} isn't any
5044 of \kwd{this}, \kwd{that} or \kwd{the-other}, then \code{x} will be
5045 initialized to \false, which the \code{fixnum} declaration makes
5046 illegal. The warning will go away if \code{ecase} is used instead of
5047 \code{case}, or if \kwd{the-other} is changed to \true.
5049 This sort of spurious type warning happens moderately often in the
5050 expansion of complex macros and in inline functions. In such cases,
5051 there may be dead code that is impossible to correctly execute. The
5052 compiler can't always prove this code is dead (could never be
5053 executed), so it compiles the erroneous code (which will always signal
5054 an error if it is executed) and gives a warning.
5056 \begin{defun}{extensions:}{required-argument}{}
5058 This function can be used as the default value for keyword arguments
5059 that must always be supplied. Since it is known by the compiler to
5060 never return, it will avoid any compile-time type warnings that
5061 would result from a default value inconsistent with the declared
5062 type. When this function is called, it signals an error indicating
5063 that a required keyword argument was not supplied. This function is
5064 also useful for \code{defstruct} slot defaults corresponding to
5065 required arguments. \xlref{empty-type}.
5067 Although this function is a CMU extension, it is relatively harmless
5068 to use it in otherwise portable code, since you can easily define it
5071 (defun required-argument ()
5072 (error "A required keyword argument was not supplied."))
5076 Type warnings are inhibited when the
5077 \code{extensions:inhibit-warnings} optimization quality is \code{3}
5078 (\pxlref{compiler-policy}.) This can be used in a local declaration
5079 to inhibit type warnings in a code fragment that has spurious
5083 %%\node Precise Type Checking, Weakened Type Checking, Compile Time Type Errors, Types in Python
5084 \subsection{Precise Type Checking}
5085 \label{precise-type-checks}
5086 \cindex{precise type checking}
5087 \cpsubindex{type checking}{precise}
5089 With the default compilation policy, all type
5090 assertions\footnote{There are a few circumstances where a type
5091 declaration is discarded rather than being used as type assertion.
5092 This doesn't affect safety much, since such discarded declarations
5093 are also not believed to be true by the compiler.} are precisely
5094 checked. Precise checking means that the check is done as though
5095 \code{typep} had been called with the exact type specifier that
5096 appeared in the declaration. \Python{} uses \var{policy} to determine
5097 whether to trust type assertions (\pxlref{compiler-policy}). Type
5098 assertions from declarations are indistinguishable from the type
5099 assertions on arguments to built-in functions. In \Python, adding
5100 type declarations makes code safer.
5102 If a variable is declared to be \w{\code{(integer 3 17)}}, then its
5103 value must always always be an integer between \code{3} and \code{17}.
5104 If multiple type declarations apply to a single variable, then all the
5105 declarations must be correct; it is as though all the types were
5106 intersected producing a single \code{and} type specifier.
5108 Argument type declarations are automatically enforced. If you declare
5109 the type of a function argument, a type check will be done when that
5110 function is called. In a function call, the called function does the
5111 argument type checking, which means that a more restrictive type
5112 assertion in the calling function (e.g., from \code{the}) may be lost.
5114 The types of structure slots are also checked. The value of a
5115 structure slot must always be of the type indicated in any \kwd{type}
5116 slot option.\footnote{The initial value need not be of this type as
5117 long as the corresponding argument to the constructor is always
5118 supplied, but this will cause a compile-time type warning unless
5119 \code{required-argument} is used.} Because of precise type checking,
5120 the arguments to slot accessors are checked to be the correct type of
5123 In traditional \llisp{} compilers, not all type assertions are
5124 checked, and type checks are not precise. Traditional compilers
5125 blindly trust explicit type declarations, but may check the argument
5126 type assertions for built-in functions. Type checking is not precise,
5127 since the argument type checks will be for the most general type legal
5128 for that argument. In many systems, type declarations suppress what
5129 little type checking is being done, so adding type declarations makes
5130 code unsafe. This is a problem since it discourages writing type
5131 declarations during initial coding. In addition to being more error
5132 prone, adding type declarations during tuning also loses all the
5133 benefits of debugging with checked type assertions.
5135 To gain maximum benefit from \Python{}'s type checking, you should
5136 always declare the types of function arguments and structure slots as
5137 precisely as possible. This often involves the use of \code{or},
5138 \code{member} and other list-style type specifiers. Paradoxically,
5139 even though adding type declarations introduces type checks, it
5140 usually reduces the overall amount of type checking. This is
5141 especially true for structure slot type declarations.
5143 \Python{} uses the \code{safety} optimization quality (rather than
5144 presence or absence of declarations) to choose one of three levels of
5145 run-time type error checking: \pxlref{optimize-declaration}.
5146 \xlref{advanced-type-stuff} for more information about types in
5150 %%\node Weakened Type Checking, , Precise Type Checking, Types in Python
5151 \subsection{Weakened Type Checking}
5152 \label{weakened-type-checks}
5153 \cindex{weakened type checking}
5154 \cpsubindex{type checking}{weakened}
5156 When the value for the \code{speed} optimization quality is greater
5157 than \code{safety}, and \code{safety} is not \code{0}, then type
5158 checking is weakened to reduce the speed and space penalty. In
5159 structure-intensive code this can double the speed, yet still catch
5160 most type errors. Weakened type checks provide a level of safety
5161 similar to that of ``safe'' code in other \llisp{} compilers.
5163 A type check is weakened by changing the check to be for some
5164 convenient supertype of the asserted type. For example,
5165 \code{\w{(integer 3 17)}} is changed to \code{fixnum},
5166 \code{\w{(simple-vector 17)}} to \code{simple-vector}, and structure
5167 types are changed to \code{structure}. A complex check like:
5169 (or node hunk (member :foo :bar :baz))
5171 will be omitted entirely (i.e., the check is weakened to \code{*}.) If
5172 a precise check can be done for no extra cost, then no weakening is
5175 Although weakened type checking is similar to type checking done by
5176 other compilers, it is sometimes safer and sometimes less safe.
5177 Weakened checks are done in the same places is precise checks, so all
5178 the preceding discussion about where checking is done still applies.
5179 Weakened checking is sometimes somewhat unsafe because although the
5180 check is weakened, the precise type is still input into type
5181 inference. In some contexts this will result in type inferences not
5182 justified by the weakened check, and hence deletion of some type
5183 checks that would be done by conventional compilers.
5185 For example, if this code was compiled with weakened checks:
5188 (a nil :type simple-string))
5191 (a nil :type single-float))
5194 (declare (type bar x))
5197 and \code{myfun} was passed a \code{foo}, then no type error would be
5198 signaled, and we would try to multiply a \code{simple-vector} as
5199 though it were a float (with unpredictable results.) This is because
5200 the check for \code{bar} was weakened to \code{structure}, yet when
5201 compiling the call to \code{bar-a}, the compiler thinks it knows it
5204 Note that normally even weakened type checks report the precise type
5205 in error messages. For example, if \code{myfun}'s \code{bar} check is
5206 weakened to \code{structure}, and the argument is \false{}, then the
5209 Type-error in MYFUN:
5210 NIL is not of type BAR
5212 However, there is some speed and space cost for signaling a precise
5213 error, so the weakened type is reported if the \code{speed}
5214 optimization quality is \code{3} or \code{debug} quality is less than
5217 Type-error in MYFUN:
5218 NIL is not of type STRUCTURE
5220 \xlref{optimize-declaration} for further discussion of the
5221 \code{optimize} declaration.
5224 %%\node Getting Existing Programs to Run, Compiler Policy, Types in Python, The Compiler
5225 \section{Getting Existing Programs to Run}
5226 \cpsubindex{existing programs}{to run}
5227 \cpsubindex{types}{portability}
5228 \cindex{compatibility with other Lisps}
5230 Since \Python{} does much more comprehensive type checking than other
5231 Lisp compilers, \Python{} will detect type errors in many programs
5232 that have been debugged using other compilers. These errors are
5233 mostly incorrect declarations, although compile-time type errors can
5234 find actual bugs if parts of the program have never been tested.
5236 Some incorrect declarations can only be detected by run-time type
5237 checking. It is very important to initially compile programs with
5238 full type checks and then test this version. After the checking
5239 version has been tested, then you can consider weakening or
5240 eliminating type checks. \b{This applies even to previously debugged
5241 programs.} \Python{} does much more type inference than other
5242 \llisp{} compilers, so believing an incorrect declaration does much
5245 The most common problem is with variables whose initial value doesn't
5246 match the type declaration. Incorrect initial values will always be
5247 flagged by a compile-time type error, and they are simple to fix once
5248 located. Consider this code fragment:
5251 (declare (fixnum foo))
5255 Here the variable \code{foo} is given an initial value of \false, but
5256 is declared to be a \code{fixnum}. Even if it is never read, the
5257 initial value of a variable must match the declared type. There are
5258 two ways to fix this problem. Change the declaration:
5261 (declare (type (or fixnum null) foo))
5265 or change the initial value:
5268 (declare (fixnum foo))
5272 It is generally preferable to change to a legal initial value rather
5273 than to weaken the declaration, but sometimes it is simpler to weaken
5274 the declaration than to try to make an initial value of the
5278 Another declaration problem occasionally encountered is incorrect
5279 declarations on \code{defmacro} arguments. This probably usually
5280 happens when a function is converted into a macro. Consider this
5284 (declare (fixnum x))
5285 `(the fixnum (1+ ,x)))
5287 Although legal and well-defined \clisp, this meaning of this
5288 definition is almost certainly not what the writer intended. For
5289 example, this call is illegal:
5293 The call is illegal because the argument to the macro is \w{\code{(+ 4
5294 5)}}, which is a \code{list}, not a \code{fixnum}. Because of
5295 macro semantics, it is hardly ever useful to declare the types of
5296 macro arguments. If you really want to assert something about the
5297 type of the result of evaluating a macro argument, then put a
5298 \code{the} in the expansion:
5301 `(the fixnum (1+ (the fixnum ,x))))
5303 In this case, it would be stylistically preferable to change this
5304 macro back to a function and declare it inline. Macros have no
5305 efficiency advantage over inline functions when using \Python.
5306 \xlref{inline-expansion}.
5309 Some more subtle problems are caused by incorrect declarations that
5310 can't be detected at compile time. Consider this code:
5312 (do ((pos 0 (position #\back{a} string :start (1+ pos))))
5314 (declare (fixnum pos))
5317 Although \code{pos} is almost always a \code{fixnum}, it is \false{}
5318 at the end of the loop. If this example is compiled with full type
5319 checks (the default), then running it will signal a type error at the
5320 end of the loop. If compiled without type checks, the program will go
5321 into an infinite loop (or perhaps \code{position} will complain
5322 because \w{\code{(1+ nil)}} isn't a sensible start.) Why? Because if
5323 you compile without type checks, the compiler just quietly believes
5324 the type declaration. Since \code{pos} is always a \code{fixnum}, it
5325 is never \nil, so \w{\code{(null pos)}} is never true, and the loop
5326 exit test is optimized away. Such errors are sometimes flagged by
5327 unreachable code notes (\pxlref{dead-code-notes}), but it is still
5328 important to initially compile any system with full type checks, even
5329 if the system works fine when compiled using other compilers.
5331 In this case, the fix is to weaken the type declaration to
5332 \w{\code{(or fixnum null)}}.\footnote{Actually, this declaration is
5333 totally unnecessary in \Python, since it already knows
5334 \code{position} returns a non-negative \code{fixnum} or \false.}
5335 Note that there is usually little performance penalty for weakening a
5336 declaration in this way. Any numeric operations in the body can still
5337 assume the variable is a \code{fixnum}, since \false{} is not a legal
5338 numeric argument. Another possible fix would be to say:
5340 (do ((pos 0 (position #\back{a} string :start (1+ pos))))
5343 (declare (fixnum pos))
5346 This would be preferable in some circumstances, since it would allow a
5347 non-standard representation to be used for the local \code{pos}
5348 variable in the loop body (see section \ref{ND-variables}.)
5350 In summary, remember that \i{all} values that a variable \i{ever}
5351 has must be of the declared type, and that you should test using safe
5354 %%\node Compiler Policy, Open Coding and Inline Expansion, Getting Existing Programs to Run, The Compiler
5355 \section{Compiler Policy}
5356 \label{compiler-policy}
5357 \cpsubindex{policy}{compiler}
5358 \cindex{compiler policy}
5360 The policy is what tells the compiler \var{how} to compile a program.
5361 This is logically (and often textually) distinct from the program
5362 itself. Broad control of policy is provided by the \code{optimize}
5363 declaration; other declarations and variables control more specific
5364 aspects of compilation.
5368 * The Optimize Declaration::
5369 * The Optimize-Interface Declaration::
5372 %%\node The Optimize Declaration, The Optimize-Interface Declaration, Compiler Policy, Compiler Policy
5373 \subsection{The Optimize Declaration}
5374 \label{optimize-declaration}
5375 \cindex{optimize declaration}
5376 \cpsubindex{declarations}{\code{optimize}}
5378 The \code{optimize} declaration recognizes six different
5379 \var{qualities}. The qualities are conceptually independent aspects
5380 of program performance. In reality, increasing one quality tends to
5381 have adverse effects on other qualities. The compiler compares the
5382 relative values of qualities when it needs to make a trade-off; i.e.,
5383 if \code{speed} is greater than \code{safety}, then improve speed at
5386 The default for all qualities (except \code{debug}) is \code{1}.
5387 Whenever qualities are equal, ties are broken according to a broad
5388 idea of what a good default environment is supposed to be. Generally
5389 this downplays \code{speed}, \code{compile-speed} and \code{space} in
5390 favor of \code{safety} and \code{debug}. Novice and casual users
5391 should stick to the default policy. Advanced users often want to
5392 improve speed and memory usage at the cost of safety and
5395 If the value for a quality is \code{0} or \code{3}, then it may have a
5396 special interpretation. A value of \code{0} means ``totally
5397 unimportant'', and a \code{3} means ``ultimately important.'' These
5398 extreme optimization values enable ``heroic'' compilation strategies
5399 that are not always desirable and sometimes self-defeating.
5400 Specifying more than one quality as \code{3} is not desirable, since
5401 it doesn't tell the compiler which quality is most important.
5404 These are the optimization qualities:
5407 \item[\code{speed}] \cindex{speed optimization quality}How fast the
5408 program should is run. \code{speed 3} enables some optimizations
5409 that hurt debuggability.
5411 \item[\code{compilation-speed}] \cindex{compilation-speed optimization
5412 quality}How fast the compiler should run. Note that increasing
5413 this above \code{safety} weakens type checking.
5415 \item[\code{space}] \cindex{space optimization quality}How much space
5416 the compiled code should take up. Inline expansion is mostly
5417 inhibited when \code{space} is greater than \code{speed}. A value
5418 of \code{0} enables promiscuous inline expansion. Wide use of a
5419 \code{0} value is not recommended, as it may waste so much space
5420 that run time is slowed. \xlref{inline-expansion} for a discussion
5421 of inline expansion.
5423 \item[\code{debug}] \cindex{debug optimization quality}How debuggable
5424 the program should be. The quality is treated differently from the
5425 other qualities: each value indicates a particular level of debugger
5426 information; it is not compared with the other qualities.
5427 \xlref{debugger-policy} for more details.
5429 \item[\code{safety}] \cindex{safety optimization quality}How much
5430 error checking should be done. If \code{speed}, \code{space} or
5431 \code{compilation-speed} is more important than \code{safety}, then
5432 type checking is weakened (\pxlref{weakened-type-checks}). If
5433 \code{safety} if \code{0}, then no run time error checking is done.
5434 In addition to suppressing type checks, \code{0} also suppresses
5435 argument count checking, unbound-symbol checking and array bounds
5438 \item[\code{extensions:inhibit-warnings}] \cindex{inhibit-warnings
5439 optimization quality}This is a CMU extension that determines how
5440 little (or how much) diagnostic output should be printed during
5441 compilation. This quality is compared to other qualities to
5442 determine whether to print style notes and warnings concerning those
5443 qualities. If \code{speed} is greater than \code{inhibit-warnings},
5444 then notes about how to improve speed will be printed, etc. The
5445 default value is \code{1}, so raising the value for any standard
5446 quality above its default enables notes for that quality. If
5447 \code{inhibit-warnings} is \code{3}, then all notes and most
5448 non-serious warnings are inhibited. This is useful with
5449 \code{declare} to suppress warnings about unavoidable problems.
5452 %%\node The Optimize-Interface Declaration, , The Optimize Declaration, Compiler Policy
5453 \subsection{The Optimize-Interface Declaration}
5454 \label{optimize-interface-declaration}
5455 \cindex{optimize-interface declaration}
5456 \cpsubindex{declarations}{\code{optimize-interface}}
5458 The \code{extensions:optimize-interface} declaration is identical in
5459 syntax to the \code{optimize} declaration, but it specifies the policy
5460 used during compilation of code the compiler automatically generates
5461 to check the number and type of arguments supplied to a function. It
5462 is useful to specify this policy separately, since even thoroughly
5463 debugged functions are vulnerable to being passed the wrong arguments.
5464 The \code{optimize-interface} declaration can specify that arguments
5465 should be checked even when the general \code{optimize} policy is
5468 Note that this argument checking is the checking of user-supplied
5469 arguments to any functions defined within the scope of the
5470 declaration, \code{not} the checking of arguments to \llisp{}
5471 primitives that appear in those definitions.
5473 The idea behind this declaration is that it allows the definition of
5474 functions that appear fully safe to other callers, but that do no
5475 internal error checking. Of course, it is possible that arguments may
5476 be invalid in ways other than having incorrect type. Functions
5477 compiled unsafely must still protect themselves against things like
5478 user-supplied array indices that are out of bounds and improper lists.
5479 See also the \kwd{context-declarations} option to
5480 \macref{with-compilation-unit}.
5483 %%\node Open Coding and Inline Expansion, , Compiler Policy, The Compiler
5484 \section{Open Coding and Inline Expansion}
5486 \cindex{open-coding}
5487 \cindex{inline expansion}
5488 \cindex{static functions}
5490 Since \clisp{} forbids the redefinition of standard functions\footnote{See the
5491 proposed X3J13 ``lisp-symbol-redefinition'' cleanup.}, the compiler can have
5492 special knowledge of these standard functions embedded in it. This special
5493 knowledge is used in various ways (open coding, inline expansion, source
5494 transformation), but the implications to the user are basically the same:
5497 \item Attempts to redefine standard functions may be frustrated, since
5498 the function may never be called. Although it is technically
5499 illegal to redefine standard functions, users sometimes want to
5500 implicitly redefine these functions when they are debugging using
5501 the \code{trace} macro. Special-casing of standard functions can be
5502 inhibited using the \code{notinline} declaration.
5504 \item The compiler can have multiple alternate implementations of
5505 standard functions that implement different trade-offs of speed,
5506 space and safety. This selection is based on the compiler policy,
5507 \pxlref{compiler-policy}.
5511 When a function call is \i{open coded}, inline code whose effect is
5512 equivalent to the function call is substituted for that function call.
5513 When a function call is \i{closed coded}, it is usually left as is,
5514 although it might be turned into a call to a different function with
5515 different arguments. As an example, if \code{nthcdr} were to be open
5522 (cdr (cdr (cdr (cdr foobar))))
5527 (list foobar (cdr foobar)))
5531 If \code{nth} is closed coded, then
5535 might stay the same, or turn into something like:
5540 In general, open coding sacrifices space for speed, but some functions (such as
5541 \code{car}) are so simple that they are always open-coded. Even when not
5542 open-coded, a call to a standard function may be transformed into a different
5543 function call (as in the last example) or compiled as \i{static call}. Static
5544 function call uses a more efficient calling convention that forbids
5547 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/efficiency.ms}
5551 \hide{ -*- Dictionary: cmu-user -*- }
5552 %%\node Advanced Compiler Use and Efficiency Hints, UNIX Interface, The Compiler, Top
5553 \chapter{Advanced Compiler Use and Efficiency Hints}
5555 \b{By Robert MacLachlan}
5558 \label{advanced-compiler}
5561 * Advanced Compiler Introduction::
5562 * More About Types in Python::
5564 * Source Optimization::
5567 * Block Compilation::
5568 * Inline Expansion::
5569 * Byte Coded Compilation::
5570 * Object Representation::
5572 * General Efficiency Hints::
5573 * Efficiency Notes::
5577 %%\node Advanced Compiler Introduction, More About Types in Python, Advanced Compiler Use and Efficiency Hints, Advanced Compiler Use and Efficiency Hints
5578 \section{Advanced Compiler Introduction}
5580 In \cmucl, as is any language on any computer, the path to efficient
5581 code starts with good algorithms and sensible programming techniques,
5582 but to avoid inefficiency pitfalls, you need to know some of this
5583 implementation's quirks and features. This chapter is mostly a fairly
5584 long and detailed overview of what optimizations \python{} does.
5585 Although there are the usual negative suggestions of inefficient
5586 features to avoid, the main emphasis is on describing the things that
5587 programmers can count on being efficient.
5589 The optimizations described here can have the effect of speeding up
5590 existing programs written in conventional styles, but the potential
5591 for new programming styles that are clearer and less error-prone is at
5592 least as significant. For this reason, several sections end with a
5593 discussion of the implications of these optimizations for programming
5600 * Representation of Objects::
5601 * Writing Efficient Code::
5604 %%\node Types, Optimization, Advanced Compiler Introduction, Advanced Compiler Introduction
5607 Python's support for types is unusual in three major ways:
5610 \item Precise type checking encourages the specific use of type
5611 declarations as a form of run-time consistency checking. This
5612 speeds development by localizing type errors and giving more
5613 meaningful error messages. \xlref{precise-type-checks}. \python{}
5614 produces completely safe code; optimized type checking maintains
5615 reasonable efficiency on conventional hardware
5616 (\pxlref{type-check-optimization}.)
5618 \item Comprehensive support for the \clisp{} type system makes complex
5619 type specifiers useful. Using type specifiers such as \code{or} and
5620 \code{member} has both efficiency and robustness advantages.
5621 \xlref{advanced-type-stuff}.
5623 \item Type inference eliminates the need for some declarations, and
5624 also aids compile-time detection of type errors. Given detailed
5625 type declarations, type inference can often eliminate type checks
5626 and enable more efficient object representations and code sequences.
5627 Checking all types results in fewer type checks. See sections
5628 \ref{type-inference} and \ref{non-descriptor}.
5632 %%\node Optimization, Function Call, Types, Advanced Compiler Introduction
5633 \subsection{Optimization}
5635 The main barrier to efficient Lisp programs is not that there is no
5636 efficient way to code the program in Lisp, but that it is difficult to
5637 arrive at that efficient coding. Common Lisp is a highly complex
5638 language, and usually has many semantically equivalent ``reasonable''
5639 ways to code a given problem. It is desirable to make all of these
5640 equivalent solutions have comparable efficiency so that programmers
5641 don't have to waste time discovering the most efficient solution.
5643 Source level optimization increases the number of efficient ways to
5644 solve a problem. This effect is much larger than the increase in the
5645 efficiency of the ``best'' solution. Source level optimization
5646 transforms the original program into a more efficient (but equivalent)
5647 program. Although the optimizer isn't doing anything the programmer
5648 couldn't have done, this high-level optimization is important because:
5651 \item The programmer can code simply and directly, rather than
5652 obfuscating code to please the compiler.
5654 \item When presented with a choice of similar coding alternatives, the
5655 programmer can chose whichever happens to be most convenient,
5656 instead of worrying about which is most efficient.
5659 Source level optimization eliminates the need for macros to optimize
5660 their expansion, and also increases the effectiveness of inline
5661 expansion. See sections \ref{source-optimization} and
5662 \ref{inline-expansion}.
5664 Efficient support for a safer programming style is the biggest
5665 advantage of source level optimization. Existing tuned programs
5666 typically won't benefit much from source optimization, since their
5667 source has already been optimized by hand. However, even tuned
5668 programs tend to run faster under \python{} because:
5671 \item Low level optimization and register allocation provides modest
5672 speedups in any program.
5674 \item Block compilation and inline expansion can reduce function call
5675 overhead, but may require some program restructuring. See sections
5676 \ref{inline-expansion}, \ref{local-call} and
5677 \ref{block-compilation}.
5679 \item Efficiency notes will point out important type declarations that
5680 are often missed even in highly tuned programs.
5681 \xlref{efficiency-notes}.
5683 \item Existing programs can be compiled safely without prohibitive
5684 speed penalty, although they would be faster and safer with added
5685 declarations. \xlref{type-check-optimization}.
5687 \item The context declaration mechanism allows both space and runtime
5688 of large systems to be reduced without sacrificing robustness by
5689 semi-automatically varying compilation policy without addition any
5690 \code{optimize} declarations to the source.
5691 \xlref{context-declarations}.
5693 \item Byte compilation can be used to dramatically reduce the size of
5694 code that is not speed-critical. \xlref{byte-compile}
5698 %%\node Function Call, Representation of Objects, Optimization, Advanced Compiler Introduction
5699 \subsection{Function Call}
5701 The sort of symbolic programs generally written in \llisp{} often
5702 favor recursion over iteration, or have inner loops so complex that
5703 they involve multiple function calls. Such programs spend a larger
5704 fraction of their time doing function calls than is the norm in other
5705 languages; for this reason \llisp{} implementations strive to make the
5706 general (or full) function call as inexpensive as possible. \python{}
5707 goes beyond this by providing two good alternatives to full call:
5710 \item Local call resolves function references at compile time,
5711 allowing better calling sequences and optimization across function
5712 calls. \xlref{local-call}.
5714 \item Inline expansion totally eliminates call overhead and allows
5715 many context dependent optimizations. This provides a safe and
5716 efficient implementation of operations with function semantics,
5717 eliminating the need for error-prone macro definitions or manual
5718 case analysis. Although most \clisp{} implementations support
5719 inline expansion, it becomes a more powerful tool with \python{}'s
5720 source level optimization. See sections \ref{source-optimization}
5721 and \ref{inline-expansion}.
5725 Generally, \python{} provides simple implementations for simple uses
5726 of function call, rather than having only a single calling convention.
5727 These features allow a more natural programming style:
5730 \item Proper tail recursion. \xlref{tail-recursion}
5732 \item Relatively efficient closures.
5734 \item A \code{funcall} that is as efficient as normal named call.
5736 \item Calls to local functions such as from \code{labels} are
5740 \item Control transfer is a direct jump.
5742 \item The closure environment is passed in registers rather than heap
5745 \item Keyword arguments and multiple values are implemented more
5752 %%\node Representation of Objects, Writing Efficient Code, Function Call, Advanced Compiler Introduction
5753 \subsection{Representation of Objects}
5755 Sometimes traditional \llisp{} implementation techniques compare so
5756 poorly to the techniques used in other languages that \llisp{} can
5757 become an impractical language choice. Terrible inefficiencies appear
5758 in number-crunching programs, since \llisp{} numeric operations often
5759 involve number-consing and generic arithmetic. \python{} supports
5760 efficient natural representations for numbers (and some other types),
5761 and allows these efficient representations to be used in more
5762 contexts. \python{} also provides good efficiency notes that warn
5763 when a crucial declaration is missing.
5765 See section \ref{non-descriptor} for more about object representations and
5766 numeric types. Also \pxlref{efficiency-notes} about efficiency notes.
5768 %%\node Writing Efficient Code, , Representation of Objects, Advanced Compiler Introduction
5769 \subsection{Writing Efficient Code}
5770 \label{efficiency-overview}
5772 Writing efficient code that works is a complex and prolonged process.
5773 It is important not to get so involved in the pursuit of efficiency
5774 that you lose sight of what the original problem demands. Remember
5778 \item The program should be correct\dash{}it doesn't matter how
5779 quickly you get the wrong answer.
5781 \item Both the programmer and the user will make errors, so the
5782 program must be robust\dash{}it must detect errors in a way that
5783 allows easy correction.
5785 \item A small portion of the program will consume most of the
5786 resources, with the bulk of the code being virtually irrelevant to
5787 efficiency considerations. Even experienced programmers familiar
5788 with the problem area cannot reliably predict where these ``hot
5794 The best way to get efficient code that is still worth using, is to separate
5795 coding from tuning. During coding, you should:
5798 \item Use a coding style that aids correctness and robustness without
5799 being incompatible with efficiency.
5801 \item Choose appropriate data structures that allow efficient
5802 algorithms and object representations
5803 (\pxlref{object-representation}). Try to make interfaces abstract
5804 enough so that you can change to a different representation if
5805 profiling reveals a need.
5807 \item Whenever you make an assumption about a function argument or
5808 global data structure, add consistency assertions, either with type
5809 declarations or explicit uses of \code{assert}, \code{ecase}, etc.
5812 During tuning, you should:
5815 \item Identify the hot spots in the program through profiling (section
5818 \item Identify inefficient constructs in the hot spot with efficiency
5819 notes, more profiling, or manual inspection of the source. See
5820 sections \ref{general-efficiency} and \ref{efficiency-notes}.
5822 \item Add declarations and consider the application of optimizations.
5823 See sections \ref{local-call}, \ref{inline-expansion} and
5824 \ref{non-descriptor}.
5826 \item If all else fails, consider algorithm or data structure changes.
5827 If you did a good job coding, changes will be easy to introduce.
5833 %%\node More About Types in Python, Type Inference, Advanced Compiler Introduction, Advanced Compiler Use and Efficiency Hints
5834 \section{More About Types in Python}
5835 \label{advanced-type-stuff}
5836 \cpsubindex{types}{in python}
5838 This section goes into more detail describing what types and declarations are
5839 recognized by \python. The area where \python{} differs most radically from
5840 previous \llisp{} compilers is in its support for types:
5843 \item Precise type checking helps to find bugs at run time.
5845 \item Compile-time type checking helps to find bugs at compile time.
5847 \item Type inference minimizes the need for generic operations, and
5848 also increases the efficiency of run time type checking and the
5849 effectiveness of compile time type checking.
5851 \item Support for detailed types provides a wealth of opportunity for
5852 operation-specific type inference and optimization.
5858 * More Types Meaningful::
5859 * Canonicalization::
5864 * The Values Declaration::
5866 * The Freeze-Type Declaration::
5867 * Type Restrictions::
5868 * Type Style Recommendations::
5871 %%\node More Types Meaningful, Canonicalization, More About Types in Python, More About Types in Python
5872 \subsection{More Types Meaningful}
5874 \clisp{} has a very powerful type system, but conventional \llisp{}
5875 implementations typically only recognize the small set of types
5876 special in that implementation. In these systems, there is an
5877 unfortunate paradox: a declaration for a relatively general type like
5878 \code{fixnum} will be recognized by the compiler, but a highly
5879 specific declaration such as \code{\w{(integer 3 17)}} is totally
5882 This is obviously a problem, since the user has to know how to specify
5883 the type of an object in the way the compiler wants it. A very
5884 minimal (but rarely satisfied) criterion for type system support is
5885 that it be no worse to make a specific declaration than to make a
5886 general one. \python{} goes beyond this by exploiting a number of
5887 advantages obtained from detailed type information.
5889 Using more restrictive types in declarations allows the compiler to do
5890 better type inference and more compile-time type checking. Also, when
5891 type declarations are considered to be consistency assertions that
5892 should be verified (conditional on policy), then complex types are
5893 useful for making more detailed assertions.
5895 Python ``understands'' the list-style \code{or}, \code{member},
5896 \code{function}, array and number type specifiers. Understanding
5900 \item If the type contains more information than is used in a
5901 particular context, then the extra information is simply ignored,
5902 rather than derailing type inference.
5904 \item In many contexts, the extra information from these type
5905 specifier is used to good effect. In particular, type checking in
5906 \code{Python} is \var{precise}, so these complex types can be used
5907 in declarations to make interesting assertions about functions and
5908 data structures (\pxlref{precise-type-checks}.) More specific
5909 declarations also aid type inference and reduce the cost for type
5913 For related information, \pxlref{numeric-types} for numeric types, and
5914 section \ref{array-types} for array types.
5917 %%\node Canonicalization, Member Types, More Types Meaningful, More About Types in Python
5918 \subsection{Canonicalization}
5919 \cpsubindex{types}{equivalence}
5920 \cindex{canonicalization of types}
5921 \cindex{equivalence of types}
5923 When given a type specifier, \python{} will often rewrite it into a
5924 different (but equivalent) type. This is the mechanism that \python{}
5925 uses for detecting type equivalence. For example, in \python{}'s
5926 canonical representation, these types are equivalent:
5928 (or list (member :end)) \myequiv (or cons (member nil :end))
5930 This has two implications for the user:
5933 \item The standard symbol type specifiers for \code{atom},
5934 \code{null}, \code{fixnum}, etc., are in no way magical. The
5935 \tindexed{null} type is actually defined to be \code{\w{(member
5936 nil)}}, \tindexed{list} is \code{\w{(or cons null)}}, and
5937 \tindexed{fixnum} is \code{\w{(signed-byte 30)}}.
5939 \item When the compiler prints out a type, it may not look like the
5940 type specifier that originally appeared in the program. This is
5941 generally not a problem, but it must be taken into consideration
5942 when reading compiler error messages.
5946 %%\node Member Types, Union Types, Canonicalization, More About Types in Python
5947 \subsection{Member Types}
5948 \cindex{member types}
5950 The \tindexed{member} type specifier can be used to represent
5951 ``symbolic'' values, analogous to the enumerated types of Pascal. For
5952 example, the second value of \code{find-symbol} has this type:
5954 (member :internal :external :inherited nil)
5956 Member types are very useful for expressing consistency constraints on data
5957 structures, for example:
5959 (defstruct ice-cream
5960 (flavor :vanilla :type (member :vanilla :chocolate :strawberry)))
5962 Member types are also useful in type inference, as the number of members can
5963 sometimes be pared down to one, in which case the value is a known constant.
5965 %%\node Union Types, The Empty Type, Member Types, More About Types in Python
5966 \subsection{Union Types}
5967 \cindex{union (\code{or}) types}
5968 \cindex{or (union) types}
5970 The \tindexed{or} (union) type specifier is understood, and is
5971 meaningfully applied in many contexts. The use of \code{or} allows
5972 assertions to be made about types in dynamically typed programs. For
5976 (next nil :type (or box null))
5977 (top :removed :type (or box-top (member :removed))))
5979 The type assertion on the \code{top} slot ensures that an error will be signaled
5980 when there is an attempt to store an illegal value (such as \kwd{rmoved}.)
5981 Although somewhat weak, these union type assertions provide a useful input into
5982 type inference, allowing the cost of type checking to be reduced. For example,
5983 this loop is safely compiled with no type checks:
5985 (defun find-box-with-top (box)
5986 (declare (type (or box null) box))
5987 (do ((current box (box-next current)))
5989 (unless (eq (box-top current) :removed)
5993 Union types are also useful in type inference for representing types that are
5994 partially constrained. For example, the result of this expression:
6000 can be expressed as \code{\w{(or integer cons)}}.
6002 %%\node The Empty Type, Function Types, Union Types, More About Types in Python
6003 \subsection{The Empty Type}
6006 \cpsubindex{empty type}{the}
6007 \cpsubindex{errors}{result type of}
6009 The type \false{} is also called the empty type, since no object is of
6010 type \false{}. The union of no types, \code{(or)}, is also empty.
6011 \python{}'s interpretation of an expression whose type is \false{} is
6012 that the expression never yields any value, but rather fails to
6013 terminate, or is thrown out of. For example, the type of a call to
6014 \code{error} or a use of \code{return} is \false{}. When the type of
6015 an expression is empty, compile-time type warnings about its value are
6016 suppressed; presumably somebody else is signaling an error. If a
6017 function is declared to have return type \false{}, but does in fact
6018 return, then (in safe compilation policies) a ``\code{NIL Function
6019 returned}'' error will be signaled. See also the function
6020 \funref{required-argument}.
6022 %%\node Function Types, The Values Declaration, The Empty Type, More About Types in Python
6023 \subsection{Function Types}
6024 \label{function-types}
6025 \cpsubindex{function}{types}
6026 \cpsubindex{types}{function}
6028 \findexed{function} types are understood in the restrictive sense, specifying:
6031 \item The argument syntax that the function must be called with. This
6032 is information about what argument counts are acceptable, and which
6033 keyword arguments are recognized. In \python, warnings about
6034 argument syntax are a consequence of function type checking.
6036 \item The types of the argument values that the caller must pass. If
6037 the compiler can prove that some argument to a call is of a type
6038 disallowed by the called function's type, then it will give a
6039 compile-time type warning. In addition to being used for
6040 compile-time type checking, these type assertions are also used as
6041 output type assertions in code generation. For example, if
6042 \code{foo} is declared to have a \code{fixnum} argument, then the
6043 \code{1+} in \w{\code{(foo (1+ x))}} is compiled with knowledge that
6044 the result must be a fixnum.
6046 \item The types the values that will be bound to argument variables in
6047 the function's definition. Declaring a function's type with
6048 \code{ftype} implicitly declares the types of the arguments in the
6049 definition. \python{} checks for consistency between the definition
6050 and the \code{ftype} declaration. Because of precise type checking,
6051 an error will be signaled when a function is called with an
6052 argument of the wrong type.
6054 \item The type of return value(s) that the caller can expect. This
6055 information is a useful input to type inference. For example, if a
6056 function is declared to return a \code{fixnum}, then when a call to
6057 that function appears in an expression, the expression will be
6058 compiled with knowledge that the call will return a \code{fixnum}.
6060 \item The type of return value(s) that the definition must return.
6061 The result type in an \code{ftype} declaration is treated like an
6062 implicit \code{the} wrapped around the body of the definition. If
6063 the definition returns a value of the wrong type, an error will be
6064 signaled. If the compiler can prove that the function returns the
6065 wrong type, then it will give a compile-time warning.
6068 This is consistent with the new interpretation of function types and
6069 the \code{ftype} declaration in the proposed X3J13
6070 ``function-type-argument-type-semantics'' cleanup. Note also, that if
6071 you don't explicitly declare the type of a function using a global
6072 \code{ftype} declaration, then \python{} will compute a function type
6073 from the definition, providing a degree of inter-routine type
6074 inference, \pxlref{function-type-inference}.
6076 %%\node The Values Declaration, Structure Types, Function Types, More About Types in Python
6077 \subsection{The Values Declaration}
6078 \cindex{values declaration}
6080 \cmucl{} supports the \code{values} declaration as an extension to
6081 \clisp. The syntax is {\code{(values \var{type1}
6082 \var{type2}$\ldots$\var{typen})}}. This declaration is
6083 semantically equivalent to a \code{the} form wrapped around the body
6084 of the special form in which the \code{values} declaration appears.
6085 The advantage of \code{values} over \findexed{the} is purely
6086 syntactic\dash{}it doesn't introduce more indentation. For example:
6089 (declare (values single-float))
6106 (defun floor (number &optional (divisor 1))
6107 (declare (values integer real))
6112 (defun floor (number &optional (divisor 1))
6113 (the (values integer real)
6116 In addition to being recognized by \code{lambda} (and hence by
6117 \code{defun}), the \code{values} declaration is recognized by all the
6118 other special forms with bodies and declarations: \code{let},
6119 \code{let*}, \code{labels} and \code{flet}. Macros with declarations
6120 usually splice the declarations into one of the above forms, so they
6121 will accept this declaration too, but the exact effect of a
6122 \code{values} declaration will depend on the macro.
6124 If you declare the types of all arguments to a function, and also
6125 declare the return value types with \code{values}, you have described
6126 the type of the function. \python{} will use this argument and result
6127 type information to derive a function type that will then be applied
6128 to calls of the function (\pxlref{function-types}.) This provides a
6129 way to declare the types of functions that is much less syntactically
6130 awkward than using the \code{ftype} declaration with a \code{function}
6133 Although the \code{values} declaration is non-standard, it is
6134 relatively harmless to use it in otherwise portable code, since any
6135 warning in non-CMU implementations can be suppressed with the standard
6136 \code{declaration} proclamation.
6138 %%\node Structure Types, The Freeze-Type Declaration, The Values Declaration, More About Types in Python
6139 \subsection{Structure Types}
6140 \label{structure-types}
6141 \cindex{structure types}
6142 \cindex{defstruct types}
6143 \cpsubindex{types}{structure}
6145 Because of precise type checking, structure types are much better supported by
6146 Python than by conventional compilers:
6149 \item The structure argument to structure accessors is precisely
6150 checked\dash{}if you call \code{foo-a} on a \code{bar}, an error
6153 \item The types of slot values are precisely checked\dash{}if you pass
6154 the wrong type argument to a constructor or a slot setter, then an
6155 error will be signaled.
6157 This error checking is tremendously useful for detecting bugs in
6158 programs that manipulate complex data structures.
6160 An additional advantage of checking structure types and enforcing slot
6161 types is that the compiler can safely believe slot type declarations.
6162 \python{} effectively moves the type checking from the slot access to
6163 the slot setter or constructor call. This is more efficient since
6164 caller of the setter or constructor often knows the type of the value,
6165 entirely eliminating the need to check the value's type. Consider
6168 (defstruct coordinate
6169 (x nil :type single-float)
6170 (y nil :type single-float))
6173 (make-coordinate :x 1.0 :y 1.0))
6176 (declare (type coordinate it))
6177 (sqrt (expt (coordinate-x it) 2) (expt (coordinate-y it) 2)))
6179 \code{make-it} and \code{use-it} are compiled with no checking on the
6180 types of the float slots, yet \code{use-it} can use
6181 \code{single-float} arithmetic with perfect safety. Note that
6182 \code{make-coordinate} must still check the values of \code{x} and
6183 \code{y} unless the call is block compiled or inline expanded
6184 (\pxlref{local-call}.) But even without this advantage, it is almost
6185 always more efficient to check slot values on structure
6186 initialization, since slots are usually written once and read many
6189 %%\node The Freeze-Type Declaration, Type Restrictions, Structure Types, More About Types in Python
6190 \subsection{The Freeze-Type Declaration}
6191 \cindex{freeze-type declaration}
6194 The \code{extensions:freeze-type} declaration is a CMU extension that
6195 enables more efficient compilation of user-defined types by asserting
6196 that the definition is not going to change. This declaration may only
6197 be used globally (with \code{declaim} or \code{proclaim}). Currently
6198 \code{freeze-type} only affects structure type testing done by
6199 \code{typep}, \code{typecase}, etc. Here is an example:
6201 (declaim (freeze-type foo bar))
6203 This asserts that the types \code{foo} and \code{bar} and their
6204 subtypes are not going to change. This allows more efficient type
6205 testing, since the compiler can open-code a test for all possible
6206 subtypes, rather than having to examine the type hierarchy at
6209 %%\node Type Restrictions, Type Style Recommendations, The Freeze-Type Declaration, More About Types in Python
6210 \subsection{Type Restrictions}
6211 \cpsubindex{types}{restrictions on}
6213 Avoid use of the \code{and}, \code{not} and \code{satisfies} types in
6214 declarations, since type inference has problems with them. When these
6215 types do appear in a declaration, they are still checked precisely,
6216 but the type information is of limited use to the compiler.
6217 \code{and} types are effective as long as the intersection can be
6218 canonicalized to a type that doesn't use \code{and}. For example:
6220 (and fixnum unsigned-byte)
6222 is fine, since it is the same as:
6224 (integer 0 \var{most-positive-fixnum})
6228 (and symbol (not (member :end)))
6230 will not be fully understood by type interference since the \code{and}
6231 can't be removed by canonicalization.
6233 Using any of these type specifiers in a type test with \code{typep} or
6234 \code{typecase} is fine, since as tests, these types can be translated
6235 into the \code{and} macro, the \code{not} function or a call to the
6236 satisfies predicate.
6238 %%\node Type Style Recommendations, , Type Restrictions, More About Types in Python
6239 \subsection{Type Style Recommendations}
6240 \cindex{style recommendations}
6242 Python provides good support for some currently unconventional ways of
6243 using the \clisp{} type system. With \python, it is desirable to make
6244 declarations as precise as possible, but type inference also makes
6245 some declarations unnecessary. Here are some general guidelines for
6246 maximum robustness and efficiency:
6249 \item Declare the types of all function arguments and structure slots
6250 as precisely as possible (while avoiding \code{not}, \code{and} and
6251 \code{satisfies}). Put these declarations in during initial coding
6252 so that type assertions can find bugs for you during debugging.
6254 \item Use the \tindexed{member} type specifier where there are a small
6255 number of possible symbol values, for example: \w{\code{(member :red
6258 \item Use the \tindexed{or} type specifier in situations where the
6259 type is not certain, but there are only a few possibilities, for
6260 example: \w{\code{(or list vector)}}.
6262 \item Declare integer types with the tightest bounds that you can,
6263 such as \code{\w{(integer 3 7)}}.
6265 \item Define \findexed{deftype} or \findexed{defstruct} types before
6266 they are used. Definition after use is legal (producing no
6267 ``undefined type'' warnings), but type tests and structure
6268 operations will be compiled much less efficiently.
6270 \item Use the \code{extensions:freeze-type} declaration to speed up
6271 type testing for structure types which won't have new subtypes added
6272 later. \xlref{freeze-type}
6274 \item In addition to declaring the array element type and simpleness,
6275 also declare the dimensions if they are fixed, for example:
6277 (simple-array single-float (1024 1024))
6279 This bounds information allows array indexing for multi-dimensional
6280 arrays to be compiled much more efficiently, and may also allow
6281 array bounds checking to be done at compile time.
6282 \xlref{array-types}.
6284 \item Avoid use of the \findexed{the} declaration within expressions.
6285 Not only does it clutter the code, but it is also almost worthless
6286 under safe policies. If the need for an output type assertion is
6287 revealed by efficiency notes during tuning, then you can consider
6288 \code{the}, but it is preferable to constrain the argument types
6289 more, allowing the compiler to prove the desired result type.
6291 \item Don't bother declaring the type of \findexed{let} or other
6292 non-argument variables unless the type is non-obvious. If you
6293 declare function return types and structure slot types, then the
6294 type of a variable is often obvious both to the programmer and to
6295 the compiler. An important case where the type isn't obvious, and a
6296 declaration is appropriate, is when the value for a variable is
6297 pulled out of untyped structure (e.g., the result of \code{car}), or
6298 comes from some weakly typed function, such as \code{read}.
6300 \item Declarations are sometimes necessary for integer loop variables,
6301 since the compiler can't always prove that the value is of a good
6302 integer type. These declarations are best added during tuning, when
6303 an efficiency note indicates the need.
6308 %%\node Type Inference, Source Optimization, More About Types in Python, Advanced Compiler Use and Efficiency Hints
6309 \section{Type Inference}
6310 \label{type-inference}
6311 \cindex{type inference}
6312 \cindex{inference of types}
6313 \cindex{derivation of types}
6315 Type inference is the process by which the compiler tries to figure
6316 out the types of expressions and variables, given an inevitable lack
6317 of complete type information. Although \python{} does much more type
6318 inference than most \llisp{} compilers, remember that the more precise
6319 and comprehensive type declarations are, the more type inference will
6323 * Variable Type Inference::
6324 * Local Function Type Inference::
6325 * Global Function Type Inference::
6326 * Operation Specific Type Inference::
6327 * Dynamic Type Inference::
6328 * Type Check Optimization::
6331 %%\node Variable Type Inference, Local Function Type Inference, Type Inference, Type Inference
6332 \subsection{Variable Type Inference}
6333 \label{variable-type-inference}
6335 The type of a variable is the union of the types of all the
6336 definitions. In the degenerate case of a let, the type of the
6337 variable is the type of the initial value. This inferred type is
6338 intersected with any declared type, and is then propagated to all the
6339 variable's references. The types of \findexed{multiple-value-bind}
6340 variables are similarly inferred from the types of the individual
6341 values of the values form.
6343 If multiple type declarations apply to a single variable, then all the
6344 declarations must be correct; it is as though all the types were intersected
6345 producing a single \tindexed{and} type specifier. In this example:
6347 (defmacro my-dotimes ((var count) &body body)
6348 `(do ((,var 0 (1+ ,var)))
6350 (declare (type (integer 0 *) ,var))
6354 (declare (fixnum i))
6357 the two declarations for \code{i} are intersected, so \code{i} is
6358 known to be a non-negative fixnum.
6360 In practice, this type inference is limited to lets and local
6361 functions, since the compiler can't analyze all the calls to a global
6362 function. But type inference works well enough on local variables so
6363 that it is often unnecessary to declare the type of local variables.
6364 This is especially likely when function result types and structure
6365 slot types are declared. The main areas where type inference breaks
6369 \item When the initial value of a variable is a untyped expression,
6370 such as \code{\w{(car x)}}, and
6372 \item When the type of one of the variable's definitions is a function
6373 of the variable's current value, as in: \code{(setq x (1+ x))}
6377 %%\node Local Function Type Inference, Global Function Type Inference, Variable Type Inference, Type Inference
6378 \subsection{Local Function Type Inference}
6379 \cpsubindex{local call}{type inference}
6381 The types of arguments to local functions are inferred in the same was
6382 as any other local variable; the type is the union of the argument
6383 types across all the calls to the function, intersected with the
6384 declared type. If there are any assignments to the argument
6385 variables, the type of the assigned value is unioned in as well.
6387 The result type of a local function is computed in a special way that
6388 takes tail recursion (\pxlref{tail-recursion}) into consideration.
6389 The result type is the union of all possible return values that aren't
6390 tail-recursive calls. For example, \python{} will infer that the
6391 result type of this function is \code{integer}:
6394 (declare (integer n res))
6397 (! (1- n) (* n res))))
6399 Although this is a rather obvious result, it becomes somewhat less
6400 trivial in the presence of mutual tail recursion of multiple
6401 functions. Local function result type inference interacts with the
6402 mechanisms for ensuring proper tail recursion mentioned in section
6403 \ref{local-call-return}.
6405 %%\node Global Function Type Inference, Operation Specific Type Inference, Local Function Type Inference, Type Inference
6406 \subsection{Global Function Type Inference}
6407 \label{function-type-inference}
6408 \cpsubindex{function}{type inference}
6410 As described in section \ref{function-types}, a global function type
6411 (\tindexed{ftype}) declaration places implicit type assertions on the
6412 call arguments, and also guarantees the type of the return value. So
6413 wherever a call to a declared function appears, there is no doubt as
6414 to the types of the arguments and return value. Furthermore,
6415 \python{} will infer a function type from the function's definition if
6416 there is no \code{ftype} declaration. Any type declarations on the
6417 argument variables are used as the argument types in the derived
6418 function type, and the compiler's best guess for the result type of
6419 the function is used as the result type in the derived function type.
6421 This method of deriving function types from the definition implicitly assumes
6422 that functions won't be redefined at run-time. Consider this example:
6425 (let ((res (and (consp x) (eq (car x) 'foo))))
6426 (format t "It is ~:[not ~;~]foo." res)))
6430 (setf (cadr it) 'yow!)
6434 Presumably, the programmer really meant to return \code{res} from
6435 \code{foo-p}, but he seems to have forgotten. When he tries to call
6436 do \code{\w{(frob (list 'foo nil))}}, \code{frob} will flame out when
6437 it tries to add to a \code{cons}. Realizing his error, he fixes
6438 \code{foo-p} and recompiles it. But when he retries his test case, he
6439 is baffled because the error is still there. What happened in this
6440 example is that \python{} proved that the result of \code{foo-p} is
6441 \code{null}, and then proceeded to optimize away the \code{setf} in
6444 Fortunately, in this example, the error is detected at compile time
6445 due to notes about unreachable code (\pxlref{dead-code-notes}.)
6446 Still, some users may not want to worry about this sort of problem
6447 during incremental development, so there is a variable to control
6448 deriving function types.
6450 \begin{defvar}{extensions:}{derive-function-types}
6452 If true (the default), argument and result type information derived
6453 from compilation of \code{defun}s is used when compiling calls to
6454 that function. If false, only information from \code{ftype}
6455 proclamations will be used.
6458 %%\node Operation Specific Type Inference, Dynamic Type Inference, Global Function Type Inference, Type Inference
6459 \subsection{Operation Specific Type Inference}
6460 \label{operation-type-inference}
6461 \cindex{operation specific type inference}
6462 \cindex{arithmetic type inference}
6463 \cpsubindex{numeric}{type inference}
6465 Many of the standard \clisp{} functions have special type inference
6466 procedures that determine the result type as a function of the
6467 argument types. For example, the result type of \code{aref} is the
6468 array element type. Here are some other examples of type inferences:
6470 (logand x #xFF) \result{} (unsigned-byte 8)
6472 (+ (the (integer 0 12) x) (the (integer 0 1) y)) \result{} (integer 0 13)
6474 (ash (the (unsigned-byte 16) x) -8) \result{} (unsigned-byte 8)
6477 %%\node Dynamic Type Inference, Type Check Optimization, Operation Specific Type Inference, Type Inference
6478 \subsection{Dynamic Type Inference}
6479 \label{constraint-propagation}
6480 \cindex{dynamic type inference}
6481 \cindex{conditional type inference}
6482 \cpsubindex{type inference}{dynamic}
6484 Python uses flow analysis to infer types in dynamically typed
6485 programs. For example:
6491 Here, the compiler knows the argument to \code{length} is a list,
6492 because the call to \code{length} is only done when \code{x} is a
6493 list. The most significant efficiency effect of inference from
6494 assertions is usually in type check optimization.
6497 Dynamic type inference has two inputs: explicit conditionals and
6498 implicit or explicit type assertions. Flow analysis propagates these
6499 constraints on variable type to any code that can be executed only
6500 after passing though the constraint. Explicit type constraints come
6501 from \findexed{if}s where the test is either a lexical variable or a
6502 function of lexical variables and constants, where the function is
6503 either a type predicate, a numeric comparison or \code{eq}.
6505 If there is an \code{eq} (or \code{eql}) test, then the compiler will
6506 actually substitute one argument for the other in the true branch.
6509 (when (eq x :yow!) (return x))
6513 (when (eq x :yow!) (return :yow!))
6515 This substitution is done when one argument is a constant, or one
6516 argument has better type information than the other. This
6517 transformation reveals opportunities for constant folding or
6518 type-specific optimizations. If the test is against a constant, then
6519 the compiler can prove that the variable is not that constant value in
6520 the false branch, or \w{\code{(not (member :yow!))}} in the example
6521 above. This can eliminate redundant tests, for example:
6527 is transformed to this:
6533 Variables appearing as \code{if} tests are interpreted as
6534 \code{\w{(not (eq \var{var} nil))}} tests. The compiler also converts
6535 \code{=} into \code{eql} where possible. It is difficult to do
6536 inference directly on \code{=} since it does implicit coercions.
6538 When there is an explicit \code{$<$} or \code{$>$} test on
6542 variables, the compiler makes inferences about the ranges the
6543 variables can assume in the true and false branches. This is mainly
6544 useful when it proves that the values are small enough in magnitude to
6545 allow open-coding of arithmetic operations. For example, in many uses
6546 of \code{dotimes} with a \code{fixnum} repeat count, the compiler
6547 proves that fixnum arithmetic can be used.
6549 Implicit type assertions are quite common, especially if you declare
6550 function argument types. Dynamic inference from implicit type
6551 assertions sometimes helps to disambiguate programs to a useful
6552 degree, but is most noticeable when it detects a dynamic type error.
6558 results in this warning:
6564 Warning: Result is a LIST, not a NUMBER.
6567 Note that \llisp{}'s dynamic type checking semantics make dynamic type
6568 inference useful even in programs that aren't really dynamically
6571 (+ (car x) (length x))
6573 Here, \code{x} presumably always holds a list, but in the absence of a
6574 declaration the compiler cannot assume \code{x} is a list simply
6575 because list-specific operations are sometimes done on it. The
6576 compiler must consider the program to be dynamically typed until it
6577 proves otherwise. Dynamic type inference proves that the argument to
6578 \code{length} is always a list because the call to \code{length} is
6579 only done after the list-specific \code{car} operation.
6582 %%\node Type Check Optimization, , Dynamic Type Inference, Type Inference
6583 \subsection{Type Check Optimization}
6584 \label{type-check-optimization}
6585 \cpsubindex{type checking}{optimization}
6586 \cpsubindex{optimization}{type check}
6588 Python backs up its support for precise type checking by minimizing
6589 the cost of run-time type checking. This is done both through type
6590 inference and though optimizations of type checking itself.
6592 Type inference often allows the compiler to prove that a value is of
6593 the correct type, and thus no type check is necessary. For example:
6595 (defstruct foo a b c)
6597 (foo (required-argument) :type foo)
6598 (next nil :type (or link null)))
6600 (foo-a (link-foo x))
6602 Here, there is no need to check that the result of \code{link-foo} is
6603 a \code{foo}, since it always is. Even when some type checks are
6604 necessary, type inference can often reduce the number:
6612 In this example, only one \w{\code{(foo-p x)}} check is needed. This
6613 applies to a lesser degree in list operations, such as:
6615 (if (eql (car x) 3) (cdr x) y)
6617 Here, we only have to check that \code{x} is a list once.
6619 Since \python{} recognizes explicit type tests, code that explicitly
6620 protects itself against type errors has little introduced overhead due
6621 to implicit type checking. For example, this loop compiles with no
6622 implicit checks checks for \code{car} and \code{cdr}:
6625 (do ((current l (cdr current)))
6626 ((atom current) nil)
6627 (when (eq (car current) e) (return current))))
6630 \cindex{complemented type checks}
6631 Python reduces the cost of checks that must be done through an
6632 optimization called \var{complementing}. A complemented check for
6633 \var{type} is simply a check that the value is not of the type
6634 \w{\code{(not \var{type})}}. This is only interesting when something
6635 is known about the actual type, in which case we can test for the
6636 complement of \w{\code{(and \var{known-type} (not \var{type}))}}, or
6637 the difference between the known type and the assertion. An example:
6639 (link-foo (link-next x))
6641 Here, we change the type check for \code{link-foo} from a test for
6642 \code{foo} to a test for:
6644 (not (and (or foo null) (not foo)))
6646 or more simply \w{\code{(not null)}}. This is probably the most
6647 important use of complementing, since the situation is fairly common,
6648 and a \code{null} test is much cheaper than a structure type test.
6650 Here is a more complicated example that illustrates the combination of
6651 complementing with dynamic type inference:
6654 (declare (type (or link null) x))
6655 (do ((current x (link-next current)))
6656 ((null current) nil)
6657 (let ((foo (link-foo current)))
6658 (when (eq (foo-a foo) a) (return foo)))))
6660 This loop can be compiled with no type checks. The \code{link} test
6661 for \code{link-foo} and \code{link-next} is complemented to
6662 \w{\code{(not null)}}, and then deleted because of the explicit
6663 \code{null} test. As before, no check is necessary for \code{foo-a},
6664 since the \code{link-foo} is always a \code{foo}. This sort of
6665 situation shows how precise type checking combined with precise
6666 declarations can actually result in reduced type checking.
6669 %%\node Source Optimization, Tail Recursion, Type Inference, Advanced Compiler Use and Efficiency Hints
6670 \section{Source Optimization}
6671 \label{source-optimization}
6672 \cindex{optimization}
6674 This section describes source-level transformations that \python{} does on
6675 programs in an attempt to make them more efficient. Although source-level
6676 optimizations can make existing programs more efficient, the biggest advantage
6677 of this sort of optimization is that it makes it easier to write efficient
6678 programs. If a clean, straightforward implementation is can be transformed
6679 into an efficient one, then there is no need for tricky and dangerous hand
6683 * Let Optimization::
6684 * Constant Folding::
6685 * Unused Expression Elimination::
6686 * Control Optimization::
6687 * Unreachable Code Deletion::
6688 * Multiple Values Optimization::
6689 * Source to Source Transformation::
6690 * Style Recommendations::
6693 %%\node Let Optimization, Constant Folding, Source Optimization, Source Optimization
6694 \subsection{Let Optimization}
6695 \label{let-optimization}
6697 \cindex{let optimization} \cpsubindex{optimization}{let}
6699 The primary optimization of let variables is to delete them when they
6700 are unnecessary. Whenever the value of a let variable is a constant,
6701 a constant variable or a constant (local or non-notinline) function,
6702 the variable is deleted, and references to the variable are replaced
6703 with references to the constant expression. This is useful primarily
6704 in the expansion of macros or inline functions, where argument values
6705 are often constant in any given call, but are in general non-constant
6706 expressions that must be bound to preserve order of evaluation. Let
6707 variable optimization eliminates the need for macros to carefully
6708 avoid spurious bindings, and also makes inline functions just as
6709 efficient as macros.
6711 A particularly interesting class of constant is a local function.
6712 Substituting for lexical variables that are bound to a function can
6713 substantially improve the efficiency of functional programming styles,
6716 (let ((a #'(lambda (x) (zow x))))
6719 effectively transforms to:
6723 This transformation is done even when the function is a closure, as in:
6725 (let ((a (let ((y (zug)))
6726 #'(lambda (x) (zow x y)))))
6734 A constant variable is a lexical variable that is never assigned to,
6735 always keeping its initial value. Whenever possible, avoid setting
6736 lexical variables\dash{}instead bind a new variable to the new value.
6737 Except for loop variables, it is almost always possible to avoid
6738 setting lexical variables. This form:
6743 is \var{more} efficient than this form:
6748 Setting variables makes the program more difficult to understand, both
6749 to the compiler and to the programmer. \python{} compiles assignments
6750 at least as efficiently as any other \llisp{} compiler, but most let
6751 optimizations are only done on constant variables.
6753 Constant variables with only a single use are also optimized away,
6754 even when the initial value is not constant.\footnote{The source
6755 transformation in this example doesn't represent the preservation of
6756 evaluation order implicit in the compiler's internal representation.
6757 Where necessary, the back end will reintroduce temporaries to
6758 preserve the semantics.} For example, this expansion of
6761 (let ((#:g3 (+ x 1)))
6768 The type semantics of this transformation are more important than the
6769 elimination of the variable itself. Consider what happens when
6770 \code{x} is declared to be a \code{fixnum}; after the transformation,
6771 the compiler can compile the addition knowing that the result is a
6772 \code{fixnum}, whereas before the transformation the addition would
6773 have to allow for fixnum overflow.
6775 Another variable optimization deletes any variable that is never read.
6776 This causes the initial value and any assigned values to be unused,
6777 allowing those expressions to be deleted if they have no side-effects.
6779 Note that a let is actually a degenerate case of local call
6780 (\pxlref{let-calls}), and that let optimization can be done on calls
6781 that weren't created by a let. Also, local call allows an applicative
6782 style of iteration that is totally assignment free.
6784 %%\node Constant Folding, Unused Expression Elimination, Let Optimization, Source Optimization
6785 \subsection{Constant Folding}
6786 \cindex{constant folding}
6787 \cpsubindex{folding}{constant}
6789 Constant folding is an optimization that replaces a call of constant
6790 arguments with the constant result of that call. Constant folding is
6791 done on all standard functions for which it is legal. Inline
6792 expansion allows folding of any constant parts of the definition, and
6793 can be done even on functions that have side-effects.
6795 It is convenient to rely on constant folding when programming, as in this
6798 (defconstant limit 42)
6801 (... (1- limit) ...))
6803 Constant folding is also helpful when writing macros or inline
6804 functions, since it usually eliminates the need to write a macro that
6805 special-cases constant arguments.
6807 \cindex{constant-function declaration} Constant folding of a user
6808 defined function is enabled by the \code{extensions:constant-function}
6809 proclamation. In this example:
6811 (declaim (ext:constant-function myfun))
6813 (declare (single-float x y))
6814 (exp (* (log x) y)))
6816 ... (myexp 3.0 1.3) ...
6818 The call to \code{myexp} is constant-folded to \code{4.1711674}.
6821 %%\node Unused Expression Elimination, Control Optimization, Constant Folding, Source Optimization
6822 \subsection{Unused Expression Elimination}
6823 \cindex{unused expression elimination}
6824 \cindex{dead code elimination}
6826 If the value of any expression is not used, and the expression has no
6827 side-effects, then it is deleted. As with constant folding, this
6828 optimization applies most often when cleaning up after inline
6829 expansion and other optimizations. Any function declared an
6830 \code{extensions:constant-function} is also subject to unused
6831 expression elimination.
6833 Note that \python{} will eliminate parts of unused expressions known
6834 to be side-effect free, even if there are other unknown parts. For
6837 (let ((a (list (foo) (bar))))
6849 %%\node Control Optimization, Unreachable Code Deletion, Unused Expression Elimination, Source Optimization
6850 \subsection{Control Optimization}
6851 \cindex{control optimization}
6852 \cpsubindex{optimization}{control}
6854 The most important optimization of control is recognizing when an
6855 \findexed{if} test is known at compile time, then deleting the
6856 \code{if}, the test expression, and the unreachable branch of the
6857 \code{if}. This can be considered a special case of constant folding,
6858 although the test doesn't have to be truly constant as long as it is
6859 definitely not \false. Note also, that type inference propagates the
6860 result of an \code{if} test to the true and false branches,
6861 \pxlref{constraint-propagation}.
6863 A related \code{if} optimization is this transformation:\footnote{Note
6864 that the code for \code{x} and \code{y} isn't actually replicated.}
6874 The opportunity for this sort of optimization usually results from a
6875 conditional macro. For example:
6879 is actually implemented as this:
6881 (if (if a nil t) x y)
6883 which is transformed to this:
6889 which is then optimized to this:
6893 Note that due to \python{}'s internal representations, the
6894 \code{if}\dash{}\code{if} situation will be recognized even if other
6895 forms are wrapped around the inner \code{if}, like:
6905 In \python, all the \clisp{} macros really are macros, written in
6906 terms of \code{if}, \code{block} and \code{tagbody}, so user-defined
6907 control macros can be just as efficient as the standard ones.
6908 \python{} emits basic blocks using a heuristic that minimizes the
6909 number of unconditional branches. The code in a \code{tagbody} will
6910 not be emitted in the order it appeared in the source, so there is no
6911 point in arranging the code to make control drop through to the
6914 %%\node Unreachable Code Deletion, Multiple Values Optimization, Control Optimization, Source Optimization
6915 \subsection{Unreachable Code Deletion}
6916 \label{dead-code-notes}
6917 \cindex{unreachable code deletion}
6918 \cindex{dead code elimination}
6920 Python will delete code whenever it can prove that the code can never be
6921 executed. Code becomes unreachable when:
6925 An \code{if} is optimized away, or
6928 There is an explicit unconditional control transfer such as \code{go} or
6929 \code{return-from}, or
6932 The last reference to a local function is deleted (or there never was any
6937 When code that appeared in the original source is deleted, the compiler prints
6938 a note to indicate a possible problem (or at least unnecessary code.) For
6943 (write-line "True.")
6944 (write-line "False.")))
6946 will result in this note:
6949 (WRITE-LINE "False.")
6950 Note: Deleting unreachable code.
6953 It is important to pay attention to unreachable code notes, since they often
6954 indicate a subtle type error. For example:
6960 (b (if x (foo-b x) :none)))
6963 results in this note:
6966 (IF X (FOO-B X) :NONE)
6969 Note: Deleting unreachable code.
6971 The \kwd{none} is unreachable, because type inference knows that the argument
6972 to \code{foo-a} must be a \code{foo}, and thus can't be \false. Presumably the
6973 programmer forgot that \code{x} could be \false{} when he wrote the binding for
6976 Here is an example with an incorrect declaration:
6978 (defun count-a (string)
6979 (do ((pos 0 (position #\back{a} string :start (1+ pos)))
6980 (count 0 (1+ count)))
6982 (declare (fixnum pos))))
6984 This time our note is:
6987 (DO ((POS 0 #) (COUNT 0 #))
6989 (DECLARE (FIXNUM POS)))
6990 --> BLOCK LET TAGBODY RETURN-FROM PROGN
6993 Note: Deleting unreachable code.
6995 The problem here is that \code{pos} can never be null since it is declared a
6998 It takes some experience with unreachable code notes to be able to
6999 tell what they are trying to say. In non-obvious cases, the best
7000 thing to do is to call the function in a way that should cause the
7001 unreachable code to be executed. Either you will get a type error, or
7002 you will find that there truly is no way for the code to be executed.
7004 Not all unreachable code results in a note:
7007 \item A note is only given when the unreachable code textually appears
7008 in the original source. This prevents spurious notes due to the
7009 optimization of macros and inline functions, but sometimes also
7010 foregoes a note that would have been useful.
7012 \item Since accurate source information is not available for non-list
7013 forms, there is an element of heuristic in determining whether or
7014 not to give a note about an atom. Spurious notes may be given when
7015 a macro or inline function defines a variable that is also present
7016 in the calling function. Notes about \false{} and \true{} are never
7017 given, since it is too easy to confuse these constants in expanded
7018 code with ones in the original source.
7020 \item Notes are only given about code unreachable due to control flow.
7021 There is no note when an expression is deleted because its value is
7022 unused, since this is a common consequence of other optimizations.
7026 Somewhat spurious unreachable code notes can also result when a macro
7027 inserts multiple copies of its arguments in different contexts, for
7030 (defmacro t-and-f (var form)
7031 `(if ,var ,form ,form))
7034 (t-and-f x (if x "True." "False.")))
7036 results in these notes:
7039 (IF X "True." "False.")
7042 Note: Deleting unreachable code.
7046 Note: Deleting unreachable code.
7048 It seems like it has deleted both branches of the \code{if}, but it has really
7049 deleted one branch in one copy, and the other branch in the other copy. Note
7050 that these messages are only spurious in not satisfying the intent of the rule
7051 that notes are only given when the deleted code appears in the original source;
7052 there is always \var{some} code being deleted when a unreachable code note is
7056 %%\node Multiple Values Optimization, Source to Source Transformation, Unreachable Code Deletion, Source Optimization
7057 \subsection{Multiple Values Optimization}
7058 \cindex{multiple value optimization}
7059 \cpsubindex{optimization}{multiple value}
7061 Within a function, \python{} implements uses of multiple values
7062 particularly efficiently. Multiple values can be kept in arbitrary
7063 registers, so using multiple values doesn't imply stack manipulation
7064 and representation conversion. For example, this code:
7066 (let ((a (if x (foo x) u))
7067 (b (if x (bar x) v)))
7070 is actually more efficient written this way:
7072 (multiple-value-bind
7075 (values (foo x) (bar x))
7080 Also, \pxlref{local-call-return} for information on how local call
7081 provides efficient support for multiple function return values.
7084 %%\node Source to Source Transformation, Style Recommendations, Multiple Values Optimization, Source Optimization
7085 \subsection{Source to Source Transformation}
7086 \cindex{source-to-source transformation}
7087 \cpsubindex{transformation}{source-to-source}
7089 The compiler implements a number of operation-specific optimizations as
7090 source-to-source transformations. You will often see unfamiliar code in error
7091 messages, for example:
7093 (defun my-zerop () (zerop x))
7101 Warning: Undefined variable: X
7103 The original \code{zerop} has been transformed into a call to
7104 \code{=}. This transformation is indicated with the same \code{==$>$}
7105 used to mark macro and function inline expansion. Although it can be
7106 confusing, display of the transformed source is important, since
7107 warnings are given with respect to the transformed source. This a
7108 more obscure example:
7110 (defun foo (x) (logand 1 x))
7112 gives this efficiency note:
7118 Note: Forced to do static-function Two-arg-and (cost 53).
7119 Unable to do inline fixnum arithmetic (cost 1) because:
7120 The first argument is a INTEGER, not a FIXNUM.
7123 Here, the compiler commuted the call to \code{logand}, introducing
7124 temporaries. The note complains that the \var{first} argument is not
7125 a \code{fixnum}, when in the original call, it was the second
7126 argument. To make things more confusing, the compiler introduced
7127 temporaries called \code{c::x} and \code{c::y} that are bound to
7128 \code{y} and \code{1}, respectively.
7130 You will also notice source-to-source optimizations when efficiency
7131 notes are enabled (\pxlref{efficiency-notes}.) When the compiler is
7132 unable to do a transformation that might be possible if there was more
7133 information, then an efficiency note is printed. For example,
7134 \code{my-zerop} above will also give this efficiency note:
7140 Note: Unable to optimize because:
7141 Operands might not be the same type, so can't open code.
7144 %%\node Style Recommendations, , Source to Source Transformation, Source Optimization
7145 \subsection{Style Recommendations}
7146 \cindex{style recommendations}
7148 Source level optimization makes possible a clearer and more relaxed programming
7152 \item Don't use macros purely to avoid function call. If you want an
7153 inline function, write it as a function and declare it inline. It's
7154 clearer, less error-prone, and works just as well.
7156 \item Don't write macros that try to ``optimize'' their expansion in
7157 trivial ways such as avoiding binding variables for simple
7158 expressions. The compiler does these optimizations too, and is less
7159 likely to make a mistake.
7161 \item Make use of local functions (i.e., \code{labels} or \code{flet})
7162 and tail-recursion in places where it is clearer. Local function
7163 call is faster than full call.
7165 \item Avoid setting local variables when possible. Binding a new
7166 \code{let} variable is at least as efficient as setting an existing
7167 variable, and is easier to understand, both for the compiler and the
7170 \item Instead of writing similar code over and over again so that it
7171 can be hand customized for each use, define a macro or inline
7172 function, and let the compiler do the work.
7177 %%\node Tail Recursion, Local Call, Source Optimization, Advanced Compiler Use and Efficiency Hints
7178 \section{Tail Recursion}
7179 \label{tail-recursion}
7180 \cindex{tail recursion}
7183 A call is tail-recursive if nothing has to be done after the the call
7184 returns, i.e. when the call returns, the returned value is immediately
7185 returned from the calling function. In this example, the recursive
7186 call to \code{myfun} is tail-recursive:
7189 (if (oddp (random x))
7194 Tail recursion is interesting because it is form of recursion that can be
7195 implemented much more efficiently than general recursion. In general, a
7196 recursive call requires the compiler to allocate storage on the stack at
7197 run-time for every call that has not yet returned. This memory consumption
7198 makes recursion unacceptably inefficient for representing repetitive algorithms
7199 having large or unbounded size. Tail recursion is the special case of
7200 recursion that is semantically equivalent to the iteration constructs normally
7201 used to represent repetition in programs. Because tail recursion is equivalent
7202 to iteration, tail-recursive programs can be compiled as efficiently as
7205 So why would you want to write a program recursively when you can write it
7206 using a loop? Well, the main answer is that recursion is a more general
7207 mechanism, so it can express some solutions simply that are awkward to write as
7208 a loop. Some programmers also feel that recursion is a stylistically
7209 preferable way to write loops because it avoids assigning variables.
7210 For example, instead of writing:
7213 something-that-uses-x)
7216 something-that-uses-y)
7218 (do ((x something (fun2 (fun1 x))))
7224 (fun2 something-that-uses-x))
7227 (fun1 something-that-uses-y))
7231 The tail-recursive definition is actually more efficient, in addition to being
7232 (arguably) clearer. As the number of functions and the complexity of their
7233 call graph increases, the simplicity of using recursion becomes compelling.
7234 Consider the advantages of writing a large finite-state machine with separate
7235 tail-recursive functions instead of using a single huge \code{prog}.
7237 It helps to understand how to use tail recursion if you think of a
7238 tail-recursive call as a \code{psetq} that assigns the argument values to the
7239 called function's variables, followed by a \code{go} to the start of the called
7240 function. This makes clear an inherent efficiency advantage of tail-recursive
7241 call: in addition to not having to allocate a stack frame, there is no need to
7242 prepare for the call to return (e.g., by computing a return PC.)
7244 Is there any disadvantage to tail recursion? Other than an increase
7245 in efficiency, the only way you can tell that a call has been compiled
7246 tail-recursively is if you use the debugger. Since a tail-recursive
7247 call has no stack frame, there is no way the debugger can print out
7248 the stack frame representing the call. The effect is that backtrace
7249 will not show some calls that would have been displayed in a
7250 non-tail-recursive implementation. In practice, this is not as bad as
7251 it sounds\dash{}in fact it isn't really clearly worse, just different.
7252 \xlref{debug-tail-recursion} for information about the debugger
7253 implications of tail recursion.
7255 In order to ensure that tail-recursion is preserved in arbitrarily
7256 complex calling patterns across separately compiled functions, the
7257 compiler must compile any call in a tail-recursive position as a
7258 tail-recursive call. This is done regardless of whether the program
7259 actually exhibits any sort of recursive calling pattern. In this
7260 example, the call to \code{fun2} will always be compiled as a
7261 tail-recursive call:
7266 So tail recursion doesn't necessarily have anything to do with recursion
7267 as it is normally thought of. \xlref{local-tail-recursion} for more
7268 discussion of using tail recursion to implement loops.
7271 * Tail Recursion Exceptions::
7274 %%\node Tail Recursion Exceptions, , Tail Recursion, Tail Recursion
7275 \subsection{Tail Recursion Exceptions}
7277 Although \python{} is claimed to be ``properly'' tail-recursive, some
7278 might dispute this, since there are situations where tail recursion is
7282 \item When the call is enclosed by a special binding, or
7284 \item When the call is enclosed by a \code{catch} or
7285 \code{unwind-protect}, or
7287 \item When the call is enclosed by a \code{block} or \code{tagbody}
7288 and the block name or \code{go} tag has been closed over.
7290 These dynamic extent binding forms inhibit tail recursion because they
7291 allocate stack space to represent the binding. Shallow-binding
7292 implementations of dynamic scoping also require cleanup code to be
7293 evaluated when the scope is exited.
7296 %%\node Local Call, Block Compilation, Tail Recursion, Advanced Compiler Use and Efficiency Hints
7297 \section{Local Call}
7300 \cpsubindex{call}{local}
7301 \cpsubindex{function call}{local}
7303 Python supports two kinds of function call: full call and local call.
7304 Full call is the standard calling convention; its late binding and
7305 generality make \llisp{} what it is, but create unavoidable overheads.
7306 When the compiler can compile the calling function and the called
7307 function simultaneously, it can use local call to avoid some of the
7308 overhead of full call. Local call is really a collection of
7309 compilation strategies. If some aspect of call overhead is not needed
7310 in a particular local call, then it can be omitted. In some cases,
7311 local call can be totally free. Local call provides two main
7312 advantages to the user:
7315 \item Local call makes the use of the lexical function binding forms
7316 \findexed{flet} and \findexed{labels} much more efficient. A local
7317 call is always faster than a full call, and in many cases is much
7320 \item Local call is a natural approach to \i{block compilation}, a
7321 compilation technique that resolves function references at compile
7322 time. Block compilation speeds function call, but increases
7323 compilation times and prevents function redefinition.
7328 * Self-Recursive Calls::
7331 * Local Tail Recursion::
7335 %%\node Self-Recursive Calls, Let Calls, Local Call, Local Call
7336 \subsection{Self-Recursive Calls}
7337 \cpsubindex{recursion}{self}
7339 Local call is used when a function defined by \code{defun} calls itself. For
7345 (* n (fact (1- n)))))
7347 This use of local call speeds recursion, but can also complicate
7348 debugging, since \findexed{trace} will only show the first call to
7349 \code{fact}, and not the recursive calls. This is because the
7350 recursive calls directly jump to the start of the function, and don't
7351 indirect through the \code{symbol-function}. Self-recursive local
7352 call is inhibited when the \kwd{block-compile} argument to
7353 \code{compile-file} is \false{} (\pxlref{compile-file-block}.)
7355 %%\node Let Calls, Closures, Self-Recursive Calls, Local Call
7356 \subsection{Let Calls}
7358 Because local call avoids unnecessary call overheads, the compiler
7359 internally uses local call to implement some macros and special forms
7360 that are not normally thought of as involving a function call. For
7361 example, this \code{let}:
7367 is internally represented as though it was macroexpanded into:
7369 (funcall #'(lambda (a b)
7374 This implementation is acceptable because the simple cases of local
7375 call (equivalent to a \code{let}) result in good code. This doesn't
7376 make \code{let} any more efficient, but does make local calls that are
7377 semantically the same as \code{let} much more efficient than full
7378 calls. For example, these definitions are all the same as far as the
7379 compiler is concerned:
7382 ...some other stuff...
7383 (let ((a something))
7387 (flet ((localfun (a)
7389 ...some other stuff...
7390 (localfun something)))
7393 (let ((funvar #'(lambda (a)
7395 ...some other stuff...
7396 (funcall funvar something)))
7399 Although local call is most efficient when the function is called only
7400 once, a call doesn't have to be equivalent to a \code{let} to be more
7401 efficient than full call. All local calls avoid the overhead of
7402 argument count checking and keyword argument parsing, and there are a
7403 number of other advantages that apply in many common situations.
7404 \xlref{let-optimization} for a discussion of the optimizations done on
7407 %%\node Closures, Local Tail Recursion, Let Calls, Local Call
7408 \subsection{Closures}
7411 Local call allows for much more efficient use of closures, since the
7412 closure environment doesn't need to be allocated on the heap, or even
7413 stored in memory at all. In this example, there is no penalty for
7414 \code{localfun} referencing \code{a} and \code{b}:
7417 (flet ((localfun (x)
7423 In local call, the compiler effectively passes closed-over values as
7424 extra arguments, so there is no need for you to ``optimize'' local
7425 function use by explicitly passing in lexically visible values.
7426 Closures may also be subject to let optimization
7427 (\pxlref{let-optimization}.)
7429 Note: indirect value cells are currently always allocated on the heap
7430 when a variable is both assigned to (with \code{setq} or \code{setf})
7431 and closed over, regardless of whether the closure is a local function
7432 or not. This is another reason to avoid setting variables when you
7435 %%\node Local Tail Recursion, Return Values, Closures, Local Call
7436 \subsection{Local Tail Recursion}
7437 \label{local-tail-recursion}
7438 \cindex{tail recursion}
7439 \cpsubindex{recursion}{tail}
7441 Tail-recursive local calls are particularly efficient, since they are
7442 in effect an assignment plus a control transfer. Scheme programmers
7443 write loops with tail-recursive local calls, instead of using the
7444 imperative \code{go} and \code{setq}. This has not caught on in the
7445 \clisp{} community, since conventional \llisp{} compilers don't
7446 implement local call. In \python, users can choose to write loops
7450 (labels ((loop (n total)
7453 (loop (1- n) (* n total)))))
7457 \begin{defmac}{extensions:}{iterate}{%
7458 \args{\var{name} (\mstar{(\var{var} \var{initial-value})})
7459 \mstar{\var{declaration}} \mstar{\var{form}}}}
7461 This macro provides syntactic sugar for using \findexed{labels} to
7462 do iteration. It creates a local function \var{name} with the
7463 specified \var{var}s as its arguments and the \var{declaration}s and
7464 \var{form}s as its body. This function is then called with the
7465 \var{initial-values}, and the result of the call is return from the
7468 Here is our factorial example rewritten using \code{iterate}:
7477 (loop (1- n) (* n total)))))
7480 The main advantage of using \code{iterate} over \code{do} is that
7481 \code{iterate} naturally allows stepping to be done differently
7482 depending on conditionals in the body of the loop. \code{iterate}
7483 can also be used to implement algorithms that aren't really
7484 iterative by simply doing a non-tail call. For example, the
7485 standard recursive definition of factorial can be written like this:
7491 (* n (fact (1- n)))))
7495 %%\node Return Values, , Local Tail Recursion, Local Call
7496 \subsection{Return Values}
7497 \label{local-call-return}
7498 \cpsubindex{return values}{local call}
7499 \cpsubindex{local call}{return values}
7501 One of the more subtle costs of full call comes from allowing
7502 arbitrary numbers of return values. This overhead can be avoided in
7503 local calls to functions that always return the same number of values.
7504 For efficiency reasons (as well as stylistic ones), you should write
7505 functions so that they always return the same number of values. This
7506 may require passing extra \false{} arguments to \code{values} in some
7507 cases, but the result is more efficient, not less so.
7509 When efficiency notes are enabled (\pxlref{efficiency-notes}), and the
7510 compiler wants to use known values return, but can't prove that the
7511 function always returns the same number of values, then it will print
7515 (DEFUN GRUE (X) (DECLARE (FIXNUM X)) (COND (# #) (# NIL) (T #)))
7516 Note: Return type not fixed values, so can't use known return convention:
7517 (VALUES (OR (INTEGER -536870912 -1) NULL) &REST T)
7520 In order to implement proper tail recursion in the presence of known
7521 values return (\pxlref{tail-recursion}), the compiler sometimes must
7522 prove that multiple functions all return the same number of values.
7523 When this can't be proven, the compiler will print a note like this:
7526 (DEFUN BLUE (X) (DECLARE (FIXNUM X)) (COND (# #) (# #) (# #) (T #)))
7527 Note: Return value count mismatch prevents known return from
7532 \xlref{number-local-call} for the interaction between local call
7533 and the representation of numeric types.
7536 %%\node Block Compilation, Inline Expansion, Local Call, Advanced Compiler Use and Efficiency Hints
7537 \section{Block Compilation}
7538 \label{block-compilation}
7539 \cindex{block compilation}
7540 \cpsubindex{compilation}{block}
7542 Block compilation allows calls to global functions defined by
7543 \findexed{defun} to be compiled as local calls. The function call
7544 can be in a different top-level form than the \code{defun}, or even in a
7547 In addition, block compilation allows the declaration of the \i{entry points}
7548 to the block compiled portion. An entry point is any function that may be
7549 called from outside of the block compilation. If a function is not an entry
7550 point, then it can be compiled more efficiently, since all calls are known at
7551 compile time. In particular, if a function is only called in one place, then
7552 it will be let converted. This effectively inline expands the function, but
7553 without the code duplication that results from defining the function normally
7554 and then declaring it inline.
7556 The main advantage of block compilation is that it it preserves efficiency in
7557 programs even when (for readability and syntactic convenience) they are broken
7558 up into many small functions. There is absolutely no overhead for calling a
7559 non-entry point function that is defined purely for modularity (i.e. called
7562 Block compilation also allows the use of non-descriptor arguments and return
7563 values in non-trivial programs (\pxlref{number-local-call}).
7566 * Block Compilation Semantics::
7567 * Block Compilation Declarations::
7568 * Compiler Arguments::
7569 * Practical Difficulties::
7570 * Context Declarations::
7571 * Context Declaration Example::
7574 %%\node Block Compilation Semantics, Block Compilation Declarations, Block Compilation, Block Compilation
7575 \subsection{Block Compilation Semantics}
7577 The effect of block compilation can be envisioned as the compiler turning all
7578 the \code{defun}s in the block compilation into a single \code{labels} form:
7580 (declaim (start-block fun1 fun3))
7595 (declaim (end-block))
7609 (setf (fdefinition 'fun1) #'fun1)
7610 (setf (fdefinition 'fun3) #'fun3))
7612 Calls between the block compiled functions are local calls, so changing the
7613 global definition of \code{fun1} will have no effect on what \code{fun2} does;
7614 \code{fun2} will keep calling the old \code{fun1}.
7616 The entry points \code{fun1} and \code{fun3} are still installed in
7617 the \code{symbol-function} as the global definitions of the functions,
7618 so a full call to an entry point works just as before. However,
7619 \code{fun2} is not an entry point, so it is not globally defined. In
7620 addition, \code{fun2} is only called in one place, so it will be let
7624 %%\node Block Compilation Declarations, Compiler Arguments, Block Compilation Semantics, Block Compilation
7625 \subsection{Block Compilation Declarations}
7626 \cpsubindex{declarations}{block compilation}
7627 \cindex{start-block declaration}
7628 \cindex{end-block declaration}
7630 The \code{extensions:start-block} and \code{extensions:end-block}
7631 declarations allow fine-grained control of block compilation. These
7632 declarations are only legal as a global declarations (\code{declaim}
7633 or \code{proclaim}).
7637 The \code{start-block} declaration has this syntax:
7639 (start-block \mstar{\var{entry-point-name}})
7641 When processed by the compiler, this declaration marks the start of
7642 block compilation, and specifies the entry points to that block. If
7643 no entry points are specified, then \var{all} functions are made into
7644 entry points. If already block compiling, then the compiler ends the
7645 current block and starts a new one.
7649 The \code{end-block} declaration has no arguments:
7653 The \code{end-block} declaration ends a block compilation unit without
7654 starting a new one. This is useful mainly when only a portion of a file
7655 is worth block compiling.
7657 %%\node Compiler Arguments, Practical Difficulties, Block Compilation Declarations, Block Compilation
7658 \subsection{Compiler Arguments}
7659 \label{compile-file-block}
7660 \cpsubindex{compile-file}{block compilation arguments}
7662 The \kwd{block-compile} and \kwd{entry-points} arguments to
7663 \code{extensions:compile-from-stream} and \funref{compile-file} provide overall
7664 control of block compilation, and allow block compilation without requiring
7665 modification of the program source.
7667 There are three possible values of the \kwd{block-compile} argument:
7670 \item[\false{}] Do no compile-time resolution of global function
7671 names, not even for self-recursive calls. This inhibits any
7672 \code{start-block} declarations appearing in the file, allowing all
7673 functions to be incrementally redefined.
7675 \item[\true{}] Start compiling in block compilation mode. This is
7676 mainly useful for block compiling small files that contain no
7677 \code{start-block} declarations. See also the \kwd{entry-points}
7680 \item[\kwd{specified}] Start compiling in form-at-a-time mode, but
7681 exploit \code{start-block} declarations and compile self-recursive
7682 calls as local calls. Normally \kwd{specified} is the default for
7683 this argument (see \varref{block-compile-default}.)
7686 The \kwd{entry-points} argument can be used in conjunction with
7687 \w{\kwd{block-compile} \true{}} to specify the entry-points to a
7688 block-compiled file. If not specified or \nil, all global functions
7689 will be compiled as entry points. When \kwd{block-compile} is not
7690 \true, this argument is ignored.
7692 \begin{defvar}{}{block-compile-default}
7694 This variable determines the default value for the
7695 \kwd{block-compile} argument to \code{compile-file} and
7696 \code{compile-from-stream}. The initial value of this variable is
7697 \kwd{specified}, but \false{} is sometimes useful for totally
7698 inhibiting block compilation.
7701 %%\node Practical Difficulties, Context Declarations, Compiler Arguments, Block Compilation
7702 \subsection{Practical Difficulties}
7704 The main problem with block compilation is that the compiler uses
7705 large amounts of memory when it is block compiling. This places an
7706 upper limit on the amount of code that can be block compiled as a
7707 unit. To make best use of block compilation, it is necessary to
7708 locate the parts of the program containing many internal calls, and
7709 then add the appropriate \code{start-block} declarations. When writing
7710 new code, it is a good idea to put in block compilation declarations
7711 from the very beginning, since writing block declarations correctly
7712 requires accurate knowledge of the program's function call structure.
7713 If you want to initially develop code with full incremental
7714 redefinition, you can compile with \varref{block-compile-default} set to
7717 Note if a \code{defun} appears in a non-null lexical environment, then
7718 calls to it cannot be block compiled.
7720 Unless files are very small, it is probably impractical to block compile
7721 multiple files as a unit by specifying a list of files to \code{compile-file}.
7722 Semi-inline expansion (\pxlref{semi-inline}) provides another way to
7723 extend block compilation across file boundaries.
7725 %%\node Context Declarations, Context Declaration Example, Practical Difficulties, Block Compilation
7726 \subsection{Context Declarations}
7727 \label{context-declarations}
7728 \cindex{context sensitive declarations}
7729 \cpsubindex{declarations}{context-sensitive}
7731 \cmucl{} has a context-sensitive declaration mechanism which is useful
7732 because it allows flexible control of the compilation policy in large
7733 systems without requiring changes to the source files. The primary
7734 use of this feature is to allow the exported interfaces of a system to
7735 be compiled more safely than the system internals. The context used
7736 is the name being defined and the kind of definition (function, macro,
7739 The \kwd{context-declarations} option to \macref{with-compilation-unit} has
7740 dynamic scope, affecting all compilation done during the evaluation of the
7741 body. The argument to this option should evaluate to a list of lists of the
7744 (\var{context-spec} \mplus{\var{declare-form}})
7746 In the indicated context, the specified declare forms are inserted at
7747 the head of each definition. The declare forms for all contexts that
7748 match are appended together, with earlier declarations getting
7749 precedence over later ones. A simple example:
7751 :context-declarations
7752 '((:external (declare (optimize (safety 2)))))
7754 This will cause all functions that are named by external symbols to be
7755 compiled with \code{safety 2}.
7757 The full syntax of context specs is:
7760 \item[\kwd{internal}, \kwd{external}] True if the symbol is internal
7761 (external) in its home package.
7763 \item[\kwd{uninterned}] True if the symbol has no home package.
7765 \item[\code{\w{(:package \mstar{\var{package-name}})}}] True if the
7766 symbol's home package is in any of the named packages (false if
7769 \item[\kwd{anonymous}] True if the function doesn't have any
7770 interesting name (not \code{defmacro}, \code{defun}, \code{labels}
7773 \item[\kwd{macro}, \kwd{function}] \kwd{macro} is a global
7774 (\code{defmacro}) macro. \kwd{function} is anything else.
7776 \item[\kwd{local}, \kwd{global}] \kwd{local} is a \code{labels} or
7777 \code{flet}. \kwd{global} is anything else.
7779 \item[\code{\w{(:or \mstar{\var{context-spec}})}}] True when any
7780 supplied \var{context-spec} is true.
7782 \item[\code{\w{(:and \mstar{\var{context-spec}})}}] True only when all
7783 supplied \var{context-spec}s are true.
7785 \item[\code{\w{(:not \mstar{\var{context-spec}})}}] True when
7786 \var{context-spec} is false.
7788 \item[\code{\w{(:member \mstar{\var{name}})}}] True when the defined
7789 name is one of these names (\code{equal} test.)
7791 \item[\code{\w{(:match \mstar{\var{pattern}})}}] True when any of the
7792 patterns is a substring of the name. The name is wrapped with
7793 \code{\$}'s, so ``\code{\$FOO}'' matches names beginning with
7794 ``\code{FOO}'', etc.
7797 %%\node Context Declaration Example, , Context Declarations, Block Compilation
7798 \subsection{Context Declaration Example}
7800 Here is a more complex example of \code{with-compilation-unit} options:
7802 :optimize '(optimize (speed 2) (space 2) (inhibit-warnings 2)
7803 (debug 1) (safety 0))
7804 :optimize-interface '(optimize-interface (safety 1) (debug 1))
7805 :context-declarations
7806 '(((:or :external (:and (:match "\%") (:match "SET")))
7807 (declare (optimize-interface (safety 2))))
7808 ((:or (:and :external :macro)
7809 (:match "\$PARSE-"))
7810 (declare (optimize (safety 2)))))
7812 The \code{optimize} and \code{extensions:optimize-interface}
7813 declarations (\pxlref{optimize-declaration}) set up the global
7814 compilation policy. The bodies of functions are to be compiled
7815 completely unsafe (\code{safety 0}), but argument count and weakened
7816 argument type checking is to be done when a function is called
7817 (\code{speed 2 safety 1}).
7819 The first declaration specifies that all functions that are external
7820 or whose names contain both ``\code{\%}'' and ``\code{SET}'' are to be
7821 compiled compiled with completely safe interfaces (\code{safety 2}).
7822 The reason for this particular \kwd{match} rule is that \code{setf}
7823 inverse functions in this system tend to have both strings in their
7824 name somewhere. We want \code{setf} inverses to be safe because they
7825 are implicitly called by users even though their name is not exported.
7827 The second declaration makes external macros or functions whose names
7828 start with ``\code{PARSE-}'' have safe bodies (as well as interfaces).
7829 This is desirable because a syntax error in a macro may cause a type
7830 error inside the body. The \kwd{match} rule is used because macros
7831 often have auxiliary functions whose names begin with this string.
7833 This particular example is used to build part of the standard \cmucl{}
7834 system. Note however, that context declarations must be set up
7835 according to the needs and coding conventions of a particular system;
7836 different parts of \cmucl{} are compiled with different context
7837 declarations, and your system will probably need its own declarations.
7838 In particular, any use of the \kwd{match} option depends on naming
7839 conventions used in coding.
7842 %%\node Inline Expansion, Byte Coded Compilation, Block Compilation, Advanced Compiler Use and Efficiency Hints
7843 \section{Inline Expansion}
7844 \label{inline-expansion}
7845 \cindex{inline expansion}
7846 \cpsubindex{expansion}{inline}
7847 \cpsubindex{call}{inline}
7848 \cpsubindex{function call}{inline}
7849 \cpsubindex{optimization}{function call}
7851 Python can expand almost any function inline, including functions
7852 with keyword arguments. The only restrictions are that keyword
7853 argument keywords in the call must be constant, and that global
7854 function definitions (\code{defun}) must be done in a null lexical
7855 environment (not nested in a \code{let} or other binding form.) Local
7856 functions (\code{flet}) can be inline expanded in any environment.
7857 Combined with \python{}'s source-level optimization, inline expansion
7858 can be used for things that formerly required macros for efficient
7859 implementation. In \python, macros don't have any efficiency
7860 advantage, so they need only be used where a macro's syntactic
7861 flexibility is required.
7863 Inline expansion is a compiler optimization technique that reduces
7864 the overhead of a function call by simply not doing the call:
7865 instead, the compiler effectively rewrites the program to appear as
7866 though the definition of the called function was inserted at each
7867 call site. In \llisp, this is straightforwardly expressed by
7868 inserting the \code{lambda} corresponding to the original definition:
7870 (proclaim '(inline my-1+))
7871 (defun my-1+ (x) (+ x 1))
7873 (my-1+ someval) \result{} ((lambda (x) (+ x 1)) someval)
7876 When the function expanded inline is large, the program after inline
7877 expansion may be substantially larger than the original program. If
7878 the program becomes too large, inline expansion hurts speed rather
7879 than helping it, since hardware resources such as physical memory and
7880 cache will be exhausted. Inline expansion is called for:
7883 \item When profiling has shown that a relatively simple function is
7884 called so often that a large amount of time is being wasted in the
7885 calling of that function (as opposed to running in that function.)
7886 If a function is complex, it will take a long time to run relative
7887 the time spent in call, so the speed advantage of inline expansion
7888 is diminished at the same time the space cost of inline expansion is
7889 increased. Of course, if a function is rarely called, then the
7890 overhead of calling it is also insignificant.
7892 \item With functions so simple that they take less space to inline
7893 expand than would be taken to call the function (such as
7894 \code{my-1+} above.) It would require intimate knowledge of the
7895 compiler to be certain when inline expansion would reduce space, but
7896 it is generally safe to inline expand functions whose definition is
7897 a single function call, or a few calls to simple \clisp{} functions.
7901 In addition to this speed/space tradeoff from inline expansion's
7902 avoidance of the call, inline expansion can also reveal opportunities
7903 for optimization. \python{}'s extensive source-level optimization can
7904 make use of context information from the caller to tremendously
7905 simplify the code resulting from the inline expansion of a function.
7907 The main form of caller context is local information about the actual
7908 argument values: what the argument types are and whether the arguments
7909 are constant. Knowledge about argument types can eliminate run-time
7910 type tests (e.g., for generic arithmetic.) Constant arguments in a
7911 call provide opportunities for constant folding optimization after
7914 A hidden way that constant arguments are often supplied to functions
7915 is through the defaulting of unsupplied optional or keyword arguments.
7916 There can be a huge efficiency advantage to inline expanding functions
7917 that have complex keyword-based interfaces, such as this definition of
7918 the \code{member} function:
7920 (proclaim '(inline member))
7921 (defun member (item list &key
7924 (test-not nil notp))
7925 (do ((list list (cdr list)))
7927 (let ((car (car list)))
7929 (funcall test item (funcall key car)))
7931 (not (funcall test-not item (funcall key car))))
7933 (funcall test item (funcall key car))))
7937 After inline expansion, this call is simplified to the obvious code:
7939 (member a l :key #'foo-a :test #'char=) \result{}
7941 (do ((list list (cdr list)))
7943 (let ((car (car list)))
7944 (if (char= item (foo-a car))
7947 In this example, there could easily be more than an order of magnitude
7948 improvement in speed. In addition to eliminating the original call to
7949 \code{member}, inline expansion also allows the calls to \code{char=}
7950 and \code{foo-a} to be open-coded. We go from a loop with three tests
7951 and two calls to a loop with one test and no calls.
7953 \xlref{source-optimization} for more discussion of source level
7957 * Inline Expansion Recording::
7958 * Semi-Inline Expansion::
7959 * The Maybe-Inline Declaration::
7962 %%\node Inline Expansion Recording, Semi-Inline Expansion, Inline Expansion, Inline Expansion
7963 \subsection{Inline Expansion Recording}
7964 \cindex{recording of inline expansions}
7966 Inline expansion requires that the source for the inline expanded function to
7967 be available when calls to the function are compiled. The compiler doesn't
7968 remember the inline expansion for every function, since that would take an
7969 excessive about of space. Instead, the programmer must tell the compiler to
7970 record the inline expansion before the definition of the inline expanded
7971 function is compiled. This is done by globally declaring the function inline
7972 before the function is defined, by using the \code{inline} and
7973 \code{extensions:maybe-inline} (\pxlref{maybe-inline-declaration})
7976 In addition to recording the inline expansion of inline functions at the time
7977 the function is compiled, \code{compile-file} also puts the inline expansion in
7978 the output file. When the output file is loaded, the inline expansion is made
7979 available for subsequent compilations; there is no need to compile the
7980 definition again to record the inline expansion.
7982 If a function is declared inline, but no expansion is recorded, then the
7983 compiler will give an efficiency note like:
7985 Note: MYFUN is declared inline, but has no expansion.
7987 When you get this note, check that the \code{inline} declaration and the
7988 definition appear before the calls that are to be inline expanded. This note
7989 will also be given if the inline expansion for a \code{defun} could not be
7990 recorded because the \code{defun} was in a non-null lexical environment.
7992 %%\node Semi-Inline Expansion, The Maybe-Inline Declaration, Inline Expansion Recording, Inline Expansion
7993 \subsection{Semi-Inline Expansion}
7996 Python supports \var{semi-inline} functions. Semi-inline expansion
7997 shares a single copy of a function across all the calls in a component
7998 by converting the inline expansion into a local function
7999 (\pxlref{local-call}.) This takes up less space when there are
8000 multiple calls, but also provides less opportunity for context
8001 dependent optimization. When there is only one call, the result is
8002 identical to normal inline expansion. Semi-inline expansion is done
8003 when the \code{space} optimization quality is \code{0}, and the
8004 function has been declared \code{extensions:maybe-inline}.
8006 This mechanism of inline expansion combined with local call also
8007 allows recursive functions to be inline expanded. If a recursive
8008 function is declared \code{inline}, calls will actually be compiled
8009 semi-inline. Although recursive functions are often so complex that
8010 there is little advantage to semi-inline expansion, it can still be
8011 useful in the same sort of cases where normal inline expansion is
8012 especially advantageous, i.e. functions where the calling context can
8015 %%\node The Maybe-Inline Declaration, , Semi-Inline Expansion, Inline Expansion
8016 \subsection{The Maybe-Inline Declaration}
8017 \label{maybe-inline-declaration}
8018 \cindex{maybe-inline declaration}
8020 The \code{extensions:maybe-inline} declaration is a \cmucl{}
8021 extension. It is similar to \code{inline}, but indicates that inline
8022 expansion may sometimes be desirable, rather than saying that inline
8023 expansion should almost always be done. When used in a global
8024 declaration, \code{extensions:maybe-inline} causes the expansion for
8025 the named functions to be recorded, but the functions aren't actually
8026 inline expanded unless \code{space} is \code{0} or the function is
8027 eventually (perhaps locally) declared \code{inline}.
8029 Use of the \code{extensions:maybe-inline} declaration followed by the
8030 \code{defun} is preferable to the standard idiom of:
8032 (proclaim '(inline myfun))
8033 (defun myfun () ...)
8034 (proclaim '(notinline myfun))
8036 ;;; \i{Any calls to \code{myfun} here are not inline expanded.}
8039 (declare (inline myfun))
8041 ;; \i{Calls to \code{myfun} here are inline expanded.}
8044 The problem with using \code{notinline} in this way is that in
8045 \clisp{} it does more than just suppress inline expansion, it also
8046 forbids the compiler to use any knowledge of \code{myfun} until a
8047 later \code{inline} declaration overrides the \code{notinline}. This
8048 prevents compiler warnings about incorrect calls to the function, and
8049 also prevents block compilation.
8051 The \code{extensions:maybe-inline} declaration is used like this:
8053 (proclaim '(extensions:maybe-inline myfun))
8054 (defun myfun () ...)
8056 ;;; \i{Any calls to \code{myfun} here are not inline expanded.}
8059 (declare (inline myfun))
8061 ;; \i{Calls to \code{myfun} here are inline expanded.}
8064 (defun someotherfun ()
8065 (declare (optimize (space 0)))
8067 ;; \i{Calls to \code{myfun} here are expanded semi-inline.}
8070 In this example, the use of \code{extensions:maybe-inline} causes the
8071 expansion to be recorded when the \code{defun} for \code{somefun} is
8072 compiled, and doesn't waste space through doing inline expansion by
8073 default. Unlike \code{notinline}, this declaration still allows the
8074 compiler to assume that the known definition really is the one that
8075 will be called when giving compiler warnings, and also allows the
8076 compiler to do semi-inline expansion when the policy is appropriate.
8078 When the goal is merely to control whether inline expansion is done by
8079 default, it is preferable to use \code{extensions:maybe-inline} rather
8080 than \code{notinline}. The \code{notinline} declaration should be
8081 reserved for those special occasions when a function may be redefined
8082 at run-time, so the compiler must be told that the obvious definition
8083 of a function is not necessarily the one that will be in effect at the
8087 %%\node Byte Coded Compilation, Object Representation, Inline Expansion, Advanced Compiler Use and Efficiency Hints
8088 \section{Byte Coded Compilation}
8089 \label{byte-compile}
8090 \cindex{byte coded compilation}
8091 \cindex{space optimization}
8093 \Python{} supports byte compilation to reduce the size of Lisp
8094 programs by allowing functions to be compiled more compactly. Byte
8095 compilation provides an extreme speed/space tradeoff: byte code is
8096 typically six times more compact than native code, but runs fifty
8097 times (or more) slower. This is about ten times faster than the
8098 standard interpreter, which is itself considered fast in comparison to
8099 other \clisp{} interpreters.
8101 Large Lisp systems (such as \cmucl{} itself) often have large amounts
8102 of user-interface code, compile-time (macro) code, debugging code, or
8103 rarely executed special-case code. This code is a good target for
8104 byte compilation: very little time is spent running in it, but it can
8105 take up quite a bit of space. Straight-line code with many function
8106 calls is much more suitable than inner loops.
8108 When byte-compiling, the compiler compiles about twice as fast, and
8109 can produce a hardware independent object file (\file{.bytef} type.)
8110 This file can be loaded like a normal fasl file on any implementation
8111 of CMU CL with the same byte-ordering (DEC PMAX has \file{.lbytef}
8114 The decision to byte compile or native compile can be done on a
8115 per-file or per-code-object basis. The \kwd{byte-compile} argument to
8116 \funref{compile-file} has these possible values:
8118 \item[\false{}] Don't byte compile anything in this file.
8120 \item[\true{}] Byte compile everything in this file and produce a
8121 processor-independent \file{.bytef} file.
8123 \item[\kwd{maybe}] Produce a normal fasl file, but byte compile any
8124 functions for which the \code{speed} optimization quality is
8125 \code{0} and the \code{debug} quality is not greater than \code{1}.
8128 \begin{defvar}{extensions:}{byte-compile-top-level}
8130 If this variable is true (the default) and the \kwd{byte-compile}
8131 argument to \code{compile-file} is \kwd{maybe}, then byte compile
8132 top-level code (code outside of any \code{defun}, \code{defmethod},
8136 \begin{defvar}{extensions:}{byte-compile-default}
8138 This variable determines the default value for the
8139 \kwd{byte-compile} argument to \code{compile-file}, initially
8144 %%\node Object Representation, Numbers, Byte Coded Compilation, Advanced Compiler Use and Efficiency Hints
8145 \section{Object Representation}
8146 \label{object-representation}
8147 \cindex{object representation}
8148 \cpsubindex{representation}{object}
8149 \cpsubindex{efficiency}{of objects}
8151 A somewhat subtle aspect of writing efficient \clisp{} programs is
8152 choosing the correct data structures so that the underlying objects
8153 can be implemented efficiently. This is partly because of the need
8154 for multiple representations for a given value
8155 (\pxlref{non-descriptor}), but is also due to the sheer number of
8156 object types that \clisp{} has built in. The number of possible
8157 representations complicates the choice of a good representation
8158 because semantically similar objects may vary in their efficiency
8159 depending on how the program operates on them.
8162 * Think Before You Use a List::
8163 * Structure Representation::
8170 %%\node Think Before You Use a List, Structure Representation, Object Representation, Object Representation
8171 \subsection{Think Before You Use a List}
8172 \cpsubindex{lists}{efficiency of}
8174 Although Lisp's creator seemed to think that it was for LISt Processing, the
8175 astute observer may have noticed that the chapter on list manipulation makes up
8176 less that three percent of \i{Common Lisp: the Language II}. The language has
8177 grown since Lisp 1.5\dash{}new data types supersede lists for many purposes.
8179 %%\node Structure Representation, Arrays, Think Before You Use a List, Object Representation
8180 \subsection{Structure Representation}
8181 \cpsubindex{structure types}{efficiency of} One of the best ways of
8182 building complex data structures is to define appropriate structure
8183 types using \findexed{defstruct}. In \python, access of structure
8184 slots is always at least as fast as list or vector access, and is
8185 usually faster. In comparison to a list representation of a tuple,
8186 structures also have a space advantage.
8188 Even if structures weren't more efficient than other representations, structure
8189 use would still be attractive because programs that use structures in
8190 appropriate ways are much more maintainable and robust than programs written
8191 using only lists. For example:
8193 (rplaca (caddr (cadddr x)) (caddr y))
8195 could have been written using structures in this way:
8197 (setf (beverage-flavor (astronaut-beverage x)) (beverage-flavor y))
8199 The second version is more maintainable because it is easier to
8200 understand what it is doing. It is more robust because structures
8201 accesses are type checked. An \code{astronaut} will never be confused
8202 with a \code{beverage}, and the result of \code{beverage-flavor} is
8203 always a flavor. See sections \ref{structure-types} and
8204 \ref{freeze-type} for more information about structure types.
8205 \xlref{type-inference} for a number of examples that make clear the
8206 advantages of structure typing.
8208 Note that the structure definition should be compiled before any uses
8209 of its accessors or type predicate so that these function calls can be
8210 efficiently open-coded.
8212 %%\node Arrays, Vectors, Structure Representation, Object Representation
8215 \cpsubindex{arrays}{efficiency of}
8217 Arrays are often the most efficient representation for collections of objects
8221 \item Array representations are often the most compact. An array is
8222 always more compact than a list containing the same number of
8225 \item Arrays allow fast constant-time access.
8227 \item Arrays are easily destructively modified, which can reduce
8230 \item Array element types can be specialized, which reduces both
8231 overall size and consing (\pxlref{specialized-array-types}.)
8235 Access of arrays that are not of type \code{simple-array} is less
8236 efficient, so declarations are appropriate when an array is of a
8237 simple type like \code{simple-string} or \code{simple-bit-vector}.
8238 Arrays are almost always simple, but the compiler may not be able to
8239 prove simpleness at every use. The only way to get a non-simple array
8240 is to use the \kwd{displaced-to}, \kwd{fill-pointer} or
8241 \code{adjustable} arguments to \code{make-array}. If you don't use
8242 these hairy options, then arrays can always be declared to be simple.
8244 Because of the many specialized array types and the possibility of
8245 non-simple arrays, array access is much like generic arithmetic
8246 (\pxlref{generic-arithmetic}). In order for array accesses to be
8247 efficiently compiled, the element type and simpleness of the array
8248 must be known at compile time. If there is inadequate information,
8249 the compiler is forced to call a generic array access routine. You
8250 can detect inefficient array accesses by enabling efficiency notes,
8251 \pxlref{efficiency-notes}.
8253 %%\node Vectors, Bit-Vectors, Arrays, Object Representation
8254 \subsection{Vectors}
8255 \cpsubindex{vectors}{efficiency of}
8257 Vectors (one dimensional arrays) are particularly useful, since in
8258 addition to their obvious array-like applications, they are also well
8259 suited to representing sequences. In comparison to a list
8260 representation, vectors are faster to access and take up between two
8261 and sixty-four times less space (depending on the element type.) As
8262 with arbitrary arrays, the compiler needs to know that vectors are not
8263 complex, so you should use \code{simple-string} in preference to
8266 The only advantage that lists have over vectors for representing
8267 sequences is that it is easy to change the length of a list, add to it
8268 and remove items from it. Likely signs of archaic, slow lisp code are
8269 \code{nth} and \code{nthcdr}. If you are using these functions you
8270 should probably be using a vector.
8272 %%\node Bit-Vectors, Hashtables, Vectors, Object Representation
8273 \subsection{Bit-Vectors}
8274 \cpsubindex{bit-vectors}{efficiency of}
8276 Another thing that lists have been used for is set manipulation. In
8277 applications where there is a known, reasonably small universe of
8278 items bit-vectors can be used to improve performance. This is much
8279 less convenient than using lists, because instead of symbols, each
8280 element in the universe must be assigned a numeric index into the bit
8281 vector. Using a bit-vector will nearly always be faster, and can be
8282 tremendously faster if the number of elements in the set is not small.
8283 The logical operations on \code{simple-bit-vector}s are efficient,
8284 since they operate on a word at a time.
8287 %%\node Hashtables, , Bit-Vectors, Object Representation
8288 \subsection{Hashtables}
8289 \cpsubindex{hash-tables}{efficiency of}
8291 Hashtables are an efficient and general mechanism for maintaining associations
8292 such as the association between an object and its name. Although hashtables
8293 are usually the best way to maintain associations, efficiency and style
8294 considerations sometimes favor the use of an association list (a-list).
8296 \code{assoc} is fairly fast when the \var{test} argument is \code{eq}
8297 or \code{eql} and there are only a few elements, but the time goes up
8298 in proportion with the number of elements. In contrast, the
8299 hash-table lookup has a somewhat higher overhead, but the speed is
8300 largely unaffected by the number of entries in the table. For an
8301 \code{equal} hash-table or alist, hash-tables have an even greater
8302 advantage, since the test is more expensive. Whatever you do, be sure
8303 to use the most restrictive test function possible.
8305 The style argument observes that although hash-tables and alists
8306 overlap in function, they do not do all things equally well.
8309 \item Alists are good for maintaining scoped environments. They were
8310 originally invented to implement scoping in the Lisp interpreter,
8311 and are still used for this in \python. With an alist one can
8312 non-destructively change an association simply by consing a new
8313 element on the front. This is something that cannot be done with
8316 \item Hashtables are good for maintaining a global association. The
8317 value associated with an entry can easily be changed with
8318 \code{setf}. With an alist, one has to go through contortions,
8319 either \code{rplacd}'ing the cons if the entry exists, or pushing a
8320 new one if it doesn't. The side-effecting nature of hash-table
8321 operations is an advantage here.
8325 Historically, symbol property lists were often used for global name
8326 associations. Property lists provide an awkward and error-prone
8327 combination of name association and record structure. If you must use
8328 the property list, please store all the related values in a single
8329 structure under a single property, rather than using many properties.
8330 This makes access more efficient, and also adds a modicum of typing
8331 and abstraction. \xlref{advanced-type-stuff} for information on types
8335 %%\node Numbers, General Efficiency Hints, Object Representation, Advanced Compiler Use and Efficiency Hints
8337 \label{numeric-types}
8338 \cpsubindex{numeric}{types}
8339 \cpsubindex{types}{numeric}
8341 Numbers are interesting because numbers are one of the few \llisp{} data types
8342 that have direct support in conventional hardware. If a number can be
8343 represented in the way that the hardware expects it, then there is a big
8344 efficiency advantage.
8346 Using hardware representations is problematical in \llisp{} due to
8347 dynamic typing (where the type of a value may be unknown at compile
8348 time.) It is possible to compile code for statically typed portions
8349 of a \llisp{} program with efficiency comparable to that obtained in
8350 statically typed languages such as C, but not all \llisp{}
8351 implementations succeed. There are two main barriers to efficient
8352 numerical code in \llisp{}:
8355 \item The compiler must prove that the numerical expression is in fact
8356 statically typed, and
8358 \item The compiler must be able to somehow reconcile the conflicting
8359 demands of the hardware mandated number representation with the
8360 \llisp{} requirements of dynamic typing and garbage-collecting
8361 dynamic storage allocation.
8364 Because of its type inference (\pxlref{type-inference}) and efficiency
8365 notes (\pxlref{efficiency-notes}), \python{} is better than
8366 conventional \llisp{} compilers at ensuring that numerical expressions
8367 are statically typed. Python also goes somewhat farther than existing
8368 compilers in the area of allowing native machine number
8369 representations in the presence of garbage collection.
8373 * Non-Descriptor Representations::
8375 * Generic Arithmetic::
8378 * Floating Point Efficiency::
8379 * Specialized Arrays::
8380 * Specialized Structure Slots::
8381 * Interactions With Local Call::
8382 * Representation of Characters::
8385 %%\node Descriptors, Non-Descriptor Representations, Numbers, Numbers
8386 \subsection{Descriptors}
8387 \cpsubindex{descriptors}{object}
8388 \cindex{object representation}
8389 \cpsubindex{representation}{object}
8390 \cpsubindex{consing}{overhead of}
8392 \llisp{}'s dynamic typing requires that it be possible to represent
8393 any value with a fixed length object, known as a \var{descriptor}.
8394 This fixed-length requirement is implicit in features such as:
8397 \item Data types (like \code{simple-vector}) that can contain any type
8398 of object, and that can be destructively modified to contain
8399 different objects (of possibly different types.)
8401 \item Functions that can be called with any type of argument, and that
8402 can be redefined at run time.
8405 In order to save space, a descriptor is invariably represented as a
8406 single word. Objects that can be directly represented in the
8407 descriptor itself are said to be \var{immediate}. Descriptors for
8408 objects larger than one word are in reality pointers to the memory
8409 actually containing the object.
8411 Representing objects using pointers has two major disadvantages:
8414 \item The memory pointed to must be allocated on the heap, so it must
8415 eventually be freed by the garbage collector. Excessive heap
8416 allocation of objects (or ``consing'') is inefficient in several
8417 ways. \xlref{consing}.
8419 \item Representing an object in memory requires the compiler to emit
8420 additional instructions to read the actual value in from memory, and
8421 then to write the value back after operating on it.
8424 The introduction of garbage collection makes things even worse, since
8425 the garbage collector must be able to determine whether a descriptor
8426 is an immediate object or a pointer. This requires that a few bits in
8427 each descriptor be dedicated to the garbage collector. The loss of a
8428 few bits doesn't seem like much, but it has a major efficiency
8429 implication\dash{}objects whose natural machine representation is a
8430 full word (integers and single-floats) cannot have an immediate
8431 representation. So the compiler is forced to use an unnatural
8432 immediate representation (such as \code{fixnum}) or a natural pointer
8433 representation (with the attendant consing overhead.)
8436 %%\node Non-Descriptor Representations, Variables, Descriptors, Numbers
8437 \subsection{Non-Descriptor Representations}
8438 \label{non-descriptor}
8439 \cindex{non-descriptor representations}
8440 \cindex{stack numbers}
8442 From the discussion above, we can see that the standard descriptor
8443 representation has many problems, the worst being number consing.
8444 \llisp{} compilers try to avoid these descriptor efficiency problems by using
8445 \var{non-descriptor} representations. A compiler that uses non-descriptor
8446 representations can compile this function so that it does no number consing:
8448 (defun multby (vec n)
8449 (declare (type (simple-array single-float (*)) vec)
8451 (dotimes (i (length vec))
8453 (* n (aref vec i)))))
8455 If a descriptor representation were used, each iteration of the loop might
8456 cons two floats and do three times as many memory references.
8458 As its negative definition suggests, the range of possible non-descriptor
8459 representations is large. The performance improvement from non-descriptor
8460 representation depends upon both the number of types that have non-descriptor
8461 representations and the number of contexts in which the compiler is forced to
8462 use a descriptor representation.
8464 Many \llisp{} compilers support non-descriptor representations for
8465 float types such as \code{single-float} and \code{double-float}
8466 (section \ref{float-efficiency}.) \python{} adds support for full
8467 word integers (\pxlref{word-integers}), characters
8468 (\pxlref{characters}) and system-area pointers (unconstrained
8469 pointers, \pxlref{system-area-pointers}.) Many \llisp{} compilers
8470 support non-descriptor representations for variables (section
8471 \ref{ND-variables}) and array elements (section
8472 \ref{specialized-array-types}.) \python{} adds support for
8473 non-descriptor arguments and return values in local call
8474 (\pxlref{number-local-call}) and structure slots (\pxlref{raw-slots}).
8476 %%\node Variables, Generic Arithmetic, Non-Descriptor Representations, Numbers
8477 \subsection{Variables}
8478 \label{ND-variables}
8479 \cpsubindex{variables}{non-descriptor}
8480 \cpsubindex{type declarations}{variable}
8481 \cpsubindex{efficiency}{of numeric variables}
8483 In order to use a non-descriptor representation for a variable or
8484 expression intermediate value, the compiler must be able to prove that
8485 the value is always of a particular type having a non-descriptor
8486 representation. Type inference (\pxlref{type-inference}) often needs
8487 some help from user-supplied declarations. The best kind of type
8488 declaration is a variable type declaration placed at the binding
8492 (declare (single-float x))
8495 Use of \code{the}, or of variable declarations not at the binding form
8496 is insufficient to allow non-descriptor representation of the
8497 variable\dash{}with these declarations it is not certain that all
8498 values of the variable are of the right type. It is sometimes useful
8499 to introduce a gratuitous binding that allows the compiler to change
8500 to a non-descriptor representation, like:
8505 (declare (type (signed-byte 32) x))
8509 The declaration on the inner \code{x} is necessary here due to a phase
8510 ordering problem. Although the compiler will eventually prove that
8511 the outer \code{x} is a \w{\code{(signed-byte 32)}} within that
8512 \code{etypecase} branch, the inner \code{x} would have been optimized
8513 away by that time. Declaring the type makes let optimization more
8516 Note that storing a value into a global (or \code{special}) variable
8517 always forces a descriptor representation. Wherever possible, you
8518 should operate only on local variables, binding any referenced globals
8519 to local variables at the beginning of the function, and doing any
8520 global assignments at the end.
8522 Efficiency notes signal use of inefficient representations, so
8523 programmer's needn't continuously worry about the details of
8524 representation selection (\pxlref{representation-eff-note}.)
8526 %%\node Generic Arithmetic, Fixnums, Variables, Numbers
8527 \subsection{Generic Arithmetic}
8528 \label{generic-arithmetic}
8529 \cindex{generic arithmetic}
8530 \cpsubindex{arithmetic}{generic}
8531 \cpsubindex{numeric}{operation efficiency}
8533 In \clisp, arithmetic operations are \var{generic}.\footnote{As Steele
8534 notes in CLTL II, this is a generic conception of generic, and is
8535 not to be confused with the CLOS concept of a generic function.}
8536 The \code{+} function can be passed \code{fixnum}s, \code{bignum}s,
8537 \code{ratio}s, and various kinds of \code{float}s and
8538 \code{complex}es, in any combination. In addition to the inherent
8539 complexity of \code{bignum} and \code{ratio} operations, there is also
8540 a lot of overhead in just figuring out which operation to do and what
8541 contagion and canonicalization rules apply. The complexity of generic
8542 arithmetic is so great that it is inconceivable to open code it.
8543 Instead, the compiler does a function call to a generic arithmetic
8544 routine, consuming many instructions before the actual computation
8547 This is ridiculous, since even \llisp{} programs do a lot of
8548 arithmetic, and the hardware is capable of doing operations on small
8549 integers and floats with a single instruction. To get acceptable
8550 efficiency, the compiler special-cases uses of generic arithmetic that
8551 are directly implemented in the hardware. In order to open code
8552 arithmetic, several constraints must be met:
8555 \item All the arguments must be known to be a good type of number.
8557 \item The result must be known to be a good type of number.
8559 \item Any intermediate values such as the result of \w{\code{(+ a b)}}
8560 in the call \w{\code{(+ a b c)}} must be known to be a good type of
8563 \item All the above numbers with good types must be of the \var{same}
8564 good type. Don't try to mix integers and floats or different float
8568 The ``good types'' are \w{\code{(signed-byte 32)}},
8569 \w{\code{(unsigned-byte 32)}}, \code{single-float} and
8570 \code{double-float}. See sections \ref{fixnums}, \ref{word-integers}
8571 and \ref{float-efficiency} for more discussion of good numeric types.
8573 \code{float} is not a good type, since it might mean either
8574 \code{single-float} or \code{double-float}. \code{integer} is not a
8575 good type, since it might mean \code{bignum}. \code{rational} is not
8576 a good type, since it might mean \code{ratio}. Note however that
8577 these types are still useful in declarations, since type inference may
8578 be able to strengthen a weak declaration into a good one, when it
8579 would be at a loss if there was no declaration at all
8580 (\pxlref{type-inference}). The \code{integer} and
8581 \code{unsigned-byte} (or non-negative integer) types are especially
8582 useful in this regard, since they can often be strengthened to a good
8585 Arithmetic with \code{complex} numbers is inefficient in comparison to
8586 float and integer arithmetic. Complex numbers are always represented
8587 with a pointer descriptor (causing consing overhead), and complex
8588 arithmetic is always closed coded using the general generic arithmetic
8589 functions. But arithmetic with complex types such as:
8594 is still faster than \code{bignum} or \code{ratio} arithmetic, since the
8595 implementation is much simpler.
8597 Note: don't use \code{/} to divide integers unless you want the
8598 overhead of rational arithmetic. Use \code{truncate} even when you
8599 know that the arguments divide evenly.
8601 You don't need to remember all the rules for how to get open-coded
8602 arithmetic, since efficiency notes will tell you when and where there
8603 is a problem\dash{}\pxlref{efficiency-notes}.
8606 %%\node Fixnums, Word Integers, Generic Arithmetic, Numbers
8607 \subsection{Fixnums}
8612 A fixnum is a ``FIXed precision NUMber''. In modern \llisp{}
8613 implementations, fixnums can be represented with an immediate
8614 descriptor, so operating on fixnums requires no consing or memory
8615 references. Clever choice of representations also allows some
8616 arithmetic operations to be done on fixnums using hardware supported
8617 word-integer instructions, somewhat reducing the speed penalty for
8618 using an unnatural integer representation.
8620 It is useful to distinguish the \code{fixnum} type from the fixnum
8621 representation of integers. In \python, there is absolutely nothing
8622 magical about the \code{fixnum} type in comparison to other finite
8623 integer types. \code{fixnum} is equivalent to (is defined with
8624 \code{deftype} to be) \w{\code{(signed-byte 30)}}. \code{fixnum} is
8625 simply the largest subset of integers that \i{can be represented}
8626 using an immediate fixnum descriptor.
8628 Unlike in other \clisp{} compilers, it is in no way desirable to use
8629 the \code{fixnum} type in declarations in preference to more
8630 restrictive integer types such as \code{bit}, \w{\code{(integer -43
8631 7)}} and \w{\code{(unsigned-byte 8)}}. Since Python does
8632 understand these integer types, it is preferable to use the more
8633 restrictive type, as it allows better type inference
8634 (\pxlref{operation-type-inference}.)
8636 The small, efficient fixnum is contrasted with bignum, or ``BIG
8637 NUMber''. This is another descriptor representation for integers, but
8638 this time a pointer representation that allows for arbitrarily large
8639 integers. Bignum operations are less efficient than fixnum
8640 operations, both because of the consing and memory reference overheads
8641 of a pointer descriptor, and also because of the inherent complexity
8642 of extended precision arithmetic. While fixnum operations can often
8643 be done with a single instruction, bignum operations are so complex
8644 that they are always done using generic arithmetic.
8646 A crucial point is that the compiler will use generic arithmetic if it
8647 can't \var{prove} that all the arguments, intermediate values, and
8648 results are fixnums. With bounded integer types such as
8649 \code{fixnum}, the result type proves to be especially problematical,
8650 since these types are not closed under common arithmetic operations
8651 such as \code{+}, \code{-}, \code{*} and \code{/}. For example,
8652 \w{\code{(1+ (the fixnum x))}} does not necessarily evaluate to a
8653 \code{fixnum}. Bignums were added to \llisp{} to get around this
8654 problem, but they really just transform the correctness problem ``if
8655 this add overflows, you will get the wrong answer'' to the efficiency
8656 problem ``if this add \var{might} overflow then your program will run
8657 slowly (because of generic arithmetic.)''
8659 There is just no getting around the fact that the hardware only
8660 directly supports short integers. To get the most efficient open
8661 coding, the compiler must be able to prove that the result is a good
8662 integer type. This is an argument in favor of using more restrictive
8663 integer types: \w{\code{(1+ (the fixnum x))}} may not always be a
8664 \code{fixnum}, but \w{\code{(1+ (the (unsigned-byte 8) x))}} always
8665 is. Of course, you can also assert the result type by putting in lots
8666 of \code{the} declarations and then compiling with \code{safety}
8669 %%\node Word Integers, Floating Point Efficiency, Fixnums, Numbers
8670 \subsection{Word Integers}
8671 \label{word-integers}
8672 \cindex{word integers}
8674 Python is unique in its efficient implementation of arithmetic
8675 on full-word integers through non-descriptor representations and open coding.
8676 Arithmetic on any subtype of these types:
8681 is reasonably efficient, although subtypes of \code{fixnum} remain
8682 somewhat more efficient.
8684 If a word integer must be represented as a descriptor, then the
8685 \code{bignum} representation is used, with its associated consing
8686 overhead. The support for word integers in no way changes the
8687 language semantics, it just makes arithmetic on small bignums vastly
8688 more efficient. It is fine to do arithmetic operations with mixed
8689 \code{fixnum} and word integer operands; just declare the most
8690 specific integer type you can, and let the compiler decide what
8691 representation to use.
8693 In fact, to most users, the greatest advantage of word integer
8694 arithmetic is that it effectively provides a few guard bits on the
8695 fixnum representation. If there are missing assertions on
8696 intermediate values in a fixnum expression, the intermediate results
8697 can usually be proved to fit in a word. After the whole expression is
8698 evaluated, there will often be a fixnum assertion on the final result,
8699 allowing creation of a fixnum result without even checking for
8702 The remarks in section \ref{fixnums} about fixnum result type also
8703 apply to word integers; you must be careful to give the compiler
8704 enough information to prove that the result is still a word integer.
8705 This time, though, when we blow out of word integers we land in into
8706 generic bignum arithmetic, which is much worse than sleazing from
8707 \code{fixnum}s to word integers. Note that mixing
8708 \w{\code{(unsigned-byte 32)}} arguments with arguments of any signed
8709 type (such as \code{fixnum}) is a no-no, since the result might not be
8712 %%\node Floating Point Efficiency, Specialized Arrays, Word Integers, Numbers
8713 \subsection{Floating Point Efficiency}
8714 \label{float-efficiency}
8715 \cindex{floating point efficiency}
8717 Arithmetic on objects of type \code{single-float} and \code{double-float} is
8718 efficiently implemented using non-descriptor representations and open coding.
8719 As for integer arithmetic, the arguments must be known to be of the same float
8720 type. Unlike for integer arithmetic, the results and intermediate values
8721 usually take care of themselves due to the rules of float contagion, i.e.
8722 \w{\code{(1+ (the single-float x))}} is always a \code{single-float}.
8724 Although they are not specially implemented, \code{short-float} and
8725 \code{long-float} are also acceptable in declarations, since they are
8726 synonyms for the \code{single-float} and \code{double-float} types,
8730 Some versions of CMU Common Lisp include extra support for floating
8731 point arithmetic. In particular, if \code{*features*} includes
8732 \kwd{propagate-float-type}, list-style float type specifiers such as
8733 \w{\code{(single-float 0.0 1.0)}} will be used to good effect.
8735 For example, in this function,
8738 (declare (type (single-float 0f0 10f0)))
8741 \Python{} can deduce that the
8742 return type of the function \code{square} is \w{\code{(single-float
8745 Many union types are also supported so that
8747 (+ (the (or (integer 1 1) (integer 5 5)) x)
8748 (the (or (integer 10 10) (integer 20 20)) y))
8750 has the inferred type \code{(or (integer 11 11) (integer 15 15)
8751 (integer 21 21) (integer 25 25))}. This also works for
8752 floating-point numbers. Member types, however, are not because in
8753 general the member elements do not have to be numbers. Thus,
8754 instead of \code{(member 1 4)}, you should write \code{(or (integer
8755 1 1) (integer 4 4))}.
8757 In addition, if \kwd{propagate-fun-type} is in \code{*features*},
8758 \Python{} knows how to infer types for many mathematical functions
8759 including square root, exponential and logarithmic functions,
8760 trignometric functions and their inverses, and hyperbolic functions
8761 and their inverses. For numeric code, this can greatly enhance
8762 efficiency by allowing the compiler to use specialized versions of
8763 the functions instead of the generic versions. The greatest benefit
8764 of this type inference is determining that the result of the
8765 function is real-valued number instead of possibly being
8766 a complex-valued number.
8768 For example, consider the function
8771 (declare (type (single-float 0f0 100f0) x))
8772 (values (sqrt x) (log x 10f0)))
8774 With this declaration, the compiler can determine that the argument
8775 to \code{sqrt} and \code{log} are always non-negative so that the result
8776 is always a \code{single-float}. In fact, the return type for this
8777 function is derived to be \code{(values (single-float 0f0 10f0)
8778 (single-float * 2f0))}.
8780 If the declaration were reduced to just \w{\code{(declare
8781 single-float x)}}, the argument to \code{sqrt} and \code{log}
8782 could be negative. This forces the use of the generic versions of
8783 these functions because the result could be a complex number.
8785 Union types are not yet supported for functions.
8787 We note, however, that proper interval arithmetic is not fully
8788 implemented in the compiler so the inferred types may be slightly in
8789 error due to round-off errors. This round-off error could
8790 accumulate to cause the compiler to erroneously deduce the result
8791 type and cause code to be removed as being
8792 unreachable.\footnote{This, however, has not actually happened, but
8793 it is a possibility.}%
8794 Thus, the declarations should only be precise enough for the
8795 compiler to deduce that a real-valued argument to a function would
8796 produce a real-valued result. The efficiency notes
8797 (\pxlref{representation-eff-note}) from the compiler will guide you
8798 on what declarations might be useful.
8801 When a float must be represented as a descriptor, a pointer representation is
8802 used, creating consing overhead. For this reason, you should try to avoid
8803 situations (such as full call and non-specialized data structures) that force a
8804 descriptor representation. See sections \ref{specialized-array-types},
8805 \ref{raw-slots} and \ref{number-local-call}.
8807 \xlref{ieee-float} for information on the extensions to support IEEE
8810 %%\node Specialized Arrays, Specialized Structure Slots, Floating Point Efficiency, Numbers
8811 \subsection{Specialized Arrays}
8812 \label{specialized-array-types}
8813 \cindex{specialized array types}
8814 \cpsubindex{array types}{specialized}
8815 \cpsubindex{types}{specialized array}
8817 \clisp{} supports specialized array element types through the
8818 \kwd{element-type} argument to \code{make-array}. When an array has a
8819 specialized element type, only elements of that type can be stored in
8820 the array. From this restriction comes two major efficiency
8824 \item A specialized array can save space by packing multiple elements
8825 into a single word. For example, a \code{base-char} array can have
8826 4 elements per word, and a \code{bit} array can have 32. This
8827 space-efficient representation is possible because it is not
8828 necessary to separately indicate the type of each element.
8830 \item The elements in a specialized array can be given the same
8831 non-descriptor representation as the one used in registers and on
8832 the stack, eliminating the need for representation conversions when
8833 reading and writing array elements. For objects with pointer
8834 descriptor representations (such as floats and word integers) there
8835 is also a substantial consing reduction because it is not necessary
8836 to allocate a new object every time an array element is modified.
8840 These are the specialized element types currently supported:
8854 Some versions of \cmucl{}\footnote{Currently, this includes the X86
8855 and Sparc versions which are compiled with the \kwd{signed-array}
8856 feature.} also support the following specialized element types:
8864 Although a \code{simple-vector} can hold any type of object, \true{}
8865 should still be considered a specialized array type, since arrays with
8866 element type \true{} are specialized to hold descriptors.
8870 When using non-descriptor representations, it is particularly
8871 important to make sure that array accesses are open-coded, since in
8872 addition to the generic operation overhead, efficiency is lost when
8873 the array element is converted to a descriptor so that it can be
8874 passed to (or from) the generic access routine. You can detect
8875 inefficient array accesses by enabling efficiency notes,
8876 \pxlref{efficiency-notes}. \xlref{array-types}.
8878 %%\node Specialized Structure Slots, Interactions With Local Call, Specialized Arrays, Numbers
8879 \subsection{Specialized Structure Slots}
8881 \cpsubindex{structure types}{numeric slots}
8882 \cindex{specialized structure slots}
8884 Structure slots declared by the \kwd{type} \code{defstruct} slot option
8885 to have certain known numeric types are also given non-descriptor
8886 representations. These types (and subtypes of these types) are supported:
8893 The primary advantage of specialized slot representations is a large
8894 reduction spurious memory allocation and access overhead of programs
8895 that intensively use these types.
8897 %%\node Interactions With Local Call, Representation of Characters, Specialized Structure Slots, Numbers
8898 \subsection{Interactions With Local Call}
8899 \label{number-local-call}
8900 \cpsubindex{local call}{numeric operands}
8901 \cpsubindex{call}{numeric operands}
8902 \cindex{numbers in local call}
8904 Local call has many advantages (\pxlref{local-call}); one relevant to
8905 our discussion here is that local call extends the usefulness of
8906 non-descriptor representations. If the compiler knows from the
8907 argument type that an argument has a non-descriptor representation,
8908 then the argument will be passed in that representation. The easiest
8909 way to ensure that the argument type is known at compile time is to
8910 always declare the argument type in the called function, like:
8913 (declare (single-float x))
8916 The advantages of passing arguments and return values in a non-descriptor
8917 representation are the same as for non-descriptor representations in general:
8918 reduced consing and memory access (\pxlref{non-descriptor}.) This
8919 extends the applicative programming styles discussed in section
8920 \ref{local-call} to numeric code. Also, if source files are kept reasonably
8921 small, block compilation can be used to reduce number consing to a minimum.
8923 Note that non-descriptor return values can only be used with the known return
8924 convention (section \ref{local-call-return}.) If the compiler can't prove that
8925 a function always returns the same number of values, then it must use the
8926 unknown values return convention, which requires a descriptor representation.
8927 Pay attention to the known return efficiency notes to avoid number consing.
8929 %%\node Representation of Characters, , Interactions With Local Call, Numbers
8930 \subsection{Representation of Characters}
8935 Python also uses a non-descriptor representation for characters when
8936 convenient. This improves the efficiency of string manipulation, but is
8937 otherwise pretty invisible; characters have an immediate descriptor
8938 representation, so there is not a great penalty for converting a character to a
8939 descriptor. Nonetheless, it may sometimes be helpful to declare
8940 character-valued variables as \code{base-character}.
8943 %%\node General Efficiency Hints, Efficiency Notes, Numbers, Advanced Compiler Use and Efficiency Hints
8944 \section{General Efficiency Hints}
8945 \label{general-efficiency}
8946 \cpsubindex{efficiency}{general hints}
8948 This section is a summary of various implementation costs and ways to get
8949 around them. These hints are relatively unrelated to the use of the \python{}
8950 compiler, and probably also apply to most other \llisp{} implementations. In
8951 each section, there are references to related in-depth discussion.
8954 * Compile Your Code::
8955 * Avoid Unnecessary Consing::
8956 * Complex Argument Syntax::
8957 * Mapping and Iteration::
8958 * Trace Files and Disassembly::
8961 %%\node Compile Your Code, Avoid Unnecessary Consing, General Efficiency Hints, General Efficiency Hints
8962 \subsection{Compile Your Code}
8963 \cpsubindex{compilation}{why to}
8965 At this point, the advantages of compiling code relative to running it
8966 interpreted probably need not be emphasized too much, but remember that
8967 in \cmucl, compiled code typically runs hundreds of times faster than
8968 interpreted code. Also, compiled (\code{fasl}) files load significantly faster
8969 than source files, so it is worthwhile compiling files which are loaded many
8970 times, even if the speed of the functions in the file is unimportant.
8972 Even disregarding the efficiency advantages, compiled code is as good or better
8973 than interpreted code. Compiled code can be debugged at the source level (see
8974 chapter \ref{debugger}), and compiled code does more error checking. For these
8975 reasons, the interpreter should be regarded mainly as an interactive command
8976 interpreter, rather than as a programming language implementation.
8978 \b{Do not} be concerned about the performance of your program until you
8979 see its speed compiled. Some techniques that make compiled code run
8980 faster make interpreted code run slower.
8982 %%\node Avoid Unnecessary Consing, Complex Argument Syntax, Compile Your Code, General Efficiency Hints
8983 \subsection{Avoid Unnecessary Consing}
8986 \cindex{garbage collection}
8987 \cindex{memory allocation}
8988 \cpsubindex{efficiency}{of memory use}
8991 Consing is another name for allocation of storage, as done by the
8992 \code{cons} function (hence its name.) \code{cons} is by no means the
8993 only function which conses\dash{}so does \code{make-array} and many
8994 other functions. Arithmetic and function call can also have hidden
8995 consing overheads. Consing hurts performance in the following ways:
8998 \item Consing reduces memory access locality, increasing paging
9001 \item Consing takes time just like anything else.
9003 \item Any space allocated eventually needs to be reclaimed, either by
9004 garbage collection or by starting a new \code{lisp} process.
9008 Consing is not undiluted evil, since programs do things other than
9009 consing, and appropriate consing can speed up the real work. It would
9010 certainly save time to allocate a vector of intermediate results that
9011 are reused hundreds of times. Also, if it is necessary to copy a
9012 large data structure many times, it may be more efficient to update
9013 the data structure non-destructively; this somewhat increases update
9014 overhead, but makes copying trivial.
9016 Note that the remarks in section \ref{efficiency-overview} about the
9017 importance of separating tuning from coding also apply to consing
9018 overhead. The majority of consing will be done by a small portion of
9019 the program. The consing hot spots are even less predictable than the
9020 CPU hot spots, so don't waste time and create bugs by doing
9021 unnecessary consing optimization. During initial coding, avoid
9022 unnecessary side-effects and cons where it is convenient. If
9023 profiling reveals a consing problem, \var{then} go back and fix the
9026 \xlref{non-descriptor} for a discussion of how to avoid number consing
9030 %%\node Complex Argument Syntax, Mapping and Iteration, Avoid Unnecessary Consing, General Efficiency Hints
9031 \subsection{Complex Argument Syntax}
9032 \cpsubindex{argument syntax}{efficiency}
9033 \cpsubindex{efficiency}{of argument syntax}
9034 \cindex{keyword argument efficiency}
9035 \cindex{rest argument efficiency}
9037 Common Lisp has very powerful argument passing mechanisms. Unfortunately, two
9038 of the most powerful mechanisms, rest arguments and keyword arguments, have a
9039 significant performance penalty:
9043 With keyword arguments, the called function has to parse the supplied keywords
9044 by iterating over them and checking them against the desired keywords.
9047 With rest arguments, the function must cons a list to hold the arguments. If a
9048 function is called many times or with many arguments, large amounts of memory
9052 Although rest argument consing is worse than keyword parsing, neither problem
9053 is serious unless thousands of calls are made to such a function. The use of
9054 keyword arguments is strongly encouraged in functions with many arguments or
9055 with interfaces that are likely to be extended, and rest arguments are often
9056 natural in user interface functions.
9058 Optional arguments have some efficiency advantage over keyword
9059 arguments, but their syntactic clumsiness and lack of extensibility
9060 has caused many \clisp{} programmers to abandon use of optionals
9061 except in functions that have obviously simple and immutable
9062 interfaces (such as \code{subseq}), or in functions that are only
9063 called in a few places. When defining an interface function to be
9064 used by other programmers or users, use of only required and keyword
9065 arguments is recommended.
9067 Parsing of \code{defmacro} keyword and rest arguments is done at
9068 compile time, so a macro can be used to provide a convenient syntax
9069 with an efficient implementation. If the macro-expanded form contains
9070 no keyword or rest arguments, then it is perfectly acceptable in inner
9073 Keyword argument parsing overhead can also be avoided by use of inline
9074 expansion (\pxlref{inline-expansion}) and block compilation (section
9075 \ref{block-compilation}.)
9077 Note: the compiler open-codes most heavily used system functions which have
9078 keyword or rest arguments, so that no run-time overhead is involved.
9080 %%\node Mapping and Iteration, Trace Files and Disassembly, Complex Argument Syntax, General Efficiency Hints
9081 \subsection{Mapping and Iteration}
9082 \cpsubindex{mapping}{efficiency of}
9084 One of the traditional \llisp{} programming styles is a highly applicative one,
9085 involving the use of mapping functions and many lists to store intermediate
9086 results. To compute the sum of the square-roots of a list of numbers, one
9089 (apply #'+ (mapcar #'sqrt list-of-numbers))
9092 This programming style is clear and elegant, but unfortunately results
9093 in slow code. There are two reasons why:
9096 \item The creation of lists of intermediate results causes much
9097 consing (see \ref{consing}).
9099 \item Each level of application requires another scan down the list.
9100 Thus, disregarding other effects, the above code would probably take
9101 twice as long as a straightforward iterative version.
9105 An example of an iterative version of the same code:
9107 (do ((num list-of-numbers (cdr num))
9108 (sum 0 (+ (sqrt (car num)) sum)))
9112 See sections \ref{variable-type-inference} and \ref{let-optimization}
9113 for a discussion of the interactions of iteration constructs with type
9114 inference and variable optimization. Also, section
9115 \ref{local-tail-recursion} discusses an applicative style of
9118 %%\node Trace Files and Disassembly, , Mapping and Iteration, General Efficiency Hints
9119 \subsection{Trace Files and Disassembly}
9121 \cindex{trace files}
9122 \cindex{assembly listing}
9123 \cpsubindex{listing files}{trace}
9124 \cindex{Virtual Machine (VM, or IR2) representation}
9125 \cindex{implicit continuation representation (IR1)}
9126 \cpsubindex{continuations}{implicit representation}
9128 In order to write efficient code, you need to know the relative costs
9129 of different operations. The main reason why writing efficient
9130 \llisp{} code is difficult is that there are so many operations, and
9131 the costs of these operations vary in obscure context-dependent ways.
9132 Although efficiency notes point out some problem areas, the only way
9133 to ensure generation of the best code is to look at the assembly code
9136 The \code{disassemble} function is a convenient way to get the assembly code for a
9137 function, but it can be very difficult to interpret, since the correspondence
9138 with the original source code is weak. A better (but more awkward) option is
9139 to use the \kwd{trace-file} argument to \code{compile-file} to generate a trace
9142 A trace file is a dump of the compiler's internal representations,
9143 including annotated assembly code. Each component in the program gets
9144 four pages in the trace file (separated by ``\code{$\hat{ }L$}''):
9147 \item The implicit-continuation (or IR1) representation of the
9148 optimized source. This is a dump of the flow graph representation
9149 used for ``source level'' optimizations. As you will quickly
9150 notice, it is not really very close to the source. This
9151 representation is not very useful to even sophisticated users.
9153 \item The Virtual Machine (VM, or IR2) representation of the program.
9154 This dump represents the generated code as sequences of ``Virtual
9155 OPerations'' (VOPs.) This representation is intermediate between
9156 the source and the assembly code\dash{}each VOP corresponds fairly
9157 directly to some primitive function or construct, but a given VOP
9158 also has a fairly predictable instruction sequence. An operation
9159 (such as \code{+}) may have multiple implementations with different
9160 cost and applicability. The choice of a particular VOP such as
9161 \code{+/fixnum} or \code{+/single-float} represents this choice of
9162 implementation. Once you are familiar with it, the VM
9163 representation is probably the most useful for determining what
9164 implementation has been used.
9166 \item An assembly listing, annotated with the VOP responsible for
9167 generating the instructions. This listing is useful for figuring
9168 out what a VOP does and how it is implemented in a particular
9169 context, but its large size makes it more difficult to read.
9171 \item A disassembly of the generated code, which has all
9172 pseudo-operations expanded out, but is not annotated with VOPs.
9176 Note that trace file generation takes much space and time, since the trace file
9177 is tens of times larger than the source file. To avoid huge confusing trace
9178 files and much wasted time, it is best to separate the critical program portion
9179 into its own file and then generate the trace file from this small file.
9182 %%\node Efficiency Notes, Profiling, General Efficiency Hints, Advanced Compiler Use and Efficiency Hints
9183 \section{Efficiency Notes}
9184 \label{efficiency-notes}
9185 \cindex{efficiency notes}
9186 \cpsubindex{notes}{efficiency}
9189 Efficiency notes are messages that warn the user that the compiler has
9190 chosen a relatively inefficient implementation for some operation.
9191 Usually an efficiency note reflects the compiler's desire for more
9192 type information. If the type of the values concerned is known to the
9193 programmer, then additional declarations can be used to get a more
9194 efficient implementation.
9196 Efficiency notes are controlled by the
9197 \code{extensions:inhibit-warnings} (\pxlref{optimize-declaration})
9198 optimization quality. When \code{speed} is greater than
9199 \code{extensions:inhibit-warnings}, efficiency notes are enabled.
9200 Note that this implicitly enables efficiency notes whenever
9201 \code{speed} is increased from its default of \code{1}.
9203 Consider this program with an obscure missing declaration:
9205 (defun eff-note (x y z)
9206 (declare (fixnum x y z))
9207 (the fixnum (+ x y z)))
9209 If compiled with \code{\w{(speed 3) (safety 0)}}, this note is given:
9215 Note: Forced to do inline (signed-byte 32) arithmetic (cost 3).
9216 Unable to do inline fixnum arithmetic (cost 2) because:
9217 The first argument is a (INTEGER -1073741824 1073741822),
9220 This efficiency note tells us that the result of the intermediate
9221 computation \code{\w{(+ x y)}} is not known to be a \code{fixnum}, so
9222 the addition of the intermediate sum to \code{z} must be done less
9223 efficiently. This can be fixed by changing the definition of
9226 (defun eff-note (x y z)
9227 (declare (fixnum x y z))
9228 (the fixnum (+ (the fixnum (+ x y)) z)))
9232 * Type Uncertainty::
9233 * Efficiency Notes and Type Checking::
9234 * Representation Efficiency Notes::
9235 * Verbosity Control::
9238 %%\node Type Uncertainty, Efficiency Notes and Type Checking, Efficiency Notes, Efficiency Notes
9239 \subsection{Type Uncertainty}
9240 \cpsubindex{types}{uncertainty}
9241 \cindex{uncertainty of types}
9243 The main cause of inefficiency is the compiler's lack of adequate
9244 information about the types of function argument and result values.
9245 Many important operations (such as arithmetic) have an inefficient
9246 general (generic) case, but have efficient implementations that can
9247 usually be used if there is sufficient argument type information.
9249 Type efficiency notes are given when a value's type is uncertain.
9250 There is an important distinction between values that are \i{not
9251 known} to be of a good type (uncertain) and values that are \i{known
9252 not} to be of a good type. Efficiency notes are given mainly for
9253 the first case (uncertain types.) If it is clear to the compiler that
9254 that there is not an efficient implementation for a particular
9255 function call, then an efficiency note will only be given if the
9256 \code{extensions:inhibit-warnings} optimization quality is \code{0}
9257 (\pxlref{optimize-declaration}.)
9259 In other words, the default efficiency notes only suggest that you add
9260 declarations, not that you change the semantics of your program so
9261 that an efficient implementation will apply. For example, compilation
9262 of this form will not give an efficiency note:
9264 (elt (the list l) i)
9266 even though a vector access is more efficient than indexing a list.
9268 %%\node Efficiency Notes and Type Checking, Representation Efficiency Notes, Type Uncertainty, Efficiency Notes
9269 \subsection{Efficiency Notes and Type Checking}
9270 \cpsubindex{type checking}{efficiency of}
9271 \cpsubindex{efficiency}{of type checking}
9272 \cpsubindex{optimization}{type check}
9274 It is important that the \code{eff-note} example above used
9275 \w{\code{(safety 0)}}. When type checking is enabled, you may get apparently
9276 spurious efficiency notes. With \w{\code{(safety 1)}}, the note has this extra
9279 The result is a (INTEGER -1610612736 1610612733), not a FIXNUM.
9281 This seems strange, since there is a \code{the} declaration on the result of that
9284 In fact, the inefficiency is real, and is a consequence of \python{}'s
9285 treating declarations as assertions to be verified. The compiler
9286 can't assume that the result type declaration is true\dash{}it must
9287 generate the result and then test whether it is of the appropriate
9290 In practice, this means that when you are tuning a program to run
9291 without type checks, you should work from the efficiency notes
9292 generated by unsafe compilation. If you want code to run efficiently
9293 with type checking, then you should pay attention to all the
9294 efficiency notes that you get during safe compilation. Since user
9295 supplied output type assertions (e.g., from \code{the}) are
9296 disregarded when selecting operation implementations for safe code,
9297 you must somehow give the compiler information that allows it to prove
9298 that the result truly must be of a good type. In our example, it
9299 could be done by constraining the argument types more:
9301 (defun eff-note (x y z)
9302 (declare (type (unsigned-byte 18) x y z))
9305 Of course, this declaration is acceptable only if the arguments to \code{eff-note}
9306 always \var{are} \w{\code{(unsigned-byte 18)}} integers.
9308 %%\node Representation Efficiency Notes, Verbosity Control, Efficiency Notes and Type Checking, Efficiency Notes
9309 \subsection{Representation Efficiency Notes}
9310 \label{representation-eff-note}
9311 \cindex{representation efficiency notes}
9312 \cpsubindex{efficiency notes}{for representation}
9313 \cindex{object representation efficiency notes}
9314 \cindex{stack numbers}
9315 \cindex{non-descriptor representations}
9316 \cpsubindex{descriptor representations}{forcing of}
9318 When operating on values that have non-descriptor representations
9319 (\pxlref{non-descriptor}), there can be a substantial time and consing
9320 penalty for converting to and from descriptor representations. For
9321 this reason, the compiler gives an efficiency note whenever it is
9322 forced to do a representation coercion more expensive than
9323 \varref{efficiency-note-cost-threshold}.
9325 Inefficient representation coercions may be due to type uncertainty,
9329 (declare (single-float x))
9335 which produces this efficiency note:
9339 Note: Doing float to pointer coercion (cost 13) from X to VAR.
9341 The variable \code{var} is not known to always hold values of type
9342 \code{single-float}, so a descriptor representation must be used for its value.
9343 In sort of situation, and adding a declaration will eliminate the inefficiency.
9345 Often inefficient representation conversions are not due to type
9346 uncertainty\dash{}instead, they result from evaluating a
9347 non-descriptor expression in a context that requires a descriptor
9351 \item Assignment to or initialization of any data structure other than
9352 a specialized array (\pxlref{specialized-array-types}), or
9354 \item Assignment to a \code{special} variable, or
9356 \item Passing as an argument or returning as a value in any function
9357 call that is not a local call (\pxlref{number-local-call}.)
9360 If such inefficient coercions appear in a ``hot spot'' in the program, data
9361 structures redesign or program reorganization may be necessary to improve
9362 efficiency. See sections \ref{block-compilation}, \ref{numeric-types} and
9365 Because representation selection is done rather late in compilation,
9366 the source context in these efficiency notes is somewhat vague, making
9367 interpretation more difficult. This is a fairly straightforward
9371 (declare (single-float x y))
9374 which gives this efficiency note:
9378 Note: Doing float to pointer coercion (cost 13), for:
9379 The first argument of CONS.
9381 The source context form is almost always the form that receives the value being
9382 coerced (as it is in the preceding example), but can also be the source form
9383 which generates the coerced value. Compiling this example:
9386 (declare (single-float x y))
9387 (cons (if (grue) (+ x y) (snoc)) t))
9393 Note: Doing float to pointer coercion (cost 13).
9396 In either case, the note's text explanation attempts to include
9397 additional information about what locations are the source and
9398 destination of the coercion. Here are some example notes:
9400 (IF (GRUE) X (SNOC))
9401 Note: Doing float to pointer coercion (cost 13) from X.
9404 Note: Doing float to pointer coercion (cost 13) from X to VAR.
9406 Note that the return value of a function is also a place to which coercions may
9409 (DEFUN F+ (X Y) (DECLARE (SINGLE-FLOAT X Y)) (+ X Y))
9410 Note: Doing float to pointer coercion (cost 13) to "<return value>".
9412 Sometimes the compiler is unable to determine a name for the source or
9413 destination, in which case the source context is the only clue.
9416 %%\node Verbosity Control, , Representation Efficiency Notes, Efficiency Notes
9417 \subsection{Verbosity Control}
9418 \cpsubindex{verbosity}{of efficiency notes}
9419 \cpsubindex{efficiency notes}{verbosity}
9421 These variables control the verbosity of efficiency notes:
9423 \begin{defvar}{}{efficiency-note-cost-threshold}
9425 Before printing some efficiency notes, the compiler compares the
9426 value of this variable to the difference in cost between the chosen
9427 implementation and the best potential implementation. If the
9428 difference is not greater than this limit, then no note is printed.
9429 The units are implementation dependent; the initial value suppresses
9430 notes about ``trivial'' inefficiencies. A value of \code{1} will
9431 note any inefficiency.
9434 \begin{defvar}{}{efficiency-note-limit}
9436 When printing some efficiency notes, the compiler reports possible
9437 efficient implementations. The initial value of \code{2} prevents
9438 excessively long efficiency notes in the common case where there is
9439 no type information, so all implementations are possible.
9443 %%\node Profiling, , Efficiency Notes, Advanced Compiler Use and Efficiency Hints
9452 The first step in improving a program's performance is to profile the
9453 activity of the program to find where it spends its time. The best
9454 way to do this is to use the profiling utility found in the
9455 \code{profile} package. This package provides a macro \code{profile}
9456 that encapsulates functions with statistics gathering code.
9459 * Profile Interface::
9460 * Profiling Techniques::
9461 * Nested or Recursive Calls::
9462 * Clock resolution::
9463 * Profiling overhead::
9464 * Additional Timing Utilities::
9465 * A Note on Timing::
9466 * Benchmarking Techniques::
9469 %%\node Profile Interface, Profiling Techniques, Profiling, Profiling
9470 \subsection{Profile Interface}
9472 \begin{defvar}{profile:}{timed-functions}
9474 This variable holds a list of all functions that are currently being
9478 \begin{defmac}{profile:}{profile}{%
9479 \args{\mstar{\var{name} \mor \kwd{callers} \code{t}}}}
9481 This macro wraps profiling code around the named functions. As in
9482 \code{trace}, the \var{name}s are not evaluated. If a function is
9483 already profiled, then the function is unprofiled and reprofiled
9484 (useful to notice function redefinition.) A warning is printed for
9485 each name that is not a defined function.
9487 If \kwd{callers \var{t}} is specified, then each function that calls
9488 this function is recorded along with the number of calls made.
9491 \begin{defmac}{profile:}{unprofile}{%
9492 \args{\mstar{\var{name}}}}
9494 This macro removes profiling code from the named functions. If no
9495 \var{name}s are supplied, all currently profiled functions are
9500 \begin{defmac}{profile:}{profile-all}{%
9501 \args{\keys{\kwd{package} \kwd{callers-p}}}}
9503 This macro in effect calls \code{profile:profile} for each
9504 function in the specified package which defaults to
9505 \code{*package*}. \kwd{callers-p} has the same meaning as in
9506 \code{profile:profile}.
9510 \begin{defmac}{profile:}{report-time}{\args{\mstar{\var{name}}}}
9512 This macro prints a report for each \var{name}d function of the
9513 following information:
9515 \item The total CPU time used in that function for all calls,
9517 \item the total number of bytes consed in that function for all
9520 \item the total number of calls,
9522 \item the average amount of CPU time per call.
9524 Summary totals of the CPU time, consing and calls columns are
9525 printed. An estimate of the profiling overhead is also printed (see
9526 below). If no \var{name}s are supplied, then the times for all
9527 currently profiled functions are printed.
9530 \begin{defmac}{}{reset-time}{\args{\mstar{\var{name}}}}
9532 This macro resets the profiling counters associated with the
9533 \var{name}d functions. If no \var{name}s are supplied, then all
9534 currently profiled functions are reset.
9538 %%\node Profiling Techniques, Nested or Recursive Calls, Profile Interface, Profiling
9539 \subsection{Profiling Techniques}
9541 Start by profiling big pieces of a program, then carefully choose which
9542 functions close to, but not in, the inner loop are to be profiled next.
9543 Avoid profiling functions that are called by other profiled functions, since
9544 this opens the possibility of profiling overhead being included in the reported
9547 If the per-call time reported is less than 1/10 second, then consider the clock
9548 resolution and profiling overhead before you believe the time. It may be that
9549 you will need to run your program many times in order to average out to a
9553 %%\node Nested or Recursive Calls, Clock resolution, Profiling Techniques, Profiling
9554 \subsection{Nested or Recursive Calls}
9556 The profiler attempts to compensate for nested or recursive calls. Time and
9557 consing overhead will be charged to the dynamically innermost (most recent)
9558 call to a profiled function. So profiling a subfunction of a profiled function
9559 will cause the reported time for the outer function to decrease. However if an
9560 inner function has a large number of calls, some of the profiling overhead may
9561 ``leak'' into the reported time for the outer function. In general, be wary of
9562 profiling short functions that are called many times.
9564 %%\node Clock resolution, Profiling overhead, Nested or Recursive Calls, Profiling
9565 \subsection{Clock resolution}
9567 Unless you are very lucky, the length of your machine's clock ``tick'' is
9568 probably much longer than the time it takes simple function to run. For
9569 example, on the IBM RT, the clock resolution is 1/50 second. This means that
9570 if a function is only called a few times, then only the first couple decimal
9571 places are really meaningful.
9573 Note however, that if a function is called many times, then the statistical
9574 averaging across all calls should result in increased resolution. For example,
9575 on the IBM RT, if a function is called a thousand times, then a resolution of
9576 tens of microseconds can be expected.
9578 %%\node Profiling overhead, Additional Timing Utilities, Clock resolution, Profiling
9579 \subsection{Profiling overhead}
9581 The added profiling code takes time to run every time that the profiled
9582 function is called, which can disrupt the attempt to collect timing
9583 information. In order to avoid serious inflation of the times for functions
9584 that take little time to run, an estimate of the overhead due to profiling is
9585 subtracted from the times reported for each function.
9587 Although this correction works fairly well, it is not totally accurate,
9588 resulting in times that become increasingly meaningless for functions with
9589 short runtimes. This is only a concern when the estimated profiling overhead
9590 is many times larger than reported total CPU time.
9592 The estimated profiling overhead is not represented in the reported total CPU
9593 time. The sum of total CPU time and the estimated profiling overhead should be
9594 close to the total CPU time for the entire profiling run (as determined by the
9595 \code{time} macro.) Time unaccounted for is probably being used by functions that
9596 you forgot to profile.
9598 %%\node Additional Timing Utilities, A Note on Timing, Profiling overhead, Profiling
9599 \subsection{Additional Timing Utilities}
9601 \begin{defmac}{}{time}{ \args{\var{form}}}
9603 This macro evaluates \var{form}, prints some timing and memory
9604 allocation information to \code{*trace-output*}, and returns any
9605 values that \var{form} returns. The timing information includes
9606 real time, user run time, and system run time. This macro executes
9607 a form and reports the time and consing overhead. If the
9608 \code{time} form is not compiled (e.g. it was typed at top-level),
9609 then \code{compile} will be called on the form to give more accurate
9610 timing information. If you really want to time interpreted speed,
9613 (time (eval '\var{form}))
9615 Things that execute fairly quickly should be timed more than once,
9616 since there may be more paging overhead in the first timing. To
9617 increase the accuracy of very short times, you can time multiple
9620 (time (dotimes (i 100) \var{form}))
9624 \begin{defun}{extensions:}{get-bytes-consed}{}
9626 This function returns the number of bytes allocated since the first
9627 time you called it. The first time it is called it returns zero.
9628 The above profiling routines use this to report consing information.
9631 \begin{defvar}{extensions:}{gc-run-time}
9633 This variable accumulates the run-time consumed by garbage
9634 collection, in the units returned by
9635 \findexed{get-internal-run-time}.
9638 \begin{defconst}{}{internal-time-units-per-second}
9639 The value of internal-time-units-per-second is 100.
9642 %%\node A Note on Timing, Benchmarking Techniques, Additional Timing Utilities, Profiling
9643 \subsection{A Note on Timing}
9644 \cpsubindex{CPU time}{interpretation of}
9645 \cpsubindex{run time}{interpretation of}
9646 \cindex{interpretation of run time}
9648 There are two general kinds of timing information provided by the
9649 \code{time} macro and other profiling utilities: real time and run
9650 time. Real time is elapsed, wall clock time. It will be affected in
9651 a fairly obvious way by any other activity on the machine. The more
9652 other processes contending for CPU and memory, the more real time will
9653 increase. This means that real time measurements are difficult to
9654 replicate, though this is less true on a dedicated workstation. The
9655 advantage of real time is that it is real. It tells you really how
9656 long the program took to run under the benchmarking conditions. The
9657 problem is that you don't know exactly what those conditions were.
9659 Run time is the amount of time that the processor supposedly spent
9660 running the program, as opposed to waiting for I/O or running other
9661 processes. ``User run time'' and ``system run time'' are numbers
9662 reported by the Unix kernel. They are supposed to be a measure of how
9663 much time the processor spent running your ``user'' program (which
9664 will include GC overhead, etc.), and the amount of time that the
9665 kernel spent running ``on your behalf.''
9667 Ideally, user time should be totally unaffected by benchmarking
9668 conditions; in reality user time does depend on other system activity,
9669 though in rather non-obvious ways.
9671 System time will clearly depend on benchmarking conditions. In Lisp
9672 benchmarking, paging activity increases system run time (but not by as much
9673 as it increases real time, since the kernel spends some time waiting for
9674 the disk, and this is not run time, kernel or otherwise.)
9676 In my experience, the biggest trap in interpreting kernel/user run time is
9677 to look only at user time. In reality, it seems that the \var{sum} of kernel
9678 and user time is more reproducible. The problem is that as system activity
9679 increases, there is a spurious \var{decrease} in user run time. In effect, as
9680 paging, etc., increases, user time leaks into system time.
9682 So, in practice, the only way to get truly reproducible results is to run
9683 with the same competing activity on the system. Try to run on a machine
9684 with nobody else logged in, and check with ``ps aux'' to see if there are any
9685 system processes munching large amounts of CPU or memory. If the ratio
9686 between real time and the sum of user and system time varies much between
9687 runs, then you have a problem.
9689 %%\node Benchmarking Techniques, , A Note on Timing, Profiling
9690 \subsection{Benchmarking Techniques}
9691 \cindex{benchmarking techniques}
9693 Given these imperfect timing tools, how do should you do benchmarking? The
9694 answer depends on whether you are trying to measure improvements in the
9695 performance of a single program on the same hardware, or if you are trying to
9696 compare the performance of different programs and/or different hardware.
9698 For the first use (measuring the effect of program modifications with
9699 constant hardware), you should look at \var{both} system+user and real time to
9700 understand what effect the change had on CPU use, and on I/O (including
9701 paging.) If you are working on a CPU intensive program, the change in
9702 system+user time will give you a moderately reproducible measure of
9703 performance across a fairly wide range of system conditions. For a CPU
9704 intensive program, you can think of system+user as ``how long it would have
9705 taken to run if I had my own machine.'' So in the case of comparing CPU
9706 intensive programs, system+user time is relatively real, and reasonable to
9709 For programs that spend a substantial amount of their time paging, you
9710 really can't predict elapsed time under a given operating condition without
9711 benchmarking in that condition. User or system+user time may be fairly
9712 reproducible, but it is also relatively meaningless, since in a paging or
9713 I/O intensive program, the program is spending its time waiting, not
9714 running, and system time and user time are both measures of run time.
9715 A change that reduces run time might increase real time by increasing
9718 Another common use for benchmarking is comparing the performance of
9719 the same program on different hardware. You want to know which
9720 machine to run your program on. For comparing different machines
9721 (operating systems, etc.), the only way to compare that makes sense is
9722 to set up the machines in \var{exactly} the way that they will
9723 \var{normally} be run, and then measure \var{real} time. If the
9724 program will normally be run along with X, then run X. If the program
9725 will normally be run on a dedicated workstation, then be sure nobody
9726 else is on the benchmarking machine. If the program will normally be
9727 run on a machine with three other Lisp jobs, then run three other Lisp
9728 jobs. If the program will normally be run on a machine with 8meg of
9729 memory, then run with 8meg. Here, ``normal'' means ``normal for that
9730 machine''. If you the choice of an unloaded RT or a heavily loaded
9731 PMAX, do your benchmarking on an unloaded RT and a heavily loaded
9734 If you have a program you believe to be CPU intensive, then you might be
9735 tempted to compare ``run'' times across systems, hoping to get a meaningful
9736 result even if the benchmarking isn't done under the expected running
9737 condition. Don't to this, for two reasons:
9740 \item The operating systems might not compute run time in the same
9743 \item Under the real running condition, the program might not be CPU
9744 intensive after all.
9748 In the end, only real time means anything\dash{}it is the amount of time you
9749 have to wait for the result. The only valid uses for run time are:
9752 \item To develop insight into the program. For example, if run time
9753 is much less than elapsed time, then you are probably spending lots
9756 \item To evaluate the relative performance of CPU intensive programs
9757 in the same environment.
9761 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/Unix.ms}
9765 %%\node UNIX Interface, Event Dispatching with SERVE-EVENT, Advanced Compiler Use and Efficiency Hints, Top
9766 \chapter{UNIX Interface}
9767 \label{unix-interface}
9769 \b{By Robert MacLachlan, Skef Wholey,}
9772 \b{Bill Chiles, and William Lott}
9775 CMU Common Lisp attempts to make the full power of the underlying
9776 environment available to the Lisp programmer. This is done using
9777 combination of hand-coded interfaces and foreign function calls to C
9778 libraries. Although the techniques differ, the style of interface is
9779 similar. This chapter provides an overview of the facilities
9780 available and general rules for using them, as well as describing
9781 specific features in detail. It is assumed that the reader has a
9782 working familiarity with Mach, Unix and X, as well as access to the
9783 standard system documentation.
9786 * Reading the Command Line::
9787 * Lisp Equivalents for C Routines::
9788 * Type Translations::
9789 * System Area Pointers::
9790 * Unix System Calls::
9791 * File Descriptor Streams::
9792 * Making Sense of Mach Return Codes::
9797 %%\node Reading the Command Line, Useful Variables, UNIX Interface, UNIX Interface
9798 \section{Reading the Command Line}
9800 The shell parses the command line with which Lisp is invoked, and
9801 passes a data structure containing the parsed information to Lisp.
9802 This information is then extracted from that data structure and put
9803 into a set of Lisp data structures.
9805 \begin{defvar}{extensions:}{command-line-strings}
9806 \defvarx[extensions:]{command-line-utility-name}
9807 \defvarx[extensions:]{command-line-words}
9808 \defvarx[extensions:]{command-line-switches}
9810 The value of \code{*command-line-words*} is a list of strings that
9811 make up the command line, one word per string. The first word on
9812 the command line, i.e. the name of the program invoked (usually
9813 \code{lisp}) is stored in \code{*command-line-utility-name*}. The
9814 value of \code{*command-line-switches*} is a list of
9815 \code{command-line-switch} structures, with a structure for each
9816 word on the command line starting with a hyphen. All the command
9817 line words between the program name and the first switch are stored
9818 in \code{*command-line-words*}.
9821 The following functions may be used to examine \code{command-line-switch}
9823 \begin{defun}{extensions:}{cmd-switch-name}{\args{\var{switch}}}
9825 Returns the name of the switch, less the preceding hyphen and
9826 trailing equal sign (if any).
9828 \begin{defun}{extensions:}{cmd-switch-value}{\args{\var{switch}}}
9830 Returns the value designated using an embedded equal sign, if any.
9831 If the switch has no equal sign, then this is null.
9833 \begin{defun}{extensions:}{cmd-switch-words}{\args{\var{switch}}}
9835 Returns a list of the words between this switch and the next switch
9836 or the end of the command line.
9838 \begin{defun}{extensions:}{cmd-switch-arg}{\args{\var{switch}}}
9840 Returns the first non-null value from \code{cmd-switch-value}, the
9841 first element in \code{cmd-switch-words}, or the first word in
9842 \var{command-line-words}.
9845 \begin{defun}{extensions:}{get-command-line-switch}{\args{\var{sname}}}
9847 This function takes the name of a switch as a string and returns the
9848 value of the switch given on the command line. If no value was
9849 specified, then any following words are returned. If there are no
9850 following words, then \true{} is returned. If the switch was not
9851 specified, then \false{} is returned.
9854 \begin{defmac}{extensions:}{defswitch}{%
9855 \args{\var{name} \ampoptional{} \var{function}}}
9857 This macro causes \var{function} to be called when the switch
9858 \var{name} appears in the command line. Name is a simple-string
9859 that does not begin with a hyphen (unless the switch name really
9860 does begin with one.)
9862 If \var{function} is not supplied, then the switch is parsed into
9863 \var{command-line-switches}, but otherwise ignored. This suppresses
9864 the undefined switch warning which would otherwise take place. THe
9865 warning can also be globally suppressed by
9866 \var{complain-about-illegal-switches}.
9869 %%\node Useful Variables, Lisp Equivalents for C Routines, Reading the Command Line, UNIX Interface
9871 \section{Useful Variables}
9873 \begin{defvar}{system:}{stdin}
9874 \defvarx[system:]{stdout} \defvarx[system:]{stderr}
9876 Streams connected to the standard input, output and error file
9880 \begin{defvar}{system:}{tty}
9882 A stream connected to \file{/dev/tty}.
9885 %%\node Lisp Equivalents for C Routines, Type Translations, Useful Variables, UNIX Interface
9886 \section{Lisp Equivalents for C Routines}
9888 The UNIX documentation describes the system interface in terms of C
9889 procedure headers. The corresponding Lisp function will have a somewhat
9890 different interface, since Lisp argument passing conventions and
9891 datatypes are different.
9893 The main difference in the argument passing conventions is that Lisp does not
9894 support passing values by reference. In Lisp, all argument and results are
9895 passed by value. Interface functions take some fixed number of arguments and
9896 return some fixed number of values. A given ``parameter'' in the C
9897 specification will appear as an argument, return value, or both, depending on
9898 whether it is an In parameter, Out parameter, or In/Out parameter. The basic
9899 transformation one makes to come up with the Lisp equivalent of a C routine is
9900 to remove the Out parameters from the call, and treat them as extra return
9901 values. In/Out parameters appear both as arguments and return values. Since
9902 Out and In/Out parameters are only conventions in C, you must determine the
9903 usage from the documentation.
9906 Thus, the C routine declared as
9908 kern_return_t lookup(servport, portsname, portsid)
9911 int *portsid; /* out */
9914 *portsid = <expression to compute portsid field>
9915 return(KERN_SUCCESS);
9918 has as its Lisp equivalent something like
9920 (defun lookup (ServPort PortsName)
9924 <expression to compute portsid field>))
9926 If there are multiple out or in-out arguments, then there are multiple
9927 additional returns values.
9929 Fortunately, CMU Common Lisp programmers rarely have to worry about the
9930 nuances of this translation process, since the names of the arguments and
9931 return values are documented in a way so that the \code{describe} function
9932 (and the \Hemlock{} \code{Describe Function Call} command, invoked with
9933 \b{C-M-Shift-A}) will list this information. Since the names of arguments
9934 and return values are usually descriptive, the information that
9935 \code{describe} prints is usually all one needs to write a
9936 call. Most programmers use this on-line documentation nearly
9937 all of the time, and thereby avoid the need to handle bulky
9938 manuals and perform the translation from barbarous tongues.
9940 %%\node Type Translations, System Area Pointers, Lisp Equivalents for C Routines, UNIX Interface
9941 \section{Type Translations}
9943 \cpsubindex{types}{alien}
9944 \cpsubindex{types}{foreign language}
9946 Lisp data types have very different representations from those used by
9947 conventional languages such as C. Since the system interfaces are
9948 designed for conventional languages, Lisp must translate objects to and
9949 from the Lisp representations. Many simple objects have a direct
9950 translation: integers, characters, strings and floating point numbers
9951 are translated to the corresponding Lisp object. A number of types,
9952 however, are implemented differently in Lisp for reasons of clarity and
9955 Instances of enumerated types are expressed as keywords in Lisp.
9956 Records, arrays, and pointer types are implemented with the \Alien{}
9957 facility (see page \pageref{aliens}.) Access functions are defined
9958 for these types which convert fields of records, elements of arrays,
9959 or data referenced by pointers into Lisp objects (possibly another
9960 object to be referenced with another access function).
9962 One should dispose of \Alien{} objects created by constructor
9963 functions or returned from remote procedure calls when they are no
9964 longer of any use, freeing the virtual memory associated with that
9965 object. Since \alien{}s contain pointers to non-Lisp data, the
9966 garbage collector cannot do this itself. If the memory
9967 was obtained from \funref{make-alien} or from a foreign function call
9968 to a routine that used \code{malloc}, then \funref{free-alien} should
9969 be used. If the \alien{} was created
9970 using MACH memory allocation (e.g. \code{vm\_allocate}), then the
9971 storage should be freed using \code{vm\_deallocate}.
9973 %%\node System Area Pointers, Unix System Calls, Type Translations, UNIX Interface
9974 \section{System Area Pointers}
9975 \label{system-area-pointers}
9977 \cindex{pointers}\cpsubindex{malloc}{C function}\cpsubindex{free}{C function}
9978 Note that in some cases an address is represented by a Lisp integer, and in
9979 other cases it is represented by a real pointer. Pointers are usually used
9980 when an object in the current address space is being referred to. The MACH
9981 virtual memory manipulation calls must use integers, since in principle the
9982 address could be in any process, and Lisp cannot abide random pointers.
9983 Because these types are represented differently in Lisp, one must explicitly
9984 coerce between these representations.
9986 System Area Pointers (SAPs) provide a mechanism that bypasses the
9987 \Alien{} type system and accesses virtual memory directly. A SAP is a
9988 raw byte pointer into the \code{lisp} process address space. SAPs are
9989 represented with a pointer descriptor, so SAP creation can cause
9990 consing. However, the compiler uses a non-descriptor representation
9991 for SAPs when possible, so the consing overhead is generally minimal.
9992 \xlref{non-descriptor}.
9994 \begin{defun}{system:}{sap-int}{\args{\var{sap}}}
9995 \defunx[system:]{int-sap}{\args{\var{int}}}
9997 The function \code{sap-int} is used to generate an integer
9998 corresponding to the system area pointer, suitable for passing to
9999 the kernel interfaces (which want all addresses specified as
10000 integers). The function \code{int-sap} is used to do the opposite
10001 conversion. The integer representation of a SAP is the byte offset
10002 of the SAP from the start of the address space.
10005 \begin{defun}{system:}{sap+}{\args{\var{sap} \var{offset}}}
10007 This function adds a byte \var{offset} to \var{sap}, returning a new
10011 \begin{defun}{system:}{sap-ref-8}{\args{\var{sap} \var{offset}}}
10012 \defunx[system:]{sap-ref-16}{\args{\var{sap} \var{offset}}}
10013 \defunx[system:]{sap-ref-32}{\args{\var{sap} \var{offset}}}
10015 These functions return the 8, 16 or 32 bit unsigned integer at
10016 \var{offset} from \var{sap}. The \var{offset} is always a byte
10017 offset, regardless of the number of bits accessed. \code{setf} may
10018 be used with the these functions to deposit values into virtual
10022 \begin{defun}{system:}{signed-sap-ref-8}{\args{\var{sap} \var{offset}}}
10023 \defunx[system:]{signed-sap-ref-16}{\args{\var{sap} \var{offset}}}
10024 \defunx[system:]{signed-sap-ref-32}{\args{\var{sap} \var{offset}}}
10026 These functions are the same as the above unsigned operations,
10027 except that they sign-extend, returning a negative number if the
10031 %%\node Unix System Calls, File Descriptor Streams, System Area Pointers, UNIX Interface
10032 \section{Unix System Calls}
10034 You probably won't have much cause to use them, but all the Unix system
10035 calls are available. The Unix system call functions are in the
10036 \code{Unix} package. The name of the interface for a particular system
10037 call is the name of the system call prepended with \code{unix-}. The
10038 system usually defines the associated constants without any prefix name.
10039 To find out how to use a particular system call, try using
10040 \code{describe} on it. If that is unhelpful, look at the source in
10041 \file{syscall.lisp} or consult your system maintainer.
10043 The Unix system calls indicate an error by returning \false{} as the
10044 first value and the Unix error number as the second value. If the call
10045 succeeds, then the first value will always be non-\nil, often \code{t}.
10047 \begin{defun}{Unix:}{get-unix-error-msg}{\args{\var{error}}}
10049 This function returns a string describing the Unix error number
10053 %%\node File Descriptor Streams, Making Sense of Mach Return Codes, Unix System Calls, UNIX Interface
10054 \section{File Descriptor Streams}
10056 Many of the UNIX system calls return file descriptors. Instead of using other
10057 UNIX system calls to perform I/O on them, you can create a stream around them.
10058 For this purpose, fd-streams exist. See also \funref{read-n-bytes}.
10060 \begin{defun}{system:}{make-fd-stream}{%
10061 \args{\var{descriptor}} \keys{\kwd{input} \kwd{output}
10062 \kwd{element-type}} \morekeys{\kwd{buffering} \kwd{name}
10063 \kwd{file} \kwd{original}} \yetmorekeys{\kwd{delete-original}
10064 \kwd{auto-close}} \yetmorekeys{\kwd{timeout} \kwd{pathname}}}
10066 This function creates a file descriptor stream using
10067 \var{descriptor}. If \kwd{input} is non-\nil, input operations are
10068 allowed. If \kwd{output} is non-\nil, output operations are
10069 allowed. The default is input only. These keywords are defined:
10071 \item[\kwd{element-type}] is the type of the unit of transaction for
10072 the stream, which defaults to \code{string-char}. See the Common
10073 Lisp description of \code{open} for valid values.
10075 \item[\kwd{buffering}] is the kind of output buffering desired for
10076 the stream. Legal values are \kwd{none} for no buffering,
10077 \kwd{line} for buffering up to each newline, and \kwd{full} for
10080 \item[\kwd{name}] is a simple-string name to use for descriptive
10081 purposes when the system prints an fd-stream. When printing
10082 fd-streams, the system prepends the streams name with \code{Stream
10083 for }. If \var{name} is unspecified, it defaults to a string
10084 containing \var{file} or \var{descriptor}, in order of preference.
10086 \item[\kwd{file}, \kwd{original}] \var{file} specifies the defaulted
10087 namestring of the associated file when creating a file stream
10088 (must be a \code{simple-string}). \var{original} is the
10089 \code{simple-string} name of a backup file containing the original
10090 contents of \var{file} while writing \var{file}.
10092 When you abort the stream by passing \true{} to \code{close} as
10093 the second argument, if you supplied both \var{file} and
10094 \var{original}, \code{close} will rename the \var{original} name
10095 to the \var{file} name. When you \code{close} the stream
10096 normally, if you supplied \var{original}, and
10097 \var{delete-original} is non-\nil, \code{close} deletes
10098 \var{original}. If \var{auto-close} is true (the default), then
10099 \var{descriptor} will be closed when the stream is garbage
10102 \item[\kwd{pathname}]: The original pathname passed to open and
10103 returned by \code{pathname}; not defaulted or translated.
10105 \item[\kwd{timeout}] if non-null, then \var{timeout} is an integer
10106 number of seconds after which an input wait should time out. If a
10107 read does time out, then the \code{system:io-timeout} condition is
10112 \begin{defun}{system:}{fd-stream-p}{\args{\var{object}}}
10114 This function returns \true{} if \var{object} is an fd-stream, and
10115 \nil{} if not. Obsolete: use the portable \code{(typep x
10119 \begin{defun}{system:}{fd-stream-fd}{\args{\var{stream}}}
10121 This returns the file descriptor associated with \var{stream}.
10125 %%\node Making Sense of Mach Return Codes, Unix Interrupts, File Descriptor Streams, UNIX Interface
10126 \section{Making Sense of Mach Return Codes}
10128 Whenever a remote procedure call returns a Unix error code (such as
10129 \code{kern\_return\_t}), it is usually prudent to check that code to
10130 see if the call was successful. To relieve the programmer of the
10131 hassle of testing this value himself, and to centralize the
10132 information about the meaning of non-success return codes, CMU Common
10133 Lisp provides a number of macros and functions. See also
10134 \funref{get-unix-error-msg}.
10136 \begin{defun}{system:}{gr-error}{%
10137 \args{\var{function} \var{gr} \ampoptional{} \var{context}}}
10139 Signals a Lisp error, printing a message indicating that the call to
10140 the specified \var{function} failed, with the return code \var{gr}.
10141 If supplied, the \var{context} string is printed after the
10142 \var{function} name and before the string associated with the
10143 \var{gr}. For example:
10145 * (gr-error 'nukegarbage 3 "lost big")
10147 Error in function GR-ERROR:
10148 NUKEGARBAGE lost big, no space.
10150 0: Return to Top-Level.
10151 Debug (type H for help)
10152 (Signal #<Conditions:Simple-Error.5FDE0>)
10157 \begin{defmac}{system:}{gr-call}{\args{\var{function} \amprest{} \var{args}}}
10158 \defmacx[system:]{gr-call*}{\args{\var{function} \amprest{} \var{args}}}
10160 These macros can be used to call a function and automatically check
10161 the GeneralReturn code and signal an appropriate error in case of
10162 non-successful return. \code{gr-call} returns \false{} if no error
10163 occurs, while \code{gr-call*} returns the second value of the
10166 * (gr-call mach:port_allocate *task-self*)
10172 \begin{defmac}{system:}{gr-bind}{
10173 \args{\code{(}\mstar{\var{var}}\code{)}
10174 \code{(}\var{function} \mstar{\var{arg}}\code{)}
10175 \mstar{\var{form}}}}
10177 This macro can be used much like \code{multiple-value-bind} to bind
10178 the \var{var}s to return values resulting from calling the
10179 \var{function} with the given \var{arg}s. The first return value is
10180 not bound to a variable, but is checked as a GeneralReturn code, as
10183 * (gr-bind (port_list port_list_cnt)
10184 (mach:port_select *task-self*)
10185 (format t "The port count is ~S." port_list_cnt)
10187 The port count is 0.
10193 %%\node Unix Interrupts, , Making Sense of Mach Return Codes, UNIX Interface
10194 \section{Unix Interrupts}
10196 \cindex{unix interrupts} \cindex{interrupts}
10197 CMU Common Lisp allows access to all the Unix signals that can be generated
10198 under Unix. It should be noted that if this capability is abused, it is
10199 possible to completely destroy the running Lisp. The following macros and
10200 functions allow access to the Unix interrupt system. The signal names as
10201 specified in section 2 of the \i{Unix Programmer's Manual} are exported
10202 from the Unix package.
10205 * Changing Interrupt Handlers::
10206 * Examples of Signal Handlers::
10209 %%\node Changing Interrupt Handlers, Examples of Signal Handlers, Unix Interrupts, Unix Interrupts
10210 \subsection{Changing Interrupt Handlers}
10211 \label{signal-handlers}
10213 \begin{defmac}{system:}{with-enabled-interrupts}{
10214 \args{\var{specs} \amprest{} \var{body}}}
10216 This macro should be called with a list of signal specifications,
10217 \var{specs}. Each element of \var{specs} should be a list of
10218 two\hide{ or three} elements: the first should be the Unix signal
10219 for which a handler should be established, the second should be a
10220 function to be called when the signal is received\hide{, and the
10221 third should be an optional character used to generate the signal
10222 from the keyboard. This last item is only useful for the SIGINT,
10223 SIGQUIT, and SIGTSTP signals.} One or more signal handlers can be
10224 established in this way. \code{with-enabled-interrupts} establishes
10225 the correct signal handlers and then executes the forms in
10226 \var{body}. The forms are executed in an unwind-protect so that the
10227 state of the signal handlers will be restored to what it was before
10228 the \code{with-enabled-interrupts} was entered. A signal handler
10229 function specified as NIL will set the Unix signal handler to the
10230 default which is normally either to ignore the signal or to cause a
10231 core dump depending on the particular signal.
10234 \begin{defmac}{system:}{without-interrupts}{\args{\amprest{} \var{body}}}
10236 It is sometimes necessary to execute a piece a code that can not be
10237 interrupted. This macro the forms in \var{body} with interrupts
10238 disabled. Note that the Unix interrupts are not actually disabled,
10239 rather they are queued until after \var{body} has finished
10243 \begin{defmac}{system:}{with-interrupts}{\args{\amprest{} \var{body}}}
10245 When executing an interrupt handler, the system disables interrupts,
10246 as if the handler was wrapped in in a \code{without-interrupts}.
10247 The macro \code{with-interrupts} can be used to enable interrupts
10248 while the forms in \var{body} are evaluated. This is useful if
10249 \var{body} is going to enter a break loop or do some long
10250 computation that might need to be interrupted.
10253 \begin{defmac}{system:}{without-hemlock}{\args{\amprest{} \var{body}}}
10255 For some interrupts, such as SIGTSTP (suspend the Lisp process and
10256 return to the Unix shell) it is necessary to leave Hemlock and then
10257 return to it. This macro executes the forms in \var{body} after
10258 exiting Hemlock. When \var{body} has been executed, control is
10259 returned to Hemlock.
10262 \begin{defun}{system:}{enable-interrupt}{%
10263 \args{\var{signal} \var{function}\hide{ \ampoptional{}
10266 This function establishes \var{function} as the handler for
10268 \hide{The optional \var{character} can be specified
10269 for the SIGINT, SIGQUIT, and SIGTSTP signals and causes that
10270 character to generate the appropriate signal from the keyboard.}
10271 Unless you want to establish a global signal handler, you should use
10272 the macro \code{with-enabled-interrupts} to temporarily establish a
10273 signal handler. \hide{Without \var{character},}
10274 \code{enable-interrupt} returns the old function associated with the
10275 signal. \hide{When \var{character} is specified for SIGINT,
10276 SIGQUIT, or SIGTSTP, it returns the old character code.}
10279 \begin{defun}{system:}{ignore-interrupt}{\args{\var{signal}}}
10281 Ignore-interrupt sets the Unix signal mechanism to ignore
10282 \var{signal} which means that the Lisp process will never see the
10283 signal. Ignore-interrupt returns the old function associated with
10284 the signal or \false{} if none is currently defined.
10287 \begin{defun}{system:}{default-interrupt}{\args{\var{signal}}}
10289 Default-interrupt can be used to tell the Unix signal mechanism to
10290 perform the default action for \var{signal}. For details on what
10291 the default action for a signal is, see section 2 of the \i{Unix
10292 Programmer's Manual}. In general, it is likely to ignore the
10293 signal or to cause a core dump.
10296 %%\node Examples of Signal Handlers, , Changing Interrupt Handlers, Unix Interrupts
10297 \subsection{Examples of Signal Handlers}
10299 The following code is the signal handler used by the Lisp system for the
10302 (defun ih-sigint (signal code scp)
10303 (declare (ignore signal code scp))
10306 (break "Software Interrupt" t))))
10308 The \code{without-hemlock} form is used to make sure that Hemlock is exited before
10309 a break loop is entered. The \code{with-interrupts} form is used to enable
10310 interrupts because the user may want to generate an interrupt while in the
10311 break loop. Finally, break is called to enter a break loop, so the user
10312 can look at the current state of the computation. If the user proceeds
10313 from the break loop, the computation will be restarted from where it was
10316 The following function is the Lisp signal handler for the SIGTSTP signal
10317 which suspends a process and returns to the Unix shell.
10319 (defun ih-sigtstp (signal code scp)
10320 (declare (ignore signal code scp))
10322 (Unix:unix-kill (Unix:unix-getpid) Unix:sigstop)))
10324 Lisp uses this interrupt handler to catch the SIGTSTP signal because it is
10325 necessary to get out of Hemlock in a clean way before returning to the shell.
10327 To set up these interrupt handlers, the following is recommended:
10329 (with-enabled-interrupts ((Unix:SIGINT #'ih-sigint)
10330 (Unix:SIGTSTP #'ih-sigtstp))
10331 <user code to execute with the above signal handlers enabled.>
10336 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/server.ms}
10338 %%\node Event Dispatching with SERVE-EVENT, Alien Objects, UNIX Interface, Top
10339 \chapter{Event Dispatching with SERVE-EVENT}
10341 \b{By Bill Chiles and Robert MacLachlan}
10344 It is common to have multiple activities simultaneously operating in the same
10345 Lisp process. Furthermore, Lisp programmers tend to expect a flexible
10346 development environment. It must be possible to load and modify application
10347 programs without requiring modifications to other running programs. CMU Common
10348 Lisp achieves this by having a central scheduling mechanism based on an
10349 event-driven, object-oriented paradigm.
10351 An \var{event} is some interesting happening that should cause the Lisp process
10352 to wake up and do something. These events include X events and activity on
10353 Unix file descriptors. The object-oriented mechanism is only available with
10354 the first two, and it is optional with X events as described later in this
10355 chapter. In an X event, the window ID is the object capability and the X event
10356 type is the operation code. The Unix file descriptor input mechanism simply
10357 consists of an association list of a handler to call when input shows up on a
10358 particular file descriptor.
10363 * The SERVE-EVENT Function::
10364 * Using SERVE-EVENT with Unix File Descriptors::
10365 * Using SERVE-EVENT with the CLX Interface to X::
10366 * A SERVE-EVENT Example::
10369 %%\node Object Sets, The SERVE-EVENT Function, Event Dispatching with SERVE-EVENT, Event Dispatching with SERVE-EVENT
10370 \section{Object Sets}
10371 \label{object-sets}
10372 \cindex{object sets}
10373 An \i{object set} is a collection of objects that have the same implementation
10374 for each operation. Externally the object is represented by the object
10375 capability and the operation is represented by the operation code. Within
10376 Lisp, the object is represented by an arbitrary Lisp object, and the
10377 implementation for the operation is represented by an arbitrary Lisp function.
10378 The object set mechanism maintains this translation from the external to the
10379 internal representation.
10381 \begin{defun}{system:}{make-object-set}{%
10382 \args{\var{name} \ampoptional{} \var{default-handler}}}
10384 This function makes a new object set. \var{Name} is a string used
10385 only for purposes of identifying the object set when it is printed.
10386 \var{Default-handler} is the function used as a handler when an
10387 undefined operation occurs on an object in the set. You can define
10388 operations with the \code{serve-}\var{operation} functions exported
10389 the \code{extensions} package for X events
10390 (\pxlref{x-serve-mumbles}). Objects are added with
10391 \code{system:add-xwindow-object}. Initially the object set has no
10392 objects and no defined operations.
10395 \begin{defun}{system:}{object-set-operation}{%
10396 \args{\var{object-set} \var{operation-code}}}
10398 This function returns the handler function that is the
10399 implementation of the operation corresponding to
10400 \var{operation-code} in \var{object-set}. When set with
10401 \code{setf}, the setter function establishes the new handler. The
10402 \code{serve-}\var{operation} functions exported from the
10403 \code{extensions} package for X events (\pxlref{x-serve-mumbles})
10404 call this on behalf of the user when announcing a new operation for
10408 \begin{defun}{system:}{add-xwindow-object}{%
10409 \args{\var{window} \var{object} \var{object-set}}}
10411 These functions add \var{port} or \var{window} to \var{object-set}.
10412 \var{Object} is an arbitrary Lisp object that is associated with the
10413 \var{port} or \var{window} capability. \var{Window} is a CLX
10414 window. When an event occurs, \code{system:serve-event} passes
10415 \var{object} as an argument to the handler function.
10419 %%\node The SERVE-EVENT Function, Using SERVE-EVENT with Unix File Descriptors, Object Sets, Event Dispatching with SERVE-EVENT
10420 \section{The SERVE-EVENT Function}
10422 The \code{system:serve-event} function is the standard way for an application
10423 to wait for something to happen. For example, the Lisp system calls
10424 \code{system:serve-event} when it wants input from X or a terminal stream.
10425 The idea behind \code{system:serve-event} is that it knows the appropriate
10426 action to take when any interesting event happens. If an application calls
10427 \code{system:serve-event} when it is idle, then any other applications with
10428 pending events can run. This allows several applications to run ``at the
10429 same time'' without interference, even though there is only one thread of
10430 control. Note that if an application is waiting for input of any kind,
10431 then other applications will get events.
10433 \begin{defun}{system:}{serve-event}{\args{\ampoptional{} \var{timeout}}}
10435 This function waits for an event to happen and then dispatches to
10436 the correct handler function. If specified, \var{timeout} is the
10437 number of seconds to wait before timing out. A time out of zero
10438 seconds is legal and causes \code{system:serve-event} to poll for
10439 any events immediately available for processing.
10440 \code{system:serve-event} returns \true{} if it serviced at least
10441 one event, and \nil{} otherwise. Depending on the application, when
10442 \code{system:serve-event} returns \true, you might want to call it
10443 repeatedly with a timeout of zero until it returns \nil.
10445 If input is available on any designated file descriptor, then this
10446 calls the appropriate handler function supplied by
10447 \code{system:add-fd-handler}.
10449 Since events for many different applications may arrive
10450 simultaneously, an application waiting for a specific event must
10451 loop on \code{system:serve-event} until the desired event happens.
10452 Since programs such as \hemlock{} call \code{system:serve-event} for
10453 input, applications usually do not need to call
10454 \code{system:serve-event} at all; \hemlock{} allows other
10455 application's handlers to run when it goes into an input wait.
10458 \begin{defun}{system:}{serve-all-events}{\args{\ampoptional{} \var{timeout}}}
10460 This function is similar to \code{system:serve-event}, except it
10461 serves all the pending events rather than just one. It returns
10462 \true{} if it serviced at least one event, and \nil{} otherwise.
10466 %%\node Using SERVE-EVENT with Unix File Descriptors, Using SERVE-EVENT with the CLX Interface to X, The SERVE-EVENT Function, Event Dispatching with SERVE-EVENT
10467 \section{Using SERVE-EVENT with Unix File Descriptors}
10468 Object sets are not available for use with file descriptors, as there are
10469 only two operations possible on file descriptors: input and output.
10470 Instead, a handler for either input or output can be registered with
10471 \code{system:serve-event} for a specific file descriptor. Whenever any input
10472 shows up, or output is possible on this file descriptor, the function
10473 associated with the handler for that descriptor is funcalled with the
10474 descriptor as it's single argument.
10476 \begin{defun}{system:}{add-fd-handler}{%
10477 \args{\var{fd} \var{direction} \var{function}}}
10479 This function installs and returns a new handler for the file
10480 descriptor \var{fd}. \var{direction} can be either \kwd{input} if
10481 the system should invoke the handler when input is available or
10482 \kwd{output} if the system should invoke the handler when output is
10483 possible. This returns a unique object representing the handler,
10484 and this is a suitable argument for \code{system:remove-fd-handler}
10485 \var{function} must take one argument, the file descriptor.
10488 \begin{defun}{system:}{remove-fd-handler}{\args{\var{handler}}}
10490 This function removes \var{handler}, that \code{add-fd-handler} must
10491 have previously returned.
10494 \begin{defmac}{system:}{with-fd-handler}{%
10495 \args{(\var{direction} \var{fd} \var{function})
10496 \mstar{\var{form}}}}
10498 This macro executes the supplied forms with a handler installed
10499 using \var{fd}, \var{direction}, and \var{function}. See
10500 \code{system:add-fd-handler}.
10503 \begin{defun}{system:}{wait-until-fd-usable}{%
10504 \args{\var{direction} \var{fd} \ampoptional{} \var{timeout}}}
10506 This function waits for up to \var{timeout} seconds for \var{fd} to
10507 become usable for \var{direction} (either \kwd{input} or
10508 \kwd{output}). If \var{timeout} is \nil{} or unspecified, this
10512 \begin{defun}{system:}{invalidate-descriptor}{\args{\var{fd}}}
10514 This function removes all handlers associated with \var{fd}. This
10515 should only be used in drastic cases (such as I/O errors, but not
10516 necessarily EOF). Normally, you should use \code{remove-fd-handler}
10517 to remove the specific handler.
10522 section{Using SERVE-EVENT with Matchmaker Interfaces}
10523 \label{ipc-serve-mumbles}
10524 Remember from section \ref{object-sets}, an object set is a collection of
10525 objects, ports in this case, with some set of operations, message ID's, with
10526 corresponding implementations, the same handler functions.
10528 Matchmaker uses the object set operations to implement servers. For
10529 each server interface \i{XXX}, Matchmaker defines a function,
10530 \code{serve-}\i{XXX}, of two arguments, an object set and a function.
10531 The \code{serve-}\i{XXX} function establishes the function as the
10532 implementation of the \i{XXX} operation in the object set. Recall
10533 from section \ref{object-sets}, \code{system:add-port-object}
10534 associates some Lisp object with a port in an object set. When
10535 \code{system:serve-event} notices activity on a port, it calls the
10536 function given to \code{serve-}\i{XXX} with the object given to
10537 \code{system:add-port-object} and the input parameters specified in
10538 the message definition. The return values from the function are used
10539 as the output parameters for the message, if any.
10540 \code{serve-}\i{XXX} functions are also generated for each \i{server
10541 message} and asynchronous user interface.
10543 To use a Lisp server:
10546 \item Create an object set.
10548 \item Define some operations on it using the \code{serve-}\i{XXX}
10551 \item Create an object for every port on which you receive requests.
10553 \item Call \code{system:serve-event} to service an RPC request.
10557 Object sets allow many servers in the same Lisp to operate without knowing
10558 about each other. There can be multiple implementations of the same interface
10559 with different operation handlers established in distinct object sets. This
10560 property is especially useful when handling emergency messages.
10564 %%\node Using SERVE-EVENT with the CLX Interface to X, A SERVE-EVENT Example, Using SERVE-EVENT with Unix File Descriptors, Event Dispatching with SERVE-EVENT
10565 \section{Using SERVE-EVENT with the CLX Interface to X}
10566 \label{x-serve-mumbles}
10567 Remember from section \ref{object-sets}, an object set is a collection of
10568 objects, CLX windows in this case, with some set of operations, event keywords,
10569 with corresponding implementations, the same handler functions. Since X allows
10570 multiple display connections from a given process, you can avoid using object
10571 sets if every window in an application or display connection behaves the same.
10572 If a particular X application on a single display connection has windows that
10573 want to handle certain events differently, then using object sets is a
10574 convenient way to organize this since you need some way to map the window/event
10575 combination to the appropriate functionality.
10577 The following is a discussion of functions exported from the \code{extensions}
10578 package that facilitate handling CLX events through \code{system:serve-event}.
10579 The first two routines are useful regardless of whether you use
10580 \code{system:serve-event}:
10581 \begin{defun}{ext:}{open-clx-display}{%
10582 \args{\ampoptional{} \var{string}}}
10584 This function parses \var{string} for an X display specification
10585 including display and screen numbers. \var{String} defaults to the
10588 (cdr (assoc :display ext:*environment-list* :test #'eq))
10590 If any field in the display specification is missing, this signals
10591 an error. \code{ext:open-clx-display} returns the CLX display and
10595 \begin{defun}{ext:}{flush-display-events}{\args{\var{display}}}
10597 This function flushes all the events in \var{display}'s event queue
10598 including the current event, in case the user calls this from within
10604 * Without Object Sets::
10605 * With Object Sets::
10608 %%\node Without Object Sets, With Object Sets, Using SERVE-EVENT with the CLX Interface to X, Using SERVE-EVENT with the CLX Interface to X
10609 \subsection{Without Object Sets}
10610 Since most applications that use CLX, can avoid the complexity of object sets,
10611 these routines are described in a separate section. The routines described in
10612 the next section that use the object set mechanism are based on these
10615 \begin{defun}{ext:}{enable-clx-event-handling}{%
10616 \args{\var{display} \var{handler}}}
10618 This function causes \code{system:serve-event} to notice when there
10619 is input on \var{display}'s connection to the X11 server. When this
10620 happens, \code{system:serve-event} invokes \var{handler} on
10621 \var{display} in a dynamic context with an error handler bound that
10622 flushes all events from \var{display} and returns. By returning,
10623 the error handler declines to handle the error, but it will have
10624 cleared all events; thus, entering the debugger will not result in
10625 infinite errors due to streams that wait via
10626 \code{system:serve-event} for input. Calling this repeatedly on the
10627 same \var{display} establishes \var{handler} as a new handler,
10628 replacing any previous one for \var{display}.
10631 \begin{defun}{ext:}{disable-clx-event-handling}{\args{\var{display}}}
10633 This function undoes the effect of
10634 \code{ext:enable-clx-event-handling}.
10637 \begin{defmac}{ext:}{with-clx-event-handling}{%
10638 \args{(\var{display} \var{handler}) \mstar{form}}}
10640 This macro evaluates each \var{form} in a context where
10641 \code{system:serve-event} invokes \var{handler} on \var{display}
10642 whenever there is input on \var{display}'s connection to the X
10643 server. This destroys any previously established handler for
10648 %%\node With Object Sets, , Without Object Sets, Using SERVE-EVENT with the CLX Interface to X
10649 \subsection{With Object Sets}
10650 This section discusses the use of object sets and
10651 \code{system:serve-event} to handle CLX events. This is necessary
10652 when a single X application has distinct windows that want to handle
10653 the same events in different ways. Basically, you need some way of
10654 asking for a given window which way you want to handle some event
10655 because this event is handled differently depending on the window.
10656 Object sets provide this feature.
10658 For each CLX event-key symbol-name \i{XXX} (for example,
10659 \var{key-press}), there is a function \code{serve-}\i{XXX} of two
10660 arguments, an object set and a function. The \code{serve-}\i{XXX}
10661 function establishes the function as the handler for the \kwd{XXX}
10662 event in the object set. Recall from section \ref{object-sets},
10663 \code{system:add-xwindow-object} associates some Lisp object with a
10664 CLX window in an object set. When \code{system:serve-event} notices
10665 activity on a window, it calls the function given to
10666 \code{ext:enable-clx-event-handling}. If this function is
10667 \code{ext:object-set-event-handler}, it calls the function given to
10668 \code{serve-}\i{XXX}, passing the object given to
10669 \code{system:add-xwindow-object} and the event's slots as well as a
10670 couple other arguments described below.
10672 To use object sets in this way:
10675 \item Create an object set.
10677 \item Define some operations on it using the \code{serve-}\i{XXX}
10680 \item Add an object for every window on which you receive requests.
10681 This can be the CLX window itself or some structure more meaningful
10682 to your application.
10684 \item Call \code{system:serve-event} to service an X event.
10688 \begin{defun}{ext:}{object-set-event-handler}{%
10689 \args{\var{display}}}
10691 This function is a suitable argument to
10692 \code{ext:enable-clx-event-handling}. The actual event handlers
10693 defined for particular events within a given object set must take an
10694 argument for every slot in the appropriate event. In addition to
10695 the event slots, \code{ext:object-set-event-handler} passes the
10696 following arguments:
10698 \item The object, as established by
10699 \code{system:add-xwindow-object}, on which the event occurred.
10700 \item event-key, see \code{xlib:event-case}.
10701 \item send-event-p, see \code{xlib:event-case}.
10704 Describing any \code{ext:serve-}\var{event-key-name} function, where
10705 \var{event-key-name} is an event-key symbol-name (for example,
10706 \code{ext:serve-key-press}), indicates exactly what all the
10707 arguments are in their correct order.
10710 %% \code{ext:object-set-event-handler} ignores \kwd{no-exposure}
10711 %% events on pixmaps, issuing a warning if one occurs. It is only
10712 %% prepared to dispatch events for windows.
10715 When creating an object set for use with
10716 \code{ext:object-set-event-handler}, specify
10717 \code{ext:default-clx-event-handler} as the default handler for
10718 events in that object set. If no default handler is specified, and
10719 the system invokes the default default handler, it will cause an
10720 error since this function takes arguments suitable for handling port
10725 %%\node A SERVE-EVENT Example, , Using SERVE-EVENT with the CLX Interface to X, Event Dispatching with SERVE-EVENT
10726 \section{A SERVE-EVENT Example}
10727 This section contains two examples using \code{system:serve-event}. The first
10728 one does not use object sets, and the second, slightly more complicated one
10733 * Without Object Sets Example::
10734 * With Object Sets Example::
10737 %%\node Without Object Sets Example, With Object Sets Example, A SERVE-EVENT Example, A SERVE-EVENT Example
10738 \subsection{Without Object Sets Example}
10739 This example defines an input handler for a CLX display connection. It only
10740 recognizes \kwd{key-press} events. The body of the example loops over
10741 \code{system:serve-event} to get input.
10744 (in-package "SERVER-EXAMPLE")
10746 (defun my-input-handler (display)
10747 (xlib:event-case (display :timeout 0)
10748 (:key-press (event-window code state)
10749 (format t "KEY-PRESSED (Window = ~D) = ~S.~%"
10750 (xlib:window-id event-window)
10751 ;; See Hemlock Command Implementor's Manual for convenient
10752 ;; input mapping function.
10753 (ext:translate-character display code state))
10754 ;; Make XLIB:EVENT-CASE discard the event.
10758 (defun server-example ()
10759 "An example of using the SYSTEM:SERVE-EVENT function and object sets to
10760 handle CLX events."
10761 (let* ((display (ext:open-clx-display))
10762 (screen (display-default-screen display))
10763 (black (screen-black-pixel screen))
10764 (white (screen-white-pixel screen))
10765 (window (create-window :parent (screen-root screen)
10766 :x 0 :y 0 :width 200 :height 200
10767 :background white :border black
10770 (xlib:make-event-mask :key-press))))
10771 ;; Wrap code in UNWIND-PROTECT, so we clean up after ourselves.
10774 ;; Enable event handling on the display.
10775 (ext:enable-clx-event-handling display #'my-input-handler)
10776 ;; Map the windows to the screen.
10777 (map-window window)
10778 ;; Make sure we send all our requests.
10779 (display-force-output display)
10780 ;; Call serve-event for 100,000 events or immediate timeouts.
10781 (dotimes (i 100000) (system:serve-event)))
10782 ;; Disable event handling on this display.
10783 (ext:disable-clx-event-handling display)
10784 ;; Get rid of the window.
10785 (destroy-window window)
10786 ;; Pick off any events the X server has already queued for our
10787 ;; windows, so we don't choke since SYSTEM:SERVE-EVENT is no longer
10788 ;; prepared to handle events for us.
10790 (unless (deleting-window-drop-event *display* window)
10792 ;; Close the display.
10793 (xlib:close-display display))))
10795 (defun deleting-window-drop-event (display win)
10796 "Check for any events on win. If there is one, remove it from the
10797 event queue and return t; otherwise, return nil."
10798 (xlib:display-finish-output display)
10799 (let ((result nil))
10800 (xlib:process-event
10802 :handler #'(lambda (&key event-window &allow-other-keys)
10803 (if (eq event-window win)
10810 %%\node With Object Sets Example, , Without Object Sets Example, A SERVE-EVENT Example
10811 \subsection{With Object Sets Example}
10812 This example involves more work, but you get a little more for your effort. It
10813 defines two objects, \code{input-box} and \code{slider}, and establishes a
10814 \kwd{key-press} handler for each object, \code{key-pressed} and
10815 \code{slider-pressed}. We have two object sets because we handle events on the
10816 windows manifesting these objects differently, but the events come over the
10817 same display connection.
10820 (in-package "SERVER-EXAMPLE")
10822 (defstruct (input-box (:print-function print-input-box)
10823 (:constructor make-input-box (display window)))
10824 "Our program knows about input-boxes, and it doesn't care how they
10826 display ; The CLX display on which my input-box is displayed.
10827 window) ; The CLX window in which the user types.
10829 (defun print-input-box (object stream n)
10830 (declare (ignore n))
10831 (format stream "#<Input-Box ~S>" (input-box-display object)))
10833 (defvar *input-box-windows*
10834 (system:make-object-set "Input Box Windows"
10835 #'ext:default-clx-event-handler))
10837 (defun key-pressed (input-box event-key event-window root child
10838 same-screen-p x y root-x root-y modifiers time
10839 key-code send-event-p)
10840 "This is our :key-press event handler."
10841 (declare (ignore event-key root child same-screen-p x y
10842 root-x root-y time send-event-p))
10843 (format t "KEY-PRESSED (Window = ~D) = ~S.~%"
10844 (xlib:window-id event-window)
10845 ;; See Hemlock Command Implementor's Manual for convenient
10846 ;; input mapping function.
10847 (ext:translate-character (input-box-display input-box)
10848 key-code modifiers)))
10850 (ext:serve-key-press *input-box-windows* #'key-pressed)
10853 (defstruct (slider (:print-function print-slider)
10854 (:include input-box)
10855 (:constructor %make-slider
10856 (display window window-width max)))
10857 "Our program knows about sliders too, and these provide input values
10859 bits-per-value ; bits per discrete value up to max.
10860 max) ; End value for slider.
10862 (defun print-slider (object stream n)
10863 (declare (ignore n))
10864 (format stream "#<Slider ~S 0..~D>"
10865 (input-box-display object)
10866 (1- (slider-max object))))
10868 (defun make-slider (display window max)
10869 (%make-slider display window
10870 (truncate (xlib:drawable-width window) max)
10873 (defvar *slider-windows*
10874 (system:make-object-set "Slider Windows"
10875 #'ext:default-clx-event-handler))
10877 (defun slider-pressed (slider event-key event-window root child
10878 same-screen-p x y root-x root-y modifiers time
10879 key-code send-event-p)
10880 "This is our :key-press event handler for sliders. Probably this is
10881 a mouse thing, but for simplicity here we take a character typed."
10882 (declare (ignore event-key root child same-screen-p x y
10883 root-x root-y time send-event-p))
10884 (format t "KEY-PRESSED (Window = ~D) = ~S --> ~D.~%"
10885 (xlib:window-id event-window)
10886 ;; See Hemlock Command Implementor's Manual for convenient
10887 ;; input mapping function.
10888 (ext:translate-character (input-box-display slider)
10889 key-code modifiers)
10890 (truncate x (slider-bits-per-value slider))))
10892 (ext:serve-key-press *slider-windows* #'slider-pressed)
10895 (defun server-example ()
10896 "An example of using the SYSTEM:SERVE-EVENT function and object sets to
10897 handle CLX events."
10898 (let* ((display (ext:open-clx-display))
10899 (screen (display-default-screen display))
10900 (black (screen-black-pixel screen))
10901 (white (screen-white-pixel screen))
10902 (iwindow (create-window :parent (screen-root screen)
10903 :x 0 :y 0 :width 200 :height 200
10904 :background white :border black
10907 (xlib:make-event-mask :key-press)))
10908 (swindow (create-window :parent (screen-root screen)
10909 :x 0 :y 300 :width 200 :height 50
10910 :background white :border black
10913 (xlib:make-event-mask :key-press)))
10914 (input-box (make-input-box display iwindow))
10915 (slider (make-slider display swindow 15)))
10916 ;; Wrap code in UNWIND-PROTECT, so we clean up after ourselves.
10919 ;; Enable event handling on the display.
10920 (ext:enable-clx-event-handling display
10921 #'ext:object-set-event-handler)
10922 ;; Add the windows to the appropriate object sets.
10923 (system:add-xwindow-object iwindow input-box
10924 *input-box-windows*)
10925 (system:add-xwindow-object swindow slider
10927 ;; Map the windows to the screen.
10928 (map-window iwindow)
10929 (map-window swindow)
10930 ;; Make sure we send all our requests.
10931 (display-force-output display)
10932 ;; Call server for 100,000 events or immediate timeouts.
10933 (dotimes (i 100000) (system:serve-event)))
10934 ;; Disable event handling on this display.
10935 (ext:disable-clx-event-handling display)
10936 (delete-window iwindow display)
10937 (delete-window swindow display)
10938 ;; Close the display.
10939 (xlib:close-display display))))
10942 (defun delete-window (window display)
10943 ;; Remove the windows from the object sets before destroying them.
10944 (system:remove-xwindow-object window)
10945 ;; Destroy the window.
10946 (destroy-window window)
10947 ;; Pick off any events the X server has already queued for our
10948 ;; windows, so we don't choke since SYSTEM:SERVE-EVENT is no longer
10949 ;; prepared to handle events for us.
10951 (unless (deleting-window-drop-event display window)
10954 (defun deleting-window-drop-event (display win)
10955 "Check for any events on win. If there is one, remove it from the
10956 event queue and return t; otherwise, return nil."
10957 (xlib:display-finish-output display)
10958 (let ((result nil))
10959 (xlib:process-event
10961 :handler #'(lambda (&key event-window &allow-other-keys)
10962 (if (eq event-window win)
10968 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/alien.ms}
10970 %%\node Alien Objects, Interprocess Communication under LISP, Event Dispatching with SERVE-EVENT, Top
10971 \chapter{Alien Objects}
10974 \b{By Robert MacLachlan and William Lott}
10979 * Introduction to Aliens::
10981 * Alien Operations::
10982 * Alien Variables::
10983 * Alien Data Structure Example::
10984 * Loading Unix Object Files::
10985 * Alien Function Calls::
10986 * Step-by-Step Alien Example::
10989 %%\node Introduction to Aliens, Alien Types, Alien Objects, Alien Objects
10990 \section{Introduction to Aliens}
10992 Because of Lisp's emphasis on dynamic memory allocation and garbage
10993 collection, Lisp implementations use unconventional memory representations
10994 for objects. This representation mismatch creates problems when a Lisp
10995 program must share objects with programs written in another language. There
10996 are three different approaches to establishing communication:
10998 \item The burden can be placed on the foreign program (and programmer) by
10999 requiring the use of Lisp object representations. The main difficulty with
11000 this approach is that either the foreign program must be written with Lisp
11001 interaction in mind, or a substantial amount of foreign ``glue'' code must be
11002 written to perform the translation.
11004 \item The Lisp system can automatically convert objects back and forth
11005 between the Lisp and foreign representations. This is convenient, but
11006 translation becomes prohibitively slow when large or complex data structures
11009 \item The Lisp program can directly manipulate foreign objects through the
11010 use of extensions to the Lisp language. Most Lisp systems make use of
11011 this approach, but the language for describing types and expressing
11012 accesses is often not powerful enough for complex objects to be easily
11015 \cmucl{} relies primarily on the automatic conversion and direct manipulation
11016 approaches: Aliens of simple scalar types are automatically converted,
11017 while complex types are directly manipulated in their foreign
11018 representation. Any foreign objects that can't automatically be
11019 converted into Lisp values are represented by objects of type
11020 \code{alien-value}. Since Lisp is a dynamically typed language, even
11021 foreign objects must have a run-time type; this type information is
11022 provided by encapsulating the raw pointer to the foreign data within an
11023 \code{alien-value} object.
11025 The Alien type language and operations are most similar to those of the
11026 C language, but Aliens can also be used when communicating with most
11027 other languages that can be linked with C.
11030 %%\node Alien Types, Alien Operations, Introduction to Aliens, Alien Objects
11031 \section{Alien Types}
11033 Alien types have a description language based on nested list structure. For
11038 struct foo *b[100];
11041 has the corresponding Alien type:
11045 (b (array (* (struct foo)) 100)))
11050 * Defining Alien Types::
11051 * Alien Types and Lisp Types::
11052 * Alien Type Specifiers::
11053 * The C-Call Package::
11056 %%\node Defining Alien Types, Alien Types and Lisp Types, Alien Types, Alien Types
11057 \subsection{Defining Alien Types}
11059 Types may be either named or anonymous. With structure and union
11060 types, the name is part of the type specifier, allowing recursively
11061 defined types such as:
11063 (struct foo (a (* (struct foo))))
11065 An anonymous structure or union type is specified by using the name
11066 \nil. The \funref{with-alien} macro defines a local scope which
11067 ``captures'' any named type definitions. Other types are not
11068 inherently named, but can be given named abbreviations using
11069 \code{def-alien-type}.
11071 \begin{defmac}{alien:}{def-alien-type}{name type}
11073 This macro globally defines \var{name} as a shorthand for the Alien
11074 type \var{type}. When introducing global structure and union type
11075 definitions, \var{name} may be \nil, in which case the name to
11076 define is taken from the type's name.
11080 %%\node Alien Types and Lisp Types, Alien Type Specifiers, Defining Alien Types, Alien Types
11081 \subsection{Alien Types and Lisp Types}
11083 The Alien types form a subsystem of the \cmucl{} type system. An
11084 \code{alien} type specifier provides a way to use any Alien type as a
11085 Lisp type specifier. For example
11087 (typep foo '(alien (* int)))
11089 can be used to determine whether \code{foo} is a pointer to an
11090 \code{int}. \code{alien} type specifiers can be used in the same ways
11091 as ordinary type specifiers (like \code{string}.) Alien type
11092 declarations are subject to the same precise type checking as any
11093 other declaration (section \xlref{precise-type-checks}.)
11095 Note that the Alien type system overlaps with normal Lisp type
11096 specifiers in some cases. For example, the type specifier
11097 \code{(alien single-float)} is identical to \code{single-float}, since
11098 Alien floats are automatically converted to Lisp floats. When
11099 \code{type-of} is called on an Alien value that is not automatically
11100 converted to a Lisp value, then it will return an \code{alien} type
11103 %%\node Alien Type Specifiers, The C-Call Package, Alien Types and Lisp Types, Alien Types
11104 \subsection{Alien Type Specifiers}
11106 Some Alien type names are \clisp symbols, but the names are
11107 still exported from the \code{alien} package, so it is legal to say
11108 \code{alien:single-float}. These are the basic Alien type specifiers:
11110 \begin{deftp}{Alien type}{*}{%
11113 A pointer to an object of the specified \var{type}. If \var{type}
11114 is \true, then it means a pointer to anything, similar to
11115 ``\code{void *}'' in ANSI C. Currently, the only way to detect a
11118 (zerop (sap-int (alien-sap \var{ptr})))
11120 \xlref{system-area-pointers}
11123 \begin{deftp}{Alien type}{array}{\var{type} \mstar{\var{dimension}}}
11125 An array of the specified \var{dimensions}, holding elements of type
11126 \var{type}. Note that \code{(* int)} and \code{(array int)} are
11127 considered to be different types when type checking is done; pointer
11128 and array types must be explicitly coerced using \code{cast}.
11130 Arrays are accessed using \code{deref}, passing the indices as
11131 additional arguments. Elements are stored in column-major order (as
11132 in C), so the first dimension determines only the size of the memory
11133 block, and not the layout of the higher dimensions. An array whose
11134 first dimension is variable may be specified by using \nil{} as the
11135 first dimension. Fixed-size arrays can be allocated as array
11136 elements, structure slots or \code{with-alien} variables. Dynamic
11137 arrays can only be allocated using \funref{make-alien}.
11140 \begin{deftp}{Alien type}{struct}{\var{name}
11141 \mstar{(\var{field} \var{type} \mopt{\var{bits}})}}
11143 A structure type with the specified \var{name} and \var{fields}.
11144 Fields are allocated at the same positions used by the
11145 implementation's C compiler. \var{bits} is intended for C-like bit
11146 field support, but is currently unused. If \var{name} is \false,
11147 then the type is anonymous.
11149 If a named Alien \code{struct} specifier is passed to
11150 \funref{def-alien-type} or \funref{with-alien}, then this defines,
11151 respectively, a new global or local Alien structure type. If no
11152 \var{fields} are specified, then the fields are taken from the
11153 current (local or global) Alien structure type definition of
11157 \begin{deftp}{Alien type}{union}{\var{name}
11158 \mstar{(\var{field} \var{type} \mopt{\var{bits}})}}
11160 Similar to \code{struct}, but defines a union type. All fields are
11161 allocated at the same offset, and the size of the union is the size
11162 of the largest field. The programmer must determine which field is
11163 active from context.
11166 \begin{deftp}{Alien type}{enum}{\var{name} \mstar{\var{spec}}}
11168 An enumeration type that maps between integer values and keywords.
11169 If \var{name} is \false, then the type is anonymous. Each
11170 \var{spec} is either a keyword, or a list \code{(\var{keyword}
11171 \var{value})}. If \var{integer} is not supplied, then it defaults
11172 to one greater than the value for the preceding spec (or to zero if
11173 it is the first spec.)
11176 \begin{deftp}{Alien type}{signed}{\mopt{\var{bits}}}
11177 A signed integer with the specified number of bits precision. The
11178 upper limit on integer precision is determined by the machine's word
11179 size. If no size is specified, the maximum size will be used.
11182 \begin{deftp}{Alien type}{integer}{\mopt{\var{bits}}}
11183 Identical to \code{signed}---the distinction between \code{signed}
11184 and \code{integer} is purely stylistic.
11187 \begin{deftp}{Alien type}{unsigned}{\mopt{\var{bits}}}
11188 Like \code{signed}, but specifies an unsigned integer.
11191 \begin{deftp}{Alien type}{boolean}{\mopt{\var{bits}}}
11192 Similar to an enumeration type that maps \code{0} to \false{} and
11193 all other values to \true. \var{bits} determines the amount of
11194 storage allocated to hold the truth value.
11197 \begin{deftp}{Alien type}{single-float}{}
11198 A floating-point number in IEEE single format.
11201 \begin{deftp}{Alien type}{double-float}{}
11202 A floating-point number in IEEE double format.
11205 \begin{deftp}{Alien type}{function}{\var{result-type} \mstar{\var{arg-type}}}
11206 \label{alien-function-types}
11207 A Alien function that takes arguments of the specified
11208 \var{arg-types} and returns a result of type \var{result-type}.
11209 Note that the only context where a \code{function} type is directly
11210 specified is in the argument to \code{alien-funcall} (see section
11211 \funref{alien-funcall}.) In all other contexts, functions are
11212 represented by function pointer types: \code{(* (function ...))}.
11215 \begin{deftp}{Alien type}{system-area-pointer}{}
11216 A pointer which is represented in Lisp as a
11217 \code{system-area-pointer} object (\pxlref{system-area-pointers}.)
11220 %%\node The C-Call Package, , Alien Type Specifiers, Alien Types
11221 \subsection{The C-Call Package}
11223 The \code{c-call} package exports these type-equivalents to the C type
11224 of the same name: \code{char}, \code{short}, \code{int}, \code{long},
11225 \code{unsigned-char}, \code{unsigned-short}, \code{unsigned-int},
11226 \code{unsigned-long}, \code{float}, \code{double}. \code{c-call} also
11227 exports these types:
11229 \begin{deftp}{Alien type}{void}{}
11230 This type is used in function types to declare that no useful value
11231 is returned. Evaluation of an \code{alien-funcall} form will return
11235 \begin{deftp}{Alien type}{c-string}{}
11236 This type is similar to \code{(* char)}, but is interpreted as a
11237 null-terminated string, and is automatically converted into a Lisp
11238 string when accessed. If the pointer is C \code{NULL} (or 0), then
11239 accessing gives Lisp \false.
11241 Assigning a Lisp string to a \code{c-string} structure field or
11242 variable stores the contents of the string to the memory already
11243 pointed to by that variable. When an Alien of type \code{(* char)}
11244 is assigned to a \code{c-string}, then the \code{c-string} pointer
11245 is assigned to. This allows \code{c-string} pointers to be
11246 initialized. For example:
11248 (def-alien-type nil (struct foo (str c-string)))
11250 (defun make-foo (str) (let ((my-foo (make-alien (struct foo))))
11251 (setf (slot my-foo 'str) (make-alien char (length str))) (setf (slot
11252 my-foo 'str) str) my-foo))
11254 Storing Lisp \false{} writes C \code{NULL} to the \code{c-string}
11259 %%\node Alien Operations, Alien Variables, Alien Types, Alien Objects
11260 \section{Alien Operations}
11262 This section describes the basic operations on Alien values.
11265 * Alien Access Operations::
11266 * Alien Coercion Operations::
11267 * Alien Dynamic Allocation::
11270 %%\node Alien Access Operations, Alien Coercion Operations, Alien Operations, Alien Operations
11271 \subsection{Alien Access Operations}
11273 \begin{defun}{alien:}{deref}{\args{\var{pointer-or-array} \amprest \var{indices}}}
11275 This function returns the value pointed to by an Alien pointer or
11276 the value of an Alien array element. If a pointer, an optional
11277 single index can be specified to give the equivalent of C pointer
11278 arithmetic; this index is scaled by the size of the type pointed to.
11279 If an array, the number of indices must be the same as the number of
11280 dimensions in the array type. \code{deref} can be set with
11281 \code{setf} to assign a new value.
11284 \begin{defun}{alien:}{slot}{\args{\var{struct-or-union} \var{slot-name}}}
11286 This function extracts the value of slot \var{slot-name} from the an
11287 Alien \code{struct} or \code{union}. If \var{struct-or-union} is a
11288 pointer to a structure or union, then it is automatically
11289 dereferenced. This can be set with \code{setf} to assign a new
11290 value. Note that \var{slot-name} is evaluated, and need not be a
11291 compile-time constant (but only constant slot accesses are
11292 efficiently compiled.)
11295 %%\node Alien Coercion Operations, Alien Dynamic Allocation, Alien Access Operations, Alien Operations
11296 \subsection{Alien Coercion Operations}
11298 \begin{defmac}{alien:}{addr}{\var{alien-expr}}
11300 This macro returns a pointer to the location specified by
11301 \var{alien-expr}, which must be either an Alien variable, a use of
11302 \code{deref}, a use of \code{slot}, or a use of
11303 \funref{extern-alien}.
11306 \begin{defmac}{alien:}{cast}{\var{alien} \var{new-type}}
11308 This macro converts \var{alien} to a new Alien with the specified
11309 \var{new-type}. Both types must be an Alien pointer, array or
11310 function type. Note that the result is not \code{eq} to the
11311 argument, but does refer to the same data bits.
11314 \begin{defmac}{alien:}{sap-alien}{\var{sap} \var{type}}
11315 \defunx[alien:]{alien-sap}{\var{alien-value}}
11317 \code{sap-alien} converts \var{sap} (a system area pointer
11318 \pxlref{system-area-pointers}) to an Alien value with the specified
11319 \var{type}. \var{type} is not evaluated.
11321 \code{alien-sap} returns the SAP which points to \var{alien-value}'s
11324 The \var{type} to \code{sap-alien} and the type of the \var{alien-value} to
11325 \code{alien-sap} must some Alien pointer, array or record type.
11328 %%\node Alien Dynamic Allocation, , Alien Coercion Operations, Alien Operations
11329 \subsection{Alien Dynamic Allocation}
11331 Dynamic Aliens are allocated using the \code{malloc} library, so foreign code
11332 can call \code{free} on the result of \code{make-alien}, and Lisp code can
11333 call \code{free-alien} on objects allocated by foreign code.
11335 \begin{defmac}{alien:}{make-alien}{\var{type} \mopt{\var{size}}}
11337 This macro returns a dynamically allocated Alien of the specified
11338 \var{type} (which is not evaluated.) The allocated memory is not
11339 initialized, and may contain arbitrary junk. If supplied,
11340 \var{size} is an expression to evaluate to compute the size of the
11341 allocated object. There are two major cases:
11343 \item When \var{type} is an array type, an array of that type is
11344 allocated and a \var{pointer} to it is returned. Note that you
11345 must use \code{deref} to change the result to an array before you
11346 can use \code{deref} to read or write elements:
11348 (defvar *foo* (make-alien (array char 10)))
11350 (type-of *foo*) \result{} (alien (* (array (signed 8) 10)))
11352 (setf (deref (deref foo) 0) 10) \result{} 10
11354 If supplied, \var{size} is used as the first dimension for the
11357 \item When \var{type} is any other type, then then an object for
11358 that type is allocated, and a \var{pointer} to it is returned. So
11359 \code{(make-alien int)} returns a \code{(* int)}. If \var{size}
11360 is specified, then a block of that many objects is allocated, with
11361 the result pointing to the first one.
11365 \begin{defun}{alien:}{free-alien}{\var{alien}}
11367 This function frees the storage for \var{alien} (which must have
11368 been allocated with \code{make-alien} or \code{malloc}.)
11371 See also \funref{with-alien}, which stack-allocates Aliens.
11374 %%\node Alien Variables, Alien Data Structure Example, Alien Operations, Alien Objects
11375 \section{Alien Variables}
11377 Both local (stack allocated) and external (C global) Alien variables are
11381 * Local Alien Variables::
11382 * External Alien Variables::
11385 %%\node Local Alien Variables, External Alien Variables, Alien Variables, Alien Variables
11386 \subsection{Local Alien Variables}
11388 \begin{defmac}{alien:}{with-alien}{\mstar{(\var{name} \var{type}
11389 \mopt{\var{initial-value}})} \mstar{form}}
11391 This macro establishes local alien variables with the specified
11392 Alien types and names for dynamic extent of the body. The variable
11393 \var{names} are established as symbol-macros; the bindings have
11394 lexical scope, and may be assigned with \code{setq} or \code{setf}.
11395 This form is analogous to defining a local variable in C: additional
11396 storage is allocated, and the initial value is copied.
11398 \code{with-alien} also establishes a new scope for named structures
11399 and unions. Any \var{type} specified for a variable may contain
11400 name structure or union types with the slots specified. Within the
11401 lexical scope of the binding specifiers and body, a locally defined
11402 structure type \var{foo} can be referenced by its name using:
11408 %%\node External Alien Variables, , Local Alien Variables, Alien Variables
11409 \subsection{External Alien Variables}
11410 \label{external-aliens}
11412 External Alien names are strings, and Lisp names are symbols. When an
11413 external Alien is represented using a Lisp variable, there must be a
11414 way to convert from one name syntax into the other. The macros
11415 \code{extern-alien}, \code{def-alien-variable} and
11416 \funref{def-alien-routine} use this conversion heuristic:
11418 \item Alien names are converted to Lisp names by uppercasing and
11419 replacing underscores with hyphens.
11421 \item Conversely, Lisp names are converted to Alien names by
11422 lowercasing and replacing hyphens with underscores.
11424 \item Both the Lisp symbol and Alien string names may be separately
11425 specified by using a list of the form:
11427 (\var{alien-string} \var{lisp-symbol})
11431 \begin{defmac}{alien:}{def-alien-variable}{\var{name} \var{type}}
11433 This macro defines \var{name} as an external Alien variable of the
11434 specified Alien \var{type}. \var{name} and \var{type} are not
11435 evaluated. The Lisp name of the variable (see above) becomes a
11436 global Alien variable in the Lisp namespace. Global Alien variables
11437 are effectively ``global symbol macros''; a reference to the
11438 variable fetches the contents of the external variable. Similarly,
11439 setting the variable stores new contents---the new contents must be
11440 of the declared \var{type}.
11442 For example, it is often necessary to read the global C variable
11443 \code{errno} to determine why a particular function call failed. It
11444 is possible to define errno and make it accessible from Lisp by the
11447 (def-alien-variable "errno" int)
11449 ;; Now it is possible to get the value of the C variable errno simply by
11450 ;; referencing that Lisp variable:
11456 \begin{defmac}{alien:}{extern-alien}{\var{name} \var{type}}
11458 This macro returns an Alien with the specified \var{type} which
11459 points to an externally defined value. \var{name} is not evaluated,
11460 and may be specified either as a string or a symbol. \var{type} is
11461 an unevaluated Alien type specifier.
11465 %%\node Alien Data Structure Example, Loading Unix Object Files, Alien Variables, Alien Objects
11466 \section{Alien Data Structure Example}
11468 Now that we have Alien types, operations and variables, we can manipulate
11469 foreign data structures. This C declaration can be translated into the
11470 following Alien type:
11474 struct foo *b[100];
11479 (def-alien-type nil
11482 (b (array (* (struct foo)) 100))))
11485 With this definition, the following C expression can be translated in this way:
11492 (with-alien ((f (struct foo)))
11493 (slot (deref (slot f 'b) 7) 'a)
11495 ;; Do something with f...
11500 Or consider this example of an external C variable and some accesses:
11509 extern struct c_struct *my_struct;
11513 my_struct = my_struct->n;
11515 which can be made be manipulated in Lisp like this:
11517 (def-alien-type nil
11526 (def-alien-variable "my_struct" (* c-struct))
11528 (incf (slot my-struct 'x))
11529 (setf (slot my-struct 'a) 5)
11530 (setq my-struct (slot my-struct 'n))
11535 %%\node Loading Unix Object Files, Alien Function Calls, Alien Data Structure Example, Alien Objects
11536 \section{Loading Unix Object Files}
11538 Foreign object files are loaded into the running Lisp process by
11539 \code{load-foreign}. First, it runs the linker on the files and
11540 libraries, creating an absolute Unix object file. This object file is
11541 then loaded into into the currently running Lisp. The external
11542 symbols defining routines and variables are made available for future
11543 external references (e.g. by \code{extern-alien}.)
11544 \code{load-foreign} must be run before any of the defined symbols are
11547 Note that if a Lisp core image is saved (using \funref{save-lisp}), all
11548 loaded foreign code is lost when the image is restarted.
11550 \begin{defun}{alien:}{load-foreign}{%
11551 \args{\var{files} \keys{\kwd{libraries} \kwd{base-file} \kwd{env}}}}
11553 \var{files} is a \code{simple-string} or list of
11554 \code{simple-string}s specifying the names of the object files.
11555 \var{libraries} is a list of \code{simple-string}s specifying
11556 libraries in a format that \code{ld}, the Unix linker, expects. The
11557 default value for \var{libraries} is \code{("-lc")} (i.e., the
11558 standard C library). \var{base-file} is the file to use for the
11559 initial symbol table information. The default is the Lisp start up
11560 code: \file{path:lisp}. \var{env} should be a list of simple
11561 strings in the format of Unix environment variables (i.e.,
11562 \code{\var{A}=\var{B}}, where \var{A} is an environment variable and
11563 \var{B} is its value). The default value for \var{env} is the
11564 environment information available at the time Lisp was invoked.
11565 Unless you are certain that you want to change this, you should just
11570 %%\node Alien Function Calls, Step-by-Step Alien Example, Loading Unix Object Files, Alien Objects
11571 \section{Alien Function Calls}
11573 The foreign function call interface allows a Lisp program to call functions
11574 written in other languages. The current implementation of the foreign
11575 function call interface assumes a C calling convention and thus routines
11576 written in any language that adheres to this convention may be called from
11579 Lisp sets up various interrupt handling routines and other environment
11580 information when it first starts up, and expects these to be in place at all
11581 times. The C functions called by Lisp should either not change the
11582 environment, especially the interrupt entry points, or should make sure
11583 that these entry points are restored when the C function returns to Lisp.
11584 If a C function makes changes without restoring things to the way they were
11585 when the C function was entered, there is no telling what will happen.
11588 * alien-funcall:: The alien-funcall Primitive
11589 * def-alien-routine:: The def-alien-routine Macro
11590 * def-alien-routine Example::
11591 * Calling Lisp from C::
11594 %%\node alien-funcall, def-alien-routine, Alien Function Calls, Alien Function Calls
11595 \subsection{The alien-funcall Primitive}
11597 \begin{defun}{alien:}{alien-funcall}{%
11598 \args{\var{alien-function} \amprest{} \var{arguments}}}
11600 This function is the foreign function call primitive:
11601 \var{alien-function} is called with the supplied \var{arguments} and
11602 its value is returned. The \var{alien-function} is an arbitrary
11603 run-time expression; to call a constant function, use
11604 \funref{extern-alien} or \code{def-alien-routine}.
11606 The type of \var{alien-function} must be \code{(alien (function
11607 ...))} or \code{(alien (* (function ...)))},
11608 \xlref{alien-function-types}. The function type is used to
11609 determine how to call the function (as though it was declared with
11610 a prototype.) The type need not be known at compile time, but only
11611 known-type calls are efficiently compiled. Limitations:
11613 \item Structure type return values are not implemented.
11614 \item Passing of structures by value is not implemented.
11618 Here is an example which allocates a \code{(struct foo)}, calls a foreign
11619 function to initialize it, then returns a Lisp vector of all the
11620 \code{(* (struct foo))} objects filled in by the foreign call:
11623 ;; Allocate a foo on the stack.
11624 (with-alien ((f (struct foo)))
11626 ;; Call some C function to fill in foo fields.
11627 (alien-funcall (extern-alien "mangle_foo" (function void (* foo)))
11630 ;; Find how many foos to use by getting the A field.
11631 (let* ((num (slot f 'a))
11632 (result (make-array num)))
11634 ;; Get a pointer to the array so that we don't have to keep extracting it:
11635 (with-alien ((a (* (array (* (struct foo)) 100)) (addr (slot f 'b))))
11637 ;; Loop over the first N elements and stash them in the result vector.
11639 (setf (svref result i) (deref (deref a) i)))
11643 %%\node def-alien-routine, def-alien-routine Example, alien-funcall, Alien Function Calls
11644 \subsection{The def-alien-routine Macro}
11647 \begin{defmac}{alien:}{def-alien-routine}{\var{name} \var{result-type}
11648 \mstar{(\var{aname} \var{atype} \mopt{style})}}
11650 This macro is a convenience for automatically generating Lisp
11651 interfaces to simple foreign functions. The primary feature is the
11652 parameter style specification, which translates the C
11653 pass-by-reference idiom into additional return values.
11655 \var{name} is usually a string external symbol, but may also be a
11656 symbol Lisp name or a list of the foreign name and the Lisp name.
11657 If only one name is specified, the other is automatically derived,
11658 (\pxlref{external-aliens}.)
11660 \var{result-type} is the Alien type of the return value. Each
11661 remaining subform specifies an argument to the foreign function.
11662 \var{aname} is the symbol name of the argument to the constructed
11663 function (for documentation) and \var{atype} is the Alien type of
11664 corresponding foreign argument. The semantics of the actual call
11665 are the same as for \funref{alien-funcall}. \var{style} should be
11666 one of the following:
11668 \item[\kwd{in}] specifies that the argument is passed by value.
11669 This is the default. \kwd{in} arguments have no corresponding
11670 return value from the Lisp function.
11672 \item[\kwd{out}] specifies a pass-by-reference output value. The
11673 type of the argument must be a pointer to a fixed sized object
11674 (such as an integer or pointer). \kwd{out} and \kwd{in-out}
11675 cannot be used with pointers to arrays, records or functions. An
11676 object of the correct size is allocated, and its address is passed
11677 to the foreign function. When the function returns, the contents
11678 of this location are returned as one of the values of the Lisp
11681 \item[\kwd{copy}] is similar to \kwd{in}, but the argument is copied
11682 to a pre-allocated object and a pointer to this object is passed
11683 to the foreign routine.
11685 \item[\kwd{in-out}] is a combination of \kwd{copy} and \kwd{out}.
11686 The argument is copied to a pre-allocated object and a pointer to
11687 this object is passed to the foreign routine. On return, the
11688 contents of this location is returned as an additional value.
11690 Any efficiency-critical foreign interface function should be inline
11691 expanded by preceding \code{def-alien-routine} with:
11693 (declaim (inline \var{lisp-name}))
11695 In addition to avoiding the Lisp call overhead, this allows
11696 pointers, word-integers and floats to be passed using non-descriptor
11697 representations, avoiding consing (\pxlref{non-descriptor}.)
11700 %%\node def-alien-routine Example, Calling Lisp from C, def-alien-routine, Alien Function Calls
11701 \subsection{def-alien-routine Example}
11703 Consider the C function \code{cfoo} with the following calling convention:
11707 char *a; /* update */
11710 /* Body of cfoo. */
11713 which can be described by the following call to \code{def-alien-routine}:
11715 (def-alien-routine "cfoo" void
11720 The Lisp function \code{cfoo} will have two arguments (\var{str} and \var{a})
11721 and two return values (\var{a} and \var{i}).
11723 %%\node Calling Lisp from C, , def-alien-routine Example, Alien Function Calls
11724 \subsection{Calling Lisp from C}
11726 Calling Lisp functions from C is sometimes possible, but is rather hackish.
11727 See \code{funcall0} ... \code{funcall3} in the \file{lisp/arch.h}. The
11728 arguments must be valid CMU CL object descriptors (e.g. fixnums must be
11729 left-shifted by 2.) See \file{compiler/generic/objdef.lisp} or the derived
11730 file \file{lisp/internals.h} for details of the object representation.
11731 \file{lisp/internals.h} is mechanically generated, and is not part of the
11732 source distribution. It is distributed in the \file{docs/} directory of the
11733 binary distribution.
11735 Note that the garbage collector moves objects, and won't be able to fix up any
11736 references in C variables, so either turn GC off or don't keep Lisp pointers
11737 in C data unless they are to statically allocated objects. You can use
11738 \funref{purify} to place live data structures in static space so that they
11739 won't move during GC.
11742 \subsection{Accessing Lisp Arrays}
11744 Due to the way \cmucl{} manages memory, the amount of memory that can
11745 be dynamically allocated by \code{malloc} or \funref{make-alien} is
11746 limited\footnote{\cmucl{} mmaps a large piece of memory for it's own
11747 use and this memory is typically about 8 MB above the start of the C
11748 heap. Thus, only about 8 MB of memory can be dynamically
11751 To overcome this limitation, it is possible to access the content of
11752 Lisp arrays which are limited only by the amount of physical memory
11753 and swap space available. However, this technique is only useful if
11754 the foreign function takes pointers to memory instead of allocating
11755 memory for itself. In latter case, you will have to modify the
11758 This technique takes advantage of the fact that \cmucl{} has
11759 specialized array types (\pxlref{specialized-array-types}) that match
11760 a typical C array. For example, a \code{(simple-array double-float
11761 (100))} is stored in memory in essentially the same way as the C
11762 array \code{double x[100]} would be. The following function allows us
11763 to get the physical address of such a Lisp array:
11765 (defun array-data-address (array)
11766 "Return the physical address of where the actual data of an array is
11769 ARRAY must be a specialized array type in CMU Lisp. This means ARRAY
11770 must be an array of one of the following types:
11781 (declare (type (or #+signed-array (array (signed-byte 8))
11782 #+signed-array (array (signed-byte 16))
11783 #+signed-array (array (signed-byte 32))
11784 (array (unsigned-byte 8))
11785 (array (unsigned-byte 16))
11786 (array (unsigned-byte 32))
11787 (array single-float)
11788 (array double-float))
11790 (optimize (speed 3) (safety 0))
11791 (ext:optimize-interface (safety 3)))
11792 ;; with-array-data will get us to the actual data. However, because
11793 ;; the array could have been displaced, we need to know where the
11795 (lisp::with-array-data ((data array)
11798 (declare (ignore end))
11799 ;; DATA is a specialized simple-array. Memory is laid out like this:
11801 ;; byte offset Value
11802 ;; 0 type code (should be 70 for double-float vector)
11803 ;; 4 4 * number of elements in vector
11804 ;; 8 1st element of vector
11807 (let ((addr (+ 8 (logandc1 7 (kernel:get-lisp-obj-address data))))
11808 (type-size (let ((type (array-element-type data)))
11809 (cond ((or (equal type '(signed-byte 8))
11810 (equal type '(unsigned-byte 8)))
11812 ((or (equal type '(signed-byte 16))
11813 (equal type '(unsigned-byte 16)))
11815 ((or (equal type '(signed-byte 32))
11816 (equal type '(unsigned-byte 32)))
11818 ((equal type 'single-float)
11820 ((equal type 'double-float)
11823 (error "Unknown specialized array element type"))))))
11824 (declare (type (unsigned-byte 32) addr)
11825 (optimize (speed 3) (safety 0) (ext:inhibit-warnings 3)))
11826 (system:int-sap (the (unsigned-byte 32)
11827 (+ addr (* type-size start)))))))
11830 Assume we have the C function below that we wish to use:
11832 double dotprod(double* x, double* y, int n)
11837 for (k = 0; k < n; ++k) \{
11838 sum += x[k] * y[k];
11842 The following example generates two large arrays in Lisp, and calls the C
11843 function to do the desired computation. This would not have been
11844 possible using \code{malloc} or \code{make-alien} since we need about
11845 16 MB of memory to hold the two arrays.
11847 (def-alien-routine "dotprod" double
11848 (x (* double-float) :in)
11849 (y (* double-float) :in)
11852 (let ((x (make-array 1000000 :element-type 'double-float))
11853 (y (make-array 1000000 :element-type 'double-float)))
11854 ;; Initialize X and Y somehow
11855 (let ((x-addr (system:int-sap (array-data-address x)))
11856 (y-addr (system:int-sap (array-data-address y))))
11857 (dotprod x-addr y-addr 1000000)))
11859 In this example, it may be useful to wrap the inner \code{let}
11860 expression in an \code{unwind-protect} that first turns off garbage
11861 collection and then turns garbage collection on afterwards. This will
11862 prevent garbage collection from moving \code{x} and \code{y} after we
11863 have obtained the (now erroneous) addresses but before the call to
11864 \code{dotprod} is made.
11868 %%\node Step-by-Step Alien Example, , Alien Function Calls, Alien Objects
11869 \section{Step-by-Step Alien Example}
11871 This section presents a complete example of an interface to a somewhat
11872 complicated C function. This example should give a fairly good idea
11873 of how to get the effect you want for almost any kind of C function.
11874 Suppose you have the following C function which you want to be able to
11875 call from Lisp in the file \file{test.c}:
11883 struct c_struct *c_function (i, s, r, a)
11886 struct c_struct *r;
11890 struct c_struct *r2;
11892 printf("i = %d\n", i);
11893 printf("s = %s\n", s);
11894 printf("r->x = %d\n", r->x);
11895 printf("r->s = %s\n", r->s);
11896 for (j = 0; j < 10; j++) printf("a[%d] = %d.\n", j, a[j]);
11897 r2 = (struct c_struct *) malloc (sizeof(struct c_struct));
11899 r2->s = "A C string";
11903 It is possible to call this function from Lisp using the file \file{test.lisp}
11906 ;;; -*- Package: test-c-call -*-
11907 (in-package "TEST-C-CALL")
11908 (use-package "ALIEN")
11909 (use-package "C-CALL")
11911 ;;; Define the record c-struct in Lisp.
11912 (def-alien-type nil
11917 ;;; Define the Lisp function interface to the C routine. It returns a
11918 ;;; pointer to a record of type c-struct. It accepts four parameters:
11919 ;;; i, an int; s, a pointer to a string; r, a pointer to a c-struct
11920 ;;; record; and a, a pointer to the array of 10 ints.
11922 ;;; The INLINE declaration eliminates some efficiency notes about heap
11923 ;;; allocation of Alien values.
11924 (declaim (inline c-function))
11925 (def-alien-routine c-function
11926 (* (struct c-struct))
11929 (r (* (struct c-struct)))
11930 (a (array int 10)))
11932 ;;; A function which sets up the parameters to the C function and
11933 ;;; actually calls it.
11934 (defun call-cfun ()
11935 (with-alien ((ar (array int 10))
11936 (c-struct (struct c-struct)))
11937 (dotimes (i 10) ; Fill array.
11938 (setf (deref ar i) i))
11939 (setf (slot c-struct 'x) 20)
11940 (setf (slot c-struct 's) "A Lisp String")
11942 (with-alien ((res (* (struct c-struct))
11943 (c-function 5 "Another Lisp String" (addr c-struct) ar)))
11944 (format t "Returned from C function.~%")
11945 (multiple-value-prog1
11946 (values (slot res 'x)
11949 ;; Deallocate result \i{after} we are done using it.
11950 (free-alien res)))))
11952 To execute the above example, it is necessary to compile the C routine as
11957 In order to enable incremental loading with some linkers, you may need to say:
11961 Once the C code has been compiled, you can start up Lisp and load it in:
11964 ;;; Lisp should start up with its normal prompt.
11966 ;;; Compile the Lisp file. This step can be done separately. You don't have
11967 ;;; to recompile every time.
11968 * (compile-file "test.lisp")
11970 ;;; Load the foreign object file to define the necessary symbols. This must
11971 ;;; be done before loading any code that refers to these symbols. next block
11972 ;;; of comments are actually the output of LOAD-FOREIGN. Different linkers
11973 ;;; will give different warnings, but some warning about redefining the code
11974 ;;; size is typical.
11975 * (load-foreign "test.o")
11977 ;;; Running library:load-foreign.csh...
11978 ;;; Loading object file...
11979 ;;; Parsing symbol table...
11980 Warning: "_gp" moved from #x00C082C0 to #x00C08460.
11982 Warning: "end" moved from #x00C00340 to #x00C004E0.
11984 ;;; o.k. now load the compiled Lisp object file.
11987 ;;; Now we can call the routine that sets up the parameters and calls the C
11989 * (test-c-call::call-cfun)
11991 ;;; The C routine prints the following information to standard output.
11993 s = Another Lisp string
11995 r->s = A Lisp string
12006 ;;; Lisp prints out the following information.
12007 Returned from C function.
12008 ;;; Return values from the call to test-c-call::call-cfun.
12014 If any of the foreign functions do output, they should not be called from
12015 within Hemlock. Depending on the situation, various strange behavior occurs.
12016 Under X, the output goes to the window in which Lisp was started; on a
12017 terminal, the output will overwrite the Hemlock screen image; in a Hemlock
12018 slave, standard output is \file{/dev/null} by default, so any output is
12021 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/ipc.ms}
12023 %%\node Interprocess Communication under LISP, Debugger Programmer's Interface, Alien Objects, Top
12024 \chapter{Interprocess Communication under LISP}
12026 \b{Written by William Lott and Bill Chiles}
12030 CMU Common Lisp offers a facility for interprocess communication (IPC)
12031 on top of using Unix system calls and the complications of that level
12032 of IPC. There is a simple remote-procedure-call (RPC) package build
12033 on top of TCP/IP sockets.
12037 * The REMOTE Package::
12038 * The WIRE Package::
12039 * Out-Of-Band Data::
12042 %%\node The REMOTE Package, The WIRE Package, Interprocess Communication under LISP, Interprocess Communication under LISP
12043 \section{The REMOTE Package}
12044 The \code{remote} package provides simple RPC facility including
12045 interfaces for creating servers, connecting to already existing
12046 servers, and calling functions in other Lisp processes. The routines
12047 for establishing a connection between two processes,
12048 \code{create-request-server} and \code{connect-to-remote-server},
12049 return \var{wire} structures. A wire maintains the current state of
12050 a connection, and all the RPC forms require a wire to indicate where
12055 * Connecting Servers and Clients::
12056 * Remote Evaluations::
12061 %%\node Connecting Servers and Clients, Remote Evaluations, The REMOTE Package, The REMOTE Package
12062 \subsection{Connecting Servers and Clients}
12064 Before a client can connect to a server, it must know the network address on
12065 which the server accepts connections. Network addresses consist of a host
12066 address or name, and a port number. Host addresses are either a string of the
12067 form \code{VANCOUVER.SLISP.CS.CMU.EDU} or a 32 bit unsigned integer. Port
12068 numbers are 16 bit unsigned integers. Note: \var{port} in this context has
12069 nothing to do with Mach ports and message passing.
12071 When a process wants to receive connection requests (that is, become a
12072 server), it first picks an integer to use as the port. Only one server
12073 (Lisp or otherwise) can use a given port number on a given machine at
12074 any particular time. This can be an iterative process to find a free
12075 port: picking an integer and calling \code{create-request-server}. This
12076 function signals an error if the chosen port is unusable. You will
12077 probably want to write a loop using \code{handler-case}, catching
12078 conditions of type error, since this function does not signal more
12079 specific conditions.
12081 \begin{defun}{wire:}{create-request-server}{%
12082 \args{\var{port} \ampoptional{} \var{on-connect}}}
12084 \code{create-request-server} sets up the current Lisp to accept
12085 connections on the given port. If port is unavailable for any
12086 reason, this signals an error. When a client connects to this port,
12087 the acceptance mechanism makes a wire structure and invokes the
12088 \var{on-connect} function. Invoking this function has a couple
12089 purposes, and \var{on-connect} may be \nil{} in which case the
12090 system foregoes invoking any function at connect time.
12092 The \var{on-connect} function is both a hook that allows you access
12093 to the wire created by the acceptance mechanism, and it confirms the
12094 connection. This function takes two arguments, the wire and the
12095 host address of the connecting process. See the section on host
12096 addresses below. When \var{on-connect} is \nil, the request server
12097 allows all connections. When it is non-\nil, the function returns
12098 two values, whether to accept the connection and a function the
12099 system should call when the connection terminates. Either value may
12100 be \nil, but when the first value is \nil, the acceptance mechanism
12103 \code{create-request-server} returns an object that
12104 \code{destroy-request-server} uses to terminate a connection.
12107 \begin{defun}{wire:}{destroy-request-server}{\args{\var{server}}}
12109 \code{destroy-request-server} takes the result of
12110 \code{create-request-server} and terminates that server. Any
12111 existing connections remain intact, but all additional connection
12112 attempts will fail.
12115 \begin{defun}{wire:}{connect-to-remote-server}{%
12116 \args{\var{host} \var{port} \ampoptional{} \var{on-death}}}
12118 \code{connect-to-remote-server} attempts to connect to a remote
12119 server at the given \var{port} on \var{host} and returns a wire
12120 structure if it is successful. If \var{on-death} is non-\nil, it is
12121 a function the system invokes when this connection terminates.
12125 %%\node Remote Evaluations, Remote Objects, Connecting Servers and Clients, The REMOTE Package
12126 \subsection{Remote Evaluations}
12127 After the server and client have connected, they each have a wire
12128 allowing function evaluation in the other process. This RPC mechanism
12129 has three flavors: for side-effect only, for a single value, and for
12132 Only a limited number of data types can be sent across wires as
12133 arguments for remote function calls and as return values: integers
12134 inclusively less than 32 bits in length, symbols, lists, and
12135 \var{remote-objects} (\pxlref{remote-objs}). The system sends symbols
12136 as two strings, the package name and the symbol name, and if the
12137 package doesn't exist remotely, the remote process signals an error.
12138 The system ignores other slots of symbols. Lists may be any tree of
12139 the above valid data types. To send other data types you must
12140 represent them in terms of these supported types. For example, you
12141 could use \code{prin1-to-string} locally, send the string, and use
12142 \code{read-from-string} remotely.
12144 \begin{defmac}{wire:}{remote}{%
12145 \args{\var{wire} \mstar{call-specs}}}
12147 The \code{remote} macro arranges for the process at the other end of
12148 \var{wire} to invoke each of the functions in the \var{call-specs}.
12149 To make sure the system sends the remote evaluation requests over
12150 the wire, you must call \code{wire-force-output}.
12152 Each of \var{call-specs} looks like a function call textually, but
12153 it has some odd constraints and semantics. The function position of
12154 the form must be the symbolic name of a function. \code{remote}
12155 evaluates each of the argument subforms for each of the
12156 \var{call-specs} locally in the current context, sending these
12157 values as the arguments for the functions.
12159 Consider the following example:
12161 (defun write-remote-string (str)
12162 (declare (simple-string str))
12164 (write-string str)))
12166 The value of \code{str} in the local process is passed over the wire
12167 with a request to invoke \code{write-string} on the value. The
12168 system does not expect to remotely evaluate \code{str} for a value
12169 in the remote process.
12172 \begin{defun}{wire:}{wire-force-output}{\args{\var{wire}}}
12174 \code{wire-force-output} flushes all internal buffers associated
12175 with \var{wire}, sending the remote requests. This is necessary
12176 after a call to \code{remote}.
12179 \begin{defmac}{wire:}{remote-value}{\args{\var{wire} \var{call-spec}}}
12181 The \code{remote-value} macro is similar to the \code{remote} macro.
12182 \code{remote-value} only takes one \var{call-spec}, and it returns
12183 the value returned by the function call in the remote process. The
12184 value must be a valid type the system can send over a wire, and
12185 there is no need to call \code{wire-force-output} in conjunction
12186 with this interface.
12188 If client unwinds past the call to \code{remote-value}, the server
12189 continues running, but the system ignores the value the server sends
12192 If the server unwinds past the remotely requested call, instead of
12193 returning normally, \code{remote-value} returns two values, \nil{}
12194 and \true. Otherwise this returns the result of the remote
12195 evaluation and \nil.
12198 \begin{defmac}{wire:}{remote-value-bind}{%
12199 \args{\var{wire} (\mstar{variable}) remote-form
12200 \mstar{local-forms}}}
12202 \code{remote-value-bind} is similar to \code{multiple-value-bind}
12203 except the values bound come from \var{remote-form}'s evaluation in
12204 the remote process. The \var{local-forms} execute in an implicit
12207 If the client unwinds past the call to \code{remote-value-bind}, the
12208 server continues running, but the system ignores the values the
12211 If the server unwinds past the remotely requested call, instead of
12212 returning normally, the \var{local-forms} never execute, and
12213 \code{remote-value-bind} returns \nil.
12217 %%\node Remote Objects, Host Addresses, Remote Evaluations, The REMOTE Package
12218 \subsection{Remote Objects}
12219 \label{remote-objs}
12221 The wire mechanism only directly supports a limited number of data
12222 types for transmission as arguments for remote function calls and as
12223 return values: integers inclusively less than 32 bits in length,
12224 symbols, lists. Sometimes it is useful to allow remote processes to
12225 refer to local data structures without allowing the remote process
12226 to operate on the data. We have \var{remote-objects} to support
12227 this without the need to represent the data structure in terms of
12228 the above data types, to send the representation to the remote
12229 process, to decode the representation, to later encode it again, and
12230 to send it back along the wire.
12232 You can convert any Lisp object into a remote-object. When you send
12233 a remote-object along a wire, the system simply sends a unique token
12234 for it. In the remote process, the system looks up the token and
12235 returns a remote-object for the token. When the remote process
12236 needs to refer to the original Lisp object as an argument to a
12237 remote call back or as a return value, it uses the remote-object it
12238 has which the system converts to the unique token, sending that
12239 along the wire to the originating process. Upon receipt in the
12240 first process, the system converts the token back to the same
12241 (\code{eq}) remote-object.
12243 \begin{defun}{wire:}{make-remote-object}{\args{\var{object}}}
12245 \code{make-remote-object} returns a remote-object that has
12246 \var{object} as its value. The remote-object can be passed across
12247 wires just like the directly supported wire data types.
12250 \begin{defun}{wire:}{remote-object-p}{\args{\var{object}}}
12252 The function \code{remote-object-p} returns \true{} if \var{object}
12253 is a remote object and \nil{} otherwise.
12256 \begin{defun}{wire:}{remote-object-local-p}{\args{\var{remote}}}
12258 The function \code{remote-object-local-p} returns \true{} if
12259 \var{remote} refers to an object in the local process. This is can
12260 only occur if the local process created \var{remote} with
12261 \code{make-remote-object}.
12264 \begin{defun}{wire:}{remote-object-eq}{\args{\var{obj1} \var{obj2}}}
12266 The function \code{remote-object-eq} returns \true{} if \var{obj1} and
12267 \var{obj2} refer to the same (\code{eq}) lisp object, regardless of
12268 which process created the remote-objects.
12271 \begin{defun}{wire:}{remote-object-value}{\args{\var{remote}}}
12273 This function returns the original object used to create the given
12274 remote object. It is an error if some other process originally
12275 created the remote-object.
12278 \begin{defun}{wire:}{forget-remote-translation}{\args{\var{object}}}
12280 This function removes the information and storage necessary to
12281 translate remote-objects back into \var{object}, so the next
12282 \code{gc} can reclaim the memory. You should use this when you no
12283 longer expect to receive references to \var{object}. If some remote
12284 process does send a reference to \var{object},
12285 \code{remote-object-value} signals an error.
12289 %%\node Host Addresses, , Remote Objects, The REMOTE Package
12290 \subsection{Host Addresses}
12291 The operating system maintains a database of all the valid host
12292 addresses. You can use this database to convert between host names
12293 and addresses and vice-versa.
12295 \begin{defun}{ext:}{lookup-host-entry}{\args{\var{host}}}
12297 \code{lookup-host-entry} searches the database for the given
12298 \var{host} and returns a host-entry structure for it. If it fails
12299 to find \var{host} in the database, it returns \nil. \var{Host} is
12300 either the address (as an integer) or the name (as a string) of the
12304 \begin{defun}{ext:}{host-entry-name}{\args{\var{host-entry}}}
12305 \defunx[ext:]{host-entry-aliases}{\args{\var{host-entry}}}
12306 \defunx[ext:]{host-entry-addr-list}{\args{\var{host-entry}}}
12307 \defunx[ext:]{host-entry-addr}{\args{\var{host-entry}}}
12309 \code{host-entry-name}, \code{host-entry-aliases}, and
12310 \code{host-entry-addr-list} each return the indicated slot from the
12311 host-entry structure. \code{host-entry-addr} returns the primary
12312 (first) address from the list returned by
12313 \code{host-entry-addr-list}.
12317 %%\node The WIRE Package, Out-Of-Band Data, The REMOTE Package, Interprocess Communication under LISP
12318 \section{The WIRE Package}
12320 The \code{wire} package provides for sending data along wires. The
12321 \code{remote} package sits on top of this package. All data sent
12322 with a given output routine must be read in the remote process with
12323 the complementary fetching routine. For example, if you send so a
12324 string with \code{wire-output-string}, the remote process must know
12325 to use \code{wire-get-string}. To avoid rigid data transfers and
12326 complicated code, the interface supports sending
12327 \var{tagged} data. With tagged data, the system sends a tag
12328 announcing the type of the next data, and the remote system takes
12329 care of fetching the appropriate type.
12331 When using interfaces at the wire level instead of the RPC level,
12332 the remote process must read everything sent by these routines. If
12333 the remote process leaves any input on the wire, it will later
12334 mistake the data for an RPC request causing unknown lossage.
12339 * Making Your Own Wires::
12342 %%\node Untagged Data, Tagged Data, The WIRE Package, The WIRE Package
12343 \subsection{Untagged Data}
12344 When using these routines both ends of the wire know exactly what types are
12345 coming and going and in what order. This data is restricted to the following
12350 8 bit unsigned bytes.
12353 32 bit unsigned bytes.
12359 simple-strings less than 65535 in length.
12363 \begin{defun}{wire:}{wire-output-byte}{\args{\var{wire} \var{byte}}}
12364 \defunx[wire:]{wire-get-byte}{\args{\var{wire}}}
12365 \defunx[wire:]{wire-output-number}{\args{\var{wire} \var{number}}}
12366 \defunx[wire:]{wire-get-number}{\args{\var{wire} \ampoptional{}
12368 \defunx[wire:]{wire-output-string}{\args{\var{wire} \var{string}}}
12369 \defunx[wire:]{wire-get-string}{\args{\var{wire}}}
12371 These functions either output or input an object of the specified
12372 data type. When you use any of these output routines to send data
12373 across the wire, you must use the corresponding input routine
12374 interpret the data.
12378 %%\node Tagged Data, Making Your Own Wires, Untagged Data, The WIRE Package
12379 \subsection{Tagged Data}
12380 When using these routines, the system automatically transmits and interprets
12381 the tags for you, so both ends can figure out what kind of data transfers
12382 occur. Sending tagged data allows a greater variety of data types: integers
12383 inclusively less than 32 bits in length, symbols, lists, and \var{remote-objects}
12384 (\pxlref{remote-objs}). The system sends symbols as two strings, the
12385 package name and the symbol name, and if the package doesn't exist remotely,
12386 the remote process signals an error. The system ignores other slots of
12387 symbols. Lists may be any tree of the above valid data types. To send other
12388 data types you must represent them in terms of these supported types. For
12389 example, you could use \code{prin1-to-string} locally, send the string, and use
12390 \code{read-from-string} remotely.
12392 \begin{defun}{wire:}{wire-output-object}{%
12393 \args{\var{wire} \var{object} \ampoptional{} \var{cache-it}}}
12394 \defunx[wire:]{wire-get-object}{\args{\var{wire}}}
12396 The function \code{wire-output-object} sends \var{object} over
12397 \var{wire} preceded by a tag indicating its type.
12399 If \var{cache-it} is non-\nil, this function only sends \var{object}
12400 the first time it gets \var{object}. Each end of the wire
12401 associates a token with \var{object}, similar to remote-objects,
12402 allowing you to send the object more efficiently on successive
12403 transmissions. \var{cache-it} defaults to \true{} for symbols and
12404 \nil{} for other types. Since the RPC level requires function
12405 names, a high-level protocol based on a set of function calls saves
12406 time in sending the functions' names repeatedly.
12408 The function \code{wire-get-object} reads the results of
12409 \code{wire-output-object} and returns that object.
12413 %%\node Making Your Own Wires, , Tagged Data, The WIRE Package
12414 \subsection{Making Your Own Wires}
12415 You can create wires manually in addition to the \code{remote} package's
12416 interface creating them for you. To create a wire, you need a Unix \i{file
12417 descriptor}. If you are unfamiliar with Unix file descriptors, see section 2 of
12418 the Unix manual pages.
12420 \begin{defun}{wire:}{make-wire}{\args{\var{descriptor}}}
12422 The function \code{make-wire} creates a new wire when supplied with
12423 the file descriptor to use for the underlying I/O operations.
12426 \begin{defun}{wire:}{wire-p}{\args{\var{object}}}
12428 This function returns \true{} if \var{object} is indeed a wire,
12432 \begin{defun}{wire:}{wire-fd}{\args{\var{wire}}}
12434 This function returns the file descriptor used by the \var{wire}.
12438 %%\node Out-Of-Band Data, , The WIRE Package, Interprocess Communication under LISP
12439 \section{Out-Of-Band Data}
12441 The TCP/IP protocol allows users to send data asynchronously, otherwise
12442 known as \var{out-of-band} data. When using this feature, the operating
12443 system interrupts the receiving process if this process has chosen to be
12444 notified about out-of-band data. The receiver can grab this input
12445 without affecting any information currently queued on the socket.
12446 Therefore, you can use this without interfering with any current
12447 activity due to other wire and remote interfaces.
12449 Unfortunately, most implementations of TCP/IP are broken, so use of
12450 out-of-band data is limited for safety reasons. You can only reliably
12451 send one character at a time.
12453 This routines in this section provide a mechanism for establishing
12454 handlers for out-of-band characters and for sending them out-of-band.
12455 These all take a Unix file descriptor instead of a wire, but you can
12456 fetch a wire's file descriptor with \code{wire-fd}.
12458 \begin{defun}{wire:}{add-oob-handler}{\args{\var{fd} \var{char} \var{handler}}}
12460 The function \code{add-oob-handler} arranges for \var{handler} to be
12461 called whenever \var{char} shows up as out-of-band data on the file
12462 descriptor \var{fd}.
12465 \begin{defun}{wire:}{remove-oob-handler}{\args{\var{fd} \var{char}}}
12467 This function removes the handler for the character \var{char} on
12468 the file descriptor \var{fd}.
12471 \begin{defun}{wire:}{remove-all-oob-handlers}{\args{\var{fd}}}
12473 This function removes all handlers for the file descriptor \var{fd}.
12476 \begin{defun}{wire:}{send-character-out-of-band}{\args{\var{fd} \var{char}}}
12478 This function Sends the character \var{char} down the file
12479 descriptor \var{fd} out-of-band.
12483 \hide{File:debug-int.tex}
12484 %%\node Debugger Programmer's Interface, Function Index, Interprocess Communication under LISP, Top
12485 \chapter{Debugger Programmer's Interface}
12486 \label{debug-internals}
12488 The debugger programmers interface is exported from from the
12489 \code{"DEBUG-INTERNALS"} or \code{"DI"} package. This is a CMU
12490 extension that allows debugging tools to be written without detailed
12491 knowledge of the compiler or run-time system.
12493 Some of the interface routines take a code-location as an argument. As
12494 described in the section on code-locations, some code-locations are
12495 unknown. When a function calls for a \var{basic-code-location}, it
12496 takes either type, but when it specifically names the argument
12497 \var{code-location}, the routine will signal an error if you give it an
12498 unknown code-location.
12501 * DI Exceptional Conditions::
12502 * Debug-variables::
12504 * Debug-functions::
12509 * Source Translation Utilities::
12513 %%\node DI Exceptional Conditions, Debug-variables, Debugger Programmer's Interface, Debugger Programmer's Interface
12514 \section{DI Exceptional Conditions}
12516 Some of these operations fail depending on the availability debugging
12517 information. In the most severe case, when someone saved a Lisp image
12518 stripping all debugging data structures, no operations are valid. In
12519 this case, even backtracing and finding frames is impossible. Some
12520 interfaces can simply return values indicating the lack of information,
12521 or their return values are naturally meaningful in light missing data.
12522 Other routines, as documented below, will signal
12523 \code{serious-condition}s when they discover awkward situations. This
12524 interface does not provide for programs to detect these situations other
12525 than by calling a routine that detects them and signals a condition.
12526 These are serious-conditions because the program using the interface
12527 must handle them before it can correctly continue execution. These
12528 debugging conditions are not errors since it is no fault of the
12529 programmers that the conditions occur.
12532 * Debug-conditions::
12536 %%\node Debug-conditions, Debug-errors, DI Exceptional Conditions, DI Exceptional Conditions
12537 \subsection{Debug-conditions}
12539 The debug internals interface signals conditions when it can't adhere
12540 to its contract. These are serious-conditions because the program
12541 using the interface must handle them before it can correctly continue
12542 execution. These debugging conditions are not errors since it is no
12543 fault of the programmers that the conditions occur. The interface
12544 does not provide for programs to detect these situations other than
12545 calling a routine that detects them and signals a condition.
12548 \begin{deftp}{Condition}{debug-condition}{}
12550 This condition inherits from serious-condition, and all debug-conditions
12551 inherit from this. These must be handled, but they are not programmer errors.
12555 \begin{deftp}{Condition}{no-debug-info}{}
12557 This condition indicates there is absolutely no debugging information
12562 \begin{deftp}{Condition}{no-debug-function-returns}{}
12564 This condition indicates the system cannot return values from a frame since
12565 its debug-function lacks debug information details about returning values.
12569 \begin{deftp}{Condition}{no-debug-blocks}{}
12570 This condition indicates that a function was not compiled with debug-block
12571 information, but this information is necessary necessary for some requested
12575 \begin{deftp}{Condition}{no-debug-variables}{}
12576 Similar to \code{no-debug-blocks}, except that variable information was
12580 \begin{deftp}{Condition}{lambda-list-unavailable}{}
12581 Similar to \code{no-debug-blocks}, except that lambda list information was
12585 \begin{deftp}{Condition}{invalid-value}{}
12587 This condition indicates a debug-variable has \kwd{invalid} or \kwd{unknown}
12588 value in a particular frame.
12592 \begin{deftp}{Condition}{ambiguous-variable-name}{}
12594 This condition indicates a user supplied debug-variable name identifies more
12595 than one valid variable in a particular frame.
12599 %%\node Debug-errors, , Debug-conditions, DI Exceptional Conditions
12600 \subsection{Debug-errors}
12602 These are programmer errors resulting from misuse of the debugging tools'
12603 programmers' interface. You could have avoided an occurrence of one of these
12604 by using some routine to check the use of the routine generating the error.
12607 \begin{deftp}{Condition}{debug-error}{}
12608 This condition inherits from error, and all user programming errors inherit
12609 from this condition.
12613 \begin{deftp}{Condition}{unhandled-condition}{}
12614 This error results from a signalled \code{debug-condition} occurring
12615 without anyone handling it.
12619 \begin{deftp}{Condition}{unknown-code-location}{}
12620 This error indicates the invalid use of an unknown-code-location.
12624 \begin{deftp}{Condition}{unknown-debug-variable}{}
12626 This error indicates an attempt to use a debug-variable in conjunction with an
12627 inappropriate debug-function; for example, checking the variable's validity
12628 using a code-location in the wrong debug-function will signal this error.
12632 \begin{deftp}{Condition}{frame-function-mismatch}{}
12634 This error indicates you called a function returned by
12635 \code{preprocess-for-eval}
12636 on a frame other than the one for which the function had been prepared.
12641 %%\node Debug-variables, Frames, DI Exceptional Conditions, Debugger Programmer's Interface
12642 \section{Debug-variables}
12644 Debug-variables represent the constant information about where the system
12645 stores argument and local variable values. The system uniquely identifies with
12646 an integer every instance of a variable with a particular name and package. To
12647 access a value, you must supply the frame along with the debug-variable since
12648 these are particular to a function, not every instance of a variable on the
12651 \begin{defun}{}{debug-variable-name}{\args{\var{debug-variable}}}
12653 This function returns the name of the \var{debug-variable}. The
12654 name is the name of the symbol used as an identifier when writing
12659 \begin{defun}{}{debug-variable-package}{\args{\var{debug-variable}}}
12661 This function returns the package name of the \var{debug-variable}.
12662 This is the package name of the symbol used as an identifier when
12667 \begin{defun}{}{debug-variable-symbol}{\args{\var{debug-variable}}}
12669 This function returns the symbol from interning
12670 \code{debug-variable-name} in the package named by
12671 \code{debug-variable-package}.
12675 \begin{defun}{}{debug-variable-id}{\args{\var{debug-variable}}}
12677 This function returns the integer that makes \var{debug-variable}'s
12678 name and package name unique with respect to other
12679 \var{debug-variable}'s in the same function.
12683 \begin{defun}{}{debug-variable-validity}{%
12684 \args{\var{debug-variable} \var{basic-code-location}}}
12686 This function returns three values reflecting the validity of
12687 \var{debug-variable}'s value at \var{basic-code-location}:
12689 \item[\kwd{valid}] The value is known to be available.
12690 \item[\kwd{invalid}] The value is known to be unavailable.
12691 \item[\kwd{unknown}] The value's availability is unknown.
12696 \begin{defun}{}{debug-variable-value}{\args{\var{debug-variable}
12699 This function returns the value stored for \var{debug-variable} in
12700 \var{frame}. The value may be invalid. This is \code{SETF}'able.
12704 \begin{defun}{}{debug-variable-valid-value}{%
12705 \args{\var{debug-variable} \var{frame}}}
12707 This function returns the value stored for \var{debug-variable} in
12708 \var{frame}. If the value is not \kwd{valid}, then this signals an
12709 \code{invalid-value} error.
12714 %%\node Frames, Debug-functions, Debug-variables, Debugger Programmer's Interface
12717 Frames describe a particular call on the stack for a particular thread. This
12718 is the environment for name resolution, getting arguments and locals, and
12719 returning values. The stack conceptually grows up, so the top of the stack is
12720 the most recently called function.
12722 \code{top-frame}, \code{frame-down}, \code{frame-up}, and
12723 \code{frame-debug-function} can only fail when there is absolutely no
12724 debug information available. This can only happen when someone saved a
12725 Lisp image specifying that the system dump all debugging data.
12728 \begin{defun}{}{top-frame}{}
12730 This function never returns the frame for itself, always the frame
12731 before calling \code{top-frame}.
12735 \begin{defun}{}{frame-down}{\args{\var{frame}}}
12737 This returns the frame immediately below \var{frame} on the stack.
12738 When \var{frame} is the bottom of the stack, this returns \nil.
12742 \begin{defun}{}{frame-up}{\args{\var{frame}}}
12744 This returns the frame immediately above \var{frame} on the stack.
12745 When \var{frame} is the top of the stack, this returns \nil.
12749 \begin{defun}{}{frame-debug-function}{\args{\var{frame}}}
12751 This function returns the debug-function for the function whose call
12752 \var{frame} represents.
12756 \begin{defun}{}{frame-code-location}{\args{\var{frame}}}
12758 This function returns the code-location where \var{frame}'s
12759 debug-function will continue running when program execution returns
12760 to \var{frame}. If someone interrupted this frame, the result could
12761 be an unknown code-location.
12765 \begin{defun}{}{frame-catches}{\args{\var{frame}}}
12767 This function returns an a-list for all active catches in
12768 \var{frame} mapping catch tags to the code-locations at which the
12773 \begin{defun}{}{eval-in-frame}{\args{\var{frame} \var{form}}}
12775 This evaluates \var{form} in \var{frame}'s environment. This can
12776 signal several different debug-conditions since its success relies
12777 on a variety of inexact debug information: \code{invalid-value},
12778 \code{ambiguous-variable-name}, \code{frame-function-mismatch}. See
12779 also \funref{preprocess-for-eval}.
12783 \begin{defun}{}{return-from-frame}{\args{\var{frame} \var{values}}}
12785 This returns the elements in the list \var{values} as multiple
12786 values from \var{frame} as if the function \var{frame} represents
12787 returned these values. This signals a
12788 \code{no-debug-function-returns} condition when \var{frame}'s
12789 debug-function lacks information on returning values.
12791 \i{Not Yet Implemented}
12796 %%\node Debug-functions, Debug-blocks, Frames, Debugger Programmer's Interface
12797 \section {Debug-functions}
12799 Debug-functions represent the static information about a function determined at
12800 compile time---argument and variable storage, their lifetime information,
12801 etc. The debug-function also contains all the debug-blocks representing
12802 basic-blocks of code, and these contains information about specific
12803 code-locations in a debug-function.
12805 \begin{defmac}{}{do-debug-function-blocks}{%
12806 \args{(\var{block-var} \var{debug-function} \mopt{result-form})
12809 This executes the forms in a context with \var{block-var} bound to
12810 each debug-block in \var{debug-function} successively.
12811 \var{Result-form} is an optional form to execute for a return value,
12812 and \code{do-debug-function-blocks} returns \nil if there is no
12813 \var{result-form}. This signals a \code{no-debug-blocks} condition
12814 when the \var{debug-function} lacks debug-block information.
12818 \begin{defun}{}{debug-function-lambda-list}{\args{\var{debug-function}}}
12820 This function returns a list representing the lambda-list for
12821 \var{debug-function}. The list has the following structure:
12823 (required-var1 required-var2
12825 (:optional var3 suppliedp-var4)
12828 (:rest var6) (:rest var7)
12830 (:keyword keyword-symbol var8 suppliedp-var9)
12831 (:keyword keyword-symbol var10)
12835 Each \code{var}\var{n} is a debug-variable; however, the symbol
12836 \kwd{deleted} appears instead whenever the argument remains
12837 unreferenced throughout \var{debug-function}.
12839 If there is no lambda-list information, this signals a
12840 \code{lambda-list-unavailable} condition.
12844 \begin{defmac}{}{do-debug-function-variables}{%
12845 \args{(\var{var} \var{debug-function} \mopt{result})
12848 This macro executes each \var{form} in a context with \var{var}
12849 bound to each debug-variable in \var{debug-function}. This returns
12850 the value of executing \var{result} (defaults to \nil). This may
12851 iterate over only some of \var{debug-function}'s variables or none
12852 depending on debug policy; for example, possibly the compilation
12853 only preserved argument information.
12857 \begin{defun}{}{debug-variable-info-available}{\args{\var{debug-function}}}
12859 This function returns whether there is any variable information for
12860 \var{debug-function}. This is useful for distinguishing whether
12861 there were no locals in a function or whether there was no variable
12862 information. For example, if \code{do-debug-function-variables}
12863 executes its forms zero times, then you can use this function to
12864 determine the reason.
12868 \begin{defun}{}{debug-function-symbol-variables}{%
12869 \args{\var{debug-function} \var{symbol}}}
12871 This function returns a list of debug-variables in
12872 \var{debug-function} having the same name and package as
12873 \var{symbol}. If \var{symbol} is uninterned, then this returns a
12874 list of debug-variables without package names and with the same name
12875 as \var{symbol}. The result of this function is limited to the
12876 availability of variable information in \var{debug-function}; for
12877 example, possibly \var{debug-function} only knows about its
12882 \begin{defun}{}{ambiguous-debug-variables}{%
12883 \args{\var{debug-function} \var{name-prefix-string}}}
12885 This function returns a list of debug-variables in
12886 \var{debug-function} whose names contain \var{name-prefix-string} as
12887 an initial substring. The result of this function is limited to the
12888 availability of variable information in \var{debug-function}; for
12889 example, possibly \var{debug-function} only knows about its
12894 \begin{defun}{}{preprocess-for-eval}{%
12895 \args{\var{form} \var{basic-code-location}}}
12897 This function returns a function of one argument that evaluates
12898 \var{form} in the lexical context of \var{basic-code-location}.
12899 This allows efficient repeated evaluation of \var{form} at a certain
12900 place in a function which could be useful for conditional breaking.
12901 This signals a \code{no-debug-variables} condition when the
12902 code-location's debug-function has no debug-variable information
12903 available. The returned function takes a frame as an argument. See
12904 also \funref{eval-in-frame}.
12908 \begin{defun}{}{function-debug-function}{\args{\var{function}}}
12910 This function returns a debug-function that represents debug
12911 information for \var{function}.
12915 \begin{defun}{}{debug-function-kind}{\args{\var{debug-function}}}
12917 This function returns the kind of function \var{debug-function}
12918 represents. The value is one of the following:
12920 \item[\kwd{optional}] This kind of function is an entry point to an
12921 ordinary function. It handles optional defaulting, parsing
12923 \item[\kwd{external}] This kind of function is an entry point to an
12924 ordinary function. It checks argument values and count and calls
12925 the defined function.
12926 \item[\kwd{top-level}] This kind of function executes one or more
12927 random top-level forms from a file.
12928 \item[\kwd{cleanup}] This kind of function represents the cleanup
12929 forms in an \code{unwind-protect}.
12930 \item[\nil] This kind of function is not one of the above; that is,
12931 it is not specially marked in any way.
12936 \begin{defun}{}{debug-function-function}{\args{\var{debug-function}}}
12938 This function returns the Common Lisp function associated with the
12939 \var{debug-function}. This returns \nil{} if the function is
12940 unavailable or is non-existent as a user callable function object.
12944 \begin{defun}{}{debug-function-name}{\args{\var{debug-function}}}
12946 This function returns the name of the function represented by
12947 \var{debug-function}. This may be a string or a cons; do not assume
12953 %%\node Debug-blocks, Breakpoints, Debug-functions, Debugger Programmer's Interface
12954 \section{Debug-blocks}
12956 Debug-blocks contain information pertinent to a specific range of code in a
12959 \begin{defmac}{}{do-debug-block-locations}{%
12960 \args{(\var{code-var} \var{debug-block} \mopt{result})
12963 This macro executes each \var{form} in a context with \var{code-var}
12964 bound to each code-location in \var{debug-block}. This returns the
12965 value of executing \var{result} (defaults to \nil).
12969 \begin{defun}{}{debug-block-successors}{\args{\var{debug-block}}}
12971 This function returns the list of possible code-locations where
12972 execution may continue when the basic-block represented by
12973 \var{debug-block} completes its execution.
12977 \begin{defun}{}{debug-block-elsewhere-p}{\args{\var{debug-block}}}
12979 This function returns whether \var{debug-block} represents elsewhere
12980 code. This is code the compiler has moved out of a function's code
12981 sequence for optimization reasons. Code-locations in these blocks
12982 are unsuitable for stepping tools, and the first code-location has
12983 nothing to do with a normal starting location for the block.
12988 %%\node Breakpoints, Code-locations, Debug-blocks, Debugger Programmer's Interface
12989 \section{Breakpoints}
12991 A breakpoint represents a function the system calls with the current frame when
12992 execution passes a certain code-location. A break point is active or inactive
12993 independent of its existence. They also have an extra slot for users to tag
12994 the breakpoint with information.
12996 \begin{defun}{}{make-breakpoint}{%
12997 \args{\var{hook-function} \var{what} \keys{\kwd{kind} \kwd{info}
12998 \kwd{function-end-cookie}}}}
13000 This function creates and returns a breakpoint. When program
13001 execution encounters the breakpoint, the system calls
13002 \var{hook-function}. \var{hook-function} takes the current frame
13003 for the function in which the program is running and the breakpoint
13006 \var{what} and \var{kind} determine where in a function the system
13007 invokes \var{hook-function}. \var{what} is either a code-location
13008 or a debug-function. \var{kind} is one of \kwd{code-location},
13009 \kwd{function-start}, or \kwd{function-end}. Since the starts and
13010 ends of functions may not have code-locations representing them,
13011 designate these places by supplying \var{what} as a debug-function
13012 and \var{kind} indicating the \kwd{function-start} or
13013 \kwd{function-end}. When \var{what} is a debug-function and
13014 \var{kind} is \kwd{function-end}, then hook-function must take two
13015 additional arguments, a list of values returned by the function and
13016 a function-end-cookie.
13018 \var{info} is information supplied by and used by the user.
13020 \var{function-end-cookie} is a function. To implement function-end
13021 breakpoints, the system uses starter breakpoints to establish the
13022 function-end breakpoint for each invocation of the function. Upon
13023 each entry, the system creates a unique cookie to identify the
13024 invocation, and when the user supplies a function for this argument,
13025 the system invokes it on the cookie. The system later invokes the
13026 function-end breakpoint hook on the same cookie. The user may save
13027 the cookie when passed to the function-end-cookie function for later
13028 comparison in the hook function.
13030 This signals an error if \var{what} is an unknown code-location.
13032 \i{Note: Breakpoints in interpreted code or byte-compiled code are
13033 not implemented. Function-end breakpoints are not implemented for
13034 compiled functions that use the known local return convention
13035 (e.g. for block-compiled or self-recursive functions.)}
13040 \begin{defun}{}{activate-breakpoint}{\args{\var{breakpoint}}}
13042 This function causes the system to invoke the \var{breakpoint}'s
13043 hook-function until the next call to \code{deactivate-breakpoint} or
13044 \code{delete-breakpoint}. The system invokes breakpoint hook
13045 functions in the opposite order that you activate them.
13049 \begin{defun}{}{deactivate-breakpoint}{\args{\var{breakpoint}}}
13051 This function stops the system from invoking the \var{breakpoint}'s
13056 \begin{defun}{}{breakpoint-active-p}{\args{\var{breakpoint}}}
13058 This returns whether \var{breakpoint} is currently active.
13062 \begin{defun}{}{breakpoint-hook-function}{\args{\var{breakpoint}}}
13064 This function returns the \var{breakpoint}'s function the system
13065 calls when execution encounters \var{breakpoint}, and it is active.
13066 This is \code{SETF}'able.
13070 \begin{defun}{}{breakpoint-info}{\args{\var{breakpoint}}}
13072 This function returns \var{breakpoint}'s information supplied by the
13073 user. This is \code{SETF}'able.
13077 \begin{defun}{}{breakpoint-kind}{\args{\var{breakpoint}}}
13079 This function returns the \var{breakpoint}'s kind specification.
13083 \begin{defun}{}{breakpoint-what}{\args{\var{breakpoint}}}
13085 This function returns the \var{breakpoint}'s what specification.
13089 \begin{defun}{}{delete-breakpoint}{\args{\var{breakpoint}}}
13091 This function frees system storage and removes computational
13092 overhead associated with \var{breakpoint}. After calling this,
13093 \var{breakpoint} is useless and can never become active again.
13098 %%\node Code-locations, Debug-sources, Breakpoints, Debugger Programmer's Interface
13099 \section{Code-locations}
13101 Code-locations represent places in functions where the system has correct
13102 information about the function's environment and where interesting operations
13103 can occur---asking for a local variable's value, setting breakpoints,
13104 evaluating forms within the function's environment, etc.
13106 Sometimes the interface returns unknown code-locations. These
13107 represent places in functions, but there is no debug information
13108 associated with them. Some operations accept these since they may
13109 succeed even with missing debug data. These operations' argument is
13110 named \var{basic-code-location} indicating they take known and unknown
13111 code-locations. If an operation names its argument
13112 \var{code-location}, and you supply an unknown one, it will signal an
13113 error. For example, \code{frame-code-location} may return an unknown
13114 code-location if someone interrupted Lisp in the given frame. The
13115 system knows where execution will continue, but this place in the code
13116 may not be a place for which the compiler dumped debug information.
13118 \begin{defun}{}{code-location-debug-function}{\args{\var{basic-code-location}}}
13120 This function returns the debug-function representing information
13121 about the function corresponding to the code-location.
13125 \begin{defun}{}{code-location-debug-block}{\args{\var{basic-code-location}}}
13127 This function returns the debug-block containing code-location if it
13128 is available. Some debug policies inhibit debug-block information,
13129 and if none is available, then this signals a \code{no-debug-blocks}
13134 \begin{defun}{}{code-location-top-level-form-offset}{%
13135 \args{\var{code-location}}}
13137 This function returns the number of top-level forms before the one
13138 containing \var{code-location} as seen by the compiler in some
13139 compilation unit. A compilation unit is not necessarily a single
13140 file, see the section on debug-sources.
13144 \begin{defun}{}{code-location-form-number}{\args{\var{code-location}}}
13146 This function returns the number of the form corresponding to
13147 \var{code-location}. The form number is derived by walking the
13148 subforms of a top-level form in depth-first order. While walking
13149 the top-level form, count one in depth-first order for each subform
13150 that is a cons. See \funref{form-number-translations}.
13154 \begin{defun}{}{code-location-debug-source}{\args{\var{code-location}}}
13156 This function returns \var{code-location}'s debug-source.
13160 \begin{defun}{}{code-location-unknown-p}{\args{\var{basic-code-location}}}
13162 This function returns whether \var{basic-code-location} is unknown.
13163 It returns \nil when the code-location is known.
13167 \begin{defun}{}{code-location=}{\args{\var{code-location1}
13168 \var{code-location2}}}
13170 This function returns whether the two code-locations are the same.
13175 %%\node Debug-sources, Source Translation Utilities, Code-locations, Debugger Programmer's Interface
13176 \section{Debug-sources}
13178 Debug-sources represent how to get back the source for some code. The
13179 source is either a file (\code{compile-file} or \code{load}), a
13180 lambda-expression (\code{compile}, \code{defun}, \code{defmacro}), or
13181 a stream (something particular to CMU Common Lisp,
13182 \code{compile-from-stream}).
13184 When compiling a source, the compiler counts each top-level form it
13185 processes, but when the compiler handles multiple files as one block
13186 compilation, the top-level form count continues past file boundaries.
13187 Therefore \code{code-location-top-level-form-offset} returns an offset
13188 that does not always start at zero for the code-location's
13189 debug-source. The offset into a particular source is
13190 \code{code-location-top-level-form-offset} minus
13191 \code{debug-source-root-number}.
13193 Inside a top-level form, a code-location's form number indicates the
13194 subform corresponding to the code-location.
13196 \begin{defun}{}{debug-source-from}{\args{\var{debug-source}}}
13198 This function returns an indication of the type of source. The
13199 following are the possible values:
13201 \item[\kwd{file}] from a file (obtained by \code{compile-file} if
13203 \item[\kwd{lisp}] from Lisp (obtained by \code{compile} if
13205 \item[\kwd{stream}] from a non-file stream (CMU Common Lisp supports
13206 \code{compile-from-stream}).
13211 \begin{defun}{}{debug-source-name}{\args{\var{debug-source}}}
13213 This function returns the actual source in some sense represented by
13214 debug-source, which is related to \code{debug-source-from}:
13216 \item[\kwd{file}] the pathname of the file.
13217 \item[\kwd{lisp}] a lambda-expression.
13218 \item[\kwd{stream}] some descriptive string that's otherwise
13224 \begin{defun}{}{debug-source-created}{\args{\var{debug-source}}}
13226 This function returns the universal time someone created the source.
13227 This may be \nil{} if it is unavailable.
13231 \begin{defun}{}{debug-source-compiled}{\args{\var{debug-source}}}
13233 This function returns the time someone compiled the source. This is
13234 \nil if the source is uncompiled.
13238 \begin{defun}{}{debug-source-root-number}{\args{\var{debug-source}}}
13240 This returns the number of top-level forms processed by the compiler
13241 before compiling this source. If this source is uncompiled, this is
13242 zero. This may be zero even if the source is compiled since the
13243 first form in the first file compiled in one compilation, for
13244 example, must have a root number of zero---the compiler saw no other
13245 top-level forms before it.
13249 %%\node Source Translation Utilities, , Debug-sources, Debugger Programmer's Interface
13250 \section{Source Translation Utilities}
13252 These two functions provide a mechanism for converting the rather
13253 obscure (but highly compact) representation of source locations into an
13254 actual source form:
13256 \begin{defun}{}{debug-source-start-positions}{\args{\var{debug-source}}}
13258 This function returns the file position of each top-level form a
13259 vector if \var{debug-source} is from a \kwd{file}. If
13260 \code{debug-source-from} is \kwd{lisp} or \kwd{stream}, or the file
13261 is byte-compiled, then the result is \false.
13265 \begin{defun}{}{form-number-translations}{\args{\var{form}
13268 This function returns a table mapping form numbers (see
13269 \code{code-location-form-number}) to source-paths. A source-path
13270 indicates a descent into the top-level-form \var{form}, going
13271 directly to the subform corresponding to a form number.
13272 \var{tlf-number} is the top-level-form number of \var{form}.
13276 \begin{defun}{}{source-path-context}{%
13277 \args{\var{form} \var{path} \var{context}}}
13279 This function returns the subform of \var{form} indicated by the
13280 source-path. \var{Form} is a top-level form, and \var{path} is a
13281 source-path into it. \var{Context} is the number of enclosing forms
13282 to return instead of directly returning the source-path form. When
13283 \var{context} is non-zero, the form returned contains a marker,
13284 \code{\#:****HERE****}, immediately before the form indicated by
13291 %%\node Function Index, Variable Index, Debugger Programmer's Interface, Top
13292 %%\unnumbered{Function Index}
13293 \cindex{Function Index}
13299 %%\node Variable Index, Type Index, Function Index, Top
13300 %%\unnumbered{Variable Index}
13301 \cindex{Variable Index}
13307 %%\node Type Index, Concept Index, Variable Index, Top
13308 %%\unnumbered{Type Index}
13309 \cindex{Type Index}
13314 %%\node Concept Index, , Type Index, Top
13315 %%\unnumbered{Concept Index}
13316 \cindex{Concept Index}
13320 \printindex[concept]