doc/cmucl/cmu-user/cmu-user.tex

   1 %% CMU Common Lisp User's Manual.
   2 %%
   3 %% Aug 97   Raymond Toy
   4 %% This is a modified version of the original CMUCL User's Manual.
   5 %% The key changes are modification of this file to use standard
   6 %% LaTeX2e.  This means latexinfo isn't going to work anymore.
   7 %% However, Latex2html support has been added.
   8 %%
   9 %% Jan 1998 Paul Werkowski
  10 %% A few of the packages below are not part of the standard LaTeX2e
  11 %% distribution, and must be obtained from a repository. At this time
  12 %% I was able to fetch from
  13 %% ftp.cdrom.com:pub/tex/ctan/macros/latex/contrib/supported/
  14 %%                      camel/index.ins
  15 %%                      camel/index.dtx
  16 %%                      calc/calc.ins
  17 %%                      calc/calc.dtx
  18 %%                      changebar/changebar.ins
  19 %%                      changebar/changebar.dtx
  20 %% One runs latex on the .ins file to produce .tex and/or .sty
  21 %% files that must be put in a path searched by latex.
  22 %%
  23 \documentclass{report}
  24 \usepackage{changebar}
  25 \usepackage{xspace}
  26 \usepackage{alltt}
  27 \usepackage{index}
  28 \usepackage{verbatim}
  29 \usepackage{ifthen}
  30 \usepackage{calc}
  31 %\usepackage{html2e}
  32 \usepackage{html,color}
  33 \usepackage{varioref}
  34
  35 %% Define the indices.  We need one for Types, Variables, Functions,
  36 %% and a general concept index.
  37 \makeindex
  38 \newindex{types}{tdx}{tnd}{Type Index}
  39 \newindex{vars}{vdx}{vnd}{Variable Index}
  40 \newindex{funs}{fdx}{fnd}{Function Index}
  41 \newindex{concept}{cdx}{cnd}{Concept Index}
  42
  43 \newcommand{\tindexed}[1]{\index[types]{#1}\textsf{#1}}
  44 \newcommand{\findexed}[1]{\index[funs]{#1}\textsf{#1}}
  45 \newcommand{\vindexed}[1]{\index[vars]{#1}\textsf{*#1*}}
  46 \newcommand{\cindex}[1]{\index[concept]{#1}}
  47 \newcommand{\cpsubindex}[2]{\index[concept]{#1!#2}}
  48
  49 %% This code taken from the LaTeX companion.  It's meant as a
  50 %% replacement for the description environment.  We want one that
  51 %% prints description items in a fixed size box and puts the
  52 %% description itself on the same line or the next depending on the
  53 %% size of the item.
  54 \newcommand{\entrylabel}[1]{\mbox{#1}\hfil}
  55 \newenvironment{entry}{%
  56   \begin{list}{}%
  57     {\renewcommand{\makelabel}{\entrylabel}%
  58       \setlength{\labelwidth}{45pt}%
  59       \setlength{\leftmargin}{\labelwidth+\labelsep}}}%
  60   {\end{list}}
  61
  62 \newlength{\Mylen}
  63 \newcommand{\Lentrylabel}[1]{%
  64   \settowidth{\Mylen}{#1}%
  65   \ifthenelse{\lengthtest{\Mylen > \labelwidth}}%
  66   {\parbox[b]{\labelwidth}%  term > labelwidth
  67     {\makebox[0pt][l]{#1}\\}}%
  68   {#1}%
  69   \hfil\relax}
  70 \newenvironment{Lentry}{%
  71   \renewcommand{\entrylabel}{\Lentrylabel}
  72   \begin{entry}}%
  73   {\end{entry}}
  74
  75 \newcommand{\fcntype}[1]{\textit{#1}}
  76 \newcommand{\argtype}[1]{\textit{#1}}
  77 \newcommand{\fcnname}[1]{\textsf{#1}}
  78
  79 \newlength{\formnamelen}        % length of a name of a form
  80 \newlength{\pboxargslen}        % length of parbox for arguments
  81 \newlength{\typelen}            % length of the type label for the form
  82
  83 \newcommand{\args}[1]{#1}
  84 \newcommand{\keys}[1]{\textsf{\&key} \= #1}
  85 \newcommand{\morekeys}[1]{\\ \> #1}
  86 \newcommand{\yetmorekeys}[1]{\\ \> #1}
  87
  88 \newcommand{\defunvspace}{\ifhmode\unskip \par\fi\addvspace{18pt plus 12pt minus 6pt}}
  89
  90
  91 %% \layout[pkg]{name}{param list}{type}
  92 %%
  93 %% This lays out a entry like so:
  94 %%
  95 %% pkg:name arg1 arg2                             [Function]
  96 %%
  97 %% where [Function] is flush right.
  98 %%
  99 \newcommand{\layout}[4][\mbox{}]{%
 100   \par\noindent
 101   \fcnname{#1#2\hspace{1em}}%
 102   \settowidth{\formnamelen}{\fcnname{#1#2\hspace{1em}}}%
 103   \settowidth{\typelen}{[\argtype{#4}]}%
 104   \setlength{\pboxargslen}{\linewidth}%
 105   \addtolength{\pboxargslen}{-1\formnamelen}%
 106   \addtolength{\pboxargslen}{-1\typelen}%
 107   \begin{minipage}[t]{\pboxargslen}
 108     \begin{tabbing}
 109       #3
 110     \end{tabbing}
 111   \end{minipage}
 112   \hfill[\fcntype{#4}]%
 113   \par\addvspace{2pt plus 2pt minus 2pt}}
 114
 115 \newcommand{\vrindexbold}[1]{\index[vars]{#1|textbf}}
 116 \newcommand{\fnindexbold}[1]{\index[funs]{#1|textbf}}
 117
 118 %% Define a new type
 119 %%
 120 %% \begin{deftp}{typeclass}{typename}{args}
 121 %%    some description
 122 %% \end{deftp}
 123 \newenvironment{deftp}[3]{%
 124   \par\bigskip\index[types]{#2|textbf}%
 125   \layout{#2}{\var{#3}}{#1}
 126   }{}
 127
 128 %% Define a function
 129 %%
 130 %% \begin{defun}{pkg}{name}{params}
 131 %%   \defunx[pkg]{name}{params}
 132 %%   description of function
 133 %% \end{defun}
 134 \newenvironment{defun}[3]{%
 135   \par\defunvspace\fnindexbold{#2}\label{FN:#2}%
 136   \layout[#1]{#2}{#3}{Function}
 137   }{}
 138 \newcommand{\defunx}[3][\mbox{}]{%
 139   \par\fnindexbold{#2}\label{FN:#2}%
 140   \layout[#1]{#2}{#3}{Function}}
 141
 142 %% Define a macro
 143 %%
 144 %% \begin{defmac}{pkg}{name}{params}
 145 %%   \defmacx[pkg]{name}{params}
 146 %%   description of macro
 147 %% \end{defmac}
 148 \newenvironment{defmac}[3]{%
 149   \par\defunvspace\fnindexbold{#2}\label{FN:#2}%
 150   \layout[#1]{#2}{#3}{Macro}}{}
 151 \newcommand{\defmacx}[3][\mbox{}]{%
 152   \par\fnindexbold{#2}\label{FN:#2}%
 153   \layout[#1]{#2}{#3}{Function}}
 154
 155 %% Define a variable
 156 %%
 157 %% \begin{defvar}{pkg}{name}
 158 %%   \defvarx[pkg]{name}
 159 %%   description of defvar
 160 %% \end{defvar}
 161 \newenvironment{defvar}[2]{%
 162   \par\defunvspace\vrindexbold{#2}\label{VR:#2}
 163   \layout[#1]{*#2*}{}{Variable}}{}
 164 \newcommand{\defvarx}[2][\mbox{}]{%
 165   \par\vrindexbold{#2}\label{VR:#2}
 166   \layout[#1]{*#2*}{}{Variable}}
 167
 168 %% Define a constant
 169 %%
 170 %% \begin{defconst}{pkg}{name}
 171 %%   \ddefconstx[pkg]{name}
 172 %%   description of defconst
 173 %% \end{defconst}
 174 \newcommand{\defconstx}[2][\mbox{}]{%
 175   \layout[#1]{#2}{}{Constant}}
 176 \newenvironment{defconst}[2]{%
 177   \defunvspace\defconstx[#1]{#2}}
 178
 179 \newenvironment{example}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
 180 \newenvironment{lisp}{\begin{example}}{\end{example}}
 181 \newenvironment{display}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
 182
 183 \newcommand{\hide}[1]{}
 184 \newcommand{\trnumber}[1]{#1}
 185 \newcommand{\citationinfo}[1]{#1}
 186 \newcommand{\var}[1]{{\textsf{\textsl{#1}}\xspace}}
 187 \newcommand{\code}[1]{\textnormal{{\sffamily #1}}}
 188 \newcommand{\file}[1]{`\texttt{#1}'}
 189 \newcommand{\samp}[1]{`\texttt{#1}'}
 190 \newcommand{\kwd}[1]{\code{:#1}}
 191 \newcommand{\F}[1]{\code{#1}}
 192 \newcommand{\w}[1]{\hbox{#1}}
 193 \renewcommand{\b}[1]{\textrm{\textbf{#1}}}
 194 \renewcommand{\i}[1]{\textit{#1}}
 195 \newcommand{\ctrl}[1]{$\uparrow$\textsf{#1}}
 196 \newcommand{\result}{$\Rightarrow$}
 197 \newcommand{\myequiv}{$\equiv$}
 198 \newcommand{\back}[1]{\(\backslash\)#1}
 199 \newcommand{\pxlref}[1]{see section~\ref{#1}, page~\pageref{#1}}
 200 \newcommand{\xlref}[1]{See section~\ref{#1}, page~\pageref{#1}}
 201
 202 \newcommand{\false}{\textsf{nil}}
 203 \newcommand{\true}{\textsf{t}}
 204 \newcommand{\nil}{\textsf{nil}}
 205 \newcommand{\FALSE}{\textsf{nil}}
 206 \newcommand{\TRUE}{\textsf{t}}
 207 \newcommand{\NIL}{\textsf{nil}}
 208
 209 \newcommand{\ampoptional}{\textsf{\&optional}}
 210 \newcommand{\amprest}{\textsf{\&rest}}
 211 \newcommand{\ampbody}{\textsf{\&body}}
 212 \newcommand{\mopt}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}\,$}}
 213 \newcommand{\mstar}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}^*\,$}}
 214 \newcommand{\mplus}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}^+\,$}}
 215 \newcommand{\mgroup}[1]{{$\,\{$}\textnormal{\textsf{\textsl{#1\/}}}{$\}\,$}}
 216 \newcommand{\mor}{$|$}
 217
 218 \newcommand{\funref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
 219 \newcommand{\specref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
 220 \newcommand{\macref}[1]{\findexed{#1} (page~\pageref{FN:#1})}
 221 \newcommand{\varref}[1]{\vindexed{#1} (page~\pageref{VR:#1})}
 222 \newcommand{\conref}[1]{\conindexed{#1} (page~\pageref{VR:#1})}
 223
 224 %% Some common abbreviations
 225 \newcommand{\clisp}{Common Lisp}
 226 \newcommand{\dash}{---}
 227 \newcommand{\alien}{Alien}
 228 \newcommand{\aliens}{Aliens}
 229 \newcommand{\Aliens}{Aliens}
 230 \newcommand{\Alien}{Alien}
 231 \newcommand{\Hemlock}{Hemlock}
 232 \newcommand{\hemlock}{Hemlock}
 233 \newcommand{\python}{Python}
 234 \newcommand{\Python}{Python}
 235 \newcommand{\cmucl}{CMU Common Lisp}
 236 \newcommand{\llisp}{Common Lisp}
 237 \newcommand{\Llisp}{Common Lisp}
 238 \newcommand{\cltl}{\emph{Common Lisp: The Language}}
 239 \newcommand{\cltltwo}{\emph{Common Lisp: The Language 2}}
 240
 241 %% Replacement commands when we run latex2html.  This should be last
 242 %% so that latex2html uses these commands instead of the LaTeX
 243 %% commands above.
 244 \begin{htmlonly}
 245   \usepackage{makeidx}
 246
 247   \newcommand{\var}[1]{\textnormal{\textit{#1}}}
 248   \newcommand{\code}[1]{\textnormal{\texttt{#1}}}
 249   %%\newcommand{\printindex}[1][\mbox{}]{}
 250
 251   %% We need the quote environment because the alltt is broken.  The
 252   %% quote environment helps us in postprocessing to result to get
 253   %% what we want.
 254   \newenvironment{example}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
 255   \newenvironment{display}{\begin{quote}\begin{alltt}}{\end{alltt}\end{quote}}
 256
 257   \newcommand{\textnormal}[1]{\rm #1}
 258   \newcommand{\hbox}[1]{\mbox{#1}}
 259   \newcommand{\xspace}{}
 260   \newcommand{newindex}[4]{}
 261
 262   \newcommand{\pxlref}[1]{see section~\ref{#1}}
 263   \newcommand{\xlref}[1]{See section~\ref{#1}}
 264
 265   \newcommand{\tindexed}[1]{\index{#1}\texttt{#1}}
 266   \newcommand{\findexed}[1]{\index{#1}\texttt{#1}}
 267   \newcommand{\vindexed}[1]{\index{#1}\texttt{*#1*}}
 268   \newcommand{\cindex}[1]{\index{#1}}
 269   \newcommand{\cpsubindex}[2]{\index{#1!#2}}
 270
 271   \newcommand{\keys}[1]{\texttt{\&key} #1}
 272   \newcommand{\morekeys}[1]{#1}
 273   \newcommand{\yetmorekeys}[1]{#1}
 274
 275   \newenvironment{defun}[3]{%
 276     \textbf{[Function]}\\
 277     \texttt{#1#2} \emph{#3}\\}{}
 278   \newcommand{\defunx}[3][\mbox{}]{%
 279     \texttt{#1#2} {\em #3}\\}
 280   \newenvironment{defmac}[3]{%
 281     \textbf{[Macro]}\\
 282     \texttt{#1#2} \emph{#3}\\}{}
 283   \newcommand{\defmacx}[3][\mbox{}]{%
 284     \texttt{#1#2} {\em #3}\\}
 285   \newenvironment{defvar}[2]{%
 286     \textbf{[Variable]}\\
 287     \texttt{#1*#2*}\\ \\}{}
 288   \newcommand{\defvarx}[2][\mbox{}]{%
 289     \texttt{#1*#2*}\\}
 290   \newenvironment{defconst}[2]{%
 291     \textbf{[Constant]}\\
 292     \texttt{#1#2}\\}{}
 293   \newcommand{\defconstx}[2][\mbox{}]{\texttt{#1#2}\\}
 294   \newenvironment{deftp}[3]{%
 295     \textbf{[#1]}\\
 296     \texttt{#2} \textit{#3}\\}{}
 297   \newenvironment{Lentry}{\begin{description}}{\end{description}}
 298 \end{htmlonly}
 299
 300 %% Set up margins
 301 \setlength{\oddsidemargin}{-10pt}
 302 \setlength{\evensidemargin}{-10pt}
 303 \setlength{\topmargin}{-40pt}
 304 \setlength{\headheight}{12pt}
 305 \setlength{\headsep}{25pt}
 306 \setlength{\footskip}{30pt}
 307 \setlength{\textheight}{9.25in}
 308 \setlength{\textwidth}{6.75in}
 309 \setlength{\columnsep}{0.375in}
 310 \setlength{\columnseprule}{0pt}
 311
 312
 313 \setcounter{tocdepth}{2}
 314 \setcounter{secnumdepth}{3}
 315 \def\textfraction{.1}
 316 \def\bottomfraction{.9}         % was .3
 317 \def\topfraction{.9}
 318
 319 \pagestyle{headings}
 320
 321 \begin{document}
 322 %%\alwaysrefill
 323 \relax
 324 %%\newindex{cp}
 325 %%\newindex{ky}
 326
 327 \newcommand{\theabstract}{%
 328
 329   CMU Common Lisp is an implementation of that Common Lisp runs on
 330   various Unix workstations.  See the README file in the distribution
 331   for current platforms.  The largest single part of this document
 332   describes the Python compiler and the programming styles and
 333   techniques that the compiler encourages.  The rest of the document
 334   describes extensions and the implementation dependent choices made
 335   in developing this implementation of Common Lisp.  We have added
 336   several extensions, including a source level debugger, an interface
 337   to Unix system calls, a foreign function call interface, support for
 338   interprocess communication and remote procedure call, and other
 339   features that provide a good environment for developing Lisp code.
 340   }
 341
 342 \newcommand{\researchcredit}{%
 343   This research was sponsored by the Defense Advanced Research
 344   Projects Agency, Information Science and Technology Office, under
 345   the title \emph{Research on Parallel Computing} issued by DARPA/CMO
 346   under Contract MDA972-90-C-0035 ARPA Order No.  7330.
 347
 348   The views and conclusions contained in this document are those of
 349   the authors and should not be interpreted as representing the
 350   official policies, either expressed or implied, of the Defense
 351   Advanced Research Projects Agency or the U.S. government.}
 352
 353 \pagestyle{empty}
 354 \title{CMU Common Lisp User's Manual}
 355
 356 %%\author{Robert A. MacLachlan, \var{Editor}}
 357 %%\date{July 1992}
 358 %%\trnumber{CMU-CS-92-161}
 359 %%\citationinfo{
 360 %%\begin{center}
 361 %%Supersedes Technical Reports CMU-CS-87-156 and CMU-CS-91-108.
 362 %%\end{center}
 363 %%}
 364 %%%%\arpasupport{strategic}
 365 %%\abstract{\theabstract}
 366 %%%%\keywords{lisp, Common Lisp, manual, compiler,
 367 %%%%          programming language implementation, programming environment}
 368
 369 %%\maketitle
 370 \begin{latexonly}
 371
 372 %%  \title{CMU Common Lisp User's Manual}
 373
 374   \author{Robert A. MacLachlan,
 375   \emph{Editor}%
 376   \thanks{\small This research was sponsored by the Defense Advanced
 377     Research Projects Agency, Information Science and Technology
 378     Office, under the title \emph{Research on Parallel Computing}
 379     issued by DARPA/CMO under Contract MDA972-90-C-0035 ARPA Order No.
 380     7330.  The views and conclusions contained in this document are
 381     those of the authors and should not be interpreted as representing
 382     the official policies, either expressed or implied, of the Defense
 383     Advanced Research Projects Agency or the U.S. government.}}
 384
 385
 386
 387 \date{\bigskip
 388   July 1992 \\ CMU-CS-92-161 \\
 389   \vspace{0.25in}
 390   October 31, 1997 \\
 391   Net Version \\
 392   \vspace{0.75in} {\small
 393     School of Computer Science \\
 394     Carnegie Mellon University \\
 395     Pittsburgh, PA 15213} \\
 396   \vspace{0.5in} \small Supersedes Technical Reports CMU-CS-87-156 and
 397   CMU-CS-91-108.\\
 398   \vspace{0.5in} \textbf{Abstract} \medskip
 399   \begin{quote}
 400     \theabstract
 401   \end{quote}
 402   }
 403
 404 \maketitle
 405 \end{latexonly}
 406
 407 %% Nice HTML version of the title page
 408 \begin{rawhtml}
 409
 410   <h1 align=center>CMU Common Lisp User's Manual</h1>
 411
 412     <p align=center>Robert A. MacLachlan, <EM>Editor</EM>
 413     </p>
 414     <p align=center>
 415       July 1992 <BR>
 416       CMU-CS-92-161 <BR>
 417     </p>
 418     <br>
 419     <p align=center>
 420       July 1997 <BR>
 421       Net Version <BR>
 422     </p>
 423
 424     <p align=center>
 425       School of Computer Science <BR>
 426       Carnegie Mellon University <BR>
 427       Pittsburgh, PA 15213 <BR>
 428     </p>
 429     <br>
 430     <p>
 431       Supersedes Technical Reports CMU-CS-87-156 and
 432       CMU-CS-91-108.<BR>
 433     </p>
 434
 435     <p align=center>
 436       <b>Abstract</b>
 437     <blockquote>
 438       CMU Common Lisp is an implementation of that Common Lisp runs on
 439       various Unix workstations.  See the README file in the
 440       distribution for current platforms.  The largest single part of
 441       this document describes the Python compiler and the programming
 442       styles and techniques that the compiler encourages.  The rest of
 443       the document describes extensions and the implementation
 444       dependent choices made in developing this implementation of
 445       Common Lisp.  We have added several extensions, including a
 446       source level debugger, an interface to Unix system calls, a
 447       foreign function call interface, support for interprocess
 448       communication and remote procedure call, and other features that
 449       provide a good environment for developing Lisp code.
 450     </blockquote>
 451     </p>
 452     <blockquote><font size=-1>
 453     This research was sponsored by the Defense Advanced Research
 454     Projects Agency, Information Science and Technology Office, under
 455     the title <em>Research on Parallel Computing</em> issued by DARPA/CMO
 456     under Contract MDA972-90-C-0035 ARPA Order No.  7330.
 457     <p>
 458     The views and conclusions contained in this document are those of
 459     the authors and should not be interpreted as representing the
 460     official policies, either expressed or implied, of the Defense
 461     Advanced Research Projects Agency or the U.S. government.
 462     </p></font>
 463   </blockquote>
 464     </p>
 465 \end{rawhtml}
 466 \clearpage
 467 \vspace*{\fill}
 468 \textbf{Keywords:} lisp, Common Lisp, manual, compiler,
 469 programming language implementation, programming environment
 470 \clearpage
 471 \pagestyle{headings}
 472 \pagenumbering{roman}
 473 \tableofcontents
 474
 475 \clearpage
 476 \pagenumbering{arabic}
 477 %%\end{iftex}
 478
 479 %%\setfilename{cmu-user.info}
 480 %%\node Top, Introduction, (dir), (dir)
 481
 482
 483 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/intro.ms}
 484
 485
 486
 487 \hide{ -*- Dictionary: cmu-user -*- }
 488 \begin{comment}
 489 * Introduction::
 490 * Design Choices and Extensions::
 491 * The Debugger::
 492 * The Compiler::
 493 * Advanced Compiler Use and Efficiency Hints::
 494 * UNIX Interface::
 495 * Event Dispatching with SERVE-EVENT::
 496 * Alien Objects::
 497 * Interprocess Communication under LISP::
 498 * Debugger Programmer's Interface::
 499 * Function Index::
 500 * Variable Index::
 501 * Type Index::
 502 * Concept Index::
 503
 504  --- The Detailed Node Listing ---
 505
 506 Introduction
 507
 508 * Support::
 509 * Local Distribution of CMU Common Lisp::
 510 * Net Distribution of CMU Common Lisp::
 511 * Source Availability::
 512 * Command Line Options::
 513 * Credits::
 514
 515 Design Choices and Extensions
 516
 517 * Data Types::
 518 * Default Interrupts for Lisp::
 519 * Packages::
 520 * The Editor::
 521 * Garbage Collection::
 522 * Describe::
 523 * The Inspector::
 524 * Load::
 525 * The Reader::
 526 * Running Programs from Lisp::
 527 * Saving a Core Image::
 528 * Pathnames::
 529 * Filesystem Operations::
 530 * Time Parsing and Formatting::
 531 * Lisp Library::
 532
 533 Data Types
 534
 535 * Symbols::
 536 * Integers::
 537 * Floats::
 538 * Characters::
 539 * Array Initialization::
 540
 541 Floats
 542
 543 * IEEE Special Values::
 544 * Negative Zero::
 545 * Denormalized Floats::
 546 * Floating Point Exceptions::
 547 * Floating Point Rounding Mode::
 548 * Accessing the Floating Point Modes::
 549
 550 The Inspector
 551
 552 * The Graphical Interface::
 553 * The TTY Inspector::
 554
 555 Running Programs from Lisp
 556
 557 * Process Accessors::
 558
 559 Pathnames
 560
 561 * Unix Pathnames::
 562 * Wildcard Pathnames::
 563 * Logical Pathnames::
 564 * Search Lists::
 565 * Predefined Search-Lists::
 566 * Search-List Operations::
 567 * Search List Example::
 568
 569 Logical Pathnames
 570
 571 * Search Lists::
 572 * Search List Example::
 573
 574 Search-List Operations
 575
 576 * Search List Example::
 577
 578 Filesystem Operations
 579
 580 * Wildcard Matching::
 581 * File Name Completion::
 582 * Miscellaneous Filesystem Operations::
 583
 584 The Debugger
 585
 586 * Debugger Introduction::
 587 * The Command Loop::
 588 * Stack Frames::
 589 * Variable Access::
 590 * Source Location Printing::
 591 * Compiler Policy Control::
 592 * Exiting Commands::
 593 * Information Commands::
 594 * Breakpoint Commands::
 595 * Function Tracing::
 596 * Specials::
 597
 598 Stack Frames
 599
 600 * Stack Motion::
 601 * How Arguments are Printed::
 602 * Function Names::
 603 * Funny Frames::
 604 * Debug Tail Recursion::
 605 * Unknown Locations and Interrupts::
 606
 607 Variable Access
 608
 609 * Variable Value Availability::
 610 * Note On Lexical Variable Access::
 611
 612 Source Location Printing
 613
 614 * How the Source is Found::
 615 * Source Location Availability::
 616
 617 Breakpoint Commands
 618
 619 * Breakpoint Example::
 620
 621 Function Tracing
 622
 623 * Encapsulation Functions::
 624
 625 The Compiler
 626
 627 * Compiler Introduction::
 628 * Calling the Compiler::
 629 * Compilation Units::
 630 * Interpreting Error Messages::
 631 * Types in Python::
 632 * Getting Existing Programs to Run::
 633 * Compiler Policy::
 634 * Open Coding and Inline Expansion::
 635
 636 Compilation Units
 637
 638 * Undefined Warnings::
 639
 640 Interpreting Error Messages
 641
 642 * The Parts of the Error Message::
 643 * The Original and Actual Source::
 644 * The Processing Path::
 645 * Error Severity::
 646 * Errors During Macroexpansion::
 647 * Read Errors::
 648 * Error Message Parameterization::
 649
 650 Types in Python
 651
 652 * Compile Time Type Errors::
 653 * Precise Type Checking::
 654 * Weakened Type Checking::
 655
 656 Compiler Policy
 657
 658 * The Optimize Declaration::
 659 * The Optimize-Interface Declaration::
 660
 661 Advanced Compiler Use and Efficiency Hints
 662
 663 * Advanced Compiler Introduction::
 664 * More About Types in Python::
 665 * Type Inference::
 666 * Source Optimization::
 667 * Tail Recursion::
 668 * Local Call::
 669 * Block Compilation::
 670 * Inline Expansion::
 671 * Byte Coded Compilation::
 672 * Object Representation::
 673 * Numbers::
 674 * General Efficiency Hints::
 675 * Efficiency Notes::
 676 * Profiling::
 677
 678 Advanced Compiler Introduction
 679
 680 * Types::
 681 * Optimization::
 682 * Function Call::
 683 * Representation of Objects::
 684 * Writing Efficient Code::
 685
 686 More About Types in Python
 687
 688 * More Types Meaningful::
 689 * Canonicalization::
 690 * Member Types::
 691 * Union Types::
 692 * The Empty Type::
 693 * Function Types::
 694 * The Values Declaration::
 695 * Structure Types::
 696 * The Freeze-Type Declaration::
 697 * Type Restrictions::
 698 * Type Style Recommendations::
 699
 700 Type Inference
 701
 702 * Variable Type Inference::
 703 * Local Function Type Inference::
 704 * Global Function Type Inference::
 705 * Operation Specific Type Inference::
 706 * Dynamic Type Inference::
 707 * Type Check Optimization::
 708
 709 Source Optimization
 710
 711 * Let Optimization::
 712 * Constant Folding::
 713 * Unused Expression Elimination::
 714 * Control Optimization::
 715 * Unreachable Code Deletion::
 716 * Multiple Values Optimization::
 717 * Source to Source Transformation::
 718 * Style Recommendations::
 719
 720 Tail Recursion
 721
 722 * Tail Recursion Exceptions::
 723
 724 Local Call
 725
 726 * Self-Recursive Calls::
 727 * Let Calls::
 728 * Closures::
 729 * Local Tail Recursion::
 730 * Return Values::
 731
 732 Block Compilation
 733
 734 * Block Compilation Semantics::
 735 * Block Compilation Declarations::
 736 * Compiler Arguments::
 737 * Practical Difficulties::
 738 * Context Declarations::
 739 * Context Declaration Example::
 740
 741 Inline Expansion
 742
 743 * Inline Expansion Recording::
 744 * Semi-Inline Expansion::
 745 * The Maybe-Inline Declaration::
 746
 747 Object Representation
 748
 749 * Think Before You Use a List::
 750 * Structure Representation::
 751 * Arrays::
 752 * Vectors::
 753 * Bit-Vectors::
 754 * Hashtables::
 755
 756 Numbers
 757
 758 * Descriptors::
 759 * Non-Descriptor Representations::
 760 * Variables::
 761 * Generic Arithmetic::
 762 * Fixnums::
 763 * Word Integers::
 764 * Floating Point Efficiency::
 765 * Specialized Arrays::
 766 * Specialized Structure Slots::
 767 * Interactions With Local Call::
 768 * Representation of Characters::
 769
 770 General Efficiency Hints
 771
 772 * Compile Your Code::
 773 * Avoid Unnecessary Consing::
 774 * Complex Argument Syntax::
 775 * Mapping and Iteration::
 776 * Trace Files and Disassembly::
 777
 778 Efficiency Notes
 779
 780 * Type Uncertainty::
 781 * Efficiency Notes and Type Checking::
 782 * Representation Efficiency Notes::
 783 * Verbosity Control::
 784
 785 Profiling
 786
 787 * Profile Interface::
 788 * Profiling Techniques::
 789 * Nested or Recursive Calls::
 790 * Clock resolution::
 791 * Profiling overhead::
 792 * Additional Timing Utilities::
 793 * A Note on Timing::
 794 * Benchmarking Techniques::
 795
 796 UNIX Interface
 797
 798 * Reading the Command Line::
 799 * Lisp Equivalents for C Routines::
 800 * Type Translations::
 801 * System Area Pointers::
 802 * Unix System Calls::
 803 * File Descriptor Streams::
 804 * Making Sense of Mach Return Codes::
 805 * Unix Interrupts::
 806
 807 Unix Interrupts
 808
 809 * Changing Interrupt Handlers::
 810 * Examples of Signal Handlers::
 811
 812 Event Dispatching with SERVE-EVENT
 813
 814 * Object Sets::
 815 * The SERVE-EVENT Function::
 816 * Using SERVE-EVENT with Unix File Descriptors::
 817 * Using SERVE-EVENT with the CLX Interface to X::
 818 * A SERVE-EVENT Example::
 819
 820 Using SERVE-EVENT with the CLX Interface to X
 821
 822 * Without Object Sets::
 823 * With Object Sets::
 824
 825 A SERVE-EVENT Example
 826
 827 * Without Object Sets Example::
 828 * With Object Sets Example::
 829
 830 Alien Objects
 831
 832 * Introduction to Aliens::
 833 * Alien Types::
 834 * Alien Operations::
 835 * Alien Variables::
 836 * Alien Data Structure Example::
 837 * Loading Unix Object Files::
 838 * Alien Function Calls::
 839 * Step-by-Step Alien Example::
 840
 841 Alien Types
 842
 843 * Defining Alien Types::
 844 * Alien Types and Lisp Types::
 845 * Alien Type Specifiers::
 846 * The C-Call Package::
 847
 848 Alien Operations
 849
 850 * Alien Access Operations::
 851 * Alien Coercion Operations::
 852 * Alien Dynamic Allocation::
 853
 854 Alien Variables
 855
 856 * Local Alien Variables::
 857 * External Alien Variables::
 858
 859 Alien Function Calls
 860
 861 * alien-funcall::               The alien-funcall Primitive
 862 * def-alien-routine::           The def-alien-routine Macro
 863 * def-alien-routine Example::
 864 * Calling Lisp from C::
 865
 866 Interprocess Communication under LISP
 867
 868 * The REMOTE Package::
 869 * The WIRE Package::
 870 * Out-Of-Band Data::
 871
 872 The REMOTE Package
 873
 874 * Connecting Servers and Clients::
 875 * Remote Evaluations::
 876 * Remote Objects::
 877 * Host Addresses::
 878
 879 The WIRE Package
 880
 881 * Untagged Data::
 882 * Tagged Data::
 883 * Making Your Own Wires::
 884
 885 Debugger Programmer's Interface
 886
 887 * DI Exceptional Conditions::
 888 * Debug-variables::
 889 * Frames::
 890 * Debug-functions::
 891 * Debug-blocks::
 892 * Breakpoints::
 893 * Code-locations::
 894 * Debug-sources::
 895 * Source Translation Utilities::
 896
 897 DI Exceptional Conditions
 898
 899 * Debug-conditions::
 900 * Debug-errors::
 901 \end{comment}
 902
 903 %%\node Introduction, Design Choices and Extensions, Top, Top
 904 \chapter{Introduction}
 905
 906 CMU Common Lisp is a public-domain implementation of Common Lisp developed in
 907 the Computer Science Department of Carnegie Mellon University.  \cmucl{} runs
 908 on various Unix workstations---see the README file in the distribution for
 909 current platforms.  This document describes the implementation based on the
 910 Python compiler.  Previous versions of CMU Common Lisp ran on the IBM RT PC
 911 and (when known as Spice Lisp) on the Perq workstation.  See \code{man cmucl}
 912 (\file{man/man1/cmucl.1}) for other general information.
 913
 914 \cmucl{} sources and executables are freely available via anonymous FTP; this
 915 software is ``as is'', and has no warranty of any kind.  CMU and the
 916 authors assume no responsibility for the consequences of any use of this
 917 software.  See \file{doc/release-notes.txt} for a description of the
 918 state of the release you have.
 919
 920 \begin{comment}
 921 * Support::
 922 * Local Distribution of CMU Common Lisp::
 923 * Net Distribution of CMU Common Lisp::
 924 * Source Availability::
 925 * Command Line Options::
 926 * Credits::
 927 \end{comment}
 928
 929 %%\node Support, Local Distribution of CMU Common Lisp, Introduction, Introduction
 930 \section{Support}
 931
 932 The CMU Common Lisp project is no longer funded, so only minimal support is
 933 being done at CMU.  There is a net community of \cmucl{} users and maintainers
 934 who communicate via comp.lang.lisp and the cmucl-bugs@cs.cmu.edu
 935 \begin{changebar}
 936   cmucl-imp@cons.org
 937 \end{changebar}
 938 mailing lists.
 939
 940 This manual contains only implementation-specific information about
 941 \cmucl.  Users will also need a separate manual describing the
 942 \clisp{} standard.  \clisp{} was initially defined in \i{Common Lisp:
 943   The Language}, by Guy L.  Steele Jr.  \clisp{} is now undergoing
 944 standardization by the X3J13 committee of ANSI.  The X3J13 spec is not
 945 yet completed, but a number of clarifications and modification have
 946 been approved.  We intend that \cmucl{} will eventually adhere to the
 947 X3J13 spec, and we have already implemented many of the changes
 948 approved by X3J13.
 949
 950 Until the X3J13 standard is completed, the second edition of
 951 \cltltwo{} is probably the best available manual for the language and
 952 for our implementation of it.  This book has no official role in the
 953 standardization process, but it does include many of the changes
 954 adopted since the first edition was completed.
 955
 956 In addition to the language itself, this document describes a number
 957 of useful library modules that run in \cmucl. \hemlock, an Emacs-like
 958 text editor, is included as an integral part of the \cmucl{}
 959 environment.  Two documents describe \hemlock{}: the \i{Hemlock User's
 960   Manual}, and the \i{Hemlock Command Implementor's Manual}.
 961
 962 %%\node Local Distribution of CMU Common Lisp, Net Distribution of CMU Common Lisp, Support, Introduction
 963 \section{Local Distribution of CMU Common Lisp}
 964
 965 In CMU CS, \cmucl{} should be runnable as \file{/usr/local/bin/cmucl}.
 966 The full binary distribution should appear under
 967 \file{/usr/local/lib/cmucl/}.  Note that the first time you run Lisp,
 968 it will take AFS several minutes to copy the image into its local
 969 cache.  Subsequent starts will be much faster.
 970
 971 Or, you can run directly out of the AFS release area (which may be
 972 necessary on SunOS machines).  Put this in your \file{.login} shell
 973 script:
 974 \begin{example}
 975 setenv CMUCLLIB "/afs/cs/misc/cmucl/@sys/beta/lib"
 976 setenv PATH \${PATH}:/afs/cs/misc/cmucl/@sys/beta/bin
 977 \end{example}
 978
 979 If you also set \code{MANPATH} or \code{MPATH} (depending on the Unix)
 980 to point to \file{/usr/local/lib/cmucl/man/}, then `\code{man cmucl}'
 981 will give an introduction to CMU CL and \samp{man lisp} will describe
 982 command line options.  For installation notes, see the \file{README}
 983 file in the release area.
 984
 985 See \file{/usr/local/lib/cmucl/doc} for release notes and
 986 documentation.  Hardcopy documentation is available in the document
 987 room.  Documentation supplements may be available for recent
 988 additions: see the \file{README} file.
 989
 990 Send bug reports and questions to \samp{cmucl-bugs@cs.cmu.edu}.  If
 991 you send a bug report to \samp{gripe} or \samp{help}, they will just
 992 forward it to this mailing list.
 993
 994 %%\node Net Distribution of CMU Common Lisp, Source Availability, Local Distribution of CMU Common Lisp, Introduction
 995 \section{Net Distribution of CMU Common Lisp}
 996
 997 \subsection{CMU Distribution}
 998 Externally, CMU Common Lisp is only available via anonymous FTP.  We
 999 don't have the manpower to make tapes.  These are our distribution
1000 machines:
1001 \begin{example}
1002 lisp-rt1.slisp.cs.cmu.edu (128.2.217.9)
1003 lisp-rt2.slisp.cs.cmu.edu (128.2.217.10)
1004 \end{example}
1005
1006 Log in with the user \samp{anonymous} and \samp{username@host} as
1007 password (i.e. your EMAIL address.)  When you log in, the current
1008 directory should be set to the \cmucl{} release area.  If you have any
1009 trouble with FTP access, please send mail to \samp{slisp@cs.cmu.edu}.
1010
1011 The release area holds compressed tar files with names of the form:
1012 \begin{example}
1013 \var{version}-\var{machine}_\var{os}.tar.Z
1014 \end{example}
1015 FTP compressed tar archives in binary mode.  To extract, \samp{cd} to
1016 the directory that is to be the root of the tree, then type:
1017 \begin{example}
1018 uncompress <file.tar.Z | tar xf - .
1019 \end{example}
1020 The resulting tree is about 23 megabytes.  For installation
1021 directions, see the section ``site initialization'' in README file at
1022 the root of the tree.
1023
1024 If poor network connections make it difficult to transfer a 10 meg
1025 file, the release is also available split into five parts, with the
1026 suffix \file{.0} to \file{.4}. To extract from multiple files, use:
1027 \begin{example}
1028 cat file.tar.Z.* | uncompress | tar xf - .
1029 \end{example}
1030
1031 The release area also contains source distributions and other binary
1032 distributions.  A listing of the current contents of the release area
1033 is in \file{FILES}.  Major release announcements will be made to
1034 \code{comp.lang.lisp} until there is enough volume to warrant a
1035 \code{comp.lang.lisp.cmu}.
1036
1037 \begin{changebar}
1038 \subsection{Net Distribution}
1039 Although the CMU Common Lisp project is no longer actively developed
1040 by CMU, development has continued.  You can obtain this version from
1041 either
1042 \begin{example}
1043   ftp://ftp2.cons.org/pub/languages/lisp/cmucl
1044   http://www2.cons.org:8000/ftp-area/cmucl/
1045 \end{example}
1046 Further information can be found via the World Wide Web at
1047 \begin{example}
1048   http://www.cons.org/cmucl
1049 \end{example}
1050 \end{changebar}
1051 %%\node Source Availability, Command Line Options, Net Distribution of CMU Common Lisp, Introduction
1052 \section{Source Availability}
1053
1054 Lisp and documentation sources are available via anonymous FTP ftp to
1055 any CMU CS machine.  All CMU written code is public domain, but CMU CL
1056 also makes use of two imported packages: PCL and CLX.  Although these
1057 packages are copyrighted, they may be freely distributed without any
1058 licensing agreement or fee.  See the \file{README} file in the binary
1059 distribution for up-to-date source pointers.
1060
1061 The release area contains a source distribution, which is an image of
1062 all the \file{.lisp} source files used to build a particular system
1063 \var{version}:
1064 \begin{example}
1065 \var{version}-source.tar.Z (3.6 meg)
1066 \end{example}
1067
1068 All of our files (including the release area) are actually in the AFS
1069 file system.  On the release machines, the FTP server's home is the
1070 release directory: \file{/afs/cs.cmu.edu/project/clisp/release}.  The
1071 actual working source areas are in other subdirectories of
1072 \file{clisp}, and you can directly ``cd'' to those directories if you
1073 know the name.  Due to the way anonymous FTP access control is done,
1074 it is important to ``cd'' to the source directory with a single
1075 command, and then do a ``get'' operation.
1076
1077 \begin{changebar}
1078   Alternatively, you can obtain the current sources via WWW at
1079   \begin{example}
1080     http://www.cons.org/cmucl
1081   \end{example}
1082   which contains pointers on how to get a \code{tar} file of the
1083   current sources or how to get an individual file from the sources.
1084   Binary versions for selected platforms are also available as well.
1085 \end{changebar}
1086
1087 %%\node Command Line Options, Credits, Source Availability, Introduction
1088 \section{Command Line Options}
1089
1090 The command line syntax and environment is described in the lisp(1)
1091 man page in the man/man1 directory of the distribution.  See also
1092 cmucl(1).  Currently Lisp accepts the following switches:
1093 \begin{Lentry}
1094   \begin{changebar}
1095   \item[\code{-batch}] specifies batch mode, where all input is
1096     directed from standard-input.  An error code of 0 is returned upon
1097     encountering an EOF and 1 otherwise.
1098   \end{changebar}
1099 \item[\code{-core}] requires an argument that should be the name of a
1100   core file.  Rather than using the default core file
1101   (\file{lib/lisp.core}), the specified core file is loaded.
1102
1103 \item[\code{-edit}] specifies to enter Hemlock.  A file to edit may be
1104   specified by placing the name of the file between the program name
1105   (usually \file{lisp}) and the first switch.
1106
1107 \item[\code{-eval}] accepts one argument which should be a Lisp form
1108   to evaluate during the start up sequence.  The value of the form
1109   will not be printed unless it is wrapped in a form that does output.
1110
1111 \item[\code{-hinit}] accepts an argument that should be the name of
1112   the hemlock init file to load the first time the function
1113   \findexed{ed} is invoked.  The default is to load
1114   \file{hemlock-init.\var{object-type}}, or if that does not exist,
1115   \file{hemlock-init.lisp} from the user's home directory.  If the
1116   file is not in the user's home directory, the full path must be
1117   specified.
1118
1119 \item[\code{-init}] accepts an argument that should be the name of an
1120   init file to load during the normal start up sequence.  The default
1121   is to load \file{init.\var{object-type}} or, if that does not exist,
1122   \file{init.lisp} from the user's home directory.  If the file is not
1123   in the user's home directory, the full path must be specified.
1124
1125 \item[\code{-noinit}] accepts no arguments and specifies that an init
1126   file should not be loaded during the normal start up sequence.
1127   Also, this switch suppresses the loading of a hemlock init file when
1128   Hemlock is started up with the \code{-edit} switch.
1129
1130 \item[\code{-load}] accepts an argument which should be the name of a
1131   file to load into Lisp before entering Lisp's read-eval-print loop.
1132
1133 \item[\code{-slave}] specifies that Lisp should start up as a
1134   \i{slave} Lisp and try to connect to an editor Lisp.  The name of
1135   the editor to connect to must be specified\dash{}to find the
1136   editor's name, use the \hemlock{} ``\code{Accept Slave
1137     Connections}'' command.  The name for the editor Lisp is of the
1138   form:
1139   \begin{example}
1140     \var{machine-name}\code{:}\var{socket}
1141   \end{example}
1142   where \var{machine-name} is the internet host name for the machine
1143   and \var{socket} is the decimal number of the socket to connect to.
1144 \end{Lentry}
1145 For more details on the use of the \code{-edit} and \code{-slave}
1146 switches, see the \i{Hemlock User's Manual}.
1147
1148 Arguments to the above switches can be specified in one of two ways:
1149 \w{\var{switch}\code{=}\var{value}} or
1150 \w{\var{switch}<\var{space}>\var{value}}.  For example, to start up
1151 the saved core file mylisp.core use either of the following two
1152 commands:
1153 \begin{example}
1154 \code{lisp -core=mylisp.core
1155 lisp -core mylisp.core}
1156 \end{example}
1157
1158 %%\node Credits,  , Command Line Options, Introduction
1159 \section{Credits}
1160
1161 Since 1981 many people have contributed to the development of CMU
1162 Common Lisp.  The currently active members are:
1163 \begin{display}
1164 Marco Antoniotti
1165 David Axmark
1166 Miles Bader
1167 Casper Dik
1168 Scott Fahlman * (fearless leader)
1169 Paul Gleichauf *
1170 Richard Harris
1171 Joerg-Cyril Hoehl
1172 Chris Hoover
1173 Simon Leinen
1174 Sandra Loosemore
1175 William Lott *
1176 Robert A. Maclachlan *
1177 \end{display}
1178 \noindent
1179 Many people are voluntarily working on improving CMU Common Lisp.  ``*''
1180 means a full-time CMU employee, and ``+'' means a part-time student
1181 employee.  A partial listing of significant past contributors follows:
1182 \begin{display}
1183 Tim Moore
1184 Sean Hallgren +
1185 Mike Garland +
1186 Ted Dunning
1187 Rick Busdiecker
1188 Bill Chiles *
1189 John Kolojejchick
1190 Todd Kaufmann +
1191 Dave McDonald *
1192 Skef Wholey *
1193 \end{display}
1194
1195
1196 \vspace{2 em}
1197 \researchcredit
1198
1199 \begin{changebar}
1200   From 1995, development of CMU Common Lisp has been continued by a
1201   group of volunteers.  A partial list of volunteers includes the
1202   following
1203   \begin{table}[h]
1204     \begin{center}
1205       \begin{tabular}{ll}
1206         Paul Werkowski & pw@snoopy.mv.com \\
1207         Peter VanEynde & s950045@uia.ua.ac.be \\
1208         Marco Antoniotti & marcoxa@PATH.Berkeley.EDU\\
1209         Martin Cracauer & cracauer@cons.org\\
1210         Douglas Thomas Crosher & dtc@scrooge.ee.swin.oz.au\\
1211         Simon Leinen & simon@switch.ch\\
1212         Rob MacLachlan & ram+@CS.cmu.edu\\
1213         Raymond Toy & toy@rtp.ericsson.se
1214       \end{tabular}
1215     \end{center}
1216   \end{table}
1217
1218   In particular Paul Werkowski completed the port for the x86
1219   architecture for FreeBSD.  Peter VanEnyde took the FreeBSD port and
1220   created a Linux version.
1221 \end{changebar}
1222
1223
1224 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/design.ms}
1225
1226 \hide{ -*- Dictionary: cmu-user -*- }
1227 %%\node Design Choices and Extensions, The Debugger, Introduction, Top
1228 \chapter{Design Choices and Extensions}
1229
1230 Several design choices in Common Lisp are left to the individual
1231 implementation, and some essential parts of the programming environment
1232 are left undefined.  This chapter discusses the most important design
1233 choices and extensions.
1234
1235 \begin{comment}
1236 * Data Types::
1237 * Default Interrupts for Lisp::
1238 * Packages::
1239 * The Editor::
1240 * Garbage Collection::
1241 * Describe::
1242 * The Inspector::
1243 * Load::
1244 * The Reader::
1245 * Running Programs from Lisp::
1246 * Saving a Core Image::
1247 * Pathnames::
1248 * Filesystem Operations::
1249 * Time Parsing and Formatting::
1250 * Lisp Library::
1251 \end{comment}
1252
1253 %%\node Data Types, Default Interrupts for Lisp, Design Choices and Extensions, Design Choices and Extensions
1254 \section{Data Types}
1255
1256 \begin{comment}
1257 * Symbols::
1258 * Integers::
1259 * Floats::
1260 * Characters::
1261 * Array Initialization::
1262 \end{comment}
1263
1264 %%\node Symbols, Integers, Data Types, Data Types
1265 \subsection{Symbols}
1266
1267 As in \cltl, all symbols and package names are printed in lower case, as
1268 a user is likely to type them.  Internally, they are normally stored
1269 upper case only.
1270
1271 %%\node Integers, Floats, Symbols, Data Types
1272 \subsection{Integers}
1273
1274 The \tindexed{fixnum} type is equivalent to \code{(signed-byte 30)}.
1275 Integers outside this range are represented as a \tindexed{bignum} or
1276 a word integer (\pxlref{word-integers}.)  Almost all integers that
1277 appear in programs can be represented as a \code{fixnum}, so integer
1278 number consing is rare.
1279
1280 %%\node Floats, Characters, Integers, Data Types
1281 \subsection{Floats}
1282 \label{ieee-float}
1283
1284 \cmucl{} supports two floating point formats: \tindexed{single-float}
1285 and \tindexed{double-float}.  These are implemented with IEEE single
1286 and double float arithmetic, respectively.  \code{short-float} is a
1287 synonym for \code{single-float}, and \code{long-float} is a synonym
1288 for \code{double-float}.  The initial value of
1289 \vindexed{read-default-float-format} is \code{single-float}.
1290
1291 Both \code{single-float} and \code{double-float} are represented with
1292 a pointer descriptor, so float operations can cause number consing.
1293 Number consing is greatly reduced if programs are written to allow the
1294 use of non-descriptor representations (\pxlref{numeric-types}.)
1295
1296
1297 \begin{comment}
1298 * IEEE Special Values::
1299 * Negative Zero::
1300 * Denormalized Floats::
1301 * Floating Point Exceptions::
1302 * Floating Point Rounding Mode::
1303 * Accessing the Floating Point Modes::
1304 \end{comment}
1305
1306 %%\node IEEE Special Values, Negative Zero, Floats, Floats
1307 \subsubsection{IEEE Special Values}
1308
1309 \cmucl{} supports the IEEE infinity and NaN special values.  These
1310 non-numeric values will only be generated when trapping is disabled
1311 for some floating point exception (\pxlref{float-traps}), so users of
1312 the default configuration need not concern themselves with special
1313 values.
1314
1315 \begin{defconst}{extensions:}{short-float-positive-infinity}
1316   \defconstx[extensions:]{short-float-negative-infinity}
1317   \defconstx[extensions:]{single-float-positive-infinity}
1318   \defconstx[extensions:]{single-float-negative-infinity}
1319   \defconstx[extensions:]{double-float-positive-infinity}
1320   \defconstx[extensions:]{double-float-negative-infinity}
1321   \defconstx[extensions:]{long-float-positive-infinity}
1322   \defconstx[extensions:]{long-float-negative-infinity}
1323
1324   The values of these constants are the IEEE positive and negative
1325   infinity objects for each float format.
1326 \end{defconst}
1327
1328 \begin{defun}{extensions:}{float-infinity-p}{\args{\var{x}}}
1329
1330   This function returns true if \var{x} is an IEEE float infinity (of
1331   either sign.)  \var{x} must be a float.
1332 \end{defun}
1333
1334 \begin{defun}{extensions:}{float-nan-p}{\args{\var{x}}}
1335   \defunx[extensions:]{float-trapping-nan-p}{\args{\var{x}}}
1336
1337   \code{float-nan-p} returns true if \var{x} is an IEEE NaN (Not A
1338   Number) object.  \code{float-trapping-nan-p} returns true only if
1339   \var{x} is a trapping NaN.  With either function, \var{x} must be a
1340   float.
1341 \end{defun}
1342
1343 %%\node Negative Zero, Denormalized Floats, IEEE Special Values, Floats
1344 \subsubsection{Negative Zero}
1345
1346 The IEEE float format provides for distinct positive and negative
1347 zeros.  To test the sign on zero (or any other float), use the
1348 \clisp{} \findexed{float-sign} function.  Negative zero prints as
1349 \code{-0.0f0} or \code{-0.0d0}.
1350
1351 %%\node Denormalized Floats, Floating Point Exceptions, Negative Zero, Floats
1352 \subsubsection{Denormalized Floats}
1353
1354 \cmucl{} supports IEEE denormalized floats.  Denormalized floats
1355 provide a mechanism for gradual underflow.  The \clisp{}
1356 \findexed{float-precision} function returns the actual precision of a
1357 denormalized float, which will be less than \findexed{float-digits}.
1358 Note that in order to generate (or even print) denormalized floats,
1359 trapping must be disabled for the underflow exception
1360 (\pxlref{float-traps}.)  The \clisp{}
1361 \w{\code{least-positive-}\var{format}-\code{float}} constants are
1362 denormalized.
1363
1364 \begin{defun}{extensions:}{float-normalized-p}{\args{\var{x}}}
1365
1366   This function returns true if \var{x} is a denormalized float.
1367   \var{x} must be a float.
1368 \end{defun}
1369
1370 %%\node Floating Point Exceptions, Floating Point Rounding Mode, Denormalized Floats, Floats
1371 \subsubsection{Floating Point Exceptions}
1372 \label{float-traps}
1373
1374 The IEEE floating point standard defines several exceptions that occur
1375 when the result of a floating point operation is unclear or
1376 undesirable.  Exceptions can be ignored, in which case some default
1377 action is taken, such as returning a special value.  When trapping is
1378 enabled for an exception, a error is signalled whenever that exception
1379 occurs.  These are the possible floating point exceptions:
1380 \begin{Lentry}
1381
1382 \item[\kwd{underflow}] This exception occurs when the result of an
1383   operation is too small to be represented as a normalized float in
1384   its format.  If trapping is enabled, the
1385   \tindexed{floating-point-underflow} condition is signalled.
1386   Otherwise, the operation results in a denormalized float or zero.
1387
1388 \item[\kwd{overflow}] This exception occurs when the result of an
1389   operation is too large to be represented as a float in its format.
1390   If trapping is enabled, the \tindexed{floating-point-overflow}
1391   exception is signalled.  Otherwise, the operation results in the
1392   appropriate infinity.
1393
1394 \item[\kwd{inexact}] This exception occurs when the result of a
1395   floating point operation is not exact, i.e. the result was rounded.
1396   If trapping is enabled, the \code{extensions:floating-point-inexact}
1397   condition is signalled.  Otherwise, the rounded result is returned.
1398
1399 \item[\kwd{invalid}] This exception occurs when the result of an
1400   operation is ill-defined, such as \code{\w{(/ 0.0 0.0)}}.  If
1401   trapping is enabled, the \code{extensions:floating-point-invalid}
1402   condition is signalled.  Otherwise, a quiet NaN is returned.
1403
1404 \item[\kwd{divide-by-zero}] This exception occurs when a float is
1405   divided by zero.  If trapping is enabled, the
1406   \tindexed{divide-by-zero} condition is signalled.  Otherwise, the
1407   appropriate infinity is returned.
1408 \end{Lentry}
1409
1410 %%\node Floating Point Rounding Mode, Accessing the Floating Point Modes, Floating Point Exceptions, Floats
1411 \subsubsection{Floating Point Rounding Mode}
1412 \label{float-rounding-modes}
1413
1414 IEEE floating point specifies four possible rounding modes:
1415 \begin{Lentry}
1416
1417 \item[\kwd{nearest}] In this mode, the inexact results are rounded to
1418   the nearer of the two possible result values.  If the neither
1419   possibility is nearer, then the even alternative is chosen.  This
1420   form of rounding is also called ``round to even'', and is the form
1421   of rounding specified for the \clisp{} \findexed{round} function.
1422
1423 \item[\kwd{positive-infinity}] This mode rounds inexact results to the
1424   possible value closer to positive infinity.  This is analogous to
1425   the \clisp{} \findexed{ceiling} function.
1426
1427 \item[\kwd{negative-infinity}] This mode rounds inexact results to the
1428   possible value closer to negative infinity.  This is analogous to
1429   the \clisp{} \findexed{floor} function.
1430
1431 \item[\kwd{zero}] This mode rounds inexact results to the possible
1432   value closer to zero.  This is analogous to the \clisp{}
1433   \findexed{truncate} function.
1434 \end{Lentry}
1435
1436 \paragraph{Warning:}
1437
1438 Although the rounding mode can be changed with
1439 \code{set-floating-point-modes}, use of any value other than the
1440 default (\kwd{nearest}) can cause unusual behavior, since it will
1441 affect rounding done by \llisp{} system code as well as rounding in
1442 user code.  In particular, the unary \code{round} function will stop
1443 doing round-to-nearest on floats, and instead do the selected form of
1444 rounding.
1445
1446 %%\node Accessing the Floating Point Modes,  , Floating Point Rounding Mode, Floats
1447 \subsubsection{Accessing the Floating Point Modes}
1448
1449 These functions can be used to modify or read the floating point modes:
1450
1451 \begin{defun}{extensions:}{set-floating-point-modes}{%
1452     \keys{\kwd{traps} \kwd{rounding-mode}}
1453     \morekeys{\kwd{fast-mode} \kwd{accrued-exceptions}}
1454     \yetmorekeys{\kwd{current-exceptions}}}
1455   \defunx[extensions:]{get-floating-point-modes}{}
1456
1457   The keyword arguments to \code{set-floating-point-modes} set various
1458   modes controlling how floating point arithmetic is done:
1459   \begin{Lentry}
1460
1461   \item[\kwd{traps}] A list of the exception conditions that should
1462     cause traps.  Possible exceptions are \kwd{underflow},
1463     \kwd{overflow}, \kwd{inexact}, \kwd{invalid} and
1464     \kwd{divide-by-zero}.  Initially all traps except \kwd{inexact}
1465     are enabled.  \xlref{float-traps}.
1466
1467   \item[\kwd{rounding-mode}] The rounding mode to use when the result
1468     is not exact.  Possible values are \kwd{nearest},
1469     \latex{\kwd{positive\-infinity}}\html{\kwd{positive-infinity}},
1470     \kwd{negative-infinity} and \kwd{zero}.  Initially, the rounding
1471     mode is \kwd{nearest}.  See the warning in section
1472     \ref{float-rounding-modes} about use of other rounding modes.
1473
1474   \item[\kwd{current-exceptions}, \kwd{accrued-exceptions}] Lists of
1475     exception keywords used to set the exception flags.  The
1476     \var{current-exceptions} are the exceptions for the previous
1477     operation, so setting it is not very useful.  The
1478     \var{accrued-exceptions} are a cumulative record of the exceptions
1479     that occurred since the last time these flags were cleared.
1480     Specifying \code{()} will clear any accrued exceptions.
1481
1482   \item[\kwd{fast-mode}] Set the hardware's ``fast mode'' flag, if
1483     any.  When set, IEEE conformance or debuggability may be impaired.
1484     Some machines may not have this feature, in which case the value
1485     is always \false.  No currently supported machines have a fast
1486     mode.
1487   \end{Lentry}
1488   If a keyword argument is not supplied, then the associated state is
1489   not changed.
1490
1491   \code{get-floating-point-modes} returns a list representing the
1492   state of the floating point modes.  The list is in the same format
1493   as the keyword arguments to \code{set-floating-point-modes}, so
1494   \code{apply} could be used with \code{set-floating-point-modes} to
1495   restore the modes in effect at the time of the call to
1496   \code{get-floating-point-modes}.
1497 \end{defun}
1498
1499 \begin{changebar}
1500 To make handling control of floating-point exceptions, the following
1501 macro is useful.
1502
1503 \begin{defmac}{ext:}{with-float-traps-masked}{traps \ampbody\ body}
1504   \code{body} is executed with the selected floating-point exceptions
1505   given by \code{traps} masked out (disabled).  \code{traps} should be
1506   a list of possible floating-point exceptions that should be ignored.
1507   Possible values are \kwd{underflow}, \kwd{overflow}, \kwd{inexact},
1508   \kwd{invalid} and \kwd{divide-by-zero}.
1509
1510   This is equivalent to saving the current traps from
1511   \code{get-floating-point-modes}, setting the floating-point modes to
1512   the desired exceptions, running the \code{body}, and restoring the
1513   saved floating-point modes.  The advantage of this macro is that it
1514   causes less consing to occur.
1515
1516   Some points about the with-float-traps-masked:
1517
1518   \begin{itemize}
1519   \item Two approaches are available for detecting FP exceptions:
1520     \begin{enumerate}
1521     \item enabling the traps and handling the exceptions
1522     \item disabling the traps and either handling the return values or
1523       checking the accrued exceptions.
1524     \end{enumerate}
1525     Of these the latter is the most portable because on the alpha port
1526     it is not possible to enable some traps at run-time.
1527
1528   \item To assist the checking of the exceptions within the body any
1529     accrued exceptions matching the given traps are cleared at the
1530     start of the body when the traps are masked.
1531
1532   \item To allow the macros to be nested these accrued exceptions are
1533     restored at the end of the body to their values at the start of
1534     the body. Thus any exceptions that occurred within the body will
1535     not affect the accrued exceptions outside the macro.
1536
1537   \item Note that only the given exceptions are restored at the end of
1538     the body so other exception will be visible in the accrued
1539     exceptions outside the body.
1540
1541   \item On the x86, setting the accrued exceptions of an unmasked
1542     exception would cause a FP trap. The macro behaviour of restoring
1543     the accrued exceptions ensures than if an accrued exception is
1544     initially not flagged and occurs within the body it will be
1545     restored/cleared at the exit of the body and thus not cause a
1546     trap.
1547
1548   \item On the x86, and, perhaps, the hppa, the FP exceptions may be
1549     delivered at the next FP instruction which requires a FP
1550     \code{wait} instruction (\code{%vm::float-wait}) if using the lisp
1551     conditions to catch trap within a \code{handler-bind}.  The
1552     \code{handler-bind} macro does the right thing and inserts a
1553     float-wait (at the end of its body on the x86).  The masking and
1554     noting of exceptions is also safe here.
1555
1556   \item The setting of the FP flags uses the
1557     \code{(floating-point-modes)} and the \code{(set
1558       (floating-point-modes)\ldots)} VOPs. These VOPs blindly update
1559     the flags which may include other state.  We assume this state
1560     hasn't changed in between getting and setting the state. For
1561     example, if you used the FP unit between the above calls, the
1562     state may be incorrectly restored! The
1563     \code{with-float-traps-masked} macro keeps the intervening code to
1564     a minimum and uses only integer operations.
1565     %% Safe byte-compiled?
1566     %% Perhaps the VOPs (x86) should be smarter and only update some of
1567     %% the flags, the trap masks and exceptions?
1568   \end{itemize}
1569
1570 \end{defmac}
1571 \end{changebar}
1572
1573 %%\node Characters, Array Initialization, Floats, Data Types
1574 \subsection{Characters}
1575
1576 \cmucl{} implements characters according to \i{Common Lisp: the
1577   Language II}.  The main difference from the first version is that
1578 character bits and font have been eliminated, and the names of the
1579 types have been changed.  \tindexed{base-character} is the new
1580 equivalent of the old \tindexed{string-char}.  In this implementation,
1581 all characters are base characters (there are no extended characters.)
1582 Character codes range between \code{0} and \code{255}, using the ASCII
1583 encoding.
1584 \begin{changebar}
1585   Table~\ref{tbl:chars}~\vpageref{tbl:chars} shows characters
1586   recognized by \cmucl.
1587 \end{changebar}
1588
1589 \begin{changebar}
1590 \begin{table}[tbhp]
1591   \begin{center}
1592     \begin{tabular}{|c|c|l|l|l|l|}
1593       \hline
1594       \multicolumn{2}{|c|}{ASCII} & \multicolumn{1}{|c}{Lisp} &
1595       \multicolumn{3}{|c|}{} \\
1596       \cline{1-2}
1597       Name & Code & \multicolumn{1}{|c|}{Name} & \multicolumn{3}{|c|}{\raisebox{1.5ex}{Alternatives}}\\
1598       \hline
1599       \hline
1600       \code{nul} & 0 & \code{\#\back{NULL}} & \code{\#\back{NUL}} & &\\
1601       \code{bel} & 7 & \code{\#\back{BELL}} & & &\\
1602       \code{bs} &  8 & \code{\#\back{BACKSPACE}} & \code{\#\back{BS}} & &\\
1603       \code{tab} & 9 & \code{\#\back{TAB}} & & &\\
1604       \code{lf} & 10 & \code{\#\back{NEWLINE}} & \code{\#\back{NL}} & \code{\#\back{LINEFEED}} & \code{\#\back{LF}}\\
1605       \code{ff} & 11 & \code{\#\back{VT}} & \code{\#\back{PAGE}} & \code{\#\back{FORM}} &\\
1606       \code{cr} & 13 & \code{\#\back{RETURN}} & \code{\#\back{CR}} & &\\
1607       \code{esc} & 27 & \code{\#\back{ESCAPE}} & \code{\#\back{ESC}} & \code{\#\back{ALTMODE}} & \code{\#\back{ALT}}\\
1608       \code{sp} & 32 & \code{\#\back{SPACE}} & \code{\#\back{SP}} & &\\
1609       \code{del} & 127 & \code{\#\back{DELETE}} & \code{\#\back{RUBOUT}} & &\\
1610       \hline
1611     \end{tabular}
1612     \caption{Characters recognized by \cmucl}
1613     \label{tbl:chars}
1614   \end{center}
1615 \end{table}
1616 \end{changebar}
1617
1618 %%\node Array Initialization,  , Characters, Data Types
1619 \subsection{Array Initialization}
1620
1621 If no \kwd{initial-value} is specified, arrays are initialized to zero.
1622
1623
1624 %%\node Default Interrupts for Lisp, Packages, Data Types, Design Choices and Extensions
1625 \section{Default Interrupts for Lisp}
1626
1627 CMU Common Lisp has several interrupt handlers defined when it starts up,
1628 as follows:
1629 \begin{Lentry}
1630
1631 \item[\code{SIGINT} (\ctrl{c})] causes Lisp to enter a break loop.
1632   This puts you into the debugger which allows you to look at the
1633   current state of the computation.  If you proceed from the break
1634   loop, the computation will proceed from where it was interrupted.
1635
1636 \item[\code{SIGQUIT} (\ctrl{L})] causes Lisp to do a throw to the
1637   top-level.  This causes the current computation to be aborted, and
1638   control returned to the top-level read-eval-print loop.
1639
1640 \item[\code{SIGTSTP} (\ctrl{z})] causes Lisp to suspend execution and
1641   return to the Unix shell.  If control is returned to Lisp, the
1642   computation will proceed from where it was interrupted.
1643
1644 \item[\code{SIGILL}, \code{SIGBUS}, \code{SIGSEGV}, and \code{SIGFPE}]
1645   cause Lisp to signal an error.
1646 \end{Lentry}
1647 For keyboard interrupt signals, the standard interrupt character is in
1648 parentheses.  Your \file{.login} may set up different interrupt
1649 characters.  When a signal is generated, there may be some delay before
1650 it is processed since Lisp cannot be interrupted safely in an arbitrary
1651 place.  The computation will continue until a safe point is reached and
1652 then the interrupt will be processed.  \xlref{signal-handlers} to define
1653 your own signal handlers.
1654
1655 %%\node Packages, The Editor, Default Interrupts for Lisp, Design Choices and Extensions
1656 \section{Packages}
1657
1658 When CMU Common Lisp is first started up, the default package is the
1659 \code{user} package.  The \code{user} package uses the
1660 \code{common-lisp}, \code{extensions}, and \code{pcl} packages.  The
1661 symbols exported from these three packages can be referenced without
1662 package qualifiers.  This section describes packages which have
1663 exported interfaces that may concern users.  The numerous internal
1664 packages which implement parts of the system are not described here.
1665 Package nicknames are in parenthesis after the full name.
1666 \begin{Lentry}
1667 \item[\code{alien}, \code{c-call}] Export the features of the Alien
1668   foreign data structure facility (\pxlref{aliens}.)
1669
1670 \item[\code{pcl}] This package contains PCL (Portable CommonLoops),
1671   which is a portable implementation of CLOS (the Common Lisp Object
1672   System.)  This implements most (but not all) of the features in the
1673   CLOS chapter of \cltltwo.
1674
1675 \item[\code{debug}] The \code{debug} package contains the command-line
1676   oriented debugger.  It exports utility various functions and
1677   switches.
1678
1679 \item[\code{debug-internals}] The \code{debug-internals} package
1680   exports the primitives used to write debuggers.
1681   \xlref{debug-internals}.
1682
1683 \item[\code{extensions (ext)}] The \code{extensions} packages exports
1684   local extensions to Common Lisp that are documented in this manual.
1685   Examples include the \code{save-lisp} function and time parsing.
1686
1687 \item[\code{hemlock (ed)}] The \code{hemlock} package contains all the
1688   code to implement Hemlock commands.  The \code{hemlock} package
1689   currently exports no symbols.
1690
1691 \item[\code{hemlock-internals (hi)}] The \code{hemlock-internals}
1692   package contains code that implements low level primitives and
1693   exports those symbols used to write Hemlock commands.
1694
1695 \item[\code{keyword}] The \code{keyword} package contains keywords
1696   (e.g., \kwd{start}).  All symbols in the \code{keyword} package are
1697   exported and evaluate to themselves (i.e., the value of the symbol
1698   is the symbol itself).
1699
1700 \item[\code{profile}] The \code{profile} package exports a simple
1701   run-time profiling facility (\pxlref{profiling}).
1702
1703 \item[\code{common-lisp (cl lisp)}] The \code{common-lisp} package
1704   exports all the symbols defined by \i{Common Lisp: the Language} and
1705   only those symbols.  Strictly portable Lisp code will depend only on
1706   the symbols exported from the \code{lisp} package.
1707
1708 \item[\code{unix}, \code{mach}] These packages export system call
1709   interfaces to generic BSD Unix and Mach (\pxlref{unix-interface}).
1710
1711 \item[\code{system (sys)}] The \code{system} package contains
1712   functions and information necessary for system interfacing.  This
1713   package is used by the \code{lisp} package and exports several
1714   symbols that are necessary to interface to system code.
1715
1716 \item[\code{common-lisp-user (user cl-user)}] The
1717   \code{common-lisp-user} package is the default package and is where
1718   a user's code and data is placed unless otherwise specified.  This
1719   package exports no symbols.
1720
1721 \item[\code{xlib}] The \code{xlib} package contains the Common Lisp X
1722   interface (CLX) to the X11 protocol.  This is mostly Lisp code with
1723   a couple of functions that are defined in C to connect to the
1724   server.
1725
1726 \item[\code{wire}] The \code{wire} package exports a remote procedure
1727   call facility (\pxlref{remote}).
1728 \end{Lentry}
1729
1730
1731 %%\node The Editor, Garbage Collection, Packages, Design Choices and Extensions
1732 \section{The Editor}
1733
1734 The \code{ed} function invokes the Hemlock editor which is described
1735 in \i{Hemlock User's Manual} and \i{Hemlock Command Implementor's
1736   Manual}.  Most users at CMU prefer to use Hemlock's slave \Llisp{}
1737 mechanism which provides an interactive buffer for the
1738 \code{read-eval-print} loop and editor commands for evaluating and
1739 compiling text from a buffer into the slave \Llisp.  Since the editor
1740 runs in the \Llisp, using slaves keeps users from trashing their
1741 editor by developing in the same \Llisp{} with \Hemlock.
1742
1743
1744 %%\node Garbage Collection, Describe, The Editor, Design Choices and Extensions
1745 \section{Garbage Collection}
1746
1747 CMU Common Lisp uses a stop-and-copy garbage collector that compacts
1748 the items in dynamic space every time it runs.  Most users cause the
1749 system to garbage collect (GC) frequently, long before space is
1750 exhausted.  With 16 or 24 megabytes of memory, causing GC's more
1751 frequently on less garbage allows the system to GC without much (if
1752 any) paging.
1753
1754 \hide{
1755 With the default value for the following variable, you can expect a GC to take
1756 about one minute of elapsed time on a 6 megabyte machine running X as well as
1757 Lisp.  On machines with 8 megabytes or more of memory a GC should run without
1758 much (if any) paging.  GC's run more frequently but tend to take only about 5
1759 seconds.
1760 }
1761
1762 The following functions invoke the garbage collector or control whether
1763 automatic garbage collection is in effect:
1764
1765 \begin{defun}{extensions:}{gc}{}
1766
1767   This function runs the garbage collector.  If
1768   \code{ext:*gc-verbose*} is non-\nil, then it invokes
1769   \code{ext:*gc-notify-before*} before GC'ing and
1770   \code{ext:*gc-notify-after*} afterwards.
1771 \end{defun}
1772
1773 \begin{defun}{extensions:}{gc-off}{}
1774
1775   This function inhibits automatic garbage collection.  After calling
1776   it, the system will not GC unless you call \code{ext:gc} or
1777   \code{ext:gc-on}.
1778 \end{defun}
1779
1780 \begin{defun}{extensions:}{gc-on}{}
1781
1782   This function reinstates automatic garbage collection.  If the
1783   system would have GC'ed while automatic GC was inhibited, then this
1784   will call \code{ext:gc}.
1785 \end{defun}
1786
1787 %%\node
1788 \subsection{GC Parameters}
1789 The following variables control the behavior of the garbage collector:
1790
1791 \begin{defvar}{extensions:}{bytes-consed-between-gcs}
1792
1793   CMU Common Lisp automatically GC's whenever the amount of memory
1794   allocated to dynamic objects exceeds the value of an internal
1795   variable.  After each GC, the system sets this internal variable to
1796   the amount of dynamic space in use at that point plus the value of
1797   the variable \code{ext:*bytes-consed-between-gcs*}.  The default
1798   value is 2000000.
1799 \end{defvar}
1800
1801 \begin{defvar}{extensions:}{gc-verbose}
1802
1803   This variable controls whether \code{ext:gc} invokes the functions
1804   in \code{ext:*gc-notify-before*} and
1805   \code{ext:*gc-notify-after*}.  If \code{*gc-verbose*} is \nil,
1806   \code{ext:gc} foregoes printing any messages.  The default value is
1807   \code{T}.
1808 \end{defvar}
1809
1810 \begin{defvar}{extensions:}{gc-notify-before}
1811
1812   This variable's value is a function that should notify the user that
1813   the system is about to GC.  It takes one argument, the amount of
1814   dynamic space in use before the GC measured in bytes.  The default
1815   value of this variable is a function that prints a message similar
1816   to the following:
1817 \begin{display}
1818   \b{[GC threshold exceeded with 2,107,124 bytes in use.  Commencing GC.]}
1819 \end{display}
1820 \end{defvar}
1821
1822 \begin{defvar}{extensions:}{gc-notify-after}
1823
1824   This variable's value is a function that should notify the user when
1825   a GC finishes.  The function must take three arguments, the amount
1826   of dynamic spaced retained by the GC, the amount of dynamic space
1827   freed, and the new threshold which is the minimum amount of space in
1828   use before the next GC will occur.  All values are byte quantities.
1829   The default value of this variable is a function that prints a
1830   message similar to the following:
1831   \begin{display}
1832     \b{[GC completed with 25,680 bytes retained and 2,096,808 bytes freed.]}
1833     \b{[GC will next occur when at least 2,025,680 bytes are in use.]}
1834   \end{display}
1835 \end{defvar}
1836
1837 Note that a garbage collection will not happen at exactly the new
1838 threshold printed by the default \code{ext:*gc-notify-after*}
1839 function.  The system periodically checks whether this threshold has
1840 been exceeded, and only then does a garbage collection.
1841
1842 \begin{defvar}{extensions:}{gc-inhibit-hook}
1843
1844   This variable's value is either a function of one argument or \nil.
1845   When the system has triggered an automatic GC, if this variable is a
1846   function, then the system calls the function with the amount of
1847   dynamic space currently in use (measured in bytes).  If the function
1848   returns \nil, then the GC occurs; otherwise, the system inhibits
1849   automatic GC as if you had called \code{ext:gc-off}.  The writer of
1850   this hook is responsible for knowing when automatic GC has been
1851   turned off and for calling or providing a way to call
1852   \code{ext:gc-on}.  The default value of this variable is \nil.
1853 \end{defvar}
1854
1855 \begin{defvar}{extensions:}{before-gc-hooks}
1856   \defvarx[extensions:]{after-gc-hooks}
1857
1858   These variables' values are lists of functions to call before or
1859   after any GC occurs.  The system provides these purely for
1860   side-effect, and the functions take no arguments.
1861 \end{defvar}
1862
1863 %%\node
1864 \subsection{Weak Pointers}
1865
1866 A weak pointer provides a way to maintain a reference to an object
1867 without preventing an object from being garbage collected.  If the
1868 garbage collector discovers that the only pointers to an object are
1869 weak pointers, then it breaks the weak pointers and deallocates the
1870 object.
1871
1872 \begin{defun}{extensions:}{make-weak-pointer}{\args{\var{object}}}
1873   \defunx[extensions:]{weak-pointer-value}{\args{\var{weak-pointer}}}
1874
1875   \code{make-weak-pointer} returns a weak pointer to an object.
1876   \code{weak-pointer-value} follows a weak pointer, returning the two
1877   values: the object pointed to (or \false{} if broken) and a boolean
1878   value which is true if the pointer has been broken.
1879 \end{defun}
1880
1881 %%\node
1882 \subsection{Finalization}
1883
1884 Finalization provides a ``hook'' that is triggered when the garbage
1885 collector reclaims an object.  It is usually used to recover non-Lisp
1886 resources that were allocated to implement the finalized Lisp object.
1887 For example, when a unix file-descriptor stream is collected,
1888 finalization is used to close the underlying file descriptor.
1889
1890 \begin{defun}{extensions:}{finalize}{\args{\var{object} \var{function}}}
1891
1892   This function registers \var{object} for finalization.
1893   \var{function} is called with no arguments when \var{object} is
1894   reclaimed.  Normally \var{function} will be a closure over the
1895   underlying state that needs to be freed, e.g. the unix file
1896   descriptor in the fd-stream case.  Note that \var{function} must not
1897   close over \var{object} itself, as this prevents the object from
1898   ever becoming garbage.
1899 \end{defun}
1900
1901 \begin{defun}{extensions:}{cancel-finalization}{\args{\var{object}}}
1902
1903   This function cancel any finalization request for \var{object}.
1904 \end{defun}
1905
1906 %%\node Describe, The Inspector, Garbage Collection, Design Choices and Extensions
1907 \section{Describe}
1908
1909 In addition to the basic function described below, there are a number of
1910 switches and other things that can be used to control \code{describe}'s
1911 behavior.
1912
1913 \begin{defun}{}{describe}{ \args{\var{object} \&optional{} \var{stream}}}
1914
1915   The \code{describe} function prints useful information about
1916   \var{object} on \var{stream}, which defaults to
1917   \code{*standard-output*}.  For any object, \code{describe} will
1918   print out the type.  Then it prints other information based on the
1919   type of \var{object}.  The types which are presently handled are:
1920
1921   \begin{Lentry}
1922
1923   \item[\tindexed{hash-table}] \code{describe} prints the number of
1924     entries currently in the hash table and the number of buckets
1925     currently allocated.
1926
1927   \item[\tindexed{function}] \code{describe} prints a list of the
1928     function's name (if any) and its formal parameters.  If the name
1929     has function documentation, then it will be printed.  If the
1930     function is compiled, then the file where it is defined will be
1931     printed as well.
1932
1933   \item[\tindexed{fixnum}] \code{describe} prints whether the integer
1934     is prime or not.
1935
1936   \item[\tindexed{symbol}] The symbol's value, properties, and
1937     documentation are printed.  If the symbol has a function
1938     definition, then the function is described.
1939   \end{Lentry}
1940   If there is anything interesting to be said about some component of
1941   the object, describe will invoke itself recursively to describe that
1942   object.  The level of recursion is indicated by indenting output.
1943 \end{defun}
1944
1945 \begin{defvar}{extensions:}{describe-level}
1946
1947   The maximum level of recursive description allowed.  Initially two.
1948 \end{defvar}
1949
1950 \begin{defvar}{extensions:}{describe-indentation}
1951
1952 The number of spaces to indent for each level of recursive
1953 description, initially three.
1954 \end{defvar}
1955
1956 \begin{defvar}{extensions:}{describe-print-level}
1957   \defvarx[extensions:]{describe-print-length}
1958
1959   The values of \code{*print-level*} and \code{*print-length*} during
1960   description.  Initially two and five.
1961 \end{defvar}
1962
1963 %%\node The Inspector, Load, Describe, Design Choices and Extensions
1964 \section{The Inspector}
1965
1966 \cmucl{} has both a graphical inspector that uses X windows and a simple
1967 terminal-based inspector.
1968
1969 \begin{defun}{}{inspect}{ \args{\ampoptional{} \var{object}}}
1970
1971   \code{inspect} calls the inspector on the optional argument
1972   \var{object}.  If \var{object} is unsupplied, \code{inspect}
1973   immediately returns \false.  Otherwise, the behavior of inspect
1974   depends on whether Lisp is running under X.  When \code{inspect} is
1975   eventually exited, it returns some selected Lisp object.
1976 \end{defun}
1977
1978 \begin{comment}
1979 * The Graphical Interface::
1980 * The TTY Inspector::
1981 \end{comment}
1982
1983 %%\node The Graphical Interface, The TTY Inspector, The Inspector, The Inspector
1984 \subsection{The Graphical Interface}
1985 \label{motif-interface}
1986
1987 CMU Common Lisp has an interface to Motif which is functionally similar to
1988 CLM, but works better in CMU CL.  See:
1989 \begin{example}
1990 \file{doc/motif-toolkit.doc}
1991 \file{doc/motif-internals.doc}
1992 \end{example}
1993
1994 This motif interface has been used to write the inspector and graphical
1995 debugger.  There is also a Lisp control panel with a simple file management
1996 facility, apropos and inspector dialogs, and controls for setting global
1997 options.  See the \code{interface} and \code{toolkit} packages.
1998
1999 \begin{defun}{interface:}{lisp-control-panel}{}
2000
2001   This function creates a control panel for the Lisp process.
2002 \end{defun}
2003
2004 \begin{defvar}{interface:}{interface-style}
2005
2006   When the graphical interface is loaded, this variable controls
2007   whether it is used by \code{inspect} and the error system.  If the
2008   value is \kwd{graphics} (the default) and the \code{DISPLAY}
2009   environment variable is defined, the graphical inspector and
2010   debugger will be invoked by \findexed{inspect} or when an error is
2011   signalled.  Possible values are \kwd{graphics} and {tty}.  If the
2012   value is \kwd{graphics}, but there is no X display, then we quietly
2013   use the TTY interface.
2014 \end{defvar}
2015
2016 %%\node The TTY Inspector,  , The Graphical Interface, The Inspector
2017 \subsection{The TTY Inspector}
2018
2019 If X is unavailable, a terminal inspector is invoked.  The TTY inspector
2020 is a crude interface to \code{describe} which allows objects to be
2021 traversed and maintains a history.  This inspector prints information
2022 about and object and a numbered list of the components of the object.
2023 The command-line based interface is a normal
2024 \code{read}--\code{eval}--\code{print} loop, but an integer \var{n}
2025 descends into the \var{n}'th component of the current object, and
2026 symbols with these special names are interpreted as commands:
2027 \begin{Lentry}
2028 \item[U] Move back to the enclosing object.  As you descend into the
2029 components of an object, a stack of all the objects previously seen is
2030 kept.  This command pops you up one level of this stack.
2031
2032 \item[Q, E] Return the current object from \code{inspect}.
2033
2034 \item[R] Recompute object display, and print again.  Useful if the
2035 object may have changed.
2036
2037 \item[D] Display again without recomputing.
2038
2039 \item[H, ?] Show help message.
2040 \end{Lentry}
2041
2042 %%\node Load, The Reader, The Inspector, Design Choices and Extensions
2043 \section{Load}
2044
2045 \begin{defun}{}{load}{%
2046     \args{\var{filename}
2047       \keys{\kwd{verbose} \kwd{print} \kwd{if-does-not-exist}}
2048       \morekeys{\kwd{if-source-newer} \kwd{contents}}}}
2049
2050   As in standard Common Lisp, this function loads a file containing
2051   source or object code into the running Lisp.  Several CMU extensions
2052   have been made to \code{load} to conveniently support a variety of
2053   program file organizations.  \var{filename} may be a wildcard
2054   pathname such as \file{*.lisp}, in which case all matching files are
2055   loaded.
2056
2057   If \var{filename} has a \code{pathname-type} (or extension), then
2058   that exact file is loaded.  If the file has no extension, then this
2059   tells \code{load} to use a heuristic to load the ``right'' file.
2060   The \code{*load-source-types*} and \code{*load-object-types*}
2061   variables below are used to determine the default source and object
2062   file types.  If only the source or the object file exists (but not
2063   both), then that file is quietly loaded.  Similarly, if both the
2064   source and object file exist, and the object file is newer than the
2065   source file, then the object file is loaded.  The value of the
2066   \var{if-source-newer} argument is used to determine what action to
2067   take when both the source and object files exist, but the object
2068   file is out of date:
2069   \begin{Lentry}
2070   \item[\kwd{load-object}] The object file is loaded even though the
2071     source file is newer.
2072
2073   \item[\kwd{load-source}] The source file is loaded instead of the
2074     older object file.
2075
2076   \item[\kwd{compile}] The source file is compiled and then the new
2077     object file is loaded.
2078
2079   \item[\kwd{query}] The user is asked a yes or no question to
2080     determine whether the source or object file is loaded.
2081   \end{Lentry}
2082   This argument defaults to the value of
2083   \code{ext:*load-if-source-newer*} (initially \kwd{load-object}.)
2084
2085   The \var{contents} argument can be used to override the heuristic
2086   (based on the file extension) that normally determines whether to
2087   load the file as a source file or an object file.  If non-null, this
2088   argument must be either \kwd{source} or \kwd{binary}, which forces
2089   loading in source and binary mode, respectively. You really
2090   shouldn't ever need to use this argument.
2091 \end{defun}
2092
2093 \begin{defvar}{extensions:}{load-source-types}
2094   \defvarx[extensions:]{load-object-types}
2095
2096   These variables are lists of possible \code{pathname-type} values
2097   for source and object files to be passed to \code{load}.  These
2098   variables are only used when the file passed to \code{load} has no
2099   type; in this case, the possible source and object types are used to
2100   default the type in order to determine the names of the source and
2101   object files.
2102 \end{defvar}
2103
2104 \begin{defvar}{extensions:}{load-if-source-newer}
2105
2106   This variable determines the default value of the
2107   \var{if-source-newer} argument to \code{load}.  Its initial value is
2108   \kwd{load-object}.
2109 \end{defvar}
2110
2111 %%\node The Reader, Stream Extensions, Load, Design Choices and Extensions
2112 \section{The Reader}
2113
2114 \begin{defvar}{extensions:}{ignore-extra-close-parentheses}
2115
2116   If this variable is \true{} (the default), then the reader merely
2117   prints a warning when an extra close parenthesis is detected
2118   (instead of signalling an error.)
2119 \end{defvar}
2120
2121 %%\node Stream Extensions, Running Programs from Lisp, The Reader, Design Choices and Extensions
2122 \section{Stream Extensions}
2123 \begin{defun}{extensions:}{read-n-bytes}{%
2124     \args{\var{stream buffer start numbytes}
2125       \ampoptional{} \var{eof-error-p}}}
2126
2127   On streams that support it, this function reads multiple bytes of
2128   data into a buffer.  The buffer must be a \code{simple-string} or
2129   \code{(simple-array (unsigned-byte 8) (*))}.  The argument
2130   \var{nbytes} specifies the desired number of bytes, and the return
2131   value is the number of bytes actually read.
2132   \begin{itemize}
2133   \item If \var{eof-error-p} is true, an \tindexed{end-of-file}
2134     condition is signalled if end-of-file is encountered before
2135     \var{count} bytes have been read.
2136
2137   \item If \var{eof-error-p} is false, \code{read-n-bytes reads} as
2138     much data is currently available (up to count bytes.)  On pipes or
2139     similar devices, this function returns as soon as any data is
2140     available, even if the amount read is less than \var{count} and
2141     eof has not been hit.  See also \funref{make-fd-stream}.
2142   \end{itemize}
2143 \end{defun}
2144 %%\node Running Programs from Lisp, Saving a Core Image, The Reader, Design Choices and Extensions
2145 \section{Running Programs from Lisp}
2146
2147 It is possible to run programs from Lisp by using the following function.
2148
2149 \begin{defun}{extensions:}{run-program}{%
2150     \args{\var{program} \var{args}
2151       \keys{\kwd{env} \kwd{wait} \kwd{pty} \kwd{input}}
2152       \morekeys{\kwd{if-input-does-not-exist}}
2153       \yetmorekeys{\kwd{output} \kwd{if-output-exists}}
2154       \yetmorekeys{\kwd{error} \kwd{if-error-exists}}
2155       \yetmorekeys{\kwd{status-hook} \kwd{before-execve}}}}
2156
2157   \code{run-program} runs \var{program} in a child process.
2158   \var{Program} should be a pathname or string naming the program.
2159   \var{Args} should be a list of strings which this passes to
2160   \var{program} as normal Unix parameters.  For no arguments, specify
2161   \var{args} as \nil.  The value returned is either a process
2162   structure or \nil.  The process interface follows the description of
2163   \code{run-program}.  If \code{run-program} fails to fork the child
2164   process, it returns \nil.
2165
2166   Except for sharing file descriptors as explained in keyword argument
2167   descriptions, \code{run-program} closes all file descriptors in the
2168   child process before running the program.  When you are done using a
2169   process, call \code{process-close} to reclaim system resources.  You
2170   only need to do this when you supply \kwd{stream} for one of
2171   \kwd{input}, \kwd{output}, or \kwd{error}, or you supply \kwd{pty}
2172   non-\nil.  You can call \code{process-close} regardless of whether
2173   you must to reclaim resources without penalty if you feel safer.
2174
2175   \code{run-program} accepts the following keyword arguments:
2176   \begin{Lentry}
2177
2178   \item[\kwd{env}] This is an a-list mapping keywords and
2179     simple-strings.  The default is \code{ext:*environment-list*}.  If
2180     \kwd{env} is specified, \code{run-program} uses the value given
2181     and does not combine the environment passed to Lisp with the one
2182     specified.
2183
2184   \item[\kwd{wait}] If non-\nil{} (the default), wait until the child
2185     process terminates.  If \nil, continue running Lisp while the
2186     child process runs.
2187
2188   \item[\kwd{pty}] This should be one of \true, \nil, or a stream.  If
2189     specified non-\nil, the subprocess executes under a Unix \i{PTY}.
2190     If specified as a stream, the system collects all output to this
2191     pty and writes it to this stream.  If specified as \true, the
2192     \code{process-pty} slot contains a stream from which you can read
2193     the program's output and to which you can write input for the
2194     program.  The default is \nil.
2195
2196   \item[\kwd{input}] This specifies how the program gets its input.
2197     If specified as a string, it is the name of a file that contains
2198     input for the child process.  \code{run-program} opens the file as
2199     standard input.  If specified as \nil{} (the default), then
2200     standard input is the file \file{/dev/null}.  If specified as
2201     \true, the program uses the current standard input.  This may
2202     cause some confusion if \kwd{wait} is \nil{} since two processes
2203     may use the terminal at the same time.  If specified as
2204     \kwd{stream}, then the \code{process-input} slot contains an
2205     output stream.  Anything written to this stream goes to the
2206     program as input.  \kwd{input} may also be an input stream that
2207     already contains all the input for the process.  In this case
2208     \code{run-program} reads all the input from this stream before
2209     returning, so this cannot be used to interact with the process.
2210
2211   \item[\kwd{if-input-does-not-exist}] This specifies what to do if
2212     the input file does not exist.  The following values are valid:
2213     \nil{} (the default) causes \code{run-program} to return \nil{}
2214     without doing anything; \kwd{create} creates the named file; and
2215     \kwd{error} signals an error.
2216
2217   \item[\kwd{output}] This specifies what happens with the program's
2218     output.  If specified as a pathname, it is the name of a file that
2219     contains output the program writes to its standard output.  If
2220     specified as \nil{} (the default), all output goes to
2221     \file{/dev/null}.  If specified as \true, the program writes to
2222     the Lisp process's standard output.  This may cause confusion if
2223     \kwd{wait} is \nil{} since two processes may write to the terminal
2224     at the same time.  If specified as \kwd{stream}, then the
2225     \code{process-output} slot contains an input stream from which you
2226     can read the program's output.
2227
2228   \item[\kwd{if-output-exists}] This specifies what to do if the
2229     output file already exists.  The following values are valid:
2230     \nil{} causes \code{run-program} to return \nil{} without doing
2231     anything; \kwd{error} (the default) signals an error;
2232     \kwd{supersede} overwrites the current file; and \kwd{append}
2233     appends all output to the file.
2234
2235   \item[\kwd{error}] This is similar to \kwd{output}, except the file
2236     becomes the program's standard error.  Additionally, \kwd{error}
2237     can be \kwd{output} in which case the program's error output is
2238     routed to the same place specified for \kwd{output}.  If specified
2239     as \kwd{stream}, the \code{process-error} contains a stream
2240     similar to the \code{process-output} slot when specifying the
2241     \kwd{output} argument.
2242
2243   \item[\kwd{if-error-exists}] This specifies what to do if the error
2244     output file already exists.  It accepts the same values as
2245     \kwd{if-output-exists}.
2246
2247   \item[\kwd{status-hook}] This specifies a function to call whenever
2248     the process changes status.  This is especially useful when
2249     specifying \kwd{wait} as \nil.  The function takes the process as
2250     a required argument.
2251
2252   \item[\kwd{before-execve}] This specifies a function to run in the
2253     child process before it becomes the program to run.  This is
2254     useful for actions such as authenticating the child process
2255     without modifying the parent Lisp process.
2256   \end{Lentry}
2257 \end{defun}
2258
2259
2260 \begin{comment}
2261 * Process Accessors::
2262 \end{comment}
2263
2264 %%\node Process Accessors,  , Running Programs from Lisp, Running Programs from Lisp
2265 \subsection{Process Accessors}
2266
2267 The following functions interface the process returned by \code{run-program}:
2268
2269 \begin{defun}{extensions:}{process-p}{\args{\var{thing}}}
2270
2271   This function returns \true{} if \var{thing} is a process.
2272   Otherwise it returns \nil{}
2273 \end{defun}
2274
2275 \begin{defun}{extensions:}{process-pid}{\args{\var{process}}}
2276
2277   This function returns the process ID, an integer, for the
2278   \var{process}.
2279 \end{defun}
2280
2281 \begin{defun}{extensions:}{process-status}{\args{\var{process}}}
2282
2283   This function returns the current status of \var{process}, which is
2284   one of \kwd{running}, \kwd{stopped}, \kwd{exited}, or
2285   \kwd{signaled}.
2286 \end{defun}
2287
2288 \begin{defun}{extensions:}{process-exit-code}{\args{\var{process}}}
2289
2290   This function returns either the exit code for \var{process}, if it
2291   is \kwd{exited}, or the termination signal \var{process} if it is
2292   \kwd{signaled}.  The result is undefined for processes that are
2293   still alive.
2294 \end{defun}
2295
2296 \begin{defun}{extensions:}{process-core-dumped}{\args{\var{process}}}
2297
2298   This function returns \true{} if someone used a Unix signal to
2299   terminate the \var{process} and caused it to dump a Unix core image.
2300 \end{defun}
2301
2302 \begin{defun}{extensions:}{process-pty}{\args{\var{process}}}
2303
2304   This function returns either the two-way stream connected to
2305   \var{process}'s Unix \i{PTY} connection or \nil{} if there is none.
2306 \end{defun}
2307
2308 \begin{defun}{extensions:}{process-input}{\args{\var{process}}}
2309   \defunx[extensions:]{process-output}{\args{\var{process}}}
2310   \defunx[extensions:]{process-error}{\args{\var{process}}}
2311
2312   If the corresponding stream was created, these functions return the
2313   input, output or error file descriptor.  \nil{} is returned if there
2314   is no stream.
2315 \end{defun}
2316
2317 \begin{defun}{extensions:}{process-status-hook}{\args{\var{process}}}
2318
2319   This function returns the current function to call whenever
2320   \var{process}'s status changes.  This function takes the
2321   \var{process} as a required argument.  \code{process-status-hook} is
2322   \code{setf}'able.
2323 \end{defun}
2324
2325 \begin{defun}{extensions:}{process-plist}{\args{\var{process}}}
2326
2327   This function returns annotations supplied by users, and it is
2328   \code{setf}'able.  This is available solely for users to associate
2329   information with \var{process} without having to build a-lists or
2330   hash tables of process structures.
2331 \end{defun}
2332
2333 \begin{defun}{extensions:}{process-wait}{
2334     \args{\var{process} \ampoptional{} \var{check-for-stopped}}}
2335
2336   This function waits for \var{process} to finish.  If
2337   \var{check-for-stopped} is non-\nil, this also returns when
2338   \var{process} stops.
2339 \end{defun}
2340
2341 \begin{defun}{extensions:}{process-kill}{%
2342     \args{\var{process} \var{signal} \ampoptional{} \var{whom}}}
2343
2344   This function sends the Unix \var{signal} to \var{process}.
2345   \var{Signal} should be the number of the signal or a keyword with
2346   the Unix name (for example, \kwd{sigsegv}).  \var{Whom} should be
2347   one of the following:
2348   \begin{Lentry}
2349
2350   \item[\kwd{pid}] This is the default, and it indicates sending the
2351     signal to \var{process} only.
2352
2353   \item[\kwd{process-group}] This indicates sending the signal to
2354     \var{process}'s group.
2355
2356   \item[\kwd{pty-process-group}] This indicates sending the signal to
2357     the process group currently in the foreground on the Unix \i{PTY}
2358     connected to \var{process}.  This last option is useful if the
2359     running program is a shell, and you wish to signal the program
2360     running under the shell, not the shell itself.  If
2361     \code{process-pty} of \var{process} is \nil, using this option is
2362     an error.
2363   \end{Lentry}
2364 \end{defun}
2365
2366 \begin{defun}{extensions:}{process-alive-p}{\args{\var{process}}}
2367
2368   This function returns \true{} if \var{process}'s status is either
2369   \kwd{running} or \kwd{stopped}.
2370 \end{defun}
2371
2372 \begin{defun}{extensions:}{process-close}{\args{\var{process}}}
2373
2374   This function closes all the streams associated with \var{process}.
2375   When you are done using a process, call this to reclaim system
2376   resources.
2377 \end{defun}
2378
2379
2380 %%\node Saving a Core Image, Pathnames, Running Programs from Lisp, Design Choices and Extensions
2381 \section{Saving a Core Image}
2382
2383 A mechanism has been provided to save a running Lisp core image and to
2384 later restore it.  This is convenient if you don't want to load several files
2385 into a Lisp when you first start it up.  The main problem is the large
2386 size of each saved Lisp image, typically at least 20 megabytes.
2387
2388 \begin{defun}{extensions:}{save-lisp}{%
2389     \args{\var{file}
2390       \keys{\kwd{purify} \kwd{root-structures} \kwd{init-function}}
2391       \morekeys{\kwd{load-init-file} \kwd{print-herald} \kwd{site-init}}
2392       \yetmorekeys{\kwd{process-command-line}}}}
2393
2394   The \code{save-lisp} function saves the state of the currently
2395   running Lisp core image in \var{file}.  The keyword arguments have
2396   the following meaning:
2397   \begin{Lentry}
2398
2399   \item[\kwd{purify}] If non-NIL (the default), the core image is
2400     purified before it is saved (see \funref{purify}.)  This reduces
2401     the amount of work the garbage collector must do when the
2402     resulting core image is being run.  Also, if more than one Lisp is
2403     running on the same machine, this maximizes the amount of memory
2404     that can be shared between the two processes.
2405
2406   \item[\kwd{root-structures}]
2407     \begin{changebar}
2408       This should be a list of the main entry points in any newly
2409       loaded systems.  This need not be supplied, but locality and/or
2410       GC performance will be better if they are.  Meaningless if
2411       \kwd{purify} is \nil.  See \funref{purify}.
2412     \end{changebar}
2413
2414   \item[\kwd{init-function}] This is the function that starts running
2415     when the created core file is resumed.  The default function
2416     simply invokes the top level read-eval-print loop.  If the
2417     function returns the lisp will exit.
2418
2419   \item[\kwd{load-init-file}] If non-NIL, then load an init file;
2420     either the one specified on the command line or
2421     ``\w{\file{init.}\var{fasl-type}}'', or, if
2422     ``\w{\file{init.}\var{fasl-type}}'' does not exist,
2423     \code{init.lisp} from the user's home directory.  If the init file
2424     is found, it is loaded into the resumed core file before the
2425     read-eval-print loop is entered.
2426
2427   \item[\kwd{site-init}] If non-NIL, the name of the site init file to
2428     quietly load.  The default is \file{library:site-init}.  No error
2429     is signalled if the file does not exist.
2430
2431   \item[\kwd{print-herald}] If non-NIL (the default), then print out
2432     the standard Lisp herald when starting.
2433
2434   \item[\kwd{process-command-line}] If non-NIL (the default),
2435     processes the command line switches and performs the appropriate
2436     actions.
2437   \end{Lentry}
2438 \end{defun}
2439
2440 To resume a saved file, type:
2441 \begin{example}
2442 lisp -core file
2443 \end{example}
2444
2445 \begin{defun}{extensions:}{purify}{
2446     \args{\var{file}
2447       \keys{\kwd{root-structures} \kwd{environment-name}}}}
2448
2449   This function optimizes garbage collection by moving all currently
2450   live objects into non-collected storage.  Once statically allocated,
2451   the objects can never be reclaimed, even if all pointers to them are
2452   dropped.  This function should generally be called after a large
2453   system has been loaded and initialized.
2454
2455   \begin{Lentry}
2456   \item[\kwd{root-structures}] is an optional list of objects which
2457     should be copied first to maximize locality.  This should be a
2458     list of the main entry points for the resulting core image.  The
2459     purification process tries to localize symbols, functions, etc.,
2460     in the core image so that paging performance is improved.  The
2461     default value is NIL which means that Lisp objects will still be
2462     localized but probably not as optimally as they could be.
2463
2464     \var{defstruct} structures defined with the \code{(:pure t)}
2465     option are moved into read-only storage, further reducing GC cost.
2466     List and vector slots of pure structures are also moved into
2467     read-only storage.
2468
2469   \item[\kwd{environment-name}] is gratuitous documentation for the
2470     compacted version of the current global environment (as seen in
2471     \code{c::*info-environment*}.)  If \false{} is supplied, then
2472     environment compaction is inhibited.
2473   \end{Lentry}
2474 \end{defun}
2475
2476 %%\node Pathnames, Filesystem Operations, Saving a Core Image, Design Choices and Extensions
2477 \section{Pathnames}
2478
2479 In \clisp{} quite a few aspects of \tindexed{pathname} semantics are left to
2480 the implementation.
2481
2482 \begin{comment}
2483 * Unix Pathnames::
2484 * Wildcard Pathnames::
2485 * Logical Pathnames::
2486 * Search Lists::
2487 * Predefined Search-Lists::
2488 * Search-List Operations::
2489 * Search List Example::
2490 \end{comment}
2491
2492 %%\node Unix Pathnames, Wildcard Pathnames, Pathnames, Pathnames
2493 \subsection{Unix Pathnames}
2494 \cpsubindex{unix}{pathnames}
2495
2496 Unix pathnames are always parsed with a \code{unix-host} object as the host and
2497 \code{nil} as the device.  The last two dots (\code{.}) in the namestring mark
2498 the type and version, however if the first character is a dot, it is considered
2499 part of the name.  If the last character is a dot, then the pathname has the
2500 empty-string as its type.  The type defaults to \code{nil} and the version
2501 defaults to \kwd{newest}.
2502 \begin{example}
2503 (defun parse (x)
2504   (values (pathname-name x) (pathname-type x) (pathname-version x)))
2505
2506 (parse "foo") \result "foo", NIL, :NEWEST
2507 (parse "foo.bar") \result "foo", "bar", :NEWEST
2508 (parse ".foo") \result ".foo", NIL, :NEWEST
2509 (parse ".foo.bar") \result ".foo", "bar", :NEWEST
2510 (parse "..") \result ".", "", :NEWEST
2511 (parse "foo.") \result "foo", "", :NEWEST
2512 (parse "foo.bar.1") \result "foo", "bar", 1
2513 (parse "foo.bar.baz") \result "foo.bar", "baz", :NEWEST
2514 \end{example}
2515
2516 The directory of pathnames beginning with a slash (or a search-list,
2517 \pxlref{search-lists}) is starts \kwd{absolute}, others start with
2518 \kwd{relative}.  The \code{..} directory is parsed as \kwd{up}; there is no
2519 namestring for \kwd{back}:
2520 \begin{example}
2521 (pathname-directory "/usr/foo/bar.baz") \result (:ABSOLUTE "usr" "foo")
2522 (pathname-directory "../foo/bar.baz") \result (:RELATIVE :UP "foo")
2523 \end{example}
2524
2525 %%\node Wildcard Pathnames, Logical Pathnames, Unix Pathnames, Pathnames
2526 \subsection{Wildcard Pathnames}
2527
2528 Wildcards are supported in Unix pathnames.  If `\code{*}' is specified for a
2529 part of a pathname, that is parsed as \kwd{wild}.  `\code{**}' can be used as a
2530 directory name to indicate \kwd{wild-inferiors}.  Filesystem operations
2531 treat \kwd{wild-inferiors} the same as\ \kwd{wild}, but pathname pattern
2532 matching (e.g. for logical pathname translation, \pxlref{logical-pathnames})
2533 matches any number of directory parts with `\code{**}' (see
2534 \pxlref{wildcard-matching}.)
2535
2536
2537 `\code{*}' embedded in a pathname part matches any number of characters.
2538 Similarly, `\code{?}' matches exactly one character, and `\code{[a,b]}'
2539 matches the characters `\code{a}' or `\code{b}'.  These pathname parts are
2540 parsed as \code{pattern} objects.
2541
2542 Backslash can be used as an escape character in namestring
2543 parsing to prevent the next character from being treated as a wildcard.  Note
2544 that if typed in a string constant, the backslash must be doubled, since the
2545 string reader also uses backslash as a quote:
2546 \begin{example}
2547 (pathname-name "foo\(\backslash\backslash\)*bar") => "foo*bar"
2548 \end{example}
2549
2550 %%\node Logical Pathnames, Search Lists, Wildcard Pathnames, Pathnames
2551 \subsection{Logical Pathnames}
2552 \cindex{logical pathnames}
2553 \label{logical-pathnames}
2554
2555 If a namestring begins with the name of a defined logical pathname
2556 host followed by a colon, then it will be parsed as a logical
2557 pathname.  Both `\code{*}' and `\code{**}' wildcards are implemented.
2558 \findexed{load-logical-pathname-defaults} on \var{name} looks for a
2559 logical host definition file in
2560 \w{\file{library:\var{name}.translations}}. Note that \file{library:}
2561 designates the search list (\pxlref{search-lists}) initialized to the
2562 \cmucl{} \file{lib/} directory, not a logical pathname.  The format of
2563 the file is a single list of two-lists of the from and to patterns:
2564 \begin{example}
2565 (("foo;*.text" "/usr/ram/foo/*.txt")
2566  ("foo;*.lisp" "/usr/ram/foo/*.l"))
2567 \end{example}
2568
2569 \begin{comment}
2570 * Search Lists::
2571 * Search List Example::
2572 \end{comment}
2573
2574 %%\node Search Lists, Predefined Search-Lists, Logical Pathnames, Pathnames
2575 \subsection{Search Lists}
2576 \cindex{search lists}
2577 \label{search-lists}
2578
2579 Search lists are an extension to Common Lisp pathnames.  They serve a function
2580 somewhat similar to Common Lisp logical pathnames, but work more like Unix PATH
2581 variables.  Search lists are used for two purposes:
2582 \begin{itemize}
2583 \item They provide a convenient shorthand for commonly used directory names,
2584 and
2585
2586 \item They allow the abstract (directory structure independent) specification
2587 of file locations in program pathname constants (similar to logical pathnames.)
2588 \end{itemize}
2589 Each search list has an associated list of directories (represented as
2590 pathnames with no name or type component.)  The namestring for any relative
2591 pathname may be prefixed with ``\var{slist}\code{:}'', indicating that the
2592 pathname is relative to the search list \var{slist} (instead of to the current
2593 working directory.)  Once qualified with a search list, the pathname is no
2594 longer considered to be relative.
2595
2596 When a search list qualified pathname is passed to a file-system operation such
2597 as \code{open}, \code{load} or \code{truename}, each directory in the search
2598 list is successively used as the root of the pathname until the file is
2599 located.  When a file is written to a search list directory, the file is always
2600 written to the first directory in the list.
2601
2602 %%\node Predefined Search-Lists, Search-List Operations, Search Lists, Pathnames
2603 \subsection{Predefined Search-Lists}
2604
2605 These search-lists are initialized from the Unix environment or when Lisp was
2606 built:
2607 \begin{Lentry}
2608 \item[\code{default:}] The current directory at startup.
2609
2610 \item[\code{home:}] The user's home directory.
2611
2612 \item[\code{library:}] The \cmucl{} \file{lib/} directory (\code{CMUCLLIB} environment
2613 variable.)
2614
2615 \item[\code{path:}] The Unix command path (\code{PATH} environment variable.)
2616
2617 \item[\code{target:}] The root of the tree where \cmucl{} was compiled.
2618 \end{Lentry}
2619 It can be useful to redefine these search-lists, for example, \file{library:}
2620 can be augmented to allow logical pathname translations to be located, and
2621 \file{target:} can be redefined to point to where \cmucl{} system sources are
2622 locally installed.
2623
2624 %%\node Search-List Operations, Search List Example, Predefined Search-Lists, Pathnames
2625 \subsection{Search-List Operations}
2626
2627 These operations define and access search-list definitions.  A search-list name
2628 may be parsed into a pathname before the search-list is actually defined, but
2629 the search-list must be defined before it can actually be used in a filesystem
2630 operation.
2631
2632 \begin{defun}{extensions:}{search-list}{\var{name}}
2633
2634   This function returns the list of directories associated with the
2635   search list \var{name}.  If \var{name} is not a defined search list,
2636   then an error is signaled.  When set with \code{setf}, the list of
2637   directories is changed to the new value.  If the new value is just a
2638   namestring or pathname, then it is interpreted as a one-element
2639   list.  Note that (unlike Unix pathnames), search list names are
2640   case-insensitive.
2641 \end{defun}
2642
2643 \begin{defun}{extensions:}{search-list-defined-p}{\var{name}}
2644   \defunx[extensions:]{clear-search-list}{\var{name}}
2645
2646   \code{search-list-defined-p} returns \true{} if \var{name} is a
2647   defined search list name, \false{} otherwise.
2648   \code{clear-search-list} make the search list \var{name} undefined.
2649 \end{defun}
2650
2651 \begin{defmac}{extensions:}{enumerate-search-list}{%
2652     \args{(\var{var} \var{pathname} \mopt{result}) \mstar{form}}}
2653
2654   This macro provides an interface to search list resolution.  The
2655   body \var{forms} are executed with \var{var} bound to each
2656   successive possible expansion for \var{name}.  If \var{name} does
2657   not contain a search-list, then the body is executed exactly once.
2658   Everything is wrapped in a block named \nil, so \code{return} can be
2659   used to terminate early.  The \var{result} form (default \nil) is
2660   evaluated to determine the result of the iteration.
2661 \end{defmac}
2662
2663 \begin{comment}
2664 * Search List Example::
2665 \end{comment}
2666
2667 %%\node Search List Example,  , Search-List Operations, Pathnames
2668 \subsection{Search List Example}
2669
2670 The search list \code{code:} can be defined as follows:
2671 \begin{example}
2672 (setf (ext:search-list "code:") '("/usr/lisp/code/"))
2673 \end{example}
2674 It is now possible to use \code{code:} as an abbreviation for the directory
2675 \file{/usr/lisp/code/} in all file operations.  For example, you can now specify
2676 \code{code:eval.lisp} to refer to the file \file{/usr/lisp/code/eval.lisp}.
2677
2678 To obtain the value of a search-list name, use the function search-list
2679 as follows:
2680 \begin{example}
2681 (ext:search-list \var{name})
2682 \end{example}
2683 Where \var{name} is the name of a search list as described above.  For example,
2684 calling \code{ext:search-list} on \code{code:} as follows:
2685 \begin{example}
2686 (ext:search-list "code:")
2687 \end{example}
2688 returns the list \code{("/usr/lisp/code/")}.
2689
2690 %%\node Filesystem Operations, Time Parsing and Formatting, Pathnames, Design Choices and Extensions
2691 \section{Filesystem Operations}
2692
2693 \cmucl{} provides a number of extensions and optional features beyond those
2694 require by \clisp.
2695
2696 \begin{comment}
2697 * Wildcard Matching::
2698 * File Name Completion::
2699 * Miscellaneous Filesystem Operations::
2700 \end{comment}
2701
2702 %%\node Wildcard Matching, File Name Completion, Filesystem Operations, Filesystem Operations
2703 \subsection{Wildcard Matching}
2704 \label{wildcard-matching}
2705
2706 Unix filesystem operations such as \code{open} will accept wildcard pathnames
2707 that match a single file (of course, \code{directory} allows any number of
2708 matches.)  Filesystem operations treat \kwd{wild-inferiors} the same as\
2709 \kwd{wild}.
2710
2711 \begin{defun}{}{directory}{\var{wildname} \keys{\kwd{all} \kwd{check-for-subdirs}}
2712     \morekeys{\kwd{follow-links}}}
2713
2714   The keyword arguments to this \clisp{} function are a CMU extension.
2715   The arguments (all default to \code{t}) have the following
2716   functions:
2717   \begin{Lentry}
2718   \item[\kwd{all}] Include files beginning with dot such as
2719     \file{.login}, similar to ``\code{ls -a}''.
2720
2721   \item[\kwd{check-for-subdirs}] Test whether files are directories,
2722     similar to ``\code{ls -F}''.
2723
2724   \item[\kwd{follow-links}] Call \code{truename} on each file, which
2725     expands out all symbolic links.  Note that this option can easily
2726     result in pathnames being returned which have a different
2727     directory from the one in the \var{wildname} argument.
2728   \end{Lentry}
2729 \end{defun}
2730
2731 \begin{defun}{extensions:}{print-directory}{%
2732     \args{\var{wildname}
2733       \ampoptional{} \var{stream}
2734       \keys{\kwd{all} \kwd{verbose}}
2735       \morekeys{\kwd{return-list}}}}
2736
2737   Print a directory of \var{wildname} listing to \var{stream} (default
2738   \code{*standard-output*}.)  \kwd{all} and \kwd{verbose} both default
2739   to \false{} and correspond to the ``\code{-a}'' and ``\code{-l}''
2740   options of \file{ls}.  Normally this function returns \false{}, but
2741   if \kwd{return-list} is true, a list of the matched pathnames are
2742   returned.
2743 \end{defun}
2744
2745 %%\node File Name Completion, Miscellaneous Filesystem Operations, Wildcard Matching, Filesystem Operations
2746 \subsection{File Name Completion}
2747
2748 \begin{defun}{extensions:}{complete-file}{%
2749     \args{\var{pathname}
2750       \keys{\kwd{defaults} \kwd{ignore-types}}}}
2751
2752   Attempt to complete a file name to the longest unambiguous prefix.
2753   If supplied, directory from \kwd{defaults} is used as the ``working
2754   directory'' when doing completion.  \kwd{ignore-types} is a list of
2755   strings of the pathname types (a.k.a. extensions) that should be
2756   disregarded as possible matches (binary file names, etc.)
2757 \end{defun}
2758
2759 \begin{defun}{extensions:}{ambiguous-files}{%
2760     \args{\var{pathname}
2761       \ampoptional{} \var{defaults}}}
2762
2763   Return a list of pathnames for all the possible completions of
2764   \var{pathname} with respect to \var{defaults}.
2765 \end{defun}
2766
2767 %%\node Miscellaneous Filesystem Operations,  , File Name Completion, Filesystem Operations
2768 \subsection{Miscellaneous Filesystem Operations}
2769
2770 \begin{defun}{extensions:}{default-directory}{}
2771
2772   Return the current working directory as a pathname.  If set with
2773   \code{setf}, set the working directory.
2774 \end{defun}
2775
2776 \begin{defun}{extensions:}{file-writable}{\var{name}}
2777
2778   This function accepts a pathname and returns \true{} if the current
2779   process can write it, and \false{} otherwise.
2780 \end{defun}
2781
2782 \begin{defun}{extensions:}{unix-namestring}{%
2783     \args{\var{pathname}
2784       \ampoptional{} \var{for-input}}}
2785
2786   This function converts \var{pathname} into a string that can be used
2787   with UNIX system calls.  Search-lists and wildcards are expanded.
2788   \var{for-input} controls the treatment of search-lists: when true
2789   (the default) and the file exists anywhere on the search-list, then
2790   that absolute pathname is returned; otherwise the first element of
2791   the search-list is used as the directory.
2792 \end{defun}
2793
2794 %%\node Time Parsing and Formatting, Lisp Library, Filesystem Operations, Design Choices and Extensions
2795 \section{Time Parsing and Formatting}
2796
2797 \cindex{time parsing} \cindex{time formatting}
2798 Functions are provided to allow parsing strings containing time information
2799 and printing time in various formats are available.
2800
2801 \begin{defun}{extensions:}{parse-time}{%
2802     \args{\var{time-string}
2803       \keys{\kwd{error-on-mismatch} \kwd{default-seconds}}
2804       \morekeys{\kwd{default-minutes} \kwd{default-hours}}
2805       \yetmorekeys{\kwd{default-day} \kwd{default-month}}
2806       \yetmorekeys{\kwd{default-year} \kwd{default-zone}}
2807       \yetmorekeys{\kwd{default-weekday}}}}
2808
2809   \code{parse-time} accepts a string containing a time (e.g.,
2810   \w{"\code{Jan 12, 1952}"}) and returns the universal time if it is
2811   successful.  If it is unsuccessful and the keyword argument
2812   \kwd{error-on-mismatch} is non-\FALSE, it signals an error.
2813   Otherwise it returns \FALSE.  The other keyword arguments have the
2814   following meaning:
2815   \begin{Lentry}
2816
2817   \item[\kwd{default-seconds}] specifies the default value for the
2818     seconds value if one is not provided by \var{time-string}.  The
2819     default value is 0.
2820
2821   \item[\kwd{default-minutes}] specifies the default value for the
2822     minutes value if one is not provided by \var{time-string}.  The
2823     default value is 0.
2824
2825   \item[\kwd{default-hours}] specifies the default value for the hours
2826     value if one is not provided by \var{time-string}.  The default
2827     value is 0.
2828
2829   \item[\kwd{default-day}] specifies the default value for the day
2830     value if one is not provided by \var{time-string}.  The default
2831     value is the current day.
2832
2833   \item[\kwd{default-month}] specifies the default value for the month
2834     value if one is not provided by \var{time-string}.  The default
2835     value is the current month.
2836
2837   \item[\kwd{default-year}] specifies the default value for the year
2838     value if one is not provided by \var{time-string}.  The default
2839     value is the current year.
2840
2841   \item[\kwd{default-zone}] specifies the default value for the time
2842     zone value if one is not provided by \var{time-string}.  The
2843     default value is the current time zone.
2844
2845   \item[\kwd{default-weekday}] specifies the default value for the day
2846     of the week if one is not provided by \var{time-string}.  The
2847     default value is the current day of the week.
2848   \end{Lentry}
2849   Any of the above keywords can be given the value \kwd{current} which
2850   means to use the current value as determined by a call to the
2851   operating system.
2852 \end{defun}
2853
2854 \begin{defun}{extensions:}{format-universal-time}{
2855     \args{\var{dest} \var{universal-time}
2856        \\
2857        \keys{\kwd{timezone}}
2858        \morekeys{\kwd{style} \kwd{date-first}}
2859        \yetmorekeys{\kwd{print-seconds} \kwd{print-meridian}}
2860        \yetmorekeys{\kwd{print-timezone} \kwd{print-weekday}}}}
2861    \defunx[extensions:]{format-decoded-time}{
2862      \args{\var{dest} \var{seconds} \var{minutes} \var{hours} \var{day} \var{month} \var{year}
2863        \\
2864        \keys{\kwd{timezone}}
2865        \morekeys{\kwd{style} \kwd{date-first}}
2866        \yetmorekeys{\kwd{print-seconds} \kwd{print-meridian}}
2867        \yetmorekeys{\kwd{print-timezone} \kwd{print-weekday}}}}
2868
2869    \code{format-universal-time} formats the time specified by
2870    \var{universal-time}.  \code{format-decoded-time} formats the time
2871    specified by \var{seconds}, \var{minutes}, \var{hours}, \var{day},
2872    \var{month}, and \var{year}.  \var{Dest} is any destination
2873    accepted by the \code{format} function.  The keyword arguments have
2874    the following meaning:
2875    \begin{Lentry}
2876
2877    \item[\kwd{timezone}] is an integer specifying the hours west of
2878      Greenwich.  \kwd{timezone} defaults to the current time zone.
2879
2880    \item[\kwd{style}] specifies the style to use in formatting the
2881      time.  The legal values are:
2882      \begin{Lentry}
2883
2884      \item[\kwd{short}] specifies to use a numeric date.
2885
2886      \item[\kwd{long}] specifies to format months and weekdays as
2887        words instead of numbers.
2888
2889      \item[\kwd{abbreviated}] is similar to long except the words are
2890        abbreviated.
2891
2892      \item[\kwd{government}] is similar to abbreviated, except the
2893        date is of the form ``day month year'' instead of ``month day,
2894        year''.
2895      \end{Lentry}
2896
2897    \item[\kwd{date-first}] if non-\false{} (default) will place the
2898      date first.  Otherwise, the time is placed first.
2899
2900    \item[\kwd{print-seconds}] if non-\false{} (default) will format
2901      the seconds as part of the time.  Otherwise, the seconds will be
2902      omitted.
2903
2904    \item[\kwd{print-meridian}] if non-\false{} (default) will format
2905      ``AM'' or ``PM'' as part of the time.  Otherwise, the ``AM'' or
2906      ``PM'' will be omitted.
2907
2908    \item[\kwd{print-timezone}] if non-\false{} (default) will format
2909      the time zone as part of the time.  Otherwise, the time zone will
2910      be omitted.
2911
2912      %%\item[\kwd{print-seconds}]
2913      %%if non-\false{} (default) will format the seconds as part of
2914      %%the time.  Otherwise, the seconds will be omitted.
2915
2916    \item[\kwd{print-weekday}] if non-\false{} (default) will format
2917      the weekday as part of date.  Otherwise, the weekday will be
2918      omitted.
2919    \end{Lentry}
2920 \end{defun}
2921
2922 %% New stuff
2923 \begin{changebar}
2924 \section{Random Number Generation}
2925 \cindex{random number generation}
2926
2927 \clisp{} includes a random number generator as a standard part of the
2928 language; however, the implementation of the generator is not
2929 specified.  Two random number generators are available in \cmucl{},
2930 depending on the version.
2931
2932 \subsection{Original Generator}
2933 \cpsubindex{random number generation}{original generator}
2934 The default random number generator uses a lagged Fibonacci generator
2935 given by
2936 \begin{displaymath}
2937   z[i] = z[i - 24] - z[i - 55] \bmod 536870908
2938 \end{displaymath}
2939 where $z[i]$ is the $i$'th random number.  This generator produces
2940 small integer-valued numbers.  For larger integer, the small random
2941 integers are concatenated to produce larger integers.  For
2942 floating-point numbers, the bits from this generator are used as the
2943 bits of the floating-point significand.
2944
2945 \subsection{New Generator}
2946 \cpsubindex{random number generation}{new generator}
2947
2948 In some versions of \cmucl{}, the original generator above has been
2949 replaced with a subtract-with-borrow generator
2950 combined with a Weyl generator.\footnote{The generator described here
2951   is available if the feature \kwd{new-random} is available.}  The
2952 reason for the change was to use a documented generator which has
2953 passed tests for randomness.
2954
2955 The subtract-with-borrow generator is described by the following
2956 equation
2957 \begin{displaymath}
2958   z[i] = z[i + 20] - z[i + 5] - b
2959 \end{displaymath}
2960 where $z[i]$ is the $i$'th random number, which is a
2961 \code{double-float}.  All of the indices in this equation are
2962 interpreted modulo 32.  The quantity $b$ is carried over from the
2963 previous iteration and is either 0 or \code{double-float-epsilon}.  If
2964 $z[i]$ is positive, $b$ is set to zero.  Otherwise, $b$ is set to
2965 \code{double-float-epsilon}.
2966
2967 To increase the randomness of this generator, this generator is
2968 combined with a Weyl generator defined by
2969 \begin{displaymath}
2970   x[i] = x[i - 1] - y \bmod 1,
2971 \end{displaymath}
2972 where $y = 7097293079245107 \times 2^{-53}$.  Thus, the resulting
2973 random number $r[i]$ is
2974 \begin{displaymath}
2975   r[i] = (z[i] - x[i]) \bmod 1
2976 \end{displaymath}
2977
2978 This generator has been tested by Peter VanEynde using Marsaglia's
2979 diehard test suite for random number generators;  this generator
2980 passes the test suite.
2981
2982 This generator is designed for generating floating-point random
2983 numbers.  To obtain integers, the bits from the significand of the
2984 floating-point number are used as the bits of the integer.  As many
2985 floating-point numbers as needed are generated to obtain the desired
2986 number of bits in the random integer.
2987
2988 For floating-point numbers, this generator can by significantly faster
2989 than the original generator.
2990 \end{changebar}
2991
2992 %%\node Lisp Library,  , Time Parsing and Formatting, Design Choices and Extensions
2993 \section{Lisp Library}
2994 \label{lisp-lib}
2995
2996 The CMU Common Lisp project maintains a collection of useful or interesting
2997 programs written by users of our system.  The library is in
2998 \file{lib/contrib/}.  Two files there that users should read are:
2999 \begin{Lentry}
3000
3001 \item[CATALOG.TXT]
3002 This file contains a page for each entry in the library.  It
3003 contains information such as the author, portability or dependency issues, how
3004 to load the entry, etc.
3005
3006 \item[READ-ME.TXT]
3007 This file describes the library's organization and all the
3008 possible pieces of information an entry's catalog description could contain.
3009 \end{Lentry}
3010
3011 Hemlock has a command \F{Library Entry} that displays a list of the current
3012 library entries in an editor buffer.  There are mode specific commands that
3013 display catalog descriptions and load entries.  This is a simple and convenient
3014 way to browse the library.
3015
3016
3017 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/debug.ms}
3018
3019
3020
3021 %%\node The Debugger, The Compiler, Design Choices and Extensions, Top
3022 \chapter{The Debugger} \hide{-*- Dictionary: cmu-user -*-}
3023 \begin{center}
3024 \b{By Robert MacLachlan}
3025 \end{center}
3026 \cindex{debugger}
3027 \label{debugger}
3028
3029 \begin{comment}
3030 * Debugger Introduction::
3031 * The Command Loop::
3032 * Stack Frames::
3033 * Variable Access::
3034 * Source Location Printing::
3035 * Compiler Policy Control::
3036 * Exiting Commands::
3037 * Information Commands::
3038 * Breakpoint Commands::
3039 * Function Tracing::
3040 * Specials::
3041 \end{comment}
3042
3043 %%\node Debugger Introduction, The Command Loop, The Debugger, The Debugger
3044 \section{Debugger Introduction}
3045
3046 The \cmucl{} debugger is unique in its level of support for source-level
3047 debugging of compiled code.  Although some other debuggers allow access of
3048 variables by name, this seems to be the first \llisp{} debugger that:
3049 \begin{itemize}
3050
3051 \item
3052 Tells you when a variable doesn't have a value because it hasn't been
3053 initialized yet or has already been deallocated, or
3054
3055 \item
3056 Can display the precise source location corresponding to a code
3057 location in the debugged program.
3058 \end{itemize}
3059 These features allow the debugging of compiled code to be made almost
3060 indistinguishable from interpreted code debugging.
3061
3062 The debugger is an interactive command loop that allows a user to examine
3063 the function call stack.  The debugger is invoked when:
3064 \begin{itemize}
3065
3066 \item
3067 A \tindexed{serious-condition} is signaled, and it is not handled, or
3068
3069 \item
3070 \findexed{error} is called, and the condition it signals is not handled, or
3071
3072 \item
3073 The debugger is explicitly invoked with the \clisp{} \findexed{break}
3074 or \findexed{debug} functions.
3075 \end{itemize}
3076
3077 {\it Note: there are two debugger interfaces in CMU CL: the TTY debugger
3078 (described below) and the Motif debugger.  Since the difference is only in the
3079 user interface, much of this chapter also applies to the Motif version.
3080 \xlref{motif-interface} for a very brief discussion of the graphical
3081 interface.}
3082
3083 When you enter the TTY debugger, it looks something like this:
3084 \begin{example}
3085 Error in function CAR.
3086 Wrong type argument, 3, should have been of type LIST.
3087
3088 Restarts:
3089   0: Return to Top-Level.
3090
3091 Debug  (type H for help)
3092
3093 (CAR 3)
3094 0]
3095 \end{example}
3096 The first group of lines describe what the error was that put us in the
3097 debugger.  In this case \code{car} was called on \code{3}.  After \code{Restarts:}
3098 is a list of all the ways that we can restart execution after this error.  In
3099 this case, the only option is to return to top-level.  After printing its
3100 banner, the debugger prints the current frame and the debugger prompt.
3101
3102 %%\f
3103 %%\node The Command Loop, Stack Frames, Debugger Introduction, The Debugger
3104 \section{The Command Loop}
3105
3106 The debugger is an interactive read-eval-print loop much like the normal
3107 top-level, but some symbols are interpreted as debugger commands instead
3108 of being evaluated.  A debugger command starts with the symbol name of
3109 the command, possibly followed by some arguments on the same line.  Some
3110 commands prompt for additional input.  Debugger commands can be
3111 abbreviated by any unambiguous prefix: \code{help} can be typed as
3112 \code{h}, \code{he}, etc.  For convenience, some commands have
3113 ambiguous one-letter abbreviations: \code{f} for \code{frame}.
3114
3115 The package is not significant in debugger commands; any symbol with the
3116 name of a debugger command will work.  If you want to show the value of
3117 a variable that happens also to be the name of a debugger command, you
3118 can use the \code{list-locals} command or the \code{debug:var}
3119 function, or you can wrap the variable in a \code{progn} to hide it from
3120 the command loop.
3121
3122 The debugger prompt is ``\var{frame}\code{]}'', where \var{frame} is the number
3123 of the current frame.  Frames are numbered starting from zero at the top (most
3124 recent call), increasing down to the bottom.  The current frame is the frame
3125 that commands refer to.  The current frame also provides the lexical
3126 environment for evaluation of non-command forms.
3127
3128 \cpsubindex{evaluation}{debugger} The debugger evaluates forms in the lexical
3129 environment of the functions being debugged.  The debugger can only
3130 access variables.  You can't \code{go} or \code{return-from} into a
3131 function, and you can't call local functions.  Special variable
3132 references are evaluated with their current value (the innermost binding
3133 around the debugger invocation)\dash{}you don't get the value that the
3134 special had in the current frame.  \xlref{debug-vars} for more
3135 information on debugger variable access.
3136
3137 %%\f
3138 %%\node Stack Frames, Variable Access, The Command Loop, The Debugger
3139 \section{Stack Frames}
3140 \cindex{stack frames} \cpsubindex{frames}{stack}
3141
3142 A stack frame is the run-time representation of a call to a function;
3143 the frame stores the state that a function needs to remember what it is
3144 doing.  Frames have:
3145 \begin{itemize}
3146
3147 \item
3148 Variables (\pxlref{debug-vars}), which are the values being operated
3149 on, and
3150
3151 \item
3152 Arguments to the call (which are really just particularly interesting
3153 variables), and
3154
3155 \item
3156 A current location (\pxlref{source-locations}), which is the place in
3157 the program where the function was running when it stopped to call another
3158 function, or because of an interrupt or error.
3159 \end{itemize}
3160
3161
3162 %%\f
3163 \begin{comment}
3164 * Stack Motion::
3165 * How Arguments are Printed::
3166 * Function Names::
3167 * Funny Frames::
3168 * Debug Tail Recursion::
3169 * Unknown Locations and Interrupts::
3170 \end{comment}
3171
3172 %%\node Stack Motion, How Arguments are Printed, Stack Frames, Stack Frames
3173 \subsection{Stack Motion}
3174
3175 These commands move to a new stack frame and print the name of the function
3176 and the values of its arguments in the style of a Lisp function call:
3177 \begin{Lentry}
3178
3179 \item[\code{up}]
3180 Move up to the next higher frame.  More recent function calls are considered
3181 to be higher on the stack.
3182
3183 \item[\code{down}]
3184 Move down to the next lower frame.
3185
3186 \item[\code{top}]
3187 Move to the highest frame.
3188
3189 \item[\code{bottom}]
3190 Move to the lowest frame.
3191
3192 \item[\code{frame} [\textit{n}]]
3193 Move to the frame with the specified number.  Prompts for the number if not
3194 supplied.
3195
3196 \begin{comment}
3197 \key{S} [\var{function-name} [\var{n}]]
3198
3199 \item
3200 Search down the stack for function.  Prompts for the function name if not
3201 supplied.  Searches an optional number of times, but doesn't prompt for
3202 this number; enter it following the function.
3203
3204 \item[\key{R} [\var{function-name} [\var{n}]]]
3205 Search up the stack for function.  Prompts for the function name if not
3206 supplied.  Searches an optional number of times, but doesn't prompt for
3207 this number; enter it following the function.
3208 \end{comment}
3209 \end{Lentry}
3210 %%\f
3211 %%\node How Arguments are Printed, Function Names, Stack Motion, Stack Frames
3212 \subsection{How Arguments are Printed}
3213
3214 A frame is printed to look like a function call, but with the actual argument
3215 values in the argument positions.  So the frame for this call in the source:
3216 \begin{lisp}
3217 (myfun (+ 3 4) 'a)
3218 \end{lisp}
3219 would look like this:
3220 \begin{example}
3221 (MYFUN 7 A)
3222 \end{example}
3223 All keyword and optional arguments are displayed with their actual
3224 values; if the corresponding argument was not supplied, the value will
3225 be the default.  So this call:
3226 \begin{lisp}
3227 (subseq "foo" 1)
3228 \end{lisp}
3229 would look like this:
3230 \begin{example}
3231 (SUBSEQ "foo" 1 3)
3232 \end{example}
3233 And this call:
3234 \begin{lisp}
3235 (string-upcase "test case")
3236 \end{lisp}
3237 would look like this:
3238 \begin{example}
3239 (STRING-UPCASE "test case" :START 0 :END NIL)
3240 \end{example}
3241
3242 The arguments to a function call are displayed by accessing the argument
3243 variables.  Although those variables are initialized to the actual argument
3244 values, they can be set inside the function; in this case the new value will be
3245 displayed.
3246
3247 \code{\amprest} arguments are handled somewhat differently.  The value of
3248 the rest argument variable is displayed as the spread-out arguments to
3249 the call, so:
3250 \begin{lisp}
3251 (format t "~A is a ~A." "This" 'test)
3252 \end{lisp}
3253 would look like this:
3254 \begin{example}
3255 (FORMAT T "~A is a ~A." "This" 'TEST)
3256 \end{example}
3257 Rest arguments cause an exception to the normal display of keyword
3258 arguments in functions that have both \code{\amprest} and \code{\&key}
3259 arguments.  In this case, the keyword argument variables are not
3260 displayed at all; the rest arg is displayed instead.  So for these
3261 functions, only the keywords actually supplied will be shown, and the
3262 values displayed will be the argument values, not values of the
3263 (possibly modified) variables.
3264
3265 If the variable for an argument is never referenced by the function, it will be
3266 deleted.  The variable value is then unavailable, so the debugger prints
3267 \code{<unused-arg>} instead of the value.  Similarly, if for any of a number of
3268 reasons (described in more detail in section \ref{debug-vars}) the value of the
3269 variable is unavailable or not known to be available, then
3270 \code{<unavailable-arg>} will be printed instead of the argument value.
3271
3272 Printing of argument values is controlled by \code{*debug-print-level*} and
3273 \varref{debug-print-length}.
3274
3275 %%\f
3276 %%\node Function Names, Funny Frames, How Arguments are Printed, Stack Frames
3277 \subsection{Function Names}
3278 \cpsubindex{function}{names}
3279 \cpsubindex{names}{function}
3280
3281 If a function is defined by \code{defun}, \code{labels}, or \code{flet}, then the
3282 debugger will print the actual function name after the open parenthesis, like:
3283 \begin{example}
3284 (STRING-UPCASE "test case" :START 0 :END NIL)
3285 ((SETF AREF) \#\back{a} "for" 1)
3286 \end{example}
3287 Otherwise, the function name is a string, and will be printed in quotes:
3288 \begin{example}
3289 ("DEFUN MYFUN" BAR)
3290 ("DEFMACRO DO" (DO ((I 0 (1+ I))) ((= I 13))) NIL)
3291 ("SETQ *GC-NOTIFY-BEFORE*")
3292 \end{example}
3293 This string name is derived from the \w{\code{def}\var{mumble}} form that encloses
3294 or expanded into the lambda, or the outermost enclosing form if there is no
3295 \w{\code{def}\var{mumble}}.
3296
3297 %%\f
3298 %%\node Funny Frames, Debug Tail Recursion, Function Names, Stack Frames
3299 \subsection{Funny Frames}
3300 \cindex{external entry points}
3301 \cpsubindex{entry points}{external}
3302 \cpsubindex{block compilation}{debugger implications}
3303 \cpsubindex{external}{stack frame kind}
3304 \cpsubindex{optional}{stack frame kind}
3305 \cpsubindex{cleanup}{stack frame kind}
3306
3307 Sometimes the evaluator introduces new functions that are used to implement a
3308 user function, but are not directly specified in the source.  The main place
3309 this is done is for checking argument type and syntax.  Usually these functions
3310 do their thing and then go away, and thus are not seen on the stack in the
3311 debugger.  But when you get some sort of error during lambda-list processing,
3312 you end up in the debugger on one of these funny frames.
3313
3314 These funny frames are flagged by printing ``\code{[}\var{keyword}\code{]}'' after the
3315 parentheses.  For example, this call:
3316 \begin{lisp}
3317 (car 'a 'b)
3318 \end{lisp}
3319 will look like this:
3320 \begin{example}
3321 (CAR 2 A) [:EXTERNAL]
3322 \end{example}
3323 And this call:
3324 \begin{lisp}
3325 (string-upcase "test case" :end)
3326 \end{lisp}
3327 would look like this:
3328 \begin{example}
3329 ("DEFUN STRING-UPCASE" "test case" 335544424 1) [:OPTIONAL]
3330 \end{example}
3331
3332 As you can see, these frames have only a vague resemblance to the original
3333 call.  Fortunately, the error message displayed when you enter the debugger
3334 will usually tell you what problem is (in these cases, too many arguments
3335 and odd keyword arguments.)  Also, if you go down the stack to the frame for
3336 the calling function, you can display the original source (\pxlref{source-locations}.)
3337
3338 With recursive or block compiled functions (\pxlref{block-compilation}), an \kwd{EXTERNAL} frame may appear before the frame
3339 representing the first call to the recursive function or entry to the compiled
3340 block.  This is a consequence of the way the compiler does block compilation:
3341 there is nothing odd with your program.  You will also see \kwd{CLEANUP} frames
3342 during the execution of \code{unwind-protect} cleanup code.  Note that inline
3343 expansion and open-coding affect what frames are present in the debugger, see
3344 sections \ref{debugger-policy} and \ref{open-coding}.
3345
3346 %%\f
3347 %%\node Debug Tail Recursion, Unknown Locations and Interrupts, Funny Frames, Stack Frames
3348 \subsection{Debug Tail Recursion}
3349 \label{debug-tail-recursion}
3350 \cindex{tail recursion}
3351 \cpsubindex{recursion}{tail}
3352
3353 Both the compiler and the interpreter are ``properly tail recursive.''  If a
3354 function call is in a tail-recursive position, the stack frame will be
3355 deallocated \i{at the time of the call}, rather than after the call returns.
3356 Consider this backtrace:
3357 \begin{example}
3358 (BAR ...)
3359 (FOO ...)
3360 \end{example}
3361 Because of tail recursion, it is not necessarily the case that
3362 \code{FOO} directly called \code{BAR}.  It may be that \code{FOO} called
3363 some other function \code{FOO2} which then called \code{BAR}
3364 tail-recursively, as in this example:
3365 \begin{example}
3366 (defun foo ()
3367   ...
3368   (foo2 ...)
3369   ...)
3370
3371 (defun foo2 (...)
3372   ...
3373   (bar ...))
3374
3375 (defun bar (...)
3376   ...)
3377 \end{example}
3378
3379 Usually the elimination of tail-recursive frames makes debugging more
3380 pleasant, since these frames are mostly uninformative.  If there is any
3381 doubt about how one function called another, it can usually be
3382 eliminated by finding the source location in the calling frame (section
3383 \ref{source-locations}.)
3384
3385 For a more thorough discussion of tail recursion, \pxlref{tail-recursion}.
3386
3387 %%\f
3388 %%\node Unknown Locations and Interrupts,  , Debug Tail Recursion, Stack Frames
3389 \subsection{Unknown Locations and Interrupts}
3390 \label{unknown-locations}
3391 \cindex{unknown code locations}
3392 \cpsubindex{locations}{unknown}
3393 \cindex{interrupts}
3394 \cpsubindex{errors}{run-time}
3395
3396 The debugger operates using special debugging information attached to
3397 the compiled code.  This debug information tells the debugger what it
3398 needs to know about the locations in the code where the debugger can be
3399 invoked.  If the debugger somehow encounters a location not described in
3400 the debug information, then it is said to be \var{unknown}.  If the code
3401 location for a frame is unknown, then some variables may be
3402 inaccessible, and the source location cannot be precisely displayed.
3403
3404 There are three reasons why a code location could be unknown:
3405 \begin{itemize}
3406
3407 \item
3408 There is inadequate debug information due to the value of the \code{debug}
3409 optimization quality.  \xlref{debugger-policy}.
3410
3411 \item
3412 The debugger was entered because of an interrupt such as \code{$\hat{ }C$}.
3413
3414 \item
3415 A hardware error such as ``\code{bus error}'' occurred in code that was
3416 compiled unsafely due to the value of the \code{safety} optimization
3417 quality.  \xlref{optimize-declaration}.
3418 \end{itemize}
3419
3420 In the last two cases, the values of argument variables are accessible,
3421 but may be incorrect.  \xlref{debug-var-validity} for more details on
3422 when variable values are accessible.
3423
3424 It is possible for an interrupt to happen when a function call or return is in
3425 progress.  The debugger may then flame out with some obscure error or insist
3426 that the bottom of the stack has been reached, when the real problem is that
3427 the current stack frame can't be located.  If this happens, return from the
3428 interrupt and try again.
3429
3430 When running interpreted code, all locations should be known.  However,
3431 an interrupt might catch some subfunction of the interpreter at an
3432 unknown location.  In this case, you should be able to go up the stack a
3433 frame or two and reach an interpreted frame which can be debugged.
3434
3435 %%\f
3436 %%\node Variable Access, Source Location Printing, Stack Frames, The Debugger
3437 \section{Variable Access}
3438 \label{debug-vars}
3439 \cpsubindex{variables}{debugger access}
3440 \cindex{debug variables}
3441
3442 There are three ways to access the current frame's local variables in the
3443 debugger.  The simplest is to type the variable's name into the debugger's
3444 read-eval-print loop.  The debugger will evaluate the variable reference as
3445 though it had appeared inside that frame.
3446
3447 The debugger doesn't really understand lexical scoping; it has just one
3448 namespace for all the variables in a function.  If a symbol is the name of
3449 multiple variables in the same function, then the reference appears ambiguous,
3450 even though lexical scoping specifies which value is visible at any given
3451 source location.  If the scopes of the two variables are not nested, then the
3452 debugger can resolve the ambiguity by observing that only one variable is
3453 accessible.
3454
3455 When there are ambiguous variables, the evaluator assigns each one a
3456 small integer identifier.  The \code{debug:var} function and the
3457 \code{list-locals} command use this identifier to distinguish between
3458 ambiguous variables:
3459 \begin{Lentry}
3460
3461 \item[\code{list-locals} \mopt{\var{prefix}}]%%\hfill\\
3462 This command prints the name and value of all variables in the current
3463 frame whose name has the specified \var{prefix}.  \var{prefix} may be a
3464 string or a symbol.  If no \var{prefix} is given, then all available
3465 variables are printed.  If a variable has a potentially ambiguous name,
3466 then the name is printed with a ``\code{\#}\var{identifier}'' suffix, where
3467 \var{identifier} is the small integer used to make the name unique.
3468 \end{Lentry}
3469
3470 \begin{defun}{debug:}{var}{\args{\var{name} \ampoptional{} \var{identifier}}}
3471
3472   This function returns the value of the variable in the current frame
3473   with the specified \var{name}.  If supplied, \var{identifier}
3474   determines which value to return when there are ambiguous variables.
3475
3476   When \var{name} is a symbol, it is interpreted as the symbol name of
3477   the variable, i.e. the package is significant.  If \var{name} is an
3478   uninterned symbol (gensym), then return the value of the uninterned
3479   variable with the same name.  If \var{name} is a string,
3480   \code{debug:var} interprets it as the prefix of a variable name, and
3481   must unambiguously complete to the name of a valid variable.
3482
3483   This function is useful mainly for accessing the value of uninterned
3484   or ambiguous variables, since most variables can be evaluated
3485   directly.
3486 \end{defun}
3487
3488 %%\f
3489 \begin{comment}
3490 * Variable Value Availability::
3491 * Note On Lexical Variable Access::
3492 \end{comment}
3493
3494 %%\node Variable Value Availability, Note On Lexical Variable Access, Variable Access, Variable Access
3495 \subsection{Variable Value Availability}
3496 \label{debug-var-validity}
3497 \cindex{availability of debug variables}
3498 \cindex{validity of debug variables}
3499 \cindex{debug optimization quality}
3500
3501 The value of a variable may be unavailable to the debugger in portions of the
3502 program where \clisp{} says that the variable is defined.  If a variable value is
3503 not available, the debugger will not let you read or write that variable.  With
3504 one exception, the debugger will never display an incorrect value for a
3505 variable.  Rather than displaying incorrect values, the debugger tells you the
3506 value is unavailable.
3507
3508 The one exception is this: if you interrupt (e.g., with \code{$\hat{ }C$}) or if there is
3509 an unexpected hardware error such as ``\code{bus error}'' (which should only happen
3510 in unsafe code), then the values displayed for arguments to the interrupted
3511 frame might be incorrect.\footnote{Since the location of an interrupt or hardware
3512 error will always be an unknown location (\pxlref{unknown-locations}),
3513 non-argument variable values will never be available in the interrupted frame.}
3514 This exception applies only to the interrupted frame: any frame farther down
3515 the stack will be fine.
3516
3517 The value of a variable may be unavailable for these reasons:
3518 \begin{itemize}
3519
3520 \item
3521 The value of the \code{debug} optimization quality may have omitted debug
3522 information needed to determine whether the variable is available.
3523 Unless a variable is an argument, its value will only be available when
3524 \code{debug} is at least \code{2}.
3525
3526 \item
3527 The compiler did lifetime analysis and determined that the value was no longer
3528 needed, even though its scope had not been exited.  Lifetime analysis is
3529 inhibited when the \code{debug} optimization quality is \code{3}.
3530
3531 \item
3532 The variable's name is an uninterned symbol (gensym).  To save space, the
3533 compiler only dumps debug information about uninterned variables when the
3534 \code{debug} optimization quality is \code{3}.
3535
3536 \item
3537 The frame's location is unknown (\pxlref{unknown-locations}) because
3538 the debugger was entered due to an interrupt or unexpected hardware error.
3539 Under these conditions the values of arguments will be available, but might be
3540 incorrect.  This is the exception above.
3541
3542 \item
3543 The variable was optimized out of existence.  Variables with no reads are
3544 always optimized away, even in the interpreter.  The degree to which the
3545 compiler deletes variables will depend on the value of the \code{compile-speed}
3546 optimization quality, but most source-level optimizations are done under all
3547 compilation policies.
3548 \end{itemize}
3549
3550
3551 Since it is especially useful to be able to get the arguments to a function,
3552 argument variables are treated specially when the \code{speed} optimization
3553 quality is less than \code{3} and the \code{debug} quality is at least \code{1}.
3554 With this compilation policy, the values of argument variables are almost
3555 always available everywhere in the function, even at unknown locations.  For
3556 non-argument variables, \code{debug} must be at least \code{2} for values to be
3557 available, and even then, values are only available at known locations.
3558
3559 %%\f
3560 %%\node Note On Lexical Variable Access,  , Variable Value Availability, Variable Access
3561 \subsection{Note On Lexical Variable Access}
3562 \cpsubindex{evaluation}{debugger}
3563
3564 When the debugger command loop establishes variable bindings for available
3565 variables, these variable bindings have lexical scope and dynamic
3566 extent.\footnote{The variable bindings are actually created using the \clisp{}
3567 \code{symbol-macro-let} special form.}  You can close over them, but such closures
3568 can't be used as upward funargs.
3569
3570 You can also set local variables using \code{setq}, but if the variable was closed
3571 over in the original source and never set, then setting the variable in the
3572 debugger may not change the value in all the functions the variable is defined
3573 in.  Another risk of setting variables is that you may assign a value of a type
3574 that the compiler proved the variable could never take on.  This may result in
3575 bad things happening.
3576
3577 %%\f
3578 %%\node Source Location Printing, Compiler Policy Control, Variable Access, The Debugger
3579 \section{Source Location Printing}
3580 \label{source-locations}
3581 \cpsubindex{source location printing}{debugger}
3582
3583 One of CMU \clisp{}'s unique capabilities is source level debugging of compiled
3584 code.  These commands display the source location for the current frame:
3585 \begin{Lentry}
3586
3587 \item[\code{source} \mopt{\var{context}}]%%\hfill\\
3588 This command displays the file that the current frame's function was defined
3589 from (if it was defined from a file), and then the source form responsible for
3590 generating the code that the current frame was executing.  If \var{context} is
3591 specified, then it is an integer specifying the number of enclosing levels of
3592 list structure to print.
3593
3594 \item[\code{vsource} \mopt{\var{context}}]%%\hfill\\
3595 This command is identical to \code{source}, except that it uses the
3596 global values of \code{*print-level*} and \code{*print-length*} instead
3597 of the debugger printing control variables \code{*debug-print-level*}
3598 and \code{*debug-print-length*}.
3599 \end{Lentry}
3600
3601 The source form for a location in the code is the innermost list present
3602 in the original source that encloses the form responsible for generating
3603 that code.  If the actual source form is not a list, then some enclosing
3604 list will be printed.  For example, if the source form was a reference
3605 to the variable \code{*some-random-special*}, then the innermost
3606 enclosing evaluated form will be printed.  Here are some possible
3607 enclosing forms:
3608 \begin{example}
3609 (let ((a *some-random-special*))
3610   ...)
3611
3612 (+ *some-random-special* ...)
3613 \end{example}
3614
3615 If the code at a location was generated from the expansion of a macro or a
3616 source-level compiler optimization, then the form in the original source that
3617 expanded into that code will be printed.  Suppose the file
3618 \file{/usr/me/mystuff.lisp} looked like this:
3619 \begin{example}
3620 (defmacro mymac ()
3621   '(myfun))
3622
3623 (defun foo ()
3624   (mymac)
3625   ...)
3626 \end{example}
3627 If \code{foo} has called \code{myfun}, and is waiting for it to return, then the
3628 \code{source} command would print:
3629 \begin{example}
3630 ; File: /usr/me/mystuff.lisp
3631
3632 (MYMAC)
3633 \end{example}
3634 Note that the macro use was printed, not the actual function call form,
3635 \code{(myfun)}.
3636
3637 If enclosing source is printed by giving an argument to \code{source} or
3638 \code{vsource}, then the actual source form is marked by wrapping it in a list
3639 whose first element is \code{\#:***HERE***}.  In the previous example,
3640 \w{\code{source 1}} would print:
3641 \begin{example}
3642 ; File: /usr/me/mystuff.lisp
3643
3644 (DEFUN FOO ()
3645   (#:***HERE***
3646    (MYMAC))
3647   ...)
3648 \end{example}
3649
3650 %%\f
3651 \begin{comment}
3652 * How the Source is Found::
3653 * Source Location Availability::
3654 \end{comment}
3655
3656 %%\node How the Source is Found, Source Location Availability, Source Location Printing, Source Location Printing
3657 \subsection{How the Source is Found}
3658
3659 If the code was defined from \llisp{} by \code{compile} or
3660 \code{eval}, then the source can always be reliably located.  If the
3661 code was defined from a \code{fasl} file created by
3662 \findexed{compile-file}, then the debugger gets the source forms it
3663 prints by reading them from the original source file.  This is a
3664 potential problem, since the source file might have moved or changed
3665 since the time it was compiled.
3666
3667 The source file is opened using the \code{truename} of the source file
3668 pathname originally given to the compiler.  This is an absolute pathname
3669 with all logical names and symbolic links expanded.  If the file can't
3670 be located using this name, then the debugger gives up and signals an
3671 error.
3672
3673 If the source file can be found, but has been modified since the time it was
3674 compiled, the debugger prints this warning:
3675 \begin{example}
3676 ; File has been modified since compilation:
3677 ;   \var{filename}
3678 ; Using form offset instead of character position.
3679 \end{example}
3680 where \var{filename} is the name of the source file.  It then proceeds using a
3681 robust but not foolproof heuristic for locating the source.  This heuristic
3682 works if:
3683 \begin{itemize}
3684
3685 \item
3686 No top-level forms before the top-level form containing the source have been
3687 added or deleted, and
3688
3689 \item
3690 The top-level form containing the source has not been modified much.  (More
3691 precisely, none of the list forms beginning before the source form have been
3692 added or deleted.)
3693 \end{itemize}
3694
3695 If the heuristic doesn't work, the displayed source will be wrong, but will
3696 probably be near the actual source.  If the ``shape'' of the top-level form in
3697 the source file is too different from the original form, then an error will be
3698 signaled.  When the heuristic is used, the the source location commands are
3699 noticeably slowed.
3700
3701 Source location printing can also be confused if (after the source was
3702 compiled) a read-macro you used in the code was redefined to expand into
3703 something different, or if a read-macro ever returns the same \code{eq}
3704 list twice.  If you don't define read macros and don't use \code{\#\#} in
3705 perverted ways, you don't need to worry about this.
3706
3707 %%\f
3708 %%\node Source Location Availability,  , How the Source is Found, Source Location Printing
3709 \subsection{Source Location Availability}
3710
3711 \cindex{debug optimization quality}
3712 Source location information is only available when the \code{debug}
3713 optimization quality is at least \code{2}.  If source location information is
3714 unavailable, the source commands will give an error message.
3715
3716 If source location information is available, but the source location is
3717 unknown because of an interrupt or unexpected hardware error
3718 (\pxlref{unknown-locations}), then the command will print:
3719 \begin{example}
3720 Unknown location: using block start.
3721 \end{example}
3722 and then proceed to print the source location for the start of the \i{basic
3723 block} enclosing the code location. \cpsubindex{block}{basic}
3724 \cpsubindex{block}{start location}
3725 It's a bit complicated to explain exactly what a basic block is, but
3726 here are some properties of the block start location:
3727 \begin{itemize}
3728
3729 \item The block start location may be the same as the true location.
3730
3731 \item The block start location will never be later in the the
3732   program's flow of control than the true location.
3733
3734 \item No conditional control structures (such as \code{if},
3735   \code{cond}, \code{or}) will intervene between the block start and
3736   the true location (but note that some conditionals present in the
3737   original source could be optimized away.)  Function calls \i{do not}
3738   end basic blocks.
3739
3740 \item The head of a loop will be the start of a block.
3741
3742 \item The programming language concept of ``block structure'' and the
3743   \clisp{} \code{block} special form are totally unrelated to the
3744   compiler's basic block.
3745 \end{itemize}
3746
3747 In other words, the true location lies between the printed location and the
3748 next conditional (but watch out because the compiler may have changed the
3749 program on you.)
3750
3751 %%\f
3752 %%\node Compiler Policy Control, Exiting Commands, Source Location Printing, The Debugger
3753 \section{Compiler Policy Control}
3754 \label{debugger-policy}
3755 \cpsubindex{policy}{debugger}
3756 \cindex{debug optimization quality}
3757 \cindex{optimize declaration}
3758
3759 The compilation policy specified by \code{optimize} declarations affects the
3760 behavior seen in the debugger.  The \code{debug} quality directly affects the
3761 debugger by controlling the amount of debugger information dumped.  Other
3762 optimization qualities have indirect but observable effects due to changes in
3763 the way compilation is done.
3764
3765 Unlike the other optimization qualities (which are compared in relative value
3766 to evaluate tradeoffs), the \code{debug} optimization quality is directly
3767 translated to a level of debug information.  This absolute interpretation
3768 allows the user to count on a particular amount of debug information being
3769 available even when the values of the other qualities are changed during
3770 compilation.  These are the levels of debug information that correspond to the
3771 values of the \code{debug} quality:
3772 \begin{Lentry}
3773
3774 \item[\code{0}]
3775 Only the function name and enough information to allow the stack to
3776 be parsed.
3777
3778 \item[\code{\w{$>$ 0}}]
3779 Any level greater than \code{0} gives level \code{0} plus all
3780 argument variables.  Values will only be accessible if the argument
3781 variable is never set and
3782 \code{speed} is not \code{3}.  \cmucl{} allows any real value for optimization
3783 qualities.  It may be useful to specify \code{0.5} to get backtrace argument
3784 display without argument documentation.
3785
3786 \item[\code{1}] Level \code{1} provides argument documentation
3787 (printed arglists) and derived argument/result type information.
3788 This makes \findexed{describe} more informative, and allows the
3789 compiler to do compile-time argument count and type checking for any
3790 calls compiled at run-time.
3791
3792 \item[\code{2}]
3793 Level \code{1} plus all interned local variables, source location
3794 information, and lifetime information that tells the debugger when arguments
3795 are available (even when \code{speed} is \code{3} or the argument is set.)  This is
3796 the default.
3797
3798 \item[\code{3}]
3799 Level \code{2} plus all uninterned variables.  In addition, lifetime
3800 analysis is disabled (even when \code{speed} is \code{3}), ensuring that all variable
3801 values are available at any known location within the scope of the binding.
3802 This has a speed penalty in addition to the obvious space penalty.
3803 \end{Lentry}
3804
3805 As you can see, if the \code{speed} quality is \code{3}, debugger performance is
3806 degraded.  This effect comes from the elimination of argument variable
3807 special-casing (\pxlref{debug-var-validity}.)  Some degree of
3808 speed/debuggability tradeoff is unavoidable, but the effect is not too drastic
3809 when \code{debug} is at least \code{2}.
3810
3811 \cindex{inline expansion}
3812 \cindex{semi-inline expansion}
3813 In addition to \code{inline} and \code{notinline} declarations, the relative values
3814 of the \code{speed} and \code{space} qualities also change whether functions are
3815 inline expanded (\pxlref{inline-expansion}.)  If a function is inline
3816 expanded, then there will be no frame to represent the call, and the arguments
3817 will be treated like any other local variable.  Functions may also be
3818 ``semi-inline'', in which case there is a frame to represent the call, but the
3819 call is to an optimized local version of the function, not to the original
3820 function.
3821
3822 %%\f
3823 %%\node Exiting Commands, Information Commands, Compiler Policy Control, The Debugger
3824 \section{Exiting Commands}
3825
3826 These commands get you out of the debugger.
3827
3828 \begin{Lentry}
3829
3830 \item[\code{quit}]
3831 Throw to top level.
3832
3833 \item[\code{restart} \mopt{\var{n}}]%%\hfill\\
3834 Invokes the \var{n}th restart case as displayed by the \code{error}
3835 command.  If \var{n} is not specified, the available restart cases are
3836 reported.
3837
3838 \item[\code{go}]
3839 Calls \code{continue} on the condition given to \code{debug}.  If there is no
3840 restart case named \var{continue}, then an error is signaled.
3841
3842 \item[\code{abort}]
3843 Calls \code{abort} on the condition given to \code{debug}.  This is
3844 useful for popping debug command loop levels or aborting to top level,
3845 as the case may be.
3846
3847 \begin{comment}
3848 (\code{debug:debug-return} \var{expression} \mopt{\var{frame}})
3849
3850 \item
3851 From the current or specified frame, return the result of evaluating
3852 expression.  If multiple values are expected, then this function should be
3853 called for multiple values.
3854 \end{comment}
3855 \end{Lentry}
3856
3857 %%\f
3858 %%\node Information Commands, Breakpoint Commands, Exiting Commands, The Debugger
3859 \section{Information Commands}
3860
3861 Most of these commands print information about the current frame or
3862 function, but a few show general information.
3863
3864 \begin{Lentry}
3865
3866 \item[\code{help}, \code{?}]
3867 Displays a synopsis of debugger commands.
3868
3869 \item[\code{describe}]
3870 Calls \code{describe} on the current function, displays number of local
3871 variables, and indicates whether the function is compiled or interpreted.
3872
3873 \item[\code{print}]
3874 Displays the current function call as it would be displayed by moving to
3875 this frame.
3876
3877 \item[\code{vprint} (or \code{pp}) \mopt{\var{verbosity}}]%%\hfill\\
3878 Displays the current function call using \code{*print-level*} and
3879 \code{*print-length*} instead of \code{*debug-print-level*} and
3880 \code{*debug-print-length*}.  \var{verbosity} is a small integer
3881 (default 2) that controls other dimensions of verbosity.
3882
3883 \item[\code{error}]
3884 Prints the condition given to \code{invoke-debugger} and the active
3885 proceed cases.
3886
3887 \item[\code{backtrace} \mopt{\var{n}}]\hfill\\
3888 Displays all the frames from the current to the bottom.  Only shows
3889 \var{n} frames if specified.  The printing is controlled by
3890 \code{*debug-print-level*} and \code{*debug-print-length*}.
3891
3892 \begin{comment}
3893 (\code{debug:debug-function} \mopt{\var{n}})
3894
3895 \item
3896 Returns the function from the current or specified frame.
3897
3898 \item[(\code{debug:function-name} \mopt{\var{n}])]
3899 Returns the function name from the current or specified frame.
3900
3901 \item[(\code{debug:pc} \mopt{\var{frame}})]
3902 Returns the index of the instruction for the function in the current or
3903 specified frame.  This is useful in conjunction with \code{disassemble}.
3904 The pc returned points to the instruction after the one that was fatal.
3905 \end{comment}
3906 \end{Lentry}
3907
3908 %%\f
3909 %%\node Breakpoint Commands, Function Tracing, Information Commands, The Debugger
3910 \section{Breakpoint Commands}
3911
3912 \cmucl{} supports setting of breakpoints inside compiled functions and
3913 stepping of compiled code.  Breakpoints can only be set at at known
3914 locations (\pxlref{unknown-locations}), so these commands are largely
3915 useless unless the \code{debug} optimize quality is at least \code{2}
3916 (\pxlref{debugger-policy}).  These commands manipulate breakpoints:
3917 \begin{Lentry}
3918 \item[\code{breakpoint} \var{location} \mstar{\var{option} \var{value}}]
3919 %%\hfill\\
3920 Set a breakpoint in some function.  \var{location} may be an integer
3921 code location number (as displayed by \code{list-locations}) or a
3922 keyword.  The keyword can be used to indicate setting a breakpoint at
3923 the function start (\kwd{start}, \kwd{s}) or function end
3924 (\kwd{end}, \kwd{e}).  The \code{breakpoint} command has
3925 \kwd{condition}, \kwd{break}, \kwd{print} and \kwd{function}
3926 options which work similarly to the \code{trace} options.
3927
3928 \item[\code{list-locations} (or \code{ll}) \mopt{\var{function}}]%%\hfill\\
3929 List all the code locations in the current frame's function, or in
3930 \var{function} if it is supplied.  The display format is the code
3931 location number, a colon and then the source form for that location:
3932 \begin{example}
3933 3: (1- N)
3934 \end{example}
3935 If consecutive locations have the same source, then a numeric range like
3936 \code{3-5:} will be printed.  For example, a default function call has a
3937 known location both immediately before and after the call, which would
3938 result in two code locations with the same source.  The listed function
3939 becomes the new default function for breakpoint setting (via the
3940 \code{breakpoint}) command.
3941
3942 \item[\code{list-breakpoints} (or \code{lb})]%%\hfill\\
3943 List all currently active breakpoints with their breakpoint number.
3944
3945 \item[\code{delete-breakpoint} (or \code{db}) \mopt{\var{number}}]%%\hfill\\
3946 Delete a breakpoint specified by its breakpoint number.  If no number is
3947 specified, delete all breakpoints.
3948
3949 \item[\code{step}]%%\hfill\\
3950 Step to the next possible breakpoint location in the current function.
3951 This always steps over function calls, instead of stepping into them
3952 \end{Lentry}
3953
3954 \begin{comment}
3955 * Breakpoint Example::
3956 \end{comment}
3957
3958 %%\node Breakpoint Example,  , Breakpoint Commands, Breakpoint Commands
3959 \subsection{Breakpoint Example}
3960
3961 Consider this definition of the factorial function:
3962 \begin{lisp}
3963 (defun ! (n)
3964   (if (zerop n)
3965       1
3966       (* n (! (1- n)))))
3967 \end{lisp}
3968 This debugger session demonstrates the use of breakpoints:
3969 \begin{example}
3970 common-lisp-user> (break) ; Invoke debugger
3971
3972 Break
3973
3974 Restarts:
3975   0: [CONTINUE] Return from BREAK.
3976   1: [ABORT   ] Return to Top-Level.
3977
3978 Debug  (type H for help)
3979
3980 (INTERACTIVE-EVAL (BREAK))
3981 0] ll #'!
3982 0: #'(LAMBDA (N) (BLOCK ! (IF # 1 #)))
3983 1: (ZEROP N)
3984 2: (* N (! (1- N)))
3985 3: (1- N)
3986 4: (! (1- N))
3987 5: (* N (! (1- N)))
3988 6: #'(LAMBDA (N) (BLOCK ! (IF # 1 #)))
3989 0] br 2
3990 (* N (! (1- N)))
3991 1: 2 in !
3992 Added.
3993 0] q
3994
3995 common-lisp-user> (! 10) ; Call the function
3996
3997 *Breakpoint hit*
3998
3999 Restarts:
4000   0: [CONTINUE] Return from BREAK.
4001   1: [ABORT   ] Return to Top-Level.
4002
4003 Debug  (type H for help)
4004
4005 (! 10) ; We are now in first call (arg 10) before the multiply
4006 Source: (* N (! (1- N)))
4007 3] st
4008
4009 *Step*
4010
4011 (! 10) ; We have finished evaluation of (1- n)
4012 Source: (1- N)
4013 3] st
4014
4015 *Breakpoint hit*
4016
4017 Restarts:
4018   0: [CONTINUE] Return from BREAK.
4019   1: [ABORT   ] Return to Top-Level.
4020
4021 Debug  (type H for help)
4022
4023 (! 9) ; We hit the breakpoint in the recursive call
4024 Source: (* N (! (1- N)))
4025 3]
4026 \end{example}
4027
4028
4029
4030 %%\f
4031 %%\node Function Tracing, Specials, Breakpoint Commands, The Debugger
4032 \section{Function Tracing}
4033 \cindex{tracing}
4034 \cpsubindex{function}{tracing}
4035
4036 The tracer causes selected functions to print their arguments and
4037 their results whenever they are called.  Options allow conditional
4038 printing of the trace information and conditional breakpoints on
4039 function entry or exit.
4040
4041 \begin{defmac}{}{trace}{%
4042     \args{\mstar{option global-value} \mstar{name \mstar{option
4043           value}}}}
4044
4045   \code{trace} is a debugging tool that prints information when
4046   specified functions are called.  In its simplest form:
4047   \begin{example}
4048     (trace \var{name-1} \var{name-2} ...)
4049   \end{example}
4050   \code{trace} causes a printout on \vindexed{trace-output} each time
4051   that one of the named functions is entered or returns (the
4052   \var{names} are not evaluated.)  Trace output is indented according
4053   to the number of pending traced calls, and this trace depth is
4054   printed at the beginning of each line of output.  Printing verbosity
4055   of arguments and return values is controlled by
4056   \vindexed{debug-print-level} and \vindexed{debug-print-length}.
4057
4058   If no \var{names} or \var{options} are are given, \code{trace}
4059   returns the list of all currently traced functions,
4060   \code{*traced-function-list*}.
4061
4062   Trace options can cause the normal printout to be suppressed, or
4063   cause extra information to be printed.  Each option is a pair of an
4064   option keyword and a value form.  Options may be interspersed with
4065   function names.  Options only affect tracing of the function whose
4066   name they appear immediately after.  Global options are specified
4067   before the first name, and affect all functions traced by a given
4068   use of \code{trace}.  If an already traced function is traced again,
4069   any new options replace the old options.  The following options are
4070   defined:
4071   \begin{Lentry}
4072   \item[\kwd{condition} \var{form}, \kwd{condition-after} \var{form},
4073     \kwd{condition-all} \var{form}] If \kwd{condition} is specified,
4074     then \code{trace} does nothing unless \var{form} evaluates to true
4075     at the time of the call.  \kwd{condition-after} is similar, but
4076     suppresses the initial printout, and is tested when the function
4077     returns.  \kwd{condition-all} tries both before and after.
4078
4079   \item[\kwd{wherein} \var{names}] If specified, \var{names} is a
4080     function name or list of names.  \code{trace} does nothing unless
4081     a call to one of those functions encloses the call to this
4082     function (i.e. it would appear in a backtrace.)  Anonymous
4083     functions have string names like \code{"DEFUN FOO"}.
4084
4085   \item[\kwd{break} \var{form}, \kwd{break-after} \var{form},
4086     \kwd{break-all} \var{form}] If specified, and \var{form} evaluates
4087     to true, then the debugger is invoked at the start of the
4088     function, at the end of the function, or both, according to the
4089     respective option.
4090
4091   \item[\kwd{print} \var{form}, \kwd{print-after} \var{form},
4092     \kwd{print-all} \var{form}] In addition to the usual printout, the
4093     result of evaluating \var{form} is printed at the start of the
4094     function, at the end of the function, or both, according to the
4095     respective option.  Multiple print options cause multiple values
4096     to be printed.
4097
4098   \item[\kwd{function} \var{function-form}] This is a not really an
4099     option, but rather another way of specifying what function to
4100     trace.  The \var{function-form} is evaluated immediately, and the
4101     resulting function is traced.
4102
4103   \item[\kwd{encapsulate \mgroup{:default | t | nil}}] In \cmucl,
4104     tracing can be done either by temporarily redefining the function
4105     name (encapsulation), or using breakpoints.  When breakpoints are
4106     used, the function object itself is destructively modified to
4107     cause the tracing action.  The advantage of using breakpoints is
4108     that tracing works even when the function is anonymously called
4109     via \code{funcall}.
4110
4111     When \kwd{encapsulate} is true, tracing is done via encapsulation.
4112     \kwd{default} is the default, and means to use encapsulation for
4113     interpreted functions and funcallable instances, breakpoints
4114     otherwise.  When encapsulation is used, forms are {\it not}
4115     evaluated in the function's lexical environment, but
4116     \code{debug:arg} can still be used.
4117   \end{Lentry}
4118
4119   \kwd{condition}, \kwd{break} and \kwd{print} forms are evaluated in
4120   the lexical environment of the called function; \code{debug:var} and
4121   \code{debug:arg} can be used.  The \code{-after} and \code{-all}
4122   forms are evaluated in the null environment.
4123 \end{defmac}
4124
4125 \begin{defmac}{}{untrace}{ \args{\amprest{} \var{function-names}}}
4126
4127   This macro turns off tracing for the specified functions, and
4128   removes their names from \code{*traced-function-list*}.  If no
4129   \var{function-names} are given, then all currently traced functions
4130   are untraced.
4131 \end{defmac}
4132
4133 \begin{defvar}{extensions:}{traced-function-list}
4134
4135   A list of function names maintained and used by \code{trace},
4136   \code{untrace}, and \code{untrace-all}.  This list should contain
4137   the names of all functions currently being traced.
4138 \end{defvar}
4139
4140 \begin{defvar}{extensions:}{max-trace-indentation}
4141
4142   The maximum number of spaces which should be used to indent trace
4143   printout.  This variable is initially set to 40.
4144 \end{defvar}
4145
4146 \begin{comment}
4147 * Encapsulation Functions::
4148 \end{comment}
4149
4150 %%\node Encapsulation Functions,  , Function Tracing, Function Tracing
4151 \subsection{Encapsulation Functions}
4152 \cindex{encapsulation}
4153 \cindex{advising}
4154
4155 The encapsulation functions provide a mechanism for intercepting the
4156 arguments and results of a function.  \code{encapsulate} changes the
4157 function definition of a symbol, and saves it so that it can be
4158 restored later.  The new definition normally calls the original
4159 definition.  The \clisp{} \findexed{fdefinition} function always returns
4160 the original definition, stripping off any encapsulation.
4161
4162 The original definition of the symbol can be restored at any time by
4163 the \code{unencapsulate} function.  \code{encapsulate} and \code{unencapsulate}
4164 allow a symbol to be multiply encapsulated in such a way that different
4165 encapsulations can be completely transparent to each other.
4166
4167 Each encapsulation has a type which may be an arbitrary lisp object.
4168 If a symbol has several encapsulations of different types, then any
4169 one of them can be removed without affecting more recent ones.
4170 A symbol may have more than one encapsulation of the same type, but
4171 only the most recent one can be undone.
4172
4173 \begin{defun}{extensions:}{encapsulate}{%
4174     \args{\var{symbol} \var{type} \var{body}}}
4175
4176   Saves the current definition of \var{symbol}, and replaces it with a
4177   function which returns the result of evaluating the form,
4178   \var{body}.  \var{Type} is an arbitrary lisp object which is the
4179   type of encapsulation.
4180
4181   When the new function is called, the following variables are bound
4182   for the evaluation of \var{body}:
4183   \begin{Lentry}
4184
4185   \item[\code{extensions:argument-list}] A list of the arguments to
4186     the function.
4187
4188   \item[\code{extensions:basic-definition}] The unencapsulated
4189     definition of the function.
4190   \end{Lentry}
4191   The unencapsulated definition may be called with the original
4192   arguments by including the form
4193   \begin{lisp}
4194     (apply extensions:basic-definition extensions:argument-list)
4195   \end{lisp}
4196
4197   \code{encapsulate} always returns \var{symbol}.
4198 \end{defun}
4199
4200 \begin{defun}{extensions:}{unencapsulate}{\args{\var{symbol} \var{type}}}
4201
4202   Undoes \var{symbol}'s most recent encapsulation of type \var{type}.
4203   \var{Type} is compared with \code{eq}.  Encapsulations of other
4204   types are left in place.
4205 \end{defun}
4206
4207 \begin{defun}{extensions:}{encapsulated-p}{%
4208     \args{\var{symbol} \var{type}}}
4209
4210   Returns \true{} if \var{symbol} has an encapsulation of type
4211   \var{type}.  Returns \nil{} otherwise.  \var{type} is compared with
4212   \code{eq}.
4213 \end{defun}
4214
4215 %%\f
4216 \begin{comment}
4217 section{The Single Stepper}
4218
4219 \begin{defmac}{}{step}{ \args{\var{form}}}
4220
4221   Evaluates form with single stepping enabled or if \var{form} is
4222   \code{T}, enables stepping until explicitly disabled.  Stepping can
4223   be disabled by quitting to the lisp top level, or by evaluating the
4224   form \w{\code{(step ())}}.
4225
4226   While stepping is enabled, every call to eval will prompt the user
4227   for a single character command.  The prompt is the form which is
4228   about to be \code{eval}ed.  It is printed with \code{*print-level*}
4229   and \code{*print-length*} bound to \code{*step-print-level*} and
4230   \code{*step-print-length*}.  All interaction is done through the
4231   stream \code{*query-io*}.  Because of this, the stepper can not be
4232   used in Hemlock eval mode.  When connected to a slave Lisp, the
4233   stepper can be used from Hemlock.
4234
4235   The commands are:
4236   \begin{Lentry}
4237
4238   \item[\key{n} (next)] Evaluate the expression with stepping still
4239     enabled.
4240
4241   \item[\key{s} (skip)] Evaluate the expression with stepping
4242     disabled.
4243
4244   \item[\key{q} (quit)] Evaluate the expression, but disable all
4245     further stepping inside the current call to \code{step}.
4246
4247   \item[\key{p} (print)] Print current form.  (does not use
4248     \code{*step-print-level*} or \code{*step-print-length*}.)
4249
4250   \item[\key{b} (break)] Enter break loop, and then prompt for the
4251     command again when the break loop returns.
4252
4253   \item[\key{e} (eval)] Prompt for and evaluate an arbitrary
4254     expression.  The expression is evaluated with stepping disabled.
4255
4256   \item[\key{?} (help)] Prints a brief list of the commands.
4257
4258   \item[\key{r} (return)] Prompt for an arbitrary value to return as
4259     result of the current call to eval.
4260
4261   \item[\key{g}] Throw to top level.
4262   \end{Lentry}
4263 \end{defmac}
4264
4265 \begin{defvar}{extensions:}{step-print-level}
4266   \defvarx[extensions:]{step-print-length}
4267
4268   \code{*print-level*} and \code{*print-length*} are bound to these
4269   values while printing the current form.  \code{*step-print-level*}
4270   and \code{*step-print-length*} are initially bound to 4 and 5,
4271   respectively.
4272 \end{defvar}
4273
4274 \begin{defvar}{extensions:}{max-step-indentation}
4275
4276   Step indents the prompts to highlight the nesting of the evaluation.
4277   This variable contains the maximum number of spaces to use for
4278   indenting.  Initially set to 40.
4279 \end{defvar}
4280
4281 \end{comment}
4282
4283 %%\f
4284 %%\node Specials,  , Function Tracing, The Debugger
4285 \section{Specials}
4286 These are the special variables that control the debugger action.
4287
4288 \begin{changebar}
4289 \begin{defvar}{debug:}{debug-print-level}
4290   \defvarx[debug:]{debug-print-length}
4291
4292   \code{*print-level*} and \code{*print-length*} are bound to these
4293   values during the execution of some debug commands.  When evaluating
4294   arbitrary expressions in the debugger, the normal values of
4295   \code{*print-level*} and \code{*print-length*} are in effect.  These
4296   variables are initially set to 3 and 5, respectively.
4297 \end{defvar}
4298 \end{changebar}
4299
4300 %%\f
4301 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/compiler.ms}
4302
4303
4304 %%\node The Compiler, Advanced Compiler Use and Efficiency Hints, The Debugger, Top
4305 \chapter{The Compiler} \hide{ -*- Dictionary: cmu-user -*-}
4306
4307 \begin{comment}
4308 * Compiler Introduction::
4309 * Calling the Compiler::
4310 * Compilation Units::
4311 * Interpreting Error Messages::
4312 * Types in Python::
4313 * Getting Existing Programs to Run::
4314 * Compiler Policy::
4315 * Open Coding and Inline Expansion::
4316 \end{comment}
4317
4318 %%\node Compiler Introduction, Calling the Compiler, The Compiler, The Compiler
4319 \section{Compiler Introduction}
4320
4321 This chapter contains information about the compiler that every \cmucl{} user
4322 should be familiar with.  Chapter \ref{advanced-compiler} goes into greater
4323 depth, describing ways to use more advanced features.
4324
4325 The \cmucl{} compiler (also known as \Python{}) has many features
4326 that are seldom or never supported by conventional \llisp{}
4327 compilers:
4328 \begin{itemize}
4329
4330 \item Source level debugging of compiled code (see chapter
4331   \ref{debugger}.)
4332
4333 \item Type error compiler warnings for type errors detectable at
4334   compile time.
4335
4336 \item Compiler error messages that provide a good indication of where
4337   the error appeared in the source.
4338
4339 \item Full run-time checking of all potential type errors, with
4340   optimization of type checks to minimize the cost.
4341
4342 \item Scheme-like features such as proper tail recursion and extensive
4343   source-level optimization.
4344
4345 \item Advanced tuning and optimization features such as comprehensive
4346   efficiency notes, flow analysis, and untagged number representations
4347   (see chapter \ref{advanced-compiler}.)
4348 \end{itemize}
4349
4350
4351 %%\f
4352 %%\node Calling the Compiler, Compilation Units, Compiler Introduction, The Compiler
4353 \section{Calling the Compiler}
4354 \cindex{compiling}
4355 Functions may be compiled using \code{compile}, \code{compile-file}, or
4356 \code{compile-from-stream}.
4357
4358 \begin{defun}{}{compile}{ \args{\var{name} \ampoptional{} \var{definition}}}
4359
4360   This function compiles the function whose name is \var{name}.  If
4361   \var{name} is \false, the compiled function object is returned.  If
4362   \var{definition} is supplied, it should be a lambda expression that
4363   is to be compiled and then placed in the function cell of
4364   \var{name}.  As per the proposed X3J13 cleanup
4365   ``compile-argument-problems'', \var{definition} may also be an
4366   interpreted function.
4367
4368   The return values are as per the proposed X3J13 cleanup
4369   ``compiler-diagnostics''.  The first value is the function name or
4370   function object.  The second value is \false{} if no compiler
4371   diagnostics were issued, and \true{} otherwise.  The third value is
4372   \false{} if no compiler diagnostics other than style warnings were
4373   issued.  A non-\false{} value indicates that there were ``serious''
4374   compiler diagnostics issued, or that other conditions of type
4375   \tindexed{error} or \tindexed{warning} (but not
4376   \tindexed{style-warning}) were signaled during compilation.
4377 \end{defun}
4378
4379
4380 \begin{defun}{}{compile-file}{
4381     \args{\var{input-pathname}
4382       \keys{\kwd{output-file} \kwd{error-file} \kwd{trace-file}}
4383       \morekeys{\kwd{error-output} \kwd{verbose} \kwd{print} \kwd{progress}}
4384       \yetmorekeys{\kwd{load} \kwd{block-compile} \kwd{entry-points}}
4385       \yetmorekeys{\kwd{byte-compile}}}}
4386
4387   The \cmucl{} \code{compile-file} is extended through the addition of
4388   several new keywords and an additional interpretation of
4389   \var{input-pathname}:
4390   \begin{Lentry}
4391
4392   \item[\var{input-pathname}] If this argument is a list of input
4393     files, rather than a single input pathname, then all the source
4394     files are compiled into a single object file.  In this case, the
4395     name of the first file is used to determine the default output
4396     file names.  This is especially useful in combination with
4397     \var{block-compile}.
4398
4399   \item[\kwd{output-file}] This argument specifies the name of the
4400     output file.  \true{} gives the default name, \false{} suppresses
4401     the output file.
4402
4403   \item[\kwd{error-file}] A listing of all the error output is
4404     directed to this file.  If there are no errors, then no error file
4405     is produced (and any existing error file is deleted.)  \true{}
4406     gives \w{"\var{name}\code{.err}"} (the default), and \false{}
4407     suppresses the output file.
4408
4409   \item[\kwd{error-output}] If \true{} (the default), then error
4410     output is sent to \code{*error-output*}.  If a stream, then output
4411     is sent to that stream instead.  If \false, then error output is
4412     suppressed.  Note that this error output is in addition to (but
4413     the same as) the output placed in the \var{error-file}.
4414
4415   \item[\kwd{verbose}] If \true{} (the default), then the compiler
4416     prints to error output at the start and end of compilation of each
4417     file.  See \varref{compile-verbose}.
4418
4419   \item[\kwd{print}] If \true{} (the default), then the compiler
4420     prints to error output when each function is compiled.  See
4421     \varref{compile-print}.
4422
4423   \item[\kwd{progress}] If \true{} (default \false{}), then the
4424     compiler prints to error output progress information about the
4425     phases of compilation of each function.  This is a CMU extension
4426     that is useful mainly in large block compilations.  See
4427     \varref{compile-progress}.
4428
4429   \item[\kwd{trace-file}] If \true{}, several of the intermediate
4430     representations (including annotated assembly code) are dumped out
4431     to this file.  \true{} gives \w{"\var{name}\code{.trace}"}.  Trace
4432     output is off by default.  \xlref{trace-files}.
4433
4434   \item[\kwd{load}] If \true{}, load the resulting output file.
4435
4436   \item[\kwd{block-compile}] Controls the compile-time resolution of
4437     function calls.  By default, only self-recursive calls are
4438     resolved, unless an \code{ext:block-start} declaration appears in
4439     the source file.  \xlref{compile-file-block}.
4440
4441   \item[\kwd{entry-points}] If non-null, then this is a list of the
4442     names of all functions in the file that should have global
4443     definitions installed (because they are referenced in other
4444     files.)  \xlref{compile-file-block}.
4445
4446   \item[\kwd{byte-compile}] If \true{}, compiling to a compact
4447     interpreted byte code is enabled.  Possible values are \true{},
4448     \false{}, and \kwd{maybe} (the default.)  See
4449     \varref{byte-compile-default} and \xlref{byte-compile}.
4450   \end{Lentry}
4451
4452   The return values are as per the proposed X3J13 cleanup
4453   ``compiler-diagnostics''.  The first value from \code{compile-file}
4454   is the truename of the output file, or \false{} if the file could
4455   not be created.  The interpretation of the second and third values
4456   is described above for \code{compile}.
4457 \end{defun}
4458
4459 \begin{defvar}{}{compile-verbose}
4460   \defvarx{compile-print}
4461   \defvarx{compile-progress}
4462
4463   These variables determine the default values for the \kwd{verbose},
4464   \kwd{print} and \kwd{progress} arguments to \code{compile-file}.
4465 \end{defvar}
4466
4467 \begin{defun}{extensions:}{compile-from-stream}{%
4468     \args{\var{input-stream}
4469       \keys{\kwd{error-stream}}
4470       \morekeys{\kwd{trace-stream}}
4471       \yetmorekeys{\kwd{block-compile} \kwd{entry-points}}
4472       \yetmorekeys{\kwd{byte-compile}}}}
4473
4474   This function is similar to \code{compile-file}, but it takes all
4475   its arguments as streams.  It reads \llisp{} code from
4476   \var{input-stream} until end of file is reached, compiling into the
4477   current environment.  This function returns the same two values as
4478   the last two values of \code{compile}.  No output files are
4479   produced.
4480 \end{defun}
4481
4482
4483 %%\f
4484 %%\node Compilation Units, Interpreting Error Messages, Calling the Compiler, The Compiler
4485 \section{Compilation Units}
4486 \cpsubindex{compilation}{units}
4487
4488 \cmucl{} supports the \code{with-compilation-unit} macro added to the
4489 language by the proposed X3J13 ``with-compilation-unit'' compiler
4490 cleanup.  This provides a mechanism for eliminating spurious undefined
4491 warnings when there are forward references across files, and also
4492 provides a standard way to access compiler extensions.
4493
4494 \begin{defmac}{}{with-compilation-unit}{%
4495     \args{(\mstar{\var{key} \var{value}}) \mstar{\var{form}}}}
4496
4497   This macro evaluates the \var{forms} in an environment that causes
4498   warnings for undefined variables, functions and types to be delayed
4499   until all the forms have been evaluated.  Each keyword \var{value}
4500   is an evaluated form.  These keyword options are recognized:
4501   \begin{Lentry}
4502
4503   \item[\kwd{override}] If uses of \code{with-compilation-unit} are
4504     dynamically nested, the outermost use will take precedence,
4505     suppressing printing of undefined warnings by inner uses.
4506     However, when the \code{override} option is true this shadowing is
4507     inhibited; an inner use will print summary warnings for the
4508     compilations within the inner scope.
4509
4510   \item[\kwd{optimize}] This is a CMU extension that specifies of the
4511     ``global'' compilation policy for the dynamic extent of the body.
4512     The argument should evaluate to an \code{optimize} declare form,
4513     like:
4514     \begin{lisp}
4515       (optimize (speed 3) (safety 0))
4516     \end{lisp}
4517     \xlref{optimize-declaration}
4518
4519   \item[\kwd{optimize-interface}] Similar to \kwd{optimize}, but
4520     specifies the compilation policy for function interfaces (argument
4521     count and type checking) for the dynamic extent of the body.
4522     \xlref{optimize-interface-declaration}.
4523
4524   \item[\kwd{context-declarations}] This is a CMU extension that
4525     pattern-matches on function names, automatically splicing in any
4526     appropriate declarations at the head of the function definition.
4527     \xlref{context-declarations}.
4528   \end{Lentry}
4529 \end{defmac}
4530
4531 \begin{comment}
4532 * Undefined Warnings::
4533 \end{comment}
4534
4535 %%\node Undefined Warnings,  , Compilation Units, Compilation Units
4536 \subsection{Undefined Warnings}
4537
4538 \cindex{undefined warnings}
4539 Warnings about undefined variables, functions and types are delayed until the
4540 end of the current compilation unit.  The compiler entry functions
4541 (\code{compile}, etc.) implicitly use \code{with-compilation-unit}, so undefined
4542 warnings will be printed at the end of the compilation unless there is an
4543 enclosing \code{with-compilation-unit}.  In order the gain the benefit of this
4544 mechanism, you should wrap a single \code{with-compilation-unit} around the calls
4545 to \code{compile-file}, i.e.:
4546 \begin{lisp}
4547 (with-compilation-unit ()
4548   (compile-file "file1")
4549   (compile-file "file2")
4550   ...)
4551 \end{lisp}
4552
4553 Unlike for functions and types, undefined warnings for variables are
4554 not suppressed when a definition (e.g. \code{defvar}) appears after
4555 the reference (but in the same compilation unit.)  This is because
4556 doing special declarations out of order just doesn't
4557 work\dash{}although early references will be compiled as special,
4558 bindings will be done lexically.
4559
4560 Undefined warnings are printed with full source context
4561 (\pxlref{error-messages}), which tremendously simplifies the problem
4562 of finding undefined references that resulted from macroexpansion.
4563 After printing detailed information about the undefined uses of each
4564 name, \code{with-compilation-unit} also prints summary listings of the
4565 names of all the undefined functions, types and variables.
4566
4567 \begin{defvar}{}{undefined-warning-limit}
4568
4569   This variable controls the number of undefined warnings for each
4570   distinct name that are printed with full source context when the
4571   compilation unit ends.  If there are more undefined references than
4572   this, then they are condensed into a single warning:
4573   \begin{example}
4574     Warning: \var{count} more uses of undefined function \var{name}.
4575   \end{example}
4576   When the value is \code{0}, then the undefined warnings are not
4577   broken down by name at all: only the summary listing of undefined
4578   names is printed.
4579 \end{defvar}
4580
4581 %%\f
4582 %%\node Interpreting Error Messages, Types in Python, Compilation Units, The Compiler
4583 \section{Interpreting Error Messages}
4584 \label{error-messages}
4585 \cpsubindex{error messages}{compiler}
4586 \cindex{compiler error messages}
4587
4588 One of \Python{}'s unique features is the level of source location
4589 information it provides in error messages.  The error messages contain
4590 a lot of detail in a terse format, to they may be confusing at first.
4591 Error messages will be illustrated using this example program:
4592 \begin{lisp}
4593 (defmacro zoq (x)
4594   `(roq (ploq (+ ,x 3))))
4595
4596 (defun foo (y)
4597   (declare (symbol y))
4598   (zoq y))
4599 \end{lisp}
4600 The main problem with this program is that it is trying to add \code{3} to a
4601 symbol.  Note also that the functions \code{roq} and \code{ploq} aren't defined
4602 anywhere.
4603
4604 \begin{comment}
4605 * The Parts of the Error Message::
4606 * The Original and Actual Source::
4607 * The Processing Path::
4608 * Error Severity::
4609 * Errors During Macroexpansion::
4610 * Read Errors::
4611 * Error Message Parameterization::
4612 \end{comment}
4613
4614 %%\node The Parts of the Error Message, The Original and Actual Source, Interpreting Error Messages, Interpreting Error Messages
4615 \subsection{The Parts of the Error Message}
4616
4617 The compiler will produce this warning:
4618 \begin{example}
4619 File: /usr/me/stuff.lisp
4620
4621 In: DEFUN FOO
4622   (ZOQ Y)
4623 --> ROQ PLOQ +
4624 ==>
4625   Y
4626 Warning: Result is a SYMBOL, not a NUMBER.
4627 \end{example}
4628 In this example we see each of the six possible parts of a compiler error
4629 message:
4630 \begin{Lentry}
4631
4632 \item[\w{\code{File: /usr/me/stuff.lisp}}] This is the \var{file} that
4633   the compiler read the relevant code from.  The file name is
4634   displayed because it may not be immediately obvious when there is an
4635   error during compilation of a large system, especially when
4636   \code{with-compilation-unit} is used to delay undefined warnings.
4637
4638 \item[\w{\code{In: DEFUN FOO}}] This is the \var{definition} or
4639   top-level form responsible for the error.  It is obtained by taking
4640   the first two elements of the enclosing form whose first element is
4641   a symbol beginning with ``\code{DEF}''.  If there is no enclosing
4642   \w{\var{def}mumble}, then the outermost form is used.  If there are
4643   multiple \w{\var{def}mumbles}, then they are all printed from the
4644   out in, separated by \code{$=>$}'s.  In this example, the problem
4645   was in the \code{defun} for \code{foo}.
4646
4647 \item[\w{\code{(ZOQ Y)}}] This is the \i{original source} form
4648   responsible for the error.  Original source means that the form
4649   directly appeared in the original input to the compiler, i.e. in the
4650   lambda passed to \code{compile} or the top-level form read from the
4651   source file.  In this example, the expansion of the \code{zoq} macro
4652   was responsible for the error.
4653
4654 \item[\w{\code{--$>$ ROQ PLOQ +}} ] This is the \i{processing path}
4655   that the compiler used to produce the errorful code.  The processing
4656   path is a representation of the evaluated forms enclosing the actual
4657   source that the compiler encountered when processing the original
4658   source.  The path is the first element of each form, or the form
4659   itself if the form is not a list.  These forms result from the
4660   expansion of macros or source-to-source transformation done by the
4661   compiler.  In this example, the enclosing evaluated forms are the
4662   calls to \code{roq}, \code{ploq} and \code{+}.  These calls resulted
4663   from the expansion of the \code{zoq} macro.
4664
4665 \item[\code{==$>$ Y}] This is the \i{actual source} responsible for
4666   the error.  If the actual source appears in the explanation, then we
4667   print the next enclosing evaluated form, instead of printing the
4668   actual source twice.  (This is the form that would otherwise have
4669   been the last form of the processing path.)  In this example, the
4670   problem is with the evaluation of the reference to the variable
4671   \code{y}.
4672
4673 \item[\w{\code{Warning: Result is a SYMBOL, not a NUMBER.}}]  This is
4674   the \var{explanation} the problem.  In this example, the problem is
4675   that \code{y} evaluates to a \code{symbol}, but is in a context
4676   where a number is required (the argument to \code{+}).
4677 \end{Lentry}
4678
4679 Note that each part of the error message is distinctively marked:
4680 \begin{itemize}
4681
4682 \item \code{File:} and \code{In:} mark the file and definition,
4683   respectively.
4684
4685 \item The original source is an indented form with no prefix.
4686
4687 \item Each line of the processing path is prefixed with \code{--$>$}.
4688
4689 \item The actual source form is indented like the original source, but
4690   is marked by a preceding \code{==$>$} line.  This is like the
4691   ``macroexpands to'' notation used in \cltl.
4692
4693 \item The explanation is prefixed with the error severity
4694   (\pxlref{error-severity}), either \code{Error:}, \code{Warning:}, or
4695   \code{Note:}.
4696 \end{itemize}
4697
4698
4699 Each part of the error message is more specific than the preceding
4700 one.  If consecutive error messages are for nearby locations, then the
4701 front part of the error messages would be the same.  In this case, the
4702 compiler omits as much of the second message as in common with the
4703 first.  For example:
4704 \begin{example}
4705 File: /usr/me/stuff.lisp
4706
4707 In: DEFUN FOO
4708   (ZOQ Y)
4709 --> ROQ
4710 ==>
4711   (PLOQ (+ Y 3))
4712 Warning: Undefined function: PLOQ
4713
4714 ==>
4715   (ROQ (PLOQ (+ Y 3)))
4716 Warning: Undefined function: ROQ
4717 \end{example}
4718 In this example, the file, definition and original source are
4719 identical for the two messages, so the compiler omits them in the
4720 second message.  If consecutive messages are entirely identical, then
4721 the compiler prints only the first message, followed by:
4722 \begin{example}
4723 [Last message occurs \var{repeats} times]
4724 \end{example}
4725 where \var{repeats} is the number of times the message was given.
4726
4727 If the source was not from a file, then no file line is printed.  If
4728 the actual source is the same as the original source, then the
4729 processing path and actual source will be omitted.  If no forms
4730 intervene between the original source and the actual source, then the
4731 processing path will also be omitted.
4732
4733 %%\f
4734 %%\node The Original and Actual Source, The Processing Path, The Parts of the Error Message, Interpreting Error Messages
4735 \subsection{The Original and Actual Source}
4736 \cindex{original source}
4737 \cindex{actual source}
4738
4739 The \i{original source} displayed will almost always be a list.  If the actual
4740 source for an error message is a symbol, the original source will be the
4741 immediately enclosing evaluated list form.  So even if the offending symbol
4742 does appear in the original source, the compiler will print the enclosing list
4743 and then print the symbol as the actual source (as though the symbol were
4744 introduced by a macro.)
4745
4746 When the \i{actual source} is displayed (and is not a symbol), it will always
4747 be code that resulted from the expansion of a macro or a source-to-source
4748 compiler optimization.  This is code that did not appear in the original
4749 source program; it was introduced by the compiler.
4750
4751 Keep in mind that when the compiler displays a source form in an error message,
4752 it always displays the most specific (innermost) responsible form.  For
4753 example, compiling this function:
4754 \begin{lisp}
4755 (defun bar (x)
4756   (let (a)
4757     (declare (fixnum a))
4758     (setq a (foo x))
4759     a))
4760 \end{lisp}
4761 Gives this error message:
4762 \begin{example}
4763 In: DEFUN BAR
4764   (LET (A) (DECLARE (FIXNUM A)) (SETQ A (FOO X)) A)
4765 Warning: The binding of A is not a FIXNUM:
4766   NIL
4767 \end{example}
4768 This error message is not saying ``there's a problem somewhere in this
4769 \code{let}''\dash{}it is saying that there is a problem with the
4770 \code{let} itself.  In this example, the problem is that \code{a}'s
4771 \false{} initial value is not a \code{fixnum}.
4772
4773 %%\f
4774 %%\node The Processing Path, Error Severity, The Original and Actual Source, Interpreting Error Messages
4775 \subsection{The Processing Path}
4776 \cindex{processing path}
4777 \cindex{macroexpansion}
4778 \cindex{source-to-source transformation}
4779
4780 The processing path is mainly useful for debugging macros, so if you don't
4781 write macros, you can ignore the processing path.  Consider this example:
4782 \begin{lisp}
4783 (defun foo (n)
4784   (dotimes (i n *undefined*)))
4785 \end{lisp}
4786 Compiling results in this error message:
4787 \begin{example}
4788 In: DEFUN FOO
4789   (DOTIMES (I N *UNDEFINED*))
4790 --> DO BLOCK LET TAGBODY RETURN-FROM
4791 ==>
4792   (PROGN *UNDEFINED*)
4793 Warning: Undefined variable: *UNDEFINED*
4794 \end{example}
4795 Note that \code{do} appears in the processing path.  This is because \code{dotimes}
4796 expands into:
4797 \begin{lisp}
4798 (do ((i 0 (1+ i)) (#:g1 n))
4799     ((>= i #:g1) *undefined*)
4800   (declare (type unsigned-byte i)))
4801 \end{lisp}
4802 The rest of the processing path results from the expansion of \code{do}:
4803 \begin{lisp}
4804 (block nil
4805   (let ((i 0) (#:g1 n))
4806     (declare (type unsigned-byte i))
4807     (tagbody (go #:g3)
4808      #:g2    (psetq i (1+ i))
4809      #:g3    (unless (>= i #:g1) (go #:g2))
4810              (return-from nil (progn *undefined*)))))
4811 \end{lisp}
4812 In this example, the compiler descended into the \code{block},
4813 \code{let}, \code{tagbody} and \code{return-from} to reach the
4814 \code{progn} printed as the actual source.  This is a place where the
4815 ``actual source appears in explanation'' rule was applied.  The
4816 innermost actual source form was the symbol \code{*undefined*} itself,
4817 but that also appeared in the explanation, so the compiler backed out
4818 one level.
4819
4820 %%\f
4821 %%\node Error Severity, Errors During Macroexpansion, The Processing Path, Interpreting Error Messages
4822 \subsection{Error Severity}
4823 \label{error-severity}
4824 \cindex{severity of compiler errors}
4825 \cindex{compiler error severity}
4826
4827 There are three levels of compiler error severity:
4828 \begin{Lentry}
4829
4830 \item[Error] This severity is used when the compiler encounters a
4831   problem serious enough to prevent normal processing of a form.
4832   Instead of compiling the form, the compiler compiles a call to
4833   \code{error}.  Errors are used mainly for signaling syntax errors.
4834   If an error happens during macroexpansion, the compiler will handle
4835   it.  The compiler also handles and attempts to proceed from read
4836   errors.
4837
4838 \item[Warning] Warnings are used when the compiler can prove that
4839   something bad will happen if a portion of the program is executed,
4840   but the compiler can proceed by compiling code that signals an error
4841   at runtime if the problem has not been fixed:
4842   \begin{itemize}
4843
4844   \item Violation of type declarations, or
4845
4846   \item Function calls that have the wrong number of arguments or
4847     malformed keyword argument lists, or
4848
4849   \item Referencing a variable declared \code{ignore}, or unrecognized
4850     declaration specifiers.
4851   \end{itemize}
4852
4853   In the language of the \clisp{} standard, these are situations where
4854   the compiler can determine that a situation with undefined
4855   consequences or that would cause an error to be signaled would
4856   result at runtime.
4857
4858 \item[Note] Notes are used when there is something that seems a bit
4859   odd, but that might reasonably appear in correct programs.
4860 \end{Lentry}
4861 Note that the compiler does not fully conform to the proposed X3J13
4862 ``compiler-diagnostics'' cleanup.  Errors, warnings and notes mostly
4863 correspond to errors, warnings and style-warnings, but many things
4864 that the cleanup considers to be style-warnings are printed as
4865 warnings rather than notes.  Also, warnings, style-warnings and most
4866 errors aren't really signaled using the condition system.
4867
4868 %%\f
4869 %%\node Errors During Macroexpansion, Read Errors, Error Severity, Interpreting Error Messages
4870 \subsection{Errors During Macroexpansion}
4871 \cpsubindex{macroexpansion}{errors during}
4872
4873 The compiler handles errors that happen during macroexpansion, turning
4874 them into compiler errors.  If you want to debug the error (to debug a
4875 macro), you can set \code{*break-on-signals*} to \code{error}.  For
4876 example, this definition:
4877 \begin{lisp}
4878 (defun foo (e l)
4879   (do ((current l (cdr current))
4880        ((atom current) nil))
4881       (when (eq (car current) e) (return current))))
4882 \end{lisp}
4883 gives this error:
4884 \begin{example}
4885 In: DEFUN FOO
4886   (DO ((CURRENT L #) (# NIL)) (WHEN (EQ # E) (RETURN CURRENT)) )
4887 Error: (during macroexpansion)
4888
4889 Error in function LISP::DO-DO-BODY.
4890 DO step variable is not a symbol: (ATOM CURRENT)
4891 \end{example}
4892
4893
4894 %%\f
4895 %%\node Read Errors, Error Message Parameterization, Errors During Macroexpansion, Interpreting Error Messages
4896 \subsection{Read Errors}
4897 \cpsubindex{read errors}{compiler}
4898
4899 The compiler also handles errors while reading the source.  For example:
4900 \begin{example}
4901 Error: Read error at 2:
4902  "(,/\back{foo})"
4903 Error in function LISP::COMMA-MACRO.
4904 Comma not inside a backquote.
4905 \end{example}
4906 The ``\code{at 2}'' refers to the character position in the source file at
4907 which the error was signaled, which is generally immediately after the
4908 erroneous text.  The next line, ``\code{(,/\back{foo})}'', is the line in
4909 the source that contains the error file position.  The ``\code{/\back{} }''
4910 indicates the error position within that line (in this example,
4911 immediately after the offending comma.)
4912
4913 When in \hemlock{} (or any other EMACS-like editor), you can go to a
4914 character position with:
4915 \begin{example}
4916 M-< C-u \var{position} C-f
4917 \end{example}
4918 Note that if the source is from a \hemlock{} buffer, then the position
4919 is relative to the start of the compiled region or \code{defun}, not the
4920 file or buffer start.
4921
4922 After printing a read error message, the compiler attempts to recover from the
4923 error by backing up to the start of the enclosing top-level form and reading
4924 again with \code{*read-suppress*} true.  If the compiler can recover from the
4925 error, then it substitutes a call to \code{cerror} for the unreadable form and
4926 proceeds to compile the rest of the file normally.
4927
4928 If there is a read error when the file position is at the end of the file
4929 (i.e., an unexpected EOF error), then the error message looks like this:
4930 \begin{example}
4931 Error: Read error in form starting at 14:
4932  "(defun test ()"
4933 Error in function LISP::FLUSH-WHITESPACE.
4934 EOF while reading #<Stream for file "/usr/me/test.lisp">
4935 \end{example}
4936 In this case, ``\code{starting at 14}'' indicates the character
4937 position at which the compiler started reading, i.e. the position
4938 before the start of the form that was missing the closing delimiter.
4939 The line \w{"\code{(defun test ()}"} is first line after the starting
4940 position that the compiler thinks might contain the unmatched open
4941 delimiter.
4942
4943 %%\f
4944 %%\node Error Message Parameterization,  , Read Errors, Interpreting Error Messages
4945 \subsection{Error Message Parameterization}
4946 \cpsubindex{error messages}{verbosity}
4947 \cpsubindex{verbosity}{of error messages}
4948
4949 There is some control over the verbosity of error messages.  See also
4950 \varref{undefined-warning-limit}, \code{*efficiency-note-limit*} and
4951 \varref{efficiency-note-cost-threshold}.
4952
4953 \begin{defvar}{}{enclosing-source-cutoff}
4954
4955   This variable specifies the number of enclosing actual source forms
4956   that are printed in full, rather than in the abbreviated processing
4957   path format.  Increasing the value from its default of \code{1}
4958   allows you to see more of the guts of the macroexpanded source,
4959   which is useful when debugging macros.
4960 \end{defvar}
4961
4962 \begin{defvar}{}{error-print-length}
4963   \defvarx{error-print-level}
4964
4965   These variables are the print level and print length used in
4966   printing error messages.  The default values are \code{5} and
4967   \code{3}.  If null, the global values of \code{*print-level*} and
4968   \code{*print-length*} are used.
4969 \end{defvar}
4970
4971 \begin{defmac}{extensions:}{def-source-context}{%
4972     \args{\var{name} \var{lambda-list} \mstar{form}}}
4973
4974   This macro defines how to extract an abbreviated source context from
4975   the \var{name}d form when it appears in the compiler input.
4976   \var{lambda-list} is a \code{defmacro} style lambda-list used to
4977   parse the arguments.  The \var{body} should return a list of
4978   subforms that can be printed on about one line.  There are
4979   predefined methods for \code{defstruct}, \code{defmethod}, etc.  If
4980   no method is defined, then the first two subforms are returned.
4981   Note that this facility implicitly determines the string name
4982   associated with anonymous functions.
4983 \end{defmac}
4984
4985 %%\f
4986 %%\node Types in Python, Getting Existing Programs to Run, Interpreting Error Messages, The Compiler
4987 \section{Types in Python}
4988 \cpsubindex{types}{in python}
4989
4990 A big difference between \Python{} and all other \llisp{} compilers
4991 is the approach to type checking and amount of knowledge about types:
4992 \begin{itemize}
4993
4994 \item \Python{} treats type declarations much differently that other
4995   Lisp compilers do.  \Python{} doesn't blindly believe type
4996   declarations; it considers them assertions about the program that
4997   should be checked.
4998
4999 \item \Python{} also has a tremendously greater knowledge of the
5000   \clisp{} type system than other compilers.  Support is incomplete
5001   only for the \code{not}, \code{and} and \code{satisfies} types.
5002 \end{itemize}
5003 See also sections \ref{advanced-type-stuff} and \ref{type-inference}.
5004
5005 %%\f
5006 \begin{comment}
5007 * Compile Time Type Errors::
5008 * Precise Type Checking::
5009 * Weakened Type Checking::
5010 \end{comment}
5011
5012 %%\node Compile Time Type Errors, Precise Type Checking, Types in Python, Types in Python
5013 \subsection{Compile Time Type Errors}
5014 \cindex{compile time type errors}
5015 \cpsubindex{type checking}{at compile time}
5016
5017 If the compiler can prove at compile time that some portion of the
5018 program cannot be executed without a type error, then it will give a
5019 warning at compile time.  It is possible that the offending code would
5020 never actually be executed at run-time due to some higher level
5021 consistency constraint unknown to the compiler, so a type warning
5022 doesn't always indicate an incorrect program.  For example, consider
5023 this code fragment:
5024 \begin{lisp}
5025 (defun raz (foo)
5026   (let ((x (case foo
5027              (:this 13)
5028              (:that 9)
5029              (:the-other 42))))
5030     (declare (fixnum x))
5031     (foo x)))
5032 \end{lisp}
5033 Compilation produces this warning:
5034 \begin{example}
5035 In: DEFUN RAZ
5036   (CASE FOO (:THIS 13) (:THAT 9) (:THE-OTHER 42))
5037 --> LET COND IF COND IF COND IF
5038 ==>
5039   (COND)
5040 Warning: This is not a FIXNUM:
5041   NIL
5042 \end{example}
5043 In this case, the warning is telling you that if \code{foo} isn't any
5044 of \kwd{this}, \kwd{that} or \kwd{the-other}, then \code{x} will be
5045 initialized to \false, which the \code{fixnum} declaration makes
5046 illegal.  The warning will go away if \code{ecase} is used instead of
5047 \code{case}, or if \kwd{the-other} is changed to \true.
5048
5049 This sort of spurious type warning happens moderately often in the
5050 expansion of complex macros and in inline functions.  In such cases,
5051 there may be dead code that is impossible to correctly execute.  The
5052 compiler can't always prove this code is dead (could never be
5053 executed), so it compiles the erroneous code (which will always signal
5054 an error if it is executed) and gives a warning.
5055
5056 \begin{defun}{extensions:}{required-argument}{}
5057
5058   This function can be used as the default value for keyword arguments
5059   that must always be supplied.  Since it is known by the compiler to
5060   never return, it will avoid any compile-time type warnings that
5061   would result from a default value inconsistent with the declared
5062   type.  When this function is called, it signals an error indicating
5063   that a required keyword argument was not supplied.  This function is
5064   also useful for \code{defstruct} slot defaults corresponding to
5065   required arguments.  \xlref{empty-type}.
5066
5067   Although this function is a CMU extension, it is relatively harmless
5068   to use it in otherwise portable code, since you can easily define it
5069   yourself:
5070   \begin{lisp}
5071     (defun required-argument ()
5072       (error "A required keyword argument was not supplied."))
5073     \end{lisp}
5074 \end{defun}
5075
5076 Type warnings are inhibited when the
5077 \code{extensions:inhibit-warnings} optimization quality is \code{3}
5078 (\pxlref{compiler-policy}.)  This can be used in a local declaration
5079 to inhibit type warnings in a code fragment that has spurious
5080 warnings.
5081
5082 %%\f
5083 %%\node Precise Type Checking, Weakened Type Checking, Compile Time Type Errors, Types in Python
5084 \subsection{Precise Type Checking}
5085 \label{precise-type-checks}
5086 \cindex{precise type checking}
5087 \cpsubindex{type checking}{precise}
5088
5089 With the default compilation policy, all type
5090 assertions\footnote{There are a few circumstances where a type
5091   declaration is discarded rather than being used as type assertion.
5092   This doesn't affect safety much, since such discarded declarations
5093   are also not believed to be true by the compiler.}  are precisely
5094 checked.  Precise checking means that the check is done as though
5095 \code{typep} had been called with the exact type specifier that
5096 appeared in the declaration.  \Python{} uses \var{policy} to determine
5097 whether to trust type assertions (\pxlref{compiler-policy}).  Type
5098 assertions from declarations are indistinguishable from the type
5099 assertions on arguments to built-in functions.  In \Python, adding
5100 type declarations makes code safer.
5101
5102 If a variable is declared to be \w{\code{(integer 3 17)}}, then its
5103 value must always always be an integer between \code{3} and \code{17}.
5104 If multiple type declarations apply to a single variable, then all the
5105 declarations must be correct; it is as though all the types were
5106 intersected producing a single \code{and} type specifier.
5107
5108 Argument type declarations are automatically enforced.  If you declare
5109 the type of a function argument, a type check will be done when that
5110 function is called.  In a function call, the called function does the
5111 argument type checking, which means that a more restrictive type
5112 assertion in the calling function (e.g., from \code{the}) may be lost.
5113
5114 The types of structure slots are also checked.  The value of a
5115 structure slot must always be of the type indicated in any \kwd{type}
5116 slot option.\footnote{The initial value need not be of this type as
5117   long as the corresponding argument to the constructor is always
5118   supplied, but this will cause a compile-time type warning unless
5119   \code{required-argument} is used.} Because of precise type checking,
5120 the arguments to slot accessors are checked to be the correct type of
5121 structure.
5122
5123 In traditional \llisp{} compilers, not all type assertions are
5124 checked, and type checks are not precise.  Traditional compilers
5125 blindly trust explicit type declarations, but may check the argument
5126 type assertions for built-in functions.  Type checking is not precise,
5127 since the argument type checks will be for the most general type legal
5128 for that argument.  In many systems, type declarations suppress what
5129 little type checking is being done, so adding type declarations makes
5130 code unsafe.  This is a problem since it discourages writing type
5131 declarations during initial coding.  In addition to being more error
5132 prone, adding type declarations during tuning also loses all the
5133 benefits of debugging with checked type assertions.
5134
5135 To gain maximum benefit from \Python{}'s type checking, you should
5136 always declare the types of function arguments and structure slots as
5137 precisely as possible.  This often involves the use of \code{or},
5138 \code{member} and other list-style type specifiers.  Paradoxically,
5139 even though adding type declarations introduces type checks, it
5140 usually reduces the overall amount of type checking.  This is
5141 especially true for structure slot type declarations.
5142
5143 \Python{} uses the \code{safety} optimization quality (rather than
5144 presence or absence of declarations) to choose one of three levels of
5145 run-time type error checking: \pxlref{optimize-declaration}.
5146 \xlref{advanced-type-stuff} for more information about types in
5147 \Python.
5148
5149 %%\f
5150 %%\node Weakened Type Checking,  , Precise Type Checking, Types in Python
5151 \subsection{Weakened Type Checking}
5152 \label{weakened-type-checks}
5153 \cindex{weakened type checking}
5154 \cpsubindex{type checking}{weakened}
5155
5156 When the value for the \code{speed} optimization quality is greater
5157 than \code{safety}, and \code{safety} is not \code{0}, then type
5158 checking is weakened to reduce the speed and space penalty.  In
5159 structure-intensive code this can double the speed, yet still catch
5160 most type errors.  Weakened type checks provide a level of safety
5161 similar to that of ``safe'' code in other \llisp{} compilers.
5162
5163 A type check is weakened by changing the check to be for some
5164 convenient supertype of the asserted type.  For example,
5165 \code{\w{(integer 3 17)}} is changed to \code{fixnum},
5166 \code{\w{(simple-vector 17)}} to \code{simple-vector}, and structure
5167 types are changed to \code{structure}.  A complex check like:
5168 \begin{example}
5169 (or node hunk (member :foo :bar :baz))
5170 \end{example}
5171 will be omitted entirely (i.e., the check is weakened to \code{*}.)  If
5172 a precise check can be done for no extra cost, then no weakening is
5173 done.
5174
5175 Although weakened type checking is similar to type checking done by
5176 other compilers, it is sometimes safer and sometimes less safe.
5177 Weakened checks are done in the same places is precise checks, so all
5178 the preceding discussion about where checking is done still applies.
5179 Weakened checking is sometimes somewhat unsafe because although the
5180 check is weakened, the precise type is still input into type
5181 inference.  In some contexts this will result in type inferences not
5182 justified by the weakened check, and hence deletion of some type
5183 checks that would be done by conventional compilers.
5184
5185 For example, if this code was compiled with weakened checks:
5186 \begin{lisp}
5187 (defstruct foo
5188   (a nil :type simple-string))
5189
5190 (defstruct bar
5191   (a nil :type single-float))
5192
5193 (defun myfun (x)
5194   (declare (type bar x))
5195   (* (bar-a x) 3.0))
5196 \end{lisp}
5197 and \code{myfun} was passed a \code{foo}, then no type error would be
5198 signaled, and we would try to multiply a \code{simple-vector} as
5199 though it were a float (with unpredictable results.)  This is because
5200 the check for \code{bar} was weakened to \code{structure}, yet when
5201 compiling the call to \code{bar-a}, the compiler thinks it knows it
5202 has a \code{bar}.
5203
5204 Note that normally even weakened type checks report the precise type
5205 in error messages.  For example, if \code{myfun}'s \code{bar} check is
5206 weakened to \code{structure}, and the argument is \false{}, then the
5207 error will be:
5208 \begin{example}
5209 Type-error in MYFUN:
5210   NIL is not of type BAR
5211 \end{example}
5212 However, there is some speed and space cost for signaling a precise
5213 error, so the weakened type is reported if the \code{speed}
5214 optimization quality is \code{3} or \code{debug} quality is less than
5215 \code{1}:
5216 \begin{example}
5217 Type-error in MYFUN:
5218   NIL is not of type STRUCTURE
5219 \end{example}
5220 \xlref{optimize-declaration} for further discussion of the
5221 \code{optimize} declaration.
5222
5223 %%\f
5224 %%\node Getting Existing Programs to Run, Compiler Policy, Types in Python, The Compiler
5225 \section{Getting Existing Programs to Run}
5226 \cpsubindex{existing programs}{to run}
5227 \cpsubindex{types}{portability}
5228 \cindex{compatibility with other Lisps}
5229
5230 Since \Python{} does much more comprehensive type checking than other
5231 Lisp compilers, \Python{} will detect type errors in many programs
5232 that have been debugged using other compilers.  These errors are
5233 mostly incorrect declarations, although compile-time type errors can
5234 find actual bugs if parts of the program have never been tested.
5235
5236 Some incorrect declarations can only be detected by run-time type
5237 checking.  It is very important to initially compile programs with
5238 full type checks and then test this version.  After the checking
5239 version has been tested, then you can consider weakening or
5240 eliminating type checks.  \b{This applies even to previously debugged
5241   programs.}  \Python{} does much more type inference than other
5242 \llisp{} compilers, so believing an incorrect declaration does much
5243 more damage.
5244
5245 The most common problem is with variables whose initial value doesn't
5246 match the type declaration.  Incorrect initial values will always be
5247 flagged by a compile-time type error, and they are simple to fix once
5248 located.  Consider this code fragment:
5249 \begin{example}
5250 (prog (foo)
5251   (declare (fixnum foo))
5252   (setq foo ...)
5253   ...)
5254 \end{example}
5255 Here the variable \code{foo} is given an initial value of \false, but
5256 is declared to be a \code{fixnum}.  Even if it is never read, the
5257 initial value of a variable must match the declared type.  There are
5258 two ways to fix this problem.  Change the declaration:
5259 \begin{example}
5260 (prog (foo)
5261   (declare (type (or fixnum null) foo))
5262   (setq foo ...)
5263   ...)
5264 \end{example}
5265 or change the initial value:
5266 \begin{example}
5267 (prog ((foo 0))
5268   (declare (fixnum foo))
5269   (setq foo ...)
5270   ...)
5271 \end{example}
5272 It is generally preferable to change to a legal initial value rather
5273 than to weaken the declaration, but sometimes it is simpler to weaken
5274 the declaration than to try to make an initial value of the
5275 appropriate type.
5276
5277
5278 Another declaration problem occasionally encountered is incorrect
5279 declarations on \code{defmacro} arguments.  This probably usually
5280 happens when a function is converted into a macro.  Consider this
5281 macro:
5282 \begin{lisp}
5283 (defmacro my-1+ (x)
5284   (declare (fixnum x))
5285   `(the fixnum (1+ ,x)))
5286 \end{lisp}
5287 Although legal and well-defined \clisp, this meaning of this
5288 definition is almost certainly not what the writer intended.  For
5289 example, this call is illegal:
5290 \begin{lisp}
5291 (my-1+ (+ 4 5))
5292 \end{lisp}
5293 The call is illegal because the argument to the macro is \w{\code{(+ 4
5294     5)}}, which is a \code{list}, not a \code{fixnum}.  Because of
5295 macro semantics, it is hardly ever useful to declare the types of
5296 macro arguments.  If you really want to assert something about the
5297 type of the result of evaluating a macro argument, then put a
5298 \code{the} in the expansion:
5299 \begin{lisp}
5300 (defmacro my-1+ (x)
5301   `(the fixnum (1+ (the fixnum ,x))))
5302 \end{lisp}
5303 In this case, it would be stylistically preferable to change this
5304 macro back to a function and declare it inline.  Macros have no
5305 efficiency advantage over inline functions when using \Python.
5306 \xlref{inline-expansion}.
5307
5308
5309 Some more subtle problems are caused by incorrect declarations that
5310 can't be detected at compile time.  Consider this code:
5311 \begin{example}
5312 (do ((pos 0 (position #\back{a} string :start (1+ pos))))
5313     ((null pos))
5314   (declare (fixnum pos))
5315   ...)
5316 \end{example}
5317 Although \code{pos} is almost always a \code{fixnum}, it is \false{}
5318 at the end of the loop.  If this example is compiled with full type
5319 checks (the default), then running it will signal a type error at the
5320 end of the loop.  If compiled without type checks, the program will go
5321 into an infinite loop (or perhaps \code{position} will complain
5322 because \w{\code{(1+ nil)}} isn't a sensible start.)  Why?  Because if
5323 you compile without type checks, the compiler just quietly believes
5324 the type declaration.  Since \code{pos} is always a \code{fixnum}, it
5325 is never \nil, so \w{\code{(null pos)}} is never true, and the loop
5326 exit test is optimized away.  Such errors are sometimes flagged by
5327 unreachable code notes (\pxlref{dead-code-notes}), but it is still
5328 important to initially compile any system with full type checks, even
5329 if the system works fine when compiled using other compilers.
5330
5331 In this case, the fix is to weaken the type declaration to
5332 \w{\code{(or fixnum null)}}.\footnote{Actually, this declaration is
5333   totally unnecessary in \Python, since it already knows
5334   \code{position} returns a non-negative \code{fixnum} or \false.}
5335 Note that there is usually little performance penalty for weakening a
5336 declaration in this way.  Any numeric operations in the body can still
5337 assume the variable is a \code{fixnum}, since \false{} is not a legal
5338 numeric argument.  Another possible fix would be to say:
5339 \begin{example}
5340 (do ((pos 0 (position #\back{a} string :start (1+ pos))))
5341     ((null pos))
5342   (let ((pos pos))
5343     (declare (fixnum pos))
5344     ...))
5345 \end{example}
5346 This would be preferable in some circumstances, since it would allow a
5347 non-standard representation to be used for the local \code{pos}
5348 variable in the loop body (see section \ref{ND-variables}.)
5349
5350 In summary, remember that \i{all} values that a variable \i{ever}
5351 has must be of the declared type, and that you should test using safe
5352 code initially.
5353 %%\f
5354 %%\node Compiler Policy, Open Coding and Inline Expansion, Getting Existing Programs to Run, The Compiler
5355 \section{Compiler Policy}
5356 \label{compiler-policy}
5357 \cpsubindex{policy}{compiler}
5358 \cindex{compiler policy}
5359
5360 The policy is what tells the compiler \var{how} to compile a program.
5361 This is logically (and often textually) distinct from the program
5362 itself.  Broad control of policy is provided by the \code{optimize}
5363 declaration; other declarations and variables control more specific
5364 aspects of compilation.
5365
5366 %%\f
5367 \begin{comment}
5368 * The Optimize Declaration::
5369 * The Optimize-Interface Declaration::
5370 \end{comment}
5371
5372 %%\node The Optimize Declaration, The Optimize-Interface Declaration, Compiler Policy, Compiler Policy
5373 \subsection{The Optimize Declaration}
5374 \label{optimize-declaration}
5375 \cindex{optimize declaration}
5376 \cpsubindex{declarations}{\code{optimize}}
5377
5378 The \code{optimize} declaration recognizes six different
5379 \var{qualities}.  The qualities are conceptually independent aspects
5380 of program performance.  In reality, increasing one quality tends to
5381 have adverse effects on other qualities.  The compiler compares the
5382 relative values of qualities when it needs to make a trade-off; i.e.,
5383 if \code{speed} is greater than \code{safety}, then improve speed at
5384 the cost of safety.
5385
5386 The default for all qualities (except \code{debug}) is \code{1}.
5387 Whenever qualities are equal, ties are broken according to a broad
5388 idea of what a good default environment is supposed to be.  Generally
5389 this downplays \code{speed}, \code{compile-speed} and \code{space} in
5390 favor of \code{safety} and \code{debug}.  Novice and casual users
5391 should stick to the default policy.  Advanced users often want to
5392 improve speed and memory usage at the cost of safety and
5393 debuggability.
5394
5395 If the value for a quality is \code{0} or \code{3}, then it may have a
5396 special interpretation.  A value of \code{0} means ``totally
5397 unimportant'', and a \code{3} means ``ultimately important.''  These
5398 extreme optimization values enable ``heroic'' compilation strategies
5399 that are not always desirable and sometimes self-defeating.
5400 Specifying more than one quality as \code{3} is not desirable, since
5401 it doesn't tell the compiler which quality is most important.
5402
5403
5404 These are the optimization qualities:
5405 \begin{Lentry}
5406
5407 \item[\code{speed}] \cindex{speed optimization quality}How fast the
5408   program should is run.  \code{speed 3} enables some optimizations
5409   that hurt debuggability.
5410
5411 \item[\code{compilation-speed}] \cindex{compilation-speed optimization
5412     quality}How fast the compiler should run.  Note that increasing
5413   this above \code{safety} weakens type checking.
5414
5415 \item[\code{space}] \cindex{space optimization quality}How much space
5416   the compiled code should take up.  Inline expansion is mostly
5417   inhibited when \code{space} is greater than \code{speed}.  A value
5418   of \code{0} enables promiscuous inline expansion.  Wide use of a
5419   \code{0} value is not recommended, as it may waste so much space
5420   that run time is slowed.  \xlref{inline-expansion} for a discussion
5421   of inline expansion.
5422
5423 \item[\code{debug}] \cindex{debug optimization quality}How debuggable
5424   the program should be.  The quality is treated differently from the
5425   other qualities: each value indicates a particular level of debugger
5426   information; it is not compared with the other qualities.
5427   \xlref{debugger-policy} for more details.
5428
5429 \item[\code{safety}] \cindex{safety optimization quality}How much
5430   error checking should be done.  If \code{speed}, \code{space} or
5431   \code{compilation-speed} is more important than \code{safety}, then
5432   type checking is weakened (\pxlref{weakened-type-checks}).  If
5433   \code{safety} if \code{0}, then no run time error checking is done.
5434   In addition to suppressing type checks, \code{0} also suppresses
5435   argument count checking, unbound-symbol checking and array bounds
5436   checks.
5437
5438 \item[\code{extensions:inhibit-warnings}] \cindex{inhibit-warnings
5439     optimization quality}This is a CMU extension that determines how
5440   little (or how much) diagnostic output should be printed during
5441   compilation.  This quality is compared to other qualities to
5442   determine whether to print style notes and warnings concerning those
5443   qualities.  If \code{speed} is greater than \code{inhibit-warnings},
5444   then notes about how to improve speed will be printed, etc.  The
5445   default value is \code{1}, so raising the value for any standard
5446   quality above its default enables notes for that quality.  If
5447   \code{inhibit-warnings} is \code{3}, then all notes and most
5448   non-serious warnings are inhibited.  This is useful with
5449   \code{declare} to suppress warnings about unavoidable problems.
5450 \end{Lentry}
5451
5452 %%\node The Optimize-Interface Declaration,  , The Optimize Declaration, Compiler Policy
5453 \subsection{The Optimize-Interface Declaration}
5454 \label{optimize-interface-declaration}
5455 \cindex{optimize-interface declaration}
5456 \cpsubindex{declarations}{\code{optimize-interface}}
5457
5458 The \code{extensions:optimize-interface} declaration is identical in
5459 syntax to the \code{optimize} declaration, but it specifies the policy
5460 used during compilation of code the compiler automatically generates
5461 to check the number and type of arguments supplied to a function.  It
5462 is useful to specify this policy separately, since even thoroughly
5463 debugged functions are vulnerable to being passed the wrong arguments.
5464 The \code{optimize-interface} declaration can specify that arguments
5465 should be checked even when the general \code{optimize} policy is
5466 unsafe.
5467
5468 Note that this argument checking is the checking of user-supplied
5469 arguments to any functions defined within the scope of the
5470 declaration, \code{not} the checking of arguments to \llisp{}
5471 primitives that appear in those definitions.
5472
5473 The idea behind this declaration is that it allows the definition of
5474 functions that appear fully safe to other callers, but that do no
5475 internal error checking.  Of course, it is possible that arguments may
5476 be invalid in ways other than having incorrect type.  Functions
5477 compiled unsafely must still protect themselves against things like
5478 user-supplied array indices that are out of bounds and improper lists.
5479 See also the \kwd{context-declarations} option to
5480 \macref{with-compilation-unit}.
5481
5482 %%\f
5483 %%\node Open Coding and Inline Expansion,  , Compiler Policy, The Compiler
5484 \section{Open Coding and Inline Expansion}
5485 \label{open-coding}
5486 \cindex{open-coding}
5487 \cindex{inline expansion}
5488 \cindex{static functions}
5489
5490 Since \clisp{} forbids the redefinition of standard functions\footnote{See the
5491 proposed X3J13 ``lisp-symbol-redefinition'' cleanup.}, the compiler can have
5492 special knowledge of these standard functions embedded in it.  This special
5493 knowledge is used in various ways (open coding, inline expansion, source
5494 transformation), but the implications to the user are basically the same:
5495 \begin{itemize}
5496
5497 \item Attempts to redefine standard functions may be frustrated, since
5498   the function may never be called.  Although it is technically
5499   illegal to redefine standard functions, users sometimes want to
5500   implicitly redefine these functions when they are debugging using
5501   the \code{trace} macro.  Special-casing of standard functions can be
5502   inhibited using the \code{notinline} declaration.
5503
5504 \item The compiler can have multiple alternate implementations of
5505   standard functions that implement different trade-offs of speed,
5506   space and safety.  This selection is based on the compiler policy,
5507   \pxlref{compiler-policy}.
5508 \end{itemize}
5509
5510
5511 When a function call is \i{open coded}, inline code whose effect is
5512 equivalent to the function call is substituted for that function call.
5513 When a function call is \i{closed coded}, it is usually left as is,
5514 although it might be turned into a call to a different function with
5515 different arguments.  As an example, if \code{nthcdr} were to be open
5516 coded, then
5517 \begin{lisp}
5518 (nthcdr 4 foobar)
5519 \end{lisp}
5520 might turn into
5521 \begin{lisp}
5522 (cdr (cdr (cdr (cdr foobar))))
5523 \end{lisp}
5524 or even
5525 \begin{lisp}
5526 (do ((i 0 (1+ i))
5527      (list foobar (cdr foobar)))
5528     ((= i 4) list))
5529 \end{lisp}
5530
5531 If \code{nth} is closed coded, then
5532 \begin{lisp}
5533 (nth x l)
5534 \end{lisp}
5535 might stay the same, or turn into something like:
5536 \begin{lisp}
5537 (car (nthcdr x l))
5538 \end{lisp}
5539
5540 In general, open coding sacrifices space for speed, but some functions (such as
5541 \code{car}) are so simple that they are always open-coded.  Even when not
5542 open-coded, a call to a standard function may be transformed into a different
5543 function call (as in the last example) or compiled as \i{static call}.  Static
5544 function call uses a more efficient calling convention that forbids
5545 redefinition.
5546
5547 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/efficiency.ms}
5548
5549
5550
5551 \hide{ -*- Dictionary: cmu-user -*- }
5552 %%\node Advanced Compiler Use and Efficiency Hints, UNIX Interface, The Compiler, Top
5553 \chapter{Advanced Compiler Use and Efficiency Hints}
5554 \begin{center}
5555 \b{By Robert MacLachlan}
5556 \end{center}
5557 \vspace{1 cm}
5558 \label{advanced-compiler}
5559
5560 \begin{comment}
5561 * Advanced Compiler Introduction::
5562 * More About Types in Python::
5563 * Type Inference::
5564 * Source Optimization::
5565 * Tail Recursion::
5566 * Local Call::
5567 * Block Compilation::
5568 * Inline Expansion::
5569 * Byte Coded Compilation::
5570 * Object Representation::
5571 * Numbers::
5572 * General Efficiency Hints::
5573 * Efficiency Notes::
5574 * Profiling::
5575 \end{comment}
5576
5577 %%\node Advanced Compiler Introduction, More About Types in Python, Advanced Compiler Use and Efficiency Hints, Advanced Compiler Use and Efficiency Hints
5578 \section{Advanced Compiler Introduction}
5579
5580 In \cmucl, as is any language on any computer, the path to efficient
5581 code starts with good algorithms and sensible programming techniques,
5582 but to avoid inefficiency pitfalls, you need to know some of this
5583 implementation's quirks and features.  This chapter is mostly a fairly
5584 long and detailed overview of what optimizations \python{} does.
5585 Although there are the usual negative suggestions of inefficient
5586 features to avoid, the main emphasis is on describing the things that
5587 programmers can count on being efficient.
5588
5589 The optimizations described here can have the effect of speeding up
5590 existing programs written in conventional styles, but the potential
5591 for new programming styles that are clearer and less error-prone is at
5592 least as significant.  For this reason, several sections end with a
5593 discussion of the implications of these optimizations for programming
5594 style.
5595
5596 \begin{comment}
5597 * Types::
5598 * Optimization::
5599 * Function Call::
5600 * Representation of Objects::
5601 * Writing Efficient Code::
5602 \end{comment}
5603
5604 %%\node Types, Optimization, Advanced Compiler Introduction, Advanced Compiler Introduction
5605 \subsection{Types}
5606
5607 Python's support for types is unusual in three major ways:
5608 \begin{itemize}
5609
5610 \item Precise type checking encourages the specific use of type
5611   declarations as a form of run-time consistency checking.  This
5612   speeds development by localizing type errors and giving more
5613   meaningful error messages.  \xlref{precise-type-checks}.  \python{}
5614   produces completely safe code; optimized type checking maintains
5615   reasonable efficiency on conventional hardware
5616   (\pxlref{type-check-optimization}.)
5617
5618 \item Comprehensive support for the \clisp{} type system makes complex
5619   type specifiers useful.  Using type specifiers such as \code{or} and
5620   \code{member} has both efficiency and robustness advantages.
5621   \xlref{advanced-type-stuff}.
5622
5623 \item Type inference eliminates the need for some declarations, and
5624   also aids compile-time detection of type errors.  Given detailed
5625   type declarations, type inference can often eliminate type checks
5626   and enable more efficient object representations and code sequences.
5627   Checking all types results in fewer type checks.  See sections
5628   \ref{type-inference} and \ref{non-descriptor}.
5629 \end{itemize}
5630
5631
5632 %%\node Optimization, Function Call, Types, Advanced Compiler Introduction
5633 \subsection{Optimization}
5634
5635 The main barrier to efficient Lisp programs is not that there is no
5636 efficient way to code the program in Lisp, but that it is difficult to
5637 arrive at that efficient coding.  Common Lisp is a highly complex
5638 language, and usually has many semantically equivalent ``reasonable''
5639 ways to code a given problem.  It is desirable to make all of these
5640 equivalent solutions have comparable efficiency so that programmers
5641 don't have to waste time discovering the most efficient solution.
5642
5643 Source level optimization increases the number of efficient ways to
5644 solve a problem.  This effect is much larger than the increase in the
5645 efficiency of the ``best'' solution.  Source level optimization
5646 transforms the original program into a more efficient (but equivalent)
5647 program.  Although the optimizer isn't doing anything the programmer
5648 couldn't have done, this high-level optimization is important because:
5649 \begin{itemize}
5650
5651 \item The programmer can code simply and directly, rather than
5652   obfuscating code to please the compiler.
5653
5654 \item When presented with a choice of similar coding alternatives, the
5655   programmer can chose whichever happens to be most convenient,
5656   instead of worrying about which is most efficient.
5657 \end{itemize}
5658
5659 Source level optimization eliminates the need for macros to optimize
5660 their expansion, and also increases the effectiveness of inline
5661 expansion.  See sections \ref{source-optimization} and
5662 \ref{inline-expansion}.
5663
5664 Efficient support for a safer programming style is the biggest
5665 advantage of source level optimization.  Existing tuned programs
5666 typically won't benefit much from source optimization, since their
5667 source has already been optimized by hand.  However, even tuned
5668 programs tend to run faster under \python{} because:
5669 \begin{itemize}
5670
5671 \item Low level optimization and register allocation provides modest
5672   speedups in any program.
5673
5674 \item Block compilation and inline expansion can reduce function call
5675   overhead, but may require some program restructuring.  See sections
5676   \ref{inline-expansion}, \ref{local-call} and
5677   \ref{block-compilation}.
5678
5679 \item Efficiency notes will point out important type declarations that
5680   are often missed even in highly tuned programs.
5681   \xlref{efficiency-notes}.
5682
5683 \item Existing programs can be compiled safely without prohibitive
5684   speed penalty, although they would be faster and safer with added
5685   declarations.  \xlref{type-check-optimization}.
5686
5687 \item The context declaration mechanism allows both space and runtime
5688   of large systems to be reduced without sacrificing robustness by
5689   semi-automatically varying compilation policy without addition any
5690   \code{optimize} declarations to the source.
5691   \xlref{context-declarations}.
5692
5693 \item Byte compilation can be used to dramatically reduce the size of
5694   code that is not speed-critical. \xlref{byte-compile}
5695 \end{itemize}
5696
5697
5698 %%\node Function Call, Representation of Objects, Optimization, Advanced Compiler Introduction
5699 \subsection{Function Call}
5700
5701 The sort of symbolic programs generally written in \llisp{} often
5702 favor recursion over iteration, or have inner loops so complex that
5703 they involve multiple function calls.  Such programs spend a larger
5704 fraction of their time doing function calls than is the norm in other
5705 languages; for this reason \llisp{} implementations strive to make the
5706 general (or full) function call as inexpensive as possible.  \python{}
5707 goes beyond this by providing two good alternatives to full call:
5708 \begin{itemize}
5709
5710 \item Local call resolves function references at compile time,
5711   allowing better calling sequences and optimization across function
5712   calls.  \xlref{local-call}.
5713
5714 \item Inline expansion totally eliminates call overhead and allows
5715   many context dependent optimizations.  This provides a safe and
5716   efficient implementation of operations with function semantics,
5717   eliminating the need for error-prone macro definitions or manual
5718   case analysis.  Although most \clisp{} implementations support
5719   inline expansion, it becomes a more powerful tool with \python{}'s
5720   source level optimization.  See sections \ref{source-optimization}
5721   and \ref{inline-expansion}.
5722 \end{itemize}
5723
5724
5725 Generally, \python{} provides simple implementations for simple uses
5726 of function call, rather than having only a single calling convention.
5727 These features allow a more natural programming style:
5728 \begin{itemize}
5729
5730 \item Proper tail recursion.  \xlref{tail-recursion}
5731
5732 \item Relatively efficient closures.
5733
5734 \item A \code{funcall} that is as efficient as normal named call.
5735
5736 \item Calls to local functions such as from \code{labels} are
5737   optimized:
5738 \begin{itemize}
5739
5740 \item Control transfer is a direct jump.
5741
5742 \item The closure environment is passed in registers rather than heap
5743   allocated.
5744
5745 \item Keyword arguments and multiple values are implemented more
5746   efficiently.
5747 \end{itemize}
5748
5749 \xlref{local-call}.
5750 \end{itemize}
5751
5752 %%\node Representation of Objects, Writing Efficient Code, Function Call, Advanced Compiler Introduction
5753 \subsection{Representation of Objects}
5754
5755 Sometimes traditional \llisp{} implementation techniques compare so
5756 poorly to the techniques used in other languages that \llisp{} can
5757 become an impractical language choice.  Terrible inefficiencies appear
5758 in number-crunching programs, since \llisp{} numeric operations often
5759 involve number-consing and generic arithmetic.  \python{} supports
5760 efficient natural representations for numbers (and some other types),
5761 and allows these efficient representations to be used in more
5762 contexts.  \python{} also provides good efficiency notes that warn
5763 when a crucial declaration is missing.
5764
5765 See section \ref{non-descriptor} for more about object representations and
5766 numeric types.  Also \pxlref{efficiency-notes} about efficiency notes.
5767
5768 %%\node Writing Efficient Code,  , Representation of Objects, Advanced Compiler Introduction
5769 \subsection{Writing Efficient Code}
5770 \label{efficiency-overview}
5771
5772 Writing efficient code that works is a complex and prolonged process.
5773 It is important not to get so involved in the pursuit of efficiency
5774 that you lose sight of what the original problem demands.  Remember
5775 that:
5776 \begin{itemize}
5777
5778 \item The program should be correct\dash{}it doesn't matter how
5779   quickly you get the wrong answer.
5780
5781 \item Both the programmer and the user will make errors, so the
5782   program must be robust\dash{}it must detect errors in a way that
5783   allows easy correction.
5784
5785 \item A small portion of the program will consume most of the
5786   resources, with the bulk of the code being virtually irrelevant to
5787   efficiency considerations.  Even experienced programmers familiar
5788   with the problem area cannot reliably predict where these ``hot
5789   spots'' will be.
5790 \end{itemize}
5791
5792
5793
5794 The best way to get efficient code that is still worth using, is to separate
5795 coding from tuning.  During coding, you should:
5796 \begin{itemize}
5797
5798 \item Use a coding style that aids correctness and robustness without
5799   being incompatible with efficiency.
5800
5801 \item Choose appropriate data structures that allow efficient
5802   algorithms and object representations
5803   (\pxlref{object-representation}).  Try to make interfaces abstract
5804   enough so that you can change to a different representation if
5805   profiling reveals a need.
5806
5807 \item Whenever you make an assumption about a function argument or
5808   global data structure, add consistency assertions, either with type
5809   declarations or explicit uses of \code{assert}, \code{ecase}, etc.
5810 \end{itemize}
5811
5812 During tuning, you should:
5813 \begin{itemize}
5814
5815 \item Identify the hot spots in the program through profiling (section
5816   \ref{profiling}.)
5817
5818 \item Identify inefficient constructs in the hot spot with efficiency
5819   notes, more profiling, or manual inspection of the source.  See
5820   sections \ref{general-efficiency} and \ref{efficiency-notes}.
5821
5822 \item Add declarations and consider the application of optimizations.
5823   See sections \ref{local-call}, \ref{inline-expansion} and
5824   \ref{non-descriptor}.
5825
5826 \item If all else fails, consider algorithm or data structure changes.
5827   If you did a good job coding, changes will be easy to introduce.
5828 \end{itemize}
5829
5830
5831
5832 %%\f
5833 %%\node More About Types in Python, Type Inference, Advanced Compiler Introduction, Advanced Compiler Use and Efficiency Hints
5834 \section{More About Types in Python}
5835 \label{advanced-type-stuff}
5836 \cpsubindex{types}{in python}
5837
5838 This section goes into more detail describing what types and declarations are
5839 recognized by \python.  The area where \python{} differs most radically from
5840 previous \llisp{} compilers is in its support for types:
5841 \begin{itemize}
5842
5843 \item Precise type checking helps to find bugs at run time.
5844
5845 \item Compile-time type checking helps to find bugs at compile time.
5846
5847 \item Type inference minimizes the need for generic operations, and
5848   also increases the efficiency of run time type checking and the
5849   effectiveness of compile time type checking.
5850
5851 \item Support for detailed types provides a wealth of opportunity for
5852   operation-specific type inference and optimization.
5853 \end{itemize}
5854
5855
5856
5857 \begin{comment}
5858 * More Types Meaningful::
5859 * Canonicalization::
5860 * Member Types::
5861 * Union Types::
5862 * The Empty Type::
5863 * Function Types::
5864 * The Values Declaration::
5865 * Structure Types::
5866 * The Freeze-Type Declaration::
5867 * Type Restrictions::
5868 * Type Style Recommendations::
5869 \end{comment}
5870
5871 %%\node More Types Meaningful, Canonicalization, More About Types in Python, More About Types in Python
5872 \subsection{More Types Meaningful}
5873
5874 \clisp{} has a very powerful type system, but conventional \llisp{}
5875 implementations typically only recognize the small set of types
5876 special in that implementation.  In these systems, there is an
5877 unfortunate paradox: a declaration for a relatively general type like
5878 \code{fixnum} will be recognized by the compiler, but a highly
5879 specific declaration such as \code{\w{(integer 3 17)}} is totally
5880 ignored.
5881
5882 This is obviously a problem, since the user has to know how to specify
5883 the type of an object in the way the compiler wants it.  A very
5884 minimal (but rarely satisfied) criterion for type system support is
5885 that it be no worse to make a specific declaration than to make a
5886 general one.  \python{} goes beyond this by exploiting a number of
5887 advantages obtained from detailed type information.
5888
5889 Using more restrictive types in declarations allows the compiler to do
5890 better type inference and more compile-time type checking.  Also, when
5891 type declarations are considered to be consistency assertions that
5892 should be verified (conditional on policy), then complex types are
5893 useful for making more detailed assertions.
5894
5895 Python ``understands'' the list-style \code{or}, \code{member},
5896 \code{function}, array and number type specifiers.  Understanding
5897 means that:
5898 \begin{itemize}
5899
5900 \item If the type contains more information than is used in a
5901   particular context, then the extra information is simply ignored,
5902   rather than derailing type inference.
5903
5904 \item In many contexts, the extra information from these type
5905   specifier is used to good effect.  In particular, type checking in
5906   \code{Python} is \var{precise}, so these complex types can be used
5907   in declarations to make interesting assertions about functions and
5908   data structures (\pxlref{precise-type-checks}.)  More specific
5909   declarations also aid type inference and reduce the cost for type
5910   checking.
5911 \end{itemize}
5912
5913 For related information, \pxlref{numeric-types} for numeric types, and
5914 section \ref{array-types} for array types.
5915
5916
5917 %%\node Canonicalization, Member Types, More Types Meaningful, More About Types in Python
5918 \subsection{Canonicalization}
5919 \cpsubindex{types}{equivalence}
5920 \cindex{canonicalization of types}
5921 \cindex{equivalence of types}
5922
5923 When given a type specifier, \python{} will often rewrite it into a
5924 different (but equivalent) type.  This is the mechanism that \python{}
5925 uses for detecting type equivalence.  For example, in \python{}'s
5926 canonical representation, these types are equivalent:
5927 \begin{example}
5928 (or list (member :end)) \myequiv (or cons (member nil :end))
5929 \end{example}
5930 This has two implications for the user:
5931 \begin{itemize}
5932
5933 \item The standard symbol type specifiers for \code{atom},
5934   \code{null}, \code{fixnum}, etc., are in no way magical.  The
5935   \tindexed{null} type is actually defined to be \code{\w{(member
5936       nil)}}, \tindexed{list} is \code{\w{(or cons null)}}, and
5937   \tindexed{fixnum} is \code{\w{(signed-byte 30)}}.
5938
5939 \item When the compiler prints out a type, it may not look like the
5940   type specifier that originally appeared in the program.  This is
5941   generally not a problem, but it must be taken into consideration
5942   when reading compiler error messages.
5943 \end{itemize}
5944
5945
5946 %%\node Member Types, Union Types, Canonicalization, More About Types in Python
5947 \subsection{Member Types}
5948 \cindex{member types}
5949
5950 The \tindexed{member} type specifier can be used to represent
5951 ``symbolic'' values, analogous to the enumerated types of Pascal.  For
5952 example, the second value of \code{find-symbol} has this type:
5953 \begin{lisp}
5954 (member :internal :external :inherited nil)
5955 \end{lisp}
5956 Member types are very useful for expressing consistency constraints on data
5957 structures, for example:
5958 \begin{lisp}
5959 (defstruct ice-cream
5960   (flavor :vanilla :type (member :vanilla :chocolate :strawberry)))
5961 \end{lisp}
5962 Member types are also useful in type inference, as the number of members can
5963 sometimes be pared down to one, in which case the value is a known constant.
5964
5965 %%\node Union Types, The Empty Type, Member Types, More About Types in Python
5966 \subsection{Union Types}
5967 \cindex{union (\code{or}) types}
5968 \cindex{or (union) types}
5969
5970 The \tindexed{or} (union) type specifier is understood, and is
5971 meaningfully applied in many contexts.  The use of \code{or} allows
5972 assertions to be made about types in dynamically typed programs.  For
5973 example:
5974 \begin{lisp}
5975 (defstruct box
5976   (next nil :type (or box null))
5977   (top :removed :type (or box-top (member :removed))))
5978 \end{lisp}
5979 The type assertion on the \code{top} slot ensures that an error will be signaled
5980 when there is an attempt to store an illegal value (such as \kwd{rmoved}.)
5981 Although somewhat weak, these union type assertions provide a useful input into
5982 type inference, allowing the cost of type checking to be reduced.  For example,
5983 this loop is safely compiled with no type checks:
5984 \begin{lisp}
5985 (defun find-box-with-top (box)
5986   (declare (type (or box null) box))
5987   (do ((current box (box-next current)))
5988       ((null current))
5989     (unless (eq (box-top current) :removed)
5990       (return current))))
5991 \end{lisp}
5992
5993 Union types are also useful in type inference for representing types that are
5994 partially constrained.  For example, the result of this expression:
5995 \begin{lisp}
5996 (if foo
5997     (logior x y)
5998     (list x y))
5999 \end{lisp}
6000 can be expressed as \code{\w{(or integer cons)}}.
6001
6002 %%\node The Empty Type, Function Types, Union Types, More About Types in Python
6003 \subsection{The Empty Type}
6004 \label{empty-type}
6005 \cindex{NIL type}
6006 \cpsubindex{empty type}{the}
6007 \cpsubindex{errors}{result type of}
6008
6009 The type \false{} is also called the empty type, since no object is of
6010 type \false{}.  The union of no types, \code{(or)}, is also empty.
6011 \python{}'s interpretation of an expression whose type is \false{} is
6012 that the expression never yields any value, but rather fails to
6013 terminate, or is thrown out of.  For example, the type of a call to
6014 \code{error} or a use of \code{return} is \false{}.  When the type of
6015 an expression is empty, compile-time type warnings about its value are
6016 suppressed; presumably somebody else is signaling an error.  If a
6017 function is declared to have return type \false{}, but does in fact
6018 return, then (in safe compilation policies) a ``\code{NIL Function
6019   returned}'' error will be signaled.  See also the function
6020 \funref{required-argument}.
6021
6022 %%\node Function Types, The Values Declaration, The Empty Type, More About Types in Python
6023 \subsection{Function Types}
6024 \label{function-types}
6025 \cpsubindex{function}{types}
6026 \cpsubindex{types}{function}
6027
6028 \findexed{function} types are understood in the restrictive sense, specifying:
6029 \begin{itemize}
6030
6031 \item The argument syntax that the function must be called with.  This
6032   is information about what argument counts are acceptable, and which
6033   keyword arguments are recognized.  In \python, warnings about
6034   argument syntax are a consequence of function type checking.
6035
6036 \item The types of the argument values that the caller must pass.  If
6037   the compiler can prove that some argument to a call is of a type
6038   disallowed by the called function's type, then it will give a
6039   compile-time type warning.  In addition to being used for
6040   compile-time type checking, these type assertions are also used as
6041   output type assertions in code generation.  For example, if
6042   \code{foo} is declared to have a \code{fixnum} argument, then the
6043   \code{1+} in \w{\code{(foo (1+ x))}} is compiled with knowledge that
6044   the result must be a fixnum.
6045
6046 \item The types the values that will be bound to argument variables in
6047   the function's definition.  Declaring a function's type with
6048   \code{ftype} implicitly declares the types of the arguments in the
6049   definition.  \python{} checks for consistency between the definition
6050   and the \code{ftype} declaration.  Because of precise type checking,
6051   an error will be signaled when a function is called with an
6052   argument of the wrong type.
6053
6054 \item The type of return value(s) that the caller can expect.  This
6055   information is a useful input to type inference.  For example, if a
6056   function is declared to return a \code{fixnum}, then when a call to
6057   that function appears in an expression, the expression will be
6058   compiled with knowledge that the call will return a \code{fixnum}.
6059
6060 \item The type of return value(s) that the definition must return.
6061   The result type in an \code{ftype} declaration is treated like an
6062   implicit \code{the} wrapped around the body of the definition.  If
6063   the definition returns a value of the wrong type, an error will be
6064   signaled.  If the compiler can prove that the function returns the
6065   wrong type, then it will give a compile-time warning.
6066 \end{itemize}
6067
6068 This is consistent with the new interpretation of function types and
6069 the \code{ftype} declaration in the proposed X3J13
6070 ``function-type-argument-type-semantics'' cleanup.  Note also, that if
6071 you don't explicitly declare the type of a function using a global
6072 \code{ftype} declaration, then \python{} will compute a function type
6073 from the definition, providing a degree of inter-routine type
6074 inference, \pxlref{function-type-inference}.
6075
6076 %%\node The Values Declaration, Structure Types, Function Types, More About Types in Python
6077 \subsection{The Values Declaration}
6078 \cindex{values declaration}
6079
6080 \cmucl{} supports the \code{values} declaration as an extension to
6081 \clisp.  The syntax is {\code{(values \var{type1}
6082     \var{type2}$\ldots$\var{typen})}}.  This declaration is
6083 semantically equivalent to a \code{the} form wrapped around the body
6084 of the special form in which the \code{values} declaration appears.
6085 The advantage of \code{values} over \findexed{the} is purely
6086 syntactic\dash{}it doesn't introduce more indentation.  For example:
6087 \begin{example}
6088 (defun foo (x)
6089   (declare (values single-float))
6090   (ecase x
6091     (:this ...)
6092     (:that ...)
6093     (:the-other ...)))
6094 \end{example}
6095 is equivalent to:
6096 \begin{example}
6097 (defun foo (x)
6098   (the single-float
6099        (ecase x
6100          (:this ...)
6101          (:that ...)
6102          (:the-other ...))))
6103 \end{example}
6104 and
6105 \begin{example}
6106 (defun floor (number &optional (divisor 1))
6107   (declare (values integer real))
6108   ...)
6109 \end{example}
6110 is equivalent to:
6111 \begin{example}
6112 (defun floor (number &optional (divisor 1))
6113   (the (values integer real)
6114        ...))
6115 \end{example}
6116 In addition to being recognized by \code{lambda} (and hence by
6117 \code{defun}), the \code{values} declaration is recognized by all the
6118 other special forms with bodies and declarations: \code{let},
6119 \code{let*}, \code{labels} and \code{flet}.  Macros with declarations
6120 usually splice the declarations into one of the above forms, so they
6121 will accept this declaration too, but the exact effect of a
6122 \code{values} declaration will depend on the macro.
6123
6124 If you declare the types of all arguments to a function, and also
6125 declare the return value types with \code{values}, you have described
6126 the type of the function.  \python{} will use this argument and result
6127 type information to derive a function type that will then be applied
6128 to calls of the function (\pxlref{function-types}.)  This provides a
6129 way to declare the types of functions that is much less syntactically
6130 awkward than using the \code{ftype} declaration with a \code{function}
6131 type specifier.
6132
6133 Although the \code{values} declaration is non-standard, it is
6134 relatively harmless to use it in otherwise portable code, since any
6135 warning in non-CMU implementations can be suppressed with the standard
6136 \code{declaration} proclamation.
6137
6138 %%\node Structure Types, The Freeze-Type Declaration, The Values Declaration, More About Types in Python
6139 \subsection{Structure Types}
6140 \label{structure-types}
6141 \cindex{structure types}
6142 \cindex{defstruct types}
6143 \cpsubindex{types}{structure}
6144
6145 Because of precise type checking, structure types are much better supported by
6146 Python than by conventional compilers:
6147 \begin{itemize}
6148
6149 \item The structure argument to structure accessors is precisely
6150   checked\dash{}if you call \code{foo-a} on a \code{bar}, an error
6151   will be signaled.
6152
6153 \item The types of slot values are precisely checked\dash{}if you pass
6154   the wrong type argument to a constructor or a slot setter, then an
6155   error will be signaled.
6156 \end{itemize}
6157 This error checking is tremendously useful for detecting bugs in
6158 programs that manipulate complex data structures.
6159
6160 An additional advantage of checking structure types and enforcing slot
6161 types is that the compiler can safely believe slot type declarations.
6162 \python{} effectively moves the type checking from the slot access to
6163 the slot setter or constructor call.  This is more efficient since
6164 caller of the setter or constructor often knows the type of the value,
6165 entirely eliminating the need to check the value's type.  Consider
6166 this example:
6167 \begin{lisp}
6168 (defstruct coordinate
6169   (x nil :type single-float)
6170   (y nil :type single-float))
6171
6172 (defun make-it ()
6173   (make-coordinate :x 1.0 :y 1.0))
6174
6175 (defun use-it (it)
6176   (declare (type coordinate it))
6177   (sqrt (expt (coordinate-x it) 2) (expt (coordinate-y it) 2)))
6178 \end{lisp}
6179 \code{make-it} and \code{use-it} are compiled with no checking on the
6180 types of the float slots, yet \code{use-it} can use
6181 \code{single-float} arithmetic with perfect safety.  Note that
6182 \code{make-coordinate} must still check the values of \code{x} and
6183 \code{y} unless the call is block compiled or inline expanded
6184 (\pxlref{local-call}.)  But even without this advantage, it is almost
6185 always more efficient to check slot values on structure
6186 initialization, since slots are usually written once and read many
6187 times.
6188
6189 %%\node The Freeze-Type Declaration, Type Restrictions, Structure Types, More About Types in Python
6190 \subsection{The Freeze-Type Declaration}
6191 \cindex{freeze-type declaration}
6192 \label{freeze-type}
6193
6194 The \code{extensions:freeze-type} declaration is a CMU extension that
6195 enables more efficient compilation of user-defined types by asserting
6196 that the definition is not going to change.  This declaration may only
6197 be used globally (with \code{declaim} or \code{proclaim}).  Currently
6198 \code{freeze-type} only affects structure type testing done by
6199 \code{typep}, \code{typecase}, etc.  Here is an example:
6200 \begin{lisp}
6201 (declaim (freeze-type foo bar))
6202 \end{lisp}
6203 This asserts that the types \code{foo} and \code{bar} and their
6204 subtypes are not going to change.  This allows more efficient type
6205 testing, since the compiler can open-code a test for all possible
6206 subtypes, rather than having to examine the type hierarchy at
6207 run-time.
6208
6209 %%\node Type Restrictions, Type Style Recommendations, The Freeze-Type Declaration, More About Types in Python
6210 \subsection{Type Restrictions}
6211 \cpsubindex{types}{restrictions on}
6212
6213 Avoid use of the \code{and}, \code{not} and \code{satisfies} types in
6214 declarations, since type inference has problems with them.  When these
6215 types do appear in a declaration, they are still checked precisely,
6216 but the type information is of limited use to the compiler.
6217 \code{and} types are effective as long as the intersection can be
6218 canonicalized to a type that doesn't use \code{and}.  For example:
6219 \begin{example}
6220 (and fixnum unsigned-byte)
6221 \end{example}
6222 is fine, since it is the same as:
6223 \begin{example}
6224 (integer 0 \var{most-positive-fixnum})
6225 \end{example}
6226 but this type:
6227 \begin{example}
6228 (and symbol (not (member :end)))
6229 \end{example}
6230 will not be fully understood by type interference since the \code{and}
6231 can't be removed by canonicalization.
6232
6233 Using any of these type specifiers in a type test with \code{typep} or
6234 \code{typecase} is fine, since as tests, these types can be translated
6235 into the \code{and} macro, the \code{not} function or a call to the
6236 satisfies predicate.
6237
6238 %%\node Type Style Recommendations,  , Type Restrictions, More About Types in Python
6239 \subsection{Type Style Recommendations}
6240 \cindex{style recommendations}
6241
6242 Python provides good support for some currently unconventional ways of
6243 using the \clisp{} type system.  With \python, it is desirable to make
6244 declarations as precise as possible, but type inference also makes
6245 some declarations unnecessary.  Here are some general guidelines for
6246 maximum robustness and efficiency:
6247 \begin{itemize}
6248
6249 \item Declare the types of all function arguments and structure slots
6250   as precisely as possible (while avoiding \code{not}, \code{and} and
6251   \code{satisfies}).  Put these declarations in during initial coding
6252   so that type assertions can find bugs for you during debugging.
6253
6254 \item Use the \tindexed{member} type specifier where there are a small
6255   number of possible symbol values, for example: \w{\code{(member :red
6256       :blue :green)}}.
6257
6258 \item Use the \tindexed{or} type specifier in situations where the
6259   type is not certain, but there are only a few possibilities, for
6260   example: \w{\code{(or list vector)}}.
6261
6262 \item Declare integer types with the tightest bounds that you can,
6263   such as \code{\w{(integer 3 7)}}.
6264
6265 \item Define \findexed{deftype} or \findexed{defstruct} types before
6266   they are used.  Definition after use is legal (producing no
6267   ``undefined type'' warnings), but type tests and structure
6268   operations will be compiled much less efficiently.
6269
6270 \item Use the \code{extensions:freeze-type} declaration to speed up
6271   type testing for structure types which won't have new subtypes added
6272   later. \xlref{freeze-type}
6273
6274 \item In addition to declaring the array element type and simpleness,
6275   also declare the dimensions if they are fixed, for example:
6276   \begin{example}
6277     (simple-array single-float (1024 1024))
6278   \end{example}
6279   This bounds information allows array indexing for multi-dimensional
6280   arrays to be compiled much more efficiently, and may also allow
6281   array bounds checking to be done at compile time.
6282   \xlref{array-types}.
6283
6284 \item Avoid use of the \findexed{the} declaration within expressions.
6285   Not only does it clutter the code, but it is also almost worthless
6286   under safe policies.  If the need for an output type assertion is
6287   revealed by efficiency notes during tuning, then you can consider
6288   \code{the}, but it is preferable to constrain the argument types
6289   more, allowing the compiler to prove the desired result type.
6290
6291 \item Don't bother declaring the type of \findexed{let} or other
6292   non-argument variables unless the type is non-obvious.  If you
6293   declare function return types and structure slot types, then the
6294   type of a variable is often obvious both to the programmer and to
6295   the compiler.  An important case where the type isn't obvious, and a
6296   declaration is appropriate, is when the value for a variable is
6297   pulled out of untyped structure (e.g., the result of \code{car}), or
6298   comes from some weakly typed function, such as \code{read}.
6299
6300 \item Declarations are sometimes necessary for integer loop variables,
6301   since the compiler can't always prove that the value is of a good
6302   integer type.  These declarations are best added during tuning, when
6303   an efficiency note indicates the need.
6304 \end{itemize}
6305
6306
6307 %%\f
6308 %%\node Type Inference, Source Optimization, More About Types in Python, Advanced Compiler Use and Efficiency Hints
6309 \section{Type Inference}
6310 \label{type-inference}
6311 \cindex{type inference}
6312 \cindex{inference of types}
6313 \cindex{derivation of types}
6314
6315 Type inference is the process by which the compiler tries to figure
6316 out the types of expressions and variables, given an inevitable lack
6317 of complete type information.  Although \python{} does much more type
6318 inference than most \llisp{} compilers, remember that the more precise
6319 and comprehensive type declarations are, the more type inference will
6320 be able to do.
6321
6322 \begin{comment}
6323 * Variable Type Inference::
6324 * Local Function Type Inference::
6325 * Global Function Type Inference::
6326 * Operation Specific Type Inference::
6327 * Dynamic Type Inference::
6328 * Type Check Optimization::
6329 \end{comment}
6330
6331 %%\node Variable Type Inference, Local Function Type Inference, Type Inference, Type Inference
6332 \subsection{Variable Type Inference}
6333 \label{variable-type-inference}
6334
6335 The type of a variable is the union of the types of all the
6336 definitions.  In the degenerate case of a let, the type of the
6337 variable is the type of the initial value.  This inferred type is
6338 intersected with any declared type, and is then propagated to all the
6339 variable's references.  The types of \findexed{multiple-value-bind}
6340 variables are similarly inferred from the types of the individual
6341 values of the values form.
6342
6343 If multiple type declarations apply to a single variable, then all the
6344 declarations must be correct; it is as though all the types were intersected
6345 producing a single \tindexed{and} type specifier.  In this example:
6346 \begin{example}
6347 (defmacro my-dotimes ((var count) &body body)
6348   `(do ((,var 0 (1+ ,var)))
6349        ((>= ,var ,count))
6350      (declare (type (integer 0 *) ,var))
6351      ,@body))
6352
6353 (my-dotimes (i ...)
6354   (declare (fixnum i))
6355   ...)
6356 \end{example}
6357 the two declarations for \code{i} are intersected, so \code{i} is
6358 known to be a non-negative fixnum.
6359
6360 In practice, this type inference is limited to lets and local
6361 functions, since the compiler can't analyze all the calls to a global
6362 function.  But type inference works well enough on local variables so
6363 that it is often unnecessary to declare the type of local variables.
6364 This is especially likely when function result types and structure
6365 slot types are declared.  The main areas where type inference breaks
6366 down are:
6367 \begin{itemize}
6368
6369 \item When the initial value of a variable is a untyped expression,
6370   such as \code{\w{(car x)}}, and
6371
6372 \item When the type of one of the variable's definitions is a function
6373   of the variable's current value, as in: \code{(setq x (1+ x))}
6374 \end{itemize}
6375
6376
6377 %%\node Local Function Type Inference, Global Function Type Inference, Variable Type Inference, Type Inference
6378 \subsection{Local Function Type Inference}
6379 \cpsubindex{local call}{type inference}
6380
6381 The types of arguments to local functions are inferred in the same was
6382 as any other local variable; the type is the union of the argument
6383 types across all the calls to the function, intersected with the
6384 declared type.  If there are any assignments to the argument
6385 variables, the type of the assigned value is unioned in as well.
6386
6387 The result type of a local function is computed in a special way that
6388 takes tail recursion (\pxlref{tail-recursion}) into consideration.
6389 The result type is the union of all possible return values that aren't
6390 tail-recursive calls.  For example, \python{} will infer that the
6391 result type of this function is \code{integer}:
6392 \begin{lisp}
6393 (defun ! (n res)
6394   (declare (integer n res))
6395   (if (zerop n)
6396       res
6397       (! (1- n) (* n res))))
6398 \end{lisp}
6399 Although this is a rather obvious result, it becomes somewhat less
6400 trivial in the presence of mutual tail recursion of multiple
6401 functions.  Local function result type inference interacts with the
6402 mechanisms for ensuring proper tail recursion mentioned in section
6403 \ref{local-call-return}.
6404
6405 %%\node Global Function Type Inference, Operation Specific Type Inference, Local Function Type Inference, Type Inference
6406 \subsection{Global Function Type Inference}
6407 \label{function-type-inference}
6408 \cpsubindex{function}{type inference}
6409
6410 As described in section \ref{function-types}, a global function type
6411 (\tindexed{ftype}) declaration places implicit type assertions on the
6412 call arguments, and also guarantees the type of the return value.  So
6413 wherever a call to a declared function appears, there is no doubt as
6414 to the types of the arguments and return value.  Furthermore,
6415 \python{} will infer a function type from the function's definition if
6416 there is no \code{ftype} declaration.  Any type declarations on the
6417 argument variables are used as the argument types in the derived
6418 function type, and the compiler's best guess for the result type of
6419 the function is used as the result type in the derived function type.
6420
6421 This method of deriving function types from the definition implicitly assumes
6422 that functions won't be redefined at run-time.  Consider this example:
6423 \begin{lisp}
6424 (defun foo-p (x)
6425   (let ((res (and (consp x) (eq (car x) 'foo))))
6426     (format t "It is ~:[not ~;~]foo." res)))
6427
6428 (defun frob (it)
6429   (if (foo-p it)
6430       (setf (cadr it) 'yow!)
6431       (1+ it)))
6432 \end{lisp}
6433
6434 Presumably, the programmer really meant to return \code{res} from
6435 \code{foo-p}, but he seems to have forgotten.  When he tries to call
6436 do \code{\w{(frob (list 'foo nil))}}, \code{frob} will flame out when
6437 it tries to add to a \code{cons}.  Realizing his error, he fixes
6438 \code{foo-p} and recompiles it.  But when he retries his test case, he
6439 is baffled because the error is still there.  What happened in this
6440 example is that \python{} proved that the result of \code{foo-p} is
6441 \code{null}, and then proceeded to optimize away the \code{setf} in
6442 \code{frob}.
6443
6444 Fortunately, in this example, the error is detected at compile time
6445 due to notes about unreachable code (\pxlref{dead-code-notes}.)
6446 Still, some users may not want to worry about this sort of problem
6447 during incremental development, so there is a variable to control
6448 deriving function types.
6449
6450 \begin{defvar}{extensions:}{derive-function-types}
6451
6452   If true (the default), argument and result type information derived
6453   from compilation of \code{defun}s is used when compiling calls to
6454   that function.  If false, only information from \code{ftype}
6455   proclamations will be used.
6456 \end{defvar}
6457
6458 %%\node Operation Specific Type Inference, Dynamic Type Inference, Global Function Type Inference, Type Inference
6459 \subsection{Operation Specific Type Inference}
6460 \label{operation-type-inference}
6461 \cindex{operation specific type inference}
6462 \cindex{arithmetic type inference}
6463 \cpsubindex{numeric}{type inference}
6464
6465 Many of the standard \clisp{} functions have special type inference
6466 procedures that determine the result type as a function of the
6467 argument types.  For example, the result type of \code{aref} is the
6468 array element type.  Here are some other examples of type inferences:
6469 \begin{lisp}
6470 (logand x #xFF) \result{} (unsigned-byte 8)
6471
6472 (+ (the (integer 0 12) x) (the (integer 0 1) y)) \result{} (integer 0 13)
6473
6474 (ash (the (unsigned-byte 16) x) -8) \result{} (unsigned-byte 8)
6475 \end{lisp}
6476
6477 %%\node Dynamic Type Inference, Type Check Optimization, Operation Specific Type Inference, Type Inference
6478 \subsection{Dynamic Type Inference}
6479 \label{constraint-propagation}
6480 \cindex{dynamic type inference}
6481 \cindex{conditional type inference}
6482 \cpsubindex{type inference}{dynamic}
6483
6484 Python uses flow analysis to infer types in dynamically typed
6485 programs.  For example:
6486 \begin{example}
6487 (ecase x
6488   (list (length x))
6489   ...)
6490 \end{example}
6491 Here, the compiler knows the argument to \code{length} is a list,
6492 because the call to \code{length} is only done when \code{x} is a
6493 list.  The most significant efficiency effect of inference from
6494 assertions is usually in type check optimization.
6495
6496
6497 Dynamic type inference has two inputs: explicit conditionals and
6498 implicit or explicit type assertions.  Flow analysis propagates these
6499 constraints on variable type to any code that can be executed only
6500 after passing though the constraint.  Explicit type constraints come
6501 from \findexed{if}s where the test is either a lexical variable or a
6502 function of lexical variables and constants, where the function is
6503 either a type predicate, a numeric comparison or \code{eq}.
6504
6505 If there is an \code{eq} (or \code{eql}) test, then the compiler will
6506 actually substitute one argument for the other in the true branch.
6507 For example:
6508 \begin{lisp}
6509 (when (eq x :yow!) (return x))
6510 \end{lisp}
6511 becomes:
6512 \begin{lisp}
6513 (when (eq x :yow!) (return :yow!))
6514 \end{lisp}
6515 This substitution is done when one argument is a constant, or one
6516 argument has better type information than the other.  This
6517 transformation reveals opportunities for constant folding or
6518 type-specific optimizations.  If the test is against a constant, then
6519 the compiler can prove that the variable is not that constant value in
6520 the false branch, or \w{\code{(not (member :yow!))}}  in the example
6521 above.  This can eliminate redundant tests, for example:
6522 \begin{example}
6523 (if (eq x nil)
6524     ...
6525     (if x a b))
6526 \end{example}
6527 is transformed to this:
6528 \begin{example}
6529 (if (eq x nil)
6530     ...
6531     a)
6532 \end{example}
6533 Variables appearing as \code{if} tests are interpreted as
6534 \code{\w{(not (eq \var{var} nil))}} tests.  The compiler also converts
6535 \code{=} into \code{eql} where possible.  It is difficult to do
6536 inference directly on \code{=} since it does implicit coercions.
6537
6538 When there is an explicit \code{$<$} or \code{$>$} test on
6539 \begin{changebar}
6540   numeric
6541 \end{changebar}
6542 variables, the compiler makes inferences about the ranges the
6543 variables can assume in the true and false branches.  This is mainly
6544 useful when it proves that the values are small enough in magnitude to
6545 allow open-coding of arithmetic operations.  For example, in many uses
6546 of \code{dotimes} with a \code{fixnum} repeat count, the compiler
6547 proves that fixnum arithmetic can be used.
6548
6549 Implicit type assertions are quite common, especially if you declare
6550 function argument types.  Dynamic inference from implicit type
6551 assertions sometimes helps to disambiguate programs to a useful
6552 degree, but is most noticeable when it detects a dynamic type error.
6553 For example:
6554 \begin{lisp}
6555 (defun foo (x)
6556   (+ (car x) x))
6557 \end{lisp}
6558 results in this warning:
6559 \begin{example}
6560 In: DEFUN FOO
6561   (+ (CAR X) X)
6562 ==>
6563   X
6564 Warning: Result is a LIST, not a NUMBER.
6565 \end{example}
6566
6567 Note that \llisp{}'s dynamic type checking semantics make dynamic type
6568 inference useful even in programs that aren't really dynamically
6569 typed, for example:
6570 \begin{lisp}
6571 (+ (car x) (length x))
6572 \end{lisp}
6573 Here, \code{x} presumably always holds a list, but in the absence of a
6574 declaration the compiler cannot assume \code{x} is a list simply
6575 because list-specific operations are sometimes done on it.  The
6576 compiler must consider the program to be dynamically typed until it
6577 proves otherwise.  Dynamic type inference proves that the argument to
6578 \code{length} is always a list because the call to \code{length} is
6579 only done after the list-specific \code{car} operation.
6580
6581
6582 %%\node Type Check Optimization,  , Dynamic Type Inference, Type Inference
6583 \subsection{Type Check Optimization}
6584 \label{type-check-optimization}
6585 \cpsubindex{type checking}{optimization}
6586 \cpsubindex{optimization}{type check}
6587
6588 Python backs up its support for precise type checking by minimizing
6589 the cost of run-time type checking.  This is done both through type
6590 inference and though optimizations of type checking itself.
6591
6592 Type inference often allows the compiler to prove that a value is of
6593 the correct type, and thus no type check is necessary.  For example:
6594 \begin{lisp}
6595 (defstruct foo a b c)
6596 (defstruct link
6597   (foo (required-argument) :type foo)
6598   (next nil :type (or link null)))
6599
6600 (foo-a (link-foo x))
6601 \end{lisp}
6602 Here, there is no need to check that the result of \code{link-foo} is
6603 a \code{foo}, since it always is.  Even when some type checks are
6604 necessary, type inference can often reduce the number:
6605 \begin{example}
6606 (defun test (x)
6607   (let ((a (foo-a x))
6608         (b (foo-b x))
6609         (c (foo-c x)))
6610     ...))
6611 \end{example}
6612 In this example, only one \w{\code{(foo-p x)}} check is needed.  This
6613 applies to a lesser degree in list operations, such as:
6614 \begin{lisp}
6615 (if (eql (car x) 3) (cdr x) y)
6616 \end{lisp}
6617 Here, we only have to check that \code{x} is a list once.
6618
6619 Since \python{} recognizes explicit type tests, code that explicitly
6620 protects itself against type errors has little introduced overhead due
6621 to implicit type checking.  For example, this loop compiles with no
6622 implicit checks checks for \code{car} and \code{cdr}:
6623 \begin{lisp}
6624 (defun memq (e l)
6625   (do ((current l (cdr current)))
6626       ((atom current) nil)
6627     (when (eq (car current) e) (return current))))
6628 \end{lisp}
6629
6630 \cindex{complemented type checks}
6631 Python reduces the cost of checks that must be done through an
6632 optimization called \var{complementing}.  A complemented check for
6633 \var{type} is simply a check that the value is not of the type
6634 \w{\code{(not \var{type})}}.  This is only interesting when something
6635 is known about the actual type, in which case we can test for the
6636 complement of \w{\code{(and \var{known-type} (not \var{type}))}}, or
6637 the difference between the known type and the assertion.  An example:
6638 \begin{lisp}
6639 (link-foo (link-next x))
6640 \end{lisp}
6641 Here, we change the type check for \code{link-foo} from a test for
6642 \code{foo} to a test for:
6643 \begin{lisp}
6644 (not (and (or foo null) (not foo)))
6645 \end{lisp}
6646 or more simply \w{\code{(not null)}}.  This is probably the most
6647 important use of complementing, since the situation is fairly common,
6648 and a \code{null} test is much cheaper than a structure type test.
6649
6650 Here is a more complicated example that illustrates the combination of
6651 complementing with dynamic type inference:
6652 \begin{lisp}
6653 (defun find-a (a x)
6654   (declare (type (or link null) x))
6655   (do ((current x (link-next current)))
6656       ((null current) nil)
6657     (let ((foo (link-foo current)))
6658       (when (eq (foo-a foo) a) (return foo)))))
6659 \end{lisp}
6660 This loop can be compiled with no type checks.  The \code{link} test
6661 for \code{link-foo} and \code{link-next} is complemented to
6662 \w{\code{(not null)}}, and then deleted because of the explicit
6663 \code{null} test.  As before, no check is necessary for \code{foo-a},
6664 since the \code{link-foo} is always a \code{foo}.  This sort of
6665 situation shows how precise type checking combined with precise
6666 declarations can actually result in reduced type checking.
6667
6668 %%\f
6669 %%\node Source Optimization, Tail Recursion, Type Inference, Advanced Compiler Use and Efficiency Hints
6670 \section{Source Optimization}
6671 \label{source-optimization}
6672 \cindex{optimization}
6673
6674 This section describes source-level transformations that \python{} does on
6675 programs in an attempt to make them more efficient.  Although source-level
6676 optimizations can make existing programs more efficient, the biggest advantage
6677 of this sort of optimization is that it makes it easier to write efficient
6678 programs.  If a clean, straightforward implementation is can be transformed
6679 into an efficient one, then there is no need for tricky and dangerous hand
6680 optimization.
6681
6682 \begin{comment}
6683 * Let Optimization::
6684 * Constant Folding::
6685 * Unused Expression Elimination::
6686 * Control Optimization::
6687 * Unreachable Code Deletion::
6688 * Multiple Values Optimization::
6689 * Source to Source Transformation::
6690 * Style Recommendations::
6691 \end{comment}
6692
6693 %%\node Let Optimization, Constant Folding, Source Optimization, Source Optimization
6694 \subsection{Let Optimization}
6695 \label{let-optimization}
6696
6697 \cindex{let optimization} \cpsubindex{optimization}{let}
6698
6699 The primary optimization of let variables is to delete them when they
6700 are unnecessary.  Whenever the value of a let variable is a constant,
6701 a constant variable or a constant (local or non-notinline) function,
6702 the variable is deleted, and references to the variable are replaced
6703 with references to the constant expression.  This is useful primarily
6704 in the expansion of macros or inline functions, where argument values
6705 are often constant in any given call, but are in general non-constant
6706 expressions that must be bound to preserve order of evaluation.  Let
6707 variable optimization eliminates the need for macros to carefully
6708 avoid spurious bindings, and also makes inline functions just as
6709 efficient as macros.
6710
6711 A particularly interesting class of constant is a local function.
6712 Substituting for lexical variables that are bound to a function can
6713 substantially improve the efficiency of functional programming styles,
6714 for example:
6715 \begin{lisp}
6716 (let ((a #'(lambda (x) (zow x))))
6717   (funcall a 3))
6718 \end{lisp}
6719 effectively transforms to:
6720 \begin{lisp}
6721 (zow 3)
6722 \end{lisp}
6723 This transformation is done even when the function is a closure, as in:
6724 \begin{lisp}
6725 (let ((a (let ((y (zug)))
6726            #'(lambda (x) (zow x y)))))
6727   (funcall a 3))
6728 \end{lisp}
6729 becoming:
6730 \begin{lisp}
6731 (zow 3 (zug))
6732 \end{lisp}
6733
6734 A constant variable is a lexical variable that is never assigned to,
6735 always keeping its initial value.  Whenever possible, avoid setting
6736 lexical variables\dash{}instead bind a new variable to the new value.
6737 Except for loop variables, it is almost always possible to avoid
6738 setting lexical variables.  This form:
6739 \begin{example}
6740 (let ((x (f x)))
6741   ...)
6742 \end{example}
6743 is \var{more} efficient than this form:
6744 \begin{example}
6745 (setq x (f x))
6746 ...
6747 \end{example}
6748 Setting variables makes the program more difficult to understand, both
6749 to the compiler and to the programmer.  \python{} compiles assignments
6750 at least as efficiently as any other \llisp{} compiler, but most let
6751 optimizations are only done on constant variables.
6752
6753 Constant variables with only a single use are also optimized away,
6754 even when the initial value is not constant.\footnote{The source
6755   transformation in this example doesn't represent the preservation of
6756   evaluation order implicit in the compiler's internal representation.
6757   Where necessary, the back end will reintroduce temporaries to
6758   preserve the semantics.}  For example, this expansion of
6759 \code{incf}:
6760 \begin{lisp}
6761 (let ((#:g3 (+ x 1)))
6762   (setq x #:G3))
6763 \end{lisp}
6764 becomes:
6765 \begin{lisp}
6766 (setq x (+ x 1))
6767 \end{lisp}
6768 The type semantics of this transformation are more important than the
6769 elimination of the variable itself.  Consider what happens when
6770 \code{x} is declared to be a \code{fixnum}; after the transformation,
6771 the compiler can compile the addition knowing that the result is a
6772 \code{fixnum}, whereas before the transformation the addition would
6773 have to allow for fixnum overflow.
6774
6775 Another variable optimization deletes any variable that is never read.
6776 This causes the initial value and any assigned values to be unused,
6777 allowing those expressions to be deleted if they have no side-effects.
6778
6779 Note that a let is actually a degenerate case of local call
6780 (\pxlref{let-calls}), and that let optimization can be done on calls
6781 that weren't created by a let.  Also, local call allows an applicative
6782 style of iteration that is totally assignment free.
6783
6784 %%\node Constant Folding, Unused Expression Elimination, Let Optimization, Source Optimization
6785 \subsection{Constant Folding}
6786 \cindex{constant folding}
6787 \cpsubindex{folding}{constant}
6788
6789 Constant folding is an optimization that replaces a call of constant
6790 arguments with the constant result of that call.  Constant folding is
6791 done on all standard functions for which it is legal.  Inline
6792 expansion allows folding of any constant parts of the definition, and
6793 can be done even on functions that have side-effects.
6794
6795 It is convenient to rely on constant folding when programming, as in this
6796 example:
6797 \begin{example}
6798 (defconstant limit 42)
6799
6800 (defun foo ()
6801   (... (1- limit) ...))
6802 \end{example}
6803 Constant folding is also helpful when writing macros or inline
6804 functions, since it usually eliminates the need to write a macro that
6805 special-cases constant arguments.
6806
6807 \cindex{constant-function declaration} Constant folding of a user
6808 defined function is enabled by the \code{extensions:constant-function}
6809 proclamation.  In this example:
6810 \begin{example}
6811 (declaim (ext:constant-function myfun))
6812 (defun myexp (x y)
6813   (declare (single-float x y))
6814   (exp (* (log x) y)))
6815
6816  ... (myexp 3.0 1.3) ...
6817 \end{example}
6818 The call to \code{myexp} is constant-folded to \code{4.1711674}.
6819
6820
6821 %%\node Unused Expression Elimination, Control Optimization, Constant Folding, Source Optimization
6822 \subsection{Unused Expression Elimination}
6823 \cindex{unused expression elimination}
6824 \cindex{dead code elimination}
6825
6826 If the value of any expression is not used, and the expression has no
6827 side-effects, then it is deleted.  As with constant folding, this
6828 optimization applies most often when cleaning up after inline
6829 expansion and other optimizations.  Any function declared an
6830 \code{extensions:constant-function} is also subject to unused
6831 expression elimination.
6832
6833 Note that \python{} will eliminate parts of unused expressions known
6834 to be side-effect free, even if there are other unknown parts.  For
6835 example:
6836 \begin{lisp}
6837 (let ((a (list (foo) (bar))))
6838   (if t
6839       (zow)
6840       (raz a)))
6841 \end{lisp}
6842 becomes:
6843 \begin{lisp}
6844 (progn (foo) (bar))
6845 (zow)
6846 \end{lisp}
6847
6848
6849 %%\node Control Optimization, Unreachable Code Deletion, Unused Expression Elimination, Source Optimization
6850 \subsection{Control Optimization}
6851 \cindex{control optimization}
6852 \cpsubindex{optimization}{control}
6853
6854 The most important optimization of control is recognizing when an
6855 \findexed{if} test is known at compile time, then deleting the
6856 \code{if}, the test expression, and the unreachable branch of the
6857 \code{if}.  This can be considered a special case of constant folding,
6858 although the test doesn't have to be truly constant as long as it is
6859 definitely not \false.  Note also, that type inference propagates the
6860 result of an \code{if} test to the true and false branches,
6861 \pxlref{constraint-propagation}.
6862
6863 A related \code{if} optimization is this transformation:\footnote{Note
6864   that the code for \code{x} and \code{y} isn't actually replicated.}
6865 \begin{lisp}
6866 (if (if a b c) x y)
6867 \end{lisp}
6868 into:
6869 \begin{lisp}
6870 (if a
6871     (if b x y)
6872     (if c x y))
6873 \end{lisp}
6874 The opportunity for this sort of optimization usually results from a
6875 conditional macro.  For example:
6876 \begin{lisp}
6877 (if (not a) x y)
6878 \end{lisp}
6879 is actually implemented as this:
6880 \begin{lisp}
6881 (if (if a nil t) x y)
6882 \end{lisp}
6883 which is transformed to this:
6884 \begin{lisp}
6885 (if a
6886     (if nil x y)
6887     (if t x y))
6888 \end{lisp}
6889 which is then optimized to this:
6890 \begin{lisp}
6891 (if a y x)
6892 \end{lisp}
6893 Note that due to \python{}'s internal representations, the
6894 \code{if}\dash{}\code{if} situation will be recognized even if other
6895 forms are wrapped around the inner \code{if}, like:
6896 \begin{example}
6897 (if (let ((g ...))
6898       (loop
6899         ...
6900         (return (not g))
6901         ...))
6902     x y)
6903 \end{example}
6904
6905 In \python, all the \clisp{} macros really are macros, written in
6906 terms of \code{if}, \code{block} and \code{tagbody}, so user-defined
6907 control macros can be just as efficient as the standard ones.
6908 \python{} emits basic blocks using a heuristic that minimizes the
6909 number of unconditional branches.  The code in a \code{tagbody} will
6910 not be emitted in the order it appeared in the source, so there is no
6911 point in arranging the code to make control drop through to the
6912 target.
6913
6914 %%\node Unreachable Code Deletion, Multiple Values Optimization, Control Optimization, Source Optimization
6915 \subsection{Unreachable Code Deletion}
6916 \label{dead-code-notes}
6917 \cindex{unreachable code deletion}
6918 \cindex{dead code elimination}
6919
6920 Python will delete code whenever it can prove that the code can never be
6921 executed.  Code becomes unreachable when:
6922 \begin{itemize}
6923
6924 \item
6925 An \code{if} is optimized away, or
6926
6927 \item
6928 There is an explicit unconditional control transfer such as \code{go} or
6929 \code{return-from}, or
6930
6931 \item
6932 The last reference to a local function is deleted (or there never was any
6933 reference.)
6934 \end{itemize}
6935
6936
6937 When code that appeared in the original source is deleted, the compiler prints
6938 a note to indicate a possible problem (or at least unnecessary code.)  For
6939 example:
6940 \begin{lisp}
6941 (defun foo ()
6942   (if t
6943       (write-line "True.")
6944       (write-line "False.")))
6945 \end{lisp}
6946 will result in this note:
6947 \begin{example}
6948 In: DEFUN FOO
6949   (WRITE-LINE "False.")
6950 Note: Deleting unreachable code.
6951 \end{example}
6952
6953 It is important to pay attention to unreachable code notes, since they often
6954 indicate a subtle type error.  For example:
6955 \begin{example}
6956 (defstruct foo a b)
6957
6958 (defun lose (x)
6959   (let ((a (foo-a x))
6960         (b (if x (foo-b x) :none)))
6961     ...))
6962 \end{example}
6963 results in this note:
6964 \begin{example}
6965 In: DEFUN LOSE
6966   (IF X (FOO-B X) :NONE)
6967 ==>
6968   :NONE
6969 Note: Deleting unreachable code.
6970 \end{example}
6971 The \kwd{none} is unreachable, because type inference knows that the argument
6972 to \code{foo-a} must be a \code{foo}, and thus can't be \false.  Presumably the
6973 programmer forgot that \code{x} could be \false{} when he wrote the binding for
6974 \code{a}.
6975
6976 Here is an example with an incorrect declaration:
6977 \begin{lisp}
6978 (defun count-a (string)
6979   (do ((pos 0 (position #\back{a} string :start (1+ pos)))
6980        (count 0 (1+ count)))
6981       ((null pos) count)
6982     (declare (fixnum pos))))
6983 \end{lisp}
6984 This time our note is:
6985 \begin{example}
6986 In: DEFUN COUNT-A
6987   (DO ((POS 0 #) (COUNT 0 #))
6988       ((NULL POS) COUNT)
6989     (DECLARE (FIXNUM POS)))
6990 --> BLOCK LET TAGBODY RETURN-FROM PROGN
6991 ==>
6992   COUNT
6993 Note: Deleting unreachable code.
6994 \end{example}
6995 The problem here is that \code{pos} can never be null since it is declared a
6996 \code{fixnum}.
6997
6998 It takes some experience with unreachable code notes to be able to
6999 tell what they are trying to say.  In non-obvious cases, the best
7000 thing to do is to call the function in a way that should cause the
7001 unreachable code to be executed.  Either you will get a type error, or
7002 you will find that there truly is no way for the code to be executed.
7003
7004 Not all unreachable code results in a note:
7005 \begin{itemize}
7006
7007 \item A note is only given when the unreachable code textually appears
7008   in the original source.  This prevents spurious notes due to the
7009   optimization of macros and inline functions, but sometimes also
7010   foregoes a note that would have been useful.
7011
7012 \item Since accurate source information is not available for non-list
7013   forms, there is an element of heuristic in determining whether or
7014   not to give a note about an atom.  Spurious notes may be given when
7015   a macro or inline function defines a variable that is also present
7016   in the calling function.  Notes about \false{} and \true{} are never
7017   given, since it is too easy to confuse these constants in expanded
7018   code with ones in the original source.
7019
7020 \item Notes are only given about code unreachable due to control flow.
7021   There is no note when an expression is deleted because its value is
7022   unused, since this is a common consequence of other optimizations.
7023 \end{itemize}
7024
7025
7026 Somewhat spurious unreachable code notes can also result when a macro
7027 inserts multiple copies of its arguments in different contexts, for
7028 example:
7029 \begin{lisp}
7030 (defmacro t-and-f (var form)
7031   `(if ,var ,form ,form))
7032
7033 (defun foo (x)
7034   (t-and-f x (if x "True." "False.")))
7035 \end{lisp}
7036 results in these notes:
7037 \begin{example}
7038 In: DEFUN FOO
7039   (IF X "True." "False.")
7040 ==>
7041   "False."
7042 Note: Deleting unreachable code.
7043
7044 ==>
7045   "True."
7046 Note: Deleting unreachable code.
7047 \end{example}
7048 It seems like it has deleted both branches of the \code{if}, but it has really
7049 deleted one branch in one copy, and the other branch in the other copy.  Note
7050 that these messages are only spurious in not satisfying the intent of the rule
7051 that notes are only given when the deleted code appears in the original source;
7052 there is always \var{some} code being deleted when a unreachable code note is
7053 printed.
7054
7055
7056 %%\node Multiple Values Optimization, Source to Source Transformation, Unreachable Code Deletion, Source Optimization
7057 \subsection{Multiple Values Optimization}
7058 \cindex{multiple value optimization}
7059 \cpsubindex{optimization}{multiple value}
7060
7061 Within a function, \python{} implements uses of multiple values
7062 particularly efficiently.  Multiple values can be kept in arbitrary
7063 registers, so using multiple values doesn't imply stack manipulation
7064 and representation conversion.  For example, this code:
7065 \begin{example}
7066 (let ((a (if x (foo x) u))
7067       (b (if x (bar x) v)))
7068   ...)
7069 \end{example}
7070 is actually more efficient written this way:
7071 \begin{example}
7072 (multiple-value-bind
7073     (a b)
7074     (if x
7075         (values (foo x) (bar x))
7076         (values u v))
7077   ...)
7078 \end{example}
7079
7080 Also, \pxlref{local-call-return} for information on how local call
7081 provides efficient support for multiple function return values.
7082
7083
7084 %%\node Source to Source Transformation, Style Recommendations, Multiple Values Optimization, Source Optimization
7085 \subsection{Source to Source Transformation}
7086 \cindex{source-to-source transformation}
7087 \cpsubindex{transformation}{source-to-source}
7088
7089 The compiler implements a number of operation-specific optimizations as
7090 source-to-source transformations.  You will often see unfamiliar code in error
7091 messages, for example:
7092 \begin{lisp}
7093 (defun my-zerop () (zerop x))
7094 \end{lisp}
7095 gives this warning:
7096 \begin{example}
7097 In: DEFUN MY-ZEROP
7098   (ZEROP X)
7099 ==>
7100   (= X 0)
7101 Warning: Undefined variable: X
7102 \end{example}
7103 The original \code{zerop} has been transformed into a call to
7104 \code{=}.  This transformation is indicated with the same \code{==$>$}
7105 used to mark macro and function inline expansion.  Although it can be
7106 confusing, display of the transformed source is important, since
7107 warnings are given with respect to the transformed source.  This a
7108 more obscure example:
7109 \begin{lisp}
7110 (defun foo (x) (logand 1 x))
7111 \end{lisp}
7112 gives this efficiency note:
7113 \begin{example}
7114 In: DEFUN FOO
7115   (LOGAND 1 X)
7116 ==>
7117   (LOGAND C::Y C::X)
7118 Note: Forced to do static-function Two-arg-and (cost 53).
7119       Unable to do inline fixnum arithmetic (cost 1) because:
7120       The first argument is a INTEGER, not a FIXNUM.
7121       etc.
7122 \end{example}
7123 Here, the compiler commuted the call to \code{logand}, introducing
7124 temporaries.  The note complains that the \var{first} argument is not
7125 a \code{fixnum}, when in the original call, it was the second
7126 argument.  To make things more confusing, the compiler introduced
7127 temporaries called \code{c::x} and \code{c::y} that are bound to
7128 \code{y} and \code{1}, respectively.
7129
7130 You will also notice source-to-source optimizations when efficiency
7131 notes are enabled (\pxlref{efficiency-notes}.)  When the compiler is
7132 unable to do a transformation that might be possible if there was more
7133 information, then an efficiency note is printed.  For example,
7134 \code{my-zerop} above will also give this efficiency note:
7135 \begin{example}
7136 In: DEFUN FOO
7137   (ZEROP X)
7138 ==>
7139   (= X 0)
7140 Note: Unable to optimize because:
7141       Operands might not be the same type, so can't open code.
7142 \end{example}
7143
7144 %%\node Style Recommendations,  , Source to Source Transformation, Source Optimization
7145 \subsection{Style Recommendations}
7146 \cindex{style recommendations}
7147
7148 Source level optimization makes possible a clearer and more relaxed programming
7149 style:
7150 \begin{itemize}
7151
7152 \item Don't use macros purely to avoid function call.  If you want an
7153   inline function, write it as a function and declare it inline.  It's
7154   clearer, less error-prone, and works just as well.
7155
7156 \item Don't write macros that try to ``optimize'' their expansion in
7157   trivial ways such as avoiding binding variables for simple
7158   expressions.  The compiler does these optimizations too, and is less
7159   likely to make a mistake.
7160
7161 \item Make use of local functions (i.e., \code{labels} or \code{flet})
7162   and tail-recursion in places where it is clearer.  Local function
7163   call is faster than full call.
7164
7165 \item Avoid setting local variables when possible.  Binding a new
7166   \code{let} variable is at least as efficient as setting an existing
7167   variable, and is easier to understand, both for the compiler and the
7168   programmer.
7169
7170 \item Instead of writing similar code over and over again so that it
7171   can be hand customized for each use, define a macro or inline
7172   function, and let the compiler do the work.
7173 \end{itemize}
7174
7175
7176 %%\f
7177 %%\node Tail Recursion, Local Call, Source Optimization, Advanced Compiler Use and Efficiency Hints
7178 \section{Tail Recursion}
7179 \label{tail-recursion}
7180 \cindex{tail recursion}
7181 \cindex{recursion}
7182
7183 A call is tail-recursive if nothing has to be done after the the call
7184 returns, i.e. when the call returns, the returned value is immediately
7185 returned from the calling function.  In this example, the recursive
7186 call to \code{myfun} is tail-recursive:
7187 \begin{lisp}
7188 (defun myfun (x)
7189   (if (oddp (random x))
7190       (isqrt x)
7191       (myfun (1- x))))
7192 \end{lisp}
7193
7194 Tail recursion is interesting because it is form of recursion that can be
7195 implemented much more efficiently than general recursion.  In general, a
7196 recursive call requires the compiler to allocate storage on the stack at
7197 run-time for every call that has not yet returned.  This memory consumption
7198 makes recursion unacceptably inefficient for representing repetitive algorithms
7199 having large or unbounded size.  Tail recursion is the special case of
7200 recursion that is semantically equivalent to the iteration constructs normally
7201 used to represent repetition in programs.  Because tail recursion is equivalent
7202 to iteration, tail-recursive programs can be compiled as efficiently as
7203 iterative programs.
7204
7205 So why would you want to write a program recursively when you can write it
7206 using a loop?  Well, the main answer is that recursion is a more general
7207 mechanism, so it can express some solutions simply that are awkward to write as
7208 a loop.  Some programmers also feel that recursion is a stylistically
7209 preferable way to write loops because it avoids assigning variables.
7210 For example, instead of writing:
7211 \begin{lisp}
7212 (defun fun1 (x)
7213   something-that-uses-x)
7214
7215 (defun fun2 (y)
7216   something-that-uses-y)
7217
7218 (do ((x something (fun2 (fun1 x))))
7219     (nil))
7220 \end{lisp}
7221 You can write:
7222 \begin{lisp}
7223 (defun fun1 (x)
7224   (fun2 something-that-uses-x))
7225
7226 (defun fun2 (y)
7227   (fun1 something-that-uses-y))
7228
7229 (fun1 something)
7230 \end{lisp}
7231 The tail-recursive definition is actually more efficient, in addition to being
7232 (arguably) clearer.  As the number of functions and the complexity of their
7233 call graph increases, the simplicity of using recursion becomes compelling.
7234 Consider the advantages of writing a large finite-state machine with separate
7235 tail-recursive functions instead of using a single huge \code{prog}.
7236
7237 It helps to understand how to use tail recursion if you think of a
7238 tail-recursive call as a \code{psetq} that assigns the argument values to the
7239 called function's variables, followed by a \code{go} to the start of the called
7240 function.  This makes clear an inherent efficiency advantage of tail-recursive
7241 call: in addition to not having to allocate a stack frame, there is no need to
7242 prepare for the call to return (e.g., by computing a return PC.)
7243
7244 Is there any disadvantage to tail recursion?  Other than an increase
7245 in efficiency, the only way you can tell that a call has been compiled
7246 tail-recursively is if you use the debugger.  Since a tail-recursive
7247 call has no stack frame, there is no way the debugger can print out
7248 the stack frame representing the call.  The effect is that backtrace
7249 will not show some calls that would have been displayed in a
7250 non-tail-recursive implementation.  In practice, this is not as bad as
7251 it sounds\dash{}in fact it isn't really clearly worse, just different.
7252 \xlref{debug-tail-recursion} for information about the debugger
7253 implications of tail recursion.
7254
7255 In order to ensure that tail-recursion is preserved in arbitrarily
7256 complex calling patterns across separately compiled functions, the
7257 compiler must compile any call in a tail-recursive position as a
7258 tail-recursive call.  This is done regardless of whether the program
7259 actually exhibits any sort of recursive calling pattern.  In this
7260 example, the call to \code{fun2} will always be compiled as a
7261 tail-recursive call:
7262 \begin{lisp}
7263 (defun fun1 (x)
7264   (fun2 x))
7265 \end{lisp}
7266 So tail recursion doesn't necessarily have anything to do with recursion
7267 as it is normally thought of.  \xlref{local-tail-recursion} for more
7268 discussion of using tail recursion to implement loops.
7269
7270 \begin{comment}
7271 * Tail Recursion Exceptions::
7272 \end{comment}
7273
7274 %%\node Tail Recursion Exceptions,  , Tail Recursion, Tail Recursion
7275 \subsection{Tail Recursion Exceptions}
7276
7277 Although \python{} is claimed to be ``properly'' tail-recursive, some
7278 might dispute this, since there are situations where tail recursion is
7279 inhibited:
7280 \begin{itemize}
7281
7282 \item When the call is enclosed by a special binding, or
7283
7284 \item When the call is enclosed by a \code{catch} or
7285   \code{unwind-protect}, or
7286
7287 \item When the call is enclosed by a \code{block} or \code{tagbody}
7288   and the block name or \code{go} tag has been closed over.
7289 \end{itemize}
7290 These dynamic extent binding forms inhibit tail recursion because they
7291 allocate stack space to represent the binding.  Shallow-binding
7292 implementations of dynamic scoping also require cleanup code to be
7293 evaluated when the scope is exited.
7294
7295 %%\f
7296 %%\node Local Call, Block Compilation, Tail Recursion, Advanced Compiler Use and Efficiency Hints
7297 \section{Local Call}
7298 \label{local-call}
7299 \cindex{local call}
7300 \cpsubindex{call}{local}
7301 \cpsubindex{function call}{local}
7302
7303 Python supports two kinds of function call: full call and local call.
7304 Full call is the standard calling convention; its late binding and
7305 generality make \llisp{} what it is, but create unavoidable overheads.
7306 When the compiler can compile the calling function and the called
7307 function simultaneously, it can use local call to avoid some of the
7308 overhead of full call.  Local call is really a collection of
7309 compilation strategies.  If some aspect of call overhead is not needed
7310 in a particular local call, then it can be omitted.  In some cases,
7311 local call can be totally free.  Local call provides two main
7312 advantages to the user:
7313 \begin{itemize}
7314
7315 \item Local call makes the use of the lexical function binding forms
7316   \findexed{flet} and \findexed{labels} much more efficient.  A local
7317   call is always faster than a full call, and in many cases is much
7318   faster.
7319
7320 \item Local call is a natural approach to \i{block compilation}, a
7321   compilation technique that resolves function references at compile
7322   time.  Block compilation speeds function call, but increases
7323   compilation times and prevents function redefinition.
7324 \end{itemize}
7325
7326
7327 \begin{comment}
7328 * Self-Recursive Calls::
7329 * Let Calls::
7330 * Closures::
7331 * Local Tail Recursion::
7332 * Return Values::
7333 \end{comment}
7334
7335 %%\node Self-Recursive Calls, Let Calls, Local Call, Local Call
7336 \subsection{Self-Recursive Calls}
7337 \cpsubindex{recursion}{self}
7338
7339 Local call is used when a function defined by \code{defun} calls itself.  For
7340 example:
7341 \begin{lisp}
7342 (defun fact (n)
7343   (if (zerop n)
7344       1
7345       (* n (fact (1- n)))))
7346 \end{lisp}
7347 This use of local call speeds recursion, but can also complicate
7348 debugging, since \findexed{trace} will only show the first call to
7349 \code{fact}, and not the recursive calls.  This is because the
7350 recursive calls directly jump to the start of the function, and don't
7351 indirect through the \code{symbol-function}.  Self-recursive local
7352 call is inhibited when the \kwd{block-compile} argument to
7353 \code{compile-file} is \false{} (\pxlref{compile-file-block}.)
7354
7355 %%\node Let Calls, Closures, Self-Recursive Calls, Local Call
7356 \subsection{Let Calls}
7357 \label{let-calls}
7358 Because local call avoids unnecessary call overheads, the compiler
7359 internally uses local call to implement some macros and special forms
7360 that are not normally thought of as involving a function call.  For
7361 example, this \code{let}:
7362 \begin{example}
7363 (let ((a (foo))
7364       (b (bar)))
7365   ...)
7366 \end{example}
7367 is internally represented as though it was macroexpanded into:
7368 \begin{example}
7369 (funcall #'(lambda (a b)
7370              ...)
7371          (foo)
7372          (bar))
7373 \end{example}
7374 This implementation is acceptable because the simple cases of local
7375 call (equivalent to a \code{let}) result in good code.  This doesn't
7376 make \code{let} any more efficient, but does make local calls that are
7377 semantically the same as \code{let} much more efficient than full
7378 calls.  For example, these definitions are all the same as far as the
7379 compiler is concerned:
7380 \begin{example}
7381 (defun foo ()
7382   ...some other stuff...
7383   (let ((a something))
7384     ...some stuff...))
7385
7386 (defun foo ()
7387   (flet ((localfun (a)
7388            ...some stuff...))
7389     ...some other stuff...
7390     (localfun something)))
7391
7392 (defun foo ()
7393   (let ((funvar #'(lambda (a)
7394                     ...some stuff...)))
7395     ...some other stuff...
7396     (funcall funvar something)))
7397 \end{example}
7398
7399 Although local call is most efficient when the function is called only
7400 once, a call doesn't have to be equivalent to a \code{let} to be more
7401 efficient than full call.  All local calls avoid the overhead of
7402 argument count checking and keyword argument parsing, and there are a
7403 number of other advantages that apply in many common situations.
7404 \xlref{let-optimization} for a discussion of the optimizations done on
7405 let calls.
7406
7407 %%\node Closures, Local Tail Recursion, Let Calls, Local Call
7408 \subsection{Closures}
7409 \cindex{closures}
7410
7411 Local call allows for much more efficient use of closures, since the
7412 closure environment doesn't need to be allocated on the heap, or even
7413 stored in memory at all.  In this example, there is no penalty for
7414 \code{localfun} referencing \code{a} and \code{b}:
7415 \begin{lisp}
7416 (defun foo (a b)
7417   (flet ((localfun (x)
7418            (1+ (* a b x))))
7419     (if (= a b)
7420         (localfun (- x))
7421         (localfun x))))
7422 \end{lisp}
7423 In local call, the compiler effectively passes closed-over values as
7424 extra arguments, so there is no need for you to ``optimize'' local
7425 function use by explicitly passing in lexically visible values.
7426 Closures may also be subject to let optimization
7427 (\pxlref{let-optimization}.)
7428
7429 Note: indirect value cells are currently always allocated on the heap
7430 when a variable is both assigned to (with \code{setq} or \code{setf})
7431 and closed over, regardless of whether the closure is a local function
7432 or not.  This is another reason to avoid setting variables when you
7433 don't have to.
7434
7435 %%\node Local Tail Recursion, Return Values, Closures, Local Call
7436 \subsection{Local Tail Recursion}
7437 \label{local-tail-recursion}
7438 \cindex{tail recursion}
7439 \cpsubindex{recursion}{tail}
7440
7441 Tail-recursive local calls are particularly efficient, since they are
7442 in effect an assignment plus a control transfer.  Scheme programmers
7443 write loops with tail-recursive local calls, instead of using the
7444 imperative \code{go} and \code{setq}.  This has not caught on in the
7445 \clisp{} community, since conventional \llisp{} compilers don't
7446 implement local call.  In \python, users can choose to write loops
7447 such as:
7448 \begin{lisp}
7449 (defun ! (n)
7450   (labels ((loop (n total)
7451              (if (zerop n)
7452                  total
7453                  (loop (1- n) (* n total)))))
7454     (loop n 1)))
7455 \end{lisp}
7456
7457 \begin{defmac}{extensions:}{iterate}{%
7458     \args{\var{name} (\mstar{(\var{var} \var{initial-value})})
7459       \mstar{\var{declaration}} \mstar{\var{form}}}}
7460
7461   This macro provides syntactic sugar for using \findexed{labels} to
7462   do iteration.  It creates a local function \var{name} with the
7463   specified \var{var}s as its arguments and the \var{declaration}s and
7464   \var{form}s as its body.  This function is then called with the
7465   \var{initial-values}, and the result of the call is return from the
7466   macro.
7467
7468   Here is our factorial example rewritten using \code{iterate}:
7469
7470   \begin{lisp}
7471     (defun ! (n)
7472       (iterate loop
7473                ((n n)
7474                (total 1))
7475         (if (zerop n)
7476           total
7477           (loop (1- n) (* n total)))))
7478   \end{lisp}
7479
7480   The main advantage of using \code{iterate} over \code{do} is that
7481   \code{iterate} naturally allows stepping to be done differently
7482   depending on conditionals in the body of the loop.  \code{iterate}
7483   can also be used to implement algorithms that aren't really
7484   iterative by simply doing a non-tail call.  For example, the
7485   standard recursive definition of factorial can be written like this:
7486 \begin{lisp}
7487 (iterate fact
7488          ((n n))
7489   (if (zerop n)
7490       1
7491       (* n (fact (1- n)))))
7492 \end{lisp}
7493 \end{defmac}
7494
7495 %%\node Return Values,  , Local Tail Recursion, Local Call
7496 \subsection{Return Values}
7497 \label{local-call-return}
7498 \cpsubindex{return values}{local call}
7499 \cpsubindex{local call}{return values}
7500
7501 One of the more subtle costs of full call comes from allowing
7502 arbitrary numbers of return values.  This overhead can be avoided in
7503 local calls to functions that always return the same number of values.
7504 For efficiency reasons (as well as stylistic ones), you should write
7505 functions so that they always return the same number of values.  This
7506 may require passing extra \false{} arguments to \code{values} in some
7507 cases, but the result is more efficient, not less so.
7508
7509 When efficiency notes are enabled (\pxlref{efficiency-notes}), and the
7510 compiler wants to use known values return, but can't prove that the
7511 function always returns the same number of values, then it will print
7512 a note like this:
7513 \begin{example}
7514 In: DEFUN GRUE
7515   (DEFUN GRUE (X) (DECLARE (FIXNUM X)) (COND (# #) (# NIL) (T #)))
7516 Note: Return type not fixed values, so can't use known return convention:
7517   (VALUES (OR (INTEGER -536870912 -1) NULL) &REST T)
7518 \end{example}
7519
7520 In order to implement proper tail recursion in the presence of known
7521 values return (\pxlref{tail-recursion}), the compiler sometimes must
7522 prove that multiple functions all return the same number of values.
7523 When this can't be proven, the compiler will print a note like this:
7524 \begin{example}
7525 In: DEFUN BLUE
7526   (DEFUN BLUE (X) (DECLARE (FIXNUM X)) (COND (# #) (# #) (# #) (T #)))
7527 Note: Return value count mismatch prevents known return from
7528       these functions:
7529   BLUE
7530   SNOO
7531 \end{example}
7532 \xlref{number-local-call} for the interaction between local call
7533 and the representation of numeric types.
7534
7535 %%\f
7536 %%\node Block Compilation, Inline Expansion, Local Call, Advanced Compiler Use and Efficiency Hints
7537 \section{Block Compilation}
7538 \label{block-compilation}
7539 \cindex{block compilation}
7540 \cpsubindex{compilation}{block}
7541
7542 Block compilation allows calls to global functions defined by
7543 \findexed{defun} to be compiled as local calls.  The function call
7544 can be in a different top-level form than the \code{defun}, or even in a
7545 different file.
7546
7547 In addition, block compilation allows the declaration of the \i{entry points}
7548 to the block compiled portion.  An entry point is any function that may be
7549 called from outside of the block compilation.  If a function is not an entry
7550 point, then it can be compiled more efficiently, since all calls are known at
7551 compile time.  In particular, if a function is only called in one place, then
7552 it will be let converted.  This effectively inline expands the function, but
7553 without the code duplication that results from defining the function normally
7554 and then declaring it inline.
7555
7556 The main advantage of block compilation is that it it preserves efficiency in
7557 programs even when (for readability and syntactic convenience) they are broken
7558 up into many small functions.  There is absolutely no overhead for calling a
7559 non-entry point function that is defined purely for modularity (i.e. called
7560 only in one place.)
7561
7562 Block compilation also allows the use of non-descriptor arguments and return
7563 values in non-trivial programs (\pxlref{number-local-call}).
7564
7565 \begin{comment}
7566 * Block Compilation Semantics::
7567 * Block Compilation Declarations::
7568 * Compiler Arguments::
7569 * Practical Difficulties::
7570 * Context Declarations::
7571 * Context Declaration Example::
7572 \end{comment}
7573
7574 %%\node Block Compilation Semantics, Block Compilation Declarations, Block Compilation, Block Compilation
7575 \subsection{Block Compilation Semantics}
7576
7577 The effect of block compilation can be envisioned as the compiler turning all
7578 the \code{defun}s in the block compilation into a single \code{labels} form:
7579 \begin{example}
7580 (declaim (start-block fun1 fun3))
7581
7582 (defun fun1 ()
7583   ...)
7584
7585 (defun fun2 ()
7586   ...
7587   (fun1)
7588   ...)
7589
7590 (defun fun3 (x)
7591   (if x
7592       (fun1)
7593       (fun2)))
7594
7595 (declaim (end-block))
7596 \end{example}
7597 becomes:
7598 \begin{example}
7599 (labels ((fun1 ()
7600            ...)
7601          (fun2 ()
7602            ...
7603            (fun1)
7604            ...)
7605          (fun3 (x)
7606            (if x
7607                (fun1)
7608                (fun2))))
7609   (setf (fdefinition 'fun1) #'fun1)
7610   (setf (fdefinition 'fun3) #'fun3))
7611 \end{example}
7612 Calls between the block compiled functions are local calls, so changing the
7613 global definition of \code{fun1} will have no effect on what \code{fun2} does;
7614 \code{fun2} will keep calling the old \code{fun1}.
7615
7616 The entry points \code{fun1} and \code{fun3} are still installed in
7617 the \code{symbol-function} as the global definitions of the functions,
7618 so a full call to an entry point works just as before.  However,
7619 \code{fun2} is not an entry point, so it is not globally defined.  In
7620 addition, \code{fun2} is only called in one place, so it will be let
7621 converted.
7622
7623
7624 %%\node Block Compilation Declarations, Compiler Arguments, Block Compilation Semantics, Block Compilation
7625 \subsection{Block Compilation Declarations}
7626 \cpsubindex{declarations}{block compilation}
7627 \cindex{start-block declaration}
7628 \cindex{end-block declaration}
7629
7630 The \code{extensions:start-block} and \code{extensions:end-block}
7631 declarations allow fine-grained control of block compilation.  These
7632 declarations are only legal as a global declarations (\code{declaim}
7633 or \code{proclaim}).
7634
7635 \noindent
7636 \vspace{1 em}
7637 The \code{start-block} declaration has this syntax:
7638 \begin{example}
7639 (start-block \mstar{\var{entry-point-name}})
7640 \end{example}
7641 When processed by the compiler, this declaration marks the start of
7642 block compilation, and specifies the entry points to that block.  If
7643 no entry points are specified, then \var{all} functions are made into
7644 entry points.  If already block compiling, then the compiler ends the
7645 current block and starts a new one.
7646
7647 \noindent
7648 \vspace{1 em}
7649 The \code{end-block} declaration has no arguments:
7650 \begin{lisp}
7651 (end-block)
7652 \end{lisp}
7653 The \code{end-block} declaration ends a block compilation unit without
7654 starting a new one.  This is useful mainly when only a portion of a file
7655 is worth block compiling.
7656
7657 %%\node Compiler Arguments, Practical Difficulties, Block Compilation Declarations, Block Compilation
7658 \subsection{Compiler Arguments}
7659 \label{compile-file-block}
7660 \cpsubindex{compile-file}{block compilation arguments}
7661
7662 The \kwd{block-compile} and \kwd{entry-points} arguments to
7663 \code{extensions:compile-from-stream} and \funref{compile-file} provide overall
7664 control of block compilation, and allow block compilation without requiring
7665 modification of the program source.
7666
7667 There are three possible values of the \kwd{block-compile} argument:
7668 \begin{Lentry}
7669
7670 \item[\false{}] Do no compile-time resolution of global function
7671   names, not even for self-recursive calls.  This inhibits any
7672   \code{start-block} declarations appearing in the file, allowing all
7673   functions to be incrementally redefined.
7674
7675 \item[\true{}] Start compiling in block compilation mode.  This is
7676   mainly useful for block compiling small files that contain no
7677   \code{start-block} declarations.  See also the \kwd{entry-points}
7678   argument.
7679
7680 \item[\kwd{specified}] Start compiling in form-at-a-time mode, but
7681   exploit \code{start-block} declarations and compile self-recursive
7682   calls as local calls.  Normally \kwd{specified} is the default for
7683   this argument (see \varref{block-compile-default}.)
7684 \end{Lentry}
7685
7686 The \kwd{entry-points} argument can be used in conjunction with
7687 \w{\kwd{block-compile} \true{}} to specify the entry-points to a
7688 block-compiled file.  If not specified or \nil, all global functions
7689 will be compiled as entry points.  When \kwd{block-compile} is not
7690 \true, this argument is ignored.
7691
7692 \begin{defvar}{}{block-compile-default}
7693
7694   This variable determines the default value for the
7695   \kwd{block-compile} argument to \code{compile-file} and
7696   \code{compile-from-stream}.  The initial value of this variable is
7697   \kwd{specified}, but \false{} is sometimes useful for totally
7698   inhibiting block compilation.
7699 \end{defvar}
7700
7701 %%\node Practical Difficulties, Context Declarations, Compiler Arguments, Block Compilation
7702 \subsection{Practical Difficulties}
7703
7704 The main problem with block compilation is that the compiler uses
7705 large amounts of memory when it is block compiling.  This places an
7706 upper limit on the amount of code that can be block compiled as a
7707 unit.  To make best use of block compilation, it is necessary to
7708 locate the parts of the program containing many internal calls, and
7709 then add the appropriate \code{start-block} declarations.  When writing
7710 new code, it is a good idea to put in block compilation declarations
7711 from the very beginning, since writing block declarations correctly
7712 requires accurate knowledge of the program's function call structure.
7713 If you want to initially develop code with full incremental
7714 redefinition, you can compile with \varref{block-compile-default} set to
7715 \false.
7716
7717 Note if a \code{defun} appears in a non-null lexical environment, then
7718 calls to it cannot be block compiled.
7719
7720 Unless files are very small, it is probably impractical to block compile
7721 multiple files as a unit by specifying a list of files to \code{compile-file}.
7722 Semi-inline expansion (\pxlref{semi-inline}) provides another way to
7723 extend block compilation across file boundaries.
7724 %%\f
7725 %%\node Context Declarations, Context Declaration Example, Practical Difficulties, Block Compilation
7726 \subsection{Context Declarations}
7727 \label{context-declarations}
7728 \cindex{context sensitive declarations}
7729 \cpsubindex{declarations}{context-sensitive}
7730
7731 \cmucl{} has a context-sensitive declaration mechanism which is useful
7732 because it allows flexible control of the compilation policy in large
7733 systems without requiring changes to the source files.  The primary
7734 use of this feature is to allow the exported interfaces of a system to
7735 be compiled more safely than the system internals.  The context used
7736 is the name being defined and the kind of definition (function, macro,
7737 etc.)
7738
7739 The \kwd{context-declarations} option to \macref{with-compilation-unit} has
7740 dynamic scope, affecting all compilation done during the evaluation of the
7741 body.  The argument to this option should evaluate to a list of lists of the
7742 form:
7743 \begin{example}
7744 (\var{context-spec} \mplus{\var{declare-form}})
7745 \end{example}
7746 In the indicated context, the specified declare forms are inserted at
7747 the head of each definition.  The declare forms for all contexts that
7748 match are appended together, with earlier declarations getting
7749 precedence over later ones.  A simple example:
7750 \begin{example}
7751     :context-declarations
7752     '((:external (declare (optimize (safety 2)))))
7753 \end{example}
7754 This will cause all functions that are named by external symbols to be
7755 compiled with \code{safety 2}.
7756
7757 The full syntax of context specs is:
7758 \begin{Lentry}
7759
7760 \item[\kwd{internal}, \kwd{external}] True if the symbol is internal
7761   (external) in its home package.
7762
7763 \item[\kwd{uninterned}] True if the symbol has no home package.
7764
7765 \item[\code{\w{(:package \mstar{\var{package-name}})}}] True if the
7766   symbol's home package is in any of the named packages (false if
7767   uninterned.)
7768
7769 \item[\kwd{anonymous}] True if the function doesn't have any
7770   interesting name (not \code{defmacro}, \code{defun}, \code{labels}
7771   or \code{flet}).
7772
7773 \item[\kwd{macro}, \kwd{function}] \kwd{macro} is a global
7774   (\code{defmacro}) macro.  \kwd{function} is anything else.
7775
7776 \item[\kwd{local}, \kwd{global}] \kwd{local} is a \code{labels} or
7777   \code{flet}.  \kwd{global} is anything else.
7778
7779 \item[\code{\w{(:or \mstar{\var{context-spec}})}}] True when any
7780   supplied \var{context-spec} is true.
7781
7782 \item[\code{\w{(:and \mstar{\var{context-spec}})}}] True only when all
7783   supplied \var{context-spec}s are true.
7784
7785 \item[\code{\w{(:not \mstar{\var{context-spec}})}}] True when
7786   \var{context-spec} is false.
7787
7788 \item[\code{\w{(:member \mstar{\var{name}})}}] True when the defined
7789   name is one of these names (\code{equal} test.)
7790
7791 \item[\code{\w{(:match \mstar{\var{pattern}})}}] True when any of the
7792   patterns is a substring of the name.  The name is wrapped with
7793   \code{\$}'s, so ``\code{\$FOO}'' matches names beginning with
7794   ``\code{FOO}'', etc.
7795 \end{Lentry}
7796
7797 %%\node Context Declaration Example,  , Context Declarations, Block Compilation
7798 \subsection{Context Declaration Example}
7799
7800 Here is a more complex example of \code{with-compilation-unit} options:
7801 \begin{example}
7802 :optimize '(optimize (speed 2) (space 2) (inhibit-warnings 2)
7803                      (debug 1) (safety 0))
7804 :optimize-interface '(optimize-interface (safety 1) (debug 1))
7805 :context-declarations
7806 '(((:or :external (:and (:match "\%") (:match "SET")))
7807    (declare (optimize-interface (safety 2))))
7808   ((:or (:and :external :macro)
7809         (:match "\$PARSE-"))
7810    (declare (optimize (safety 2)))))
7811 \end{example}
7812 The \code{optimize} and \code{extensions:optimize-interface}
7813 declarations (\pxlref{optimize-declaration}) set up the global
7814 compilation policy.  The bodies of functions are to be compiled
7815 completely unsafe (\code{safety 0}), but argument count and weakened
7816 argument type checking is to be done when a function is called
7817 (\code{speed 2 safety 1}).
7818
7819 The first declaration specifies that all functions that are external
7820 or whose names contain both ``\code{\%}'' and ``\code{SET}'' are to be
7821 compiled compiled with completely safe interfaces (\code{safety 2}).
7822 The reason for this particular \kwd{match} rule is that \code{setf}
7823 inverse functions in this system tend to have both strings in their
7824 name somewhere.  We want \code{setf} inverses to be safe because they
7825 are implicitly called by users even though their name is not exported.
7826
7827 The second declaration makes external macros or functions whose names
7828 start with ``\code{PARSE-}'' have safe bodies (as well as interfaces).
7829 This is desirable because a syntax error in a macro may cause a type
7830 error inside the body.  The \kwd{match} rule is used because macros
7831 often have auxiliary functions whose names begin with this string.
7832
7833 This particular example is used to build part of the standard \cmucl{}
7834 system.  Note however, that context declarations must be set up
7835 according to the needs and coding conventions of a particular system;
7836 different parts of \cmucl{} are compiled with different context
7837 declarations, and your system will probably need its own declarations.
7838 In particular, any use of the \kwd{match} option depends on naming
7839 conventions used in coding.
7840
7841 %%\f
7842 %%\node Inline Expansion, Byte Coded Compilation, Block Compilation, Advanced Compiler Use and Efficiency Hints
7843 \section{Inline Expansion}
7844 \label{inline-expansion}
7845 \cindex{inline expansion}
7846 \cpsubindex{expansion}{inline}
7847 \cpsubindex{call}{inline}
7848 \cpsubindex{function call}{inline}
7849 \cpsubindex{optimization}{function call}
7850
7851 Python can expand almost any function inline, including functions
7852 with keyword arguments.  The only restrictions are that keyword
7853 argument keywords in the call must be constant, and that global
7854 function definitions (\code{defun}) must be done in a null lexical
7855 environment (not nested in a \code{let} or other binding form.)  Local
7856 functions (\code{flet}) can be inline expanded in any environment.
7857 Combined with \python{}'s source-level optimization, inline expansion
7858 can be used for things that formerly required macros for efficient
7859 implementation.  In \python, macros don't have any efficiency
7860 advantage, so they need only be used where a macro's syntactic
7861 flexibility is required.
7862
7863 Inline expansion is a compiler optimization technique that reduces
7864 the overhead of a function call by simply not doing the call:
7865 instead, the compiler effectively rewrites the program to appear as
7866 though the definition of the called function was inserted at each
7867 call site.  In \llisp, this is straightforwardly expressed by
7868 inserting the \code{lambda} corresponding to the original definition:
7869 \begin{lisp}
7870 (proclaim '(inline my-1+))
7871 (defun my-1+ (x) (+ x 1))
7872
7873 (my-1+ someval) \result{} ((lambda (x) (+ x 1)) someval)
7874 \end{lisp}
7875
7876 When the function expanded inline is large, the program after inline
7877 expansion may be substantially larger than the original program.  If
7878 the program becomes too large, inline expansion hurts speed rather
7879 than helping it, since hardware resources such as physical memory and
7880 cache will be exhausted.  Inline expansion is called for:
7881 \begin{itemize}
7882
7883 \item When profiling has shown that a relatively simple function is
7884   called so often that a large amount of time is being wasted in the
7885   calling of that function (as opposed to running in that function.)
7886   If a function is complex, it will take a long time to run relative
7887   the time spent in call, so the speed advantage of inline expansion
7888   is diminished at the same time the space cost of inline expansion is
7889   increased.  Of course, if a function is rarely called, then the
7890   overhead of calling it is also insignificant.
7891
7892 \item With functions so simple that they take less space to inline
7893   expand than would be taken to call the function (such as
7894   \code{my-1+} above.)  It would require intimate knowledge of the
7895   compiler to be certain when inline expansion would reduce space, but
7896   it is generally safe to inline expand functions whose definition is
7897   a single function call, or a few calls to simple \clisp{} functions.
7898 \end{itemize}
7899
7900
7901 In addition to this speed/space tradeoff from inline expansion's
7902 avoidance of the call, inline expansion can also reveal opportunities
7903 for optimization.  \python{}'s extensive source-level optimization can
7904 make use of context information from the caller to tremendously
7905 simplify the code resulting from the inline expansion of a function.
7906
7907 The main form of caller context is local information about the actual
7908 argument values: what the argument types are and whether the arguments
7909 are constant.  Knowledge about argument types can eliminate run-time
7910 type tests (e.g., for generic arithmetic.)  Constant arguments in a
7911 call provide opportunities for constant folding optimization after
7912 inline expansion.
7913
7914 A hidden way that constant arguments are often supplied to functions
7915 is through the defaulting of unsupplied optional or keyword arguments.
7916 There can be a huge efficiency advantage to inline expanding functions
7917 that have complex keyword-based interfaces, such as this definition of
7918 the \code{member} function:
7919 \begin{lisp}
7920 (proclaim '(inline member))
7921 (defun member (item list &key
7922                     (key #'identity)
7923                     (test #'eql testp)
7924                     (test-not nil notp))
7925   (do ((list list (cdr list)))
7926       ((null list) nil)
7927     (let ((car (car list)))
7928       (if (cond (testp
7929                  (funcall test item (funcall key car)))
7930                 (notp
7931                  (not (funcall test-not item (funcall key car))))
7932                 (t
7933                  (funcall test item (funcall key car))))
7934           (return list)))))
7935
7936 \end{lisp}
7937 After inline expansion, this call is simplified to the obvious code:
7938 \begin{lisp}
7939 (member a l :key #'foo-a :test #'char=) \result{}
7940
7941 (do ((list list (cdr list)))
7942     ((null list) nil)
7943   (let ((car (car list)))
7944     (if (char= item (foo-a car))
7945         (return list))))
7946 \end{lisp}
7947 In this example, there could easily be more than an order of magnitude
7948 improvement in speed.  In addition to eliminating the original call to
7949 \code{member}, inline expansion also allows the calls to \code{char=}
7950 and \code{foo-a} to be open-coded.  We go from a loop with three tests
7951 and two calls to a loop with one test and no calls.
7952
7953 \xlref{source-optimization} for more discussion of source level
7954 optimization.
7955
7956 \begin{comment}
7957 * Inline Expansion Recording::
7958 * Semi-Inline Expansion::
7959 * The Maybe-Inline Declaration::
7960 \end{comment}
7961
7962 %%\node Inline Expansion Recording, Semi-Inline Expansion, Inline Expansion, Inline Expansion
7963 \subsection{Inline Expansion Recording}
7964 \cindex{recording of inline expansions}
7965
7966 Inline expansion requires that the source for the inline expanded function to
7967 be available when calls to the function are compiled.  The compiler doesn't
7968 remember the inline expansion for every function, since that would take an
7969 excessive about of space.  Instead, the programmer must tell the compiler to
7970 record the inline expansion before the definition of the inline expanded
7971 function is compiled.  This is done by globally declaring the function inline
7972 before the function is defined, by using the \code{inline} and
7973 \code{extensions:maybe-inline} (\pxlref{maybe-inline-declaration})
7974 declarations.
7975
7976 In addition to recording the inline expansion of inline functions at the time
7977 the function is compiled, \code{compile-file} also puts the inline expansion in
7978 the output file.  When the output file is loaded, the inline expansion is made
7979 available for subsequent compilations; there is no need to compile the
7980 definition again to record the inline expansion.
7981
7982 If a function is declared inline, but no expansion is recorded, then the
7983 compiler will give an efficiency note like:
7984 \begin{example}
7985 Note: MYFUN is declared inline, but has no expansion.
7986 \end{example}
7987 When you get this note, check that the \code{inline} declaration and the
7988 definition appear before the calls that are to be inline expanded.  This note
7989 will also be given if the inline expansion for a \code{defun} could not be
7990 recorded because the \code{defun} was in a non-null lexical environment.
7991
7992 %%\node Semi-Inline Expansion, The Maybe-Inline Declaration, Inline Expansion Recording, Inline Expansion
7993 \subsection{Semi-Inline Expansion}
7994 \label{semi-inline}
7995
7996 Python supports \var{semi-inline} functions.  Semi-inline expansion
7997 shares a single copy of a function across all the calls in a component
7998 by converting the inline expansion into a local function
7999 (\pxlref{local-call}.)  This takes up less space when there are
8000 multiple calls, but also provides less opportunity for context
8001 dependent optimization.  When there is only one call, the result is
8002 identical to normal inline expansion.  Semi-inline expansion is done
8003 when the \code{space} optimization quality is \code{0}, and the
8004 function has been declared \code{extensions:maybe-inline}.
8005
8006 This mechanism of inline expansion combined with local call also
8007 allows recursive functions to be inline expanded.  If a recursive
8008 function is declared \code{inline}, calls will actually be compiled
8009 semi-inline.  Although recursive functions are often so complex that
8010 there is little advantage to semi-inline expansion, it can still be
8011 useful in the same sort of cases where normal inline expansion is
8012 especially advantageous, i.e. functions where the calling context can
8013 help a lot.
8014
8015 %%\node The Maybe-Inline Declaration,  , Semi-Inline Expansion, Inline Expansion
8016 \subsection{The Maybe-Inline Declaration}
8017 \label{maybe-inline-declaration}
8018 \cindex{maybe-inline declaration}
8019
8020 The \code{extensions:maybe-inline} declaration is a \cmucl{}
8021 extension.  It is similar to \code{inline}, but indicates that inline
8022 expansion may sometimes be desirable, rather than saying that inline
8023 expansion should almost always be done.  When used in a global
8024 declaration, \code{extensions:maybe-inline} causes the expansion for
8025 the named functions to be recorded, but the functions aren't actually
8026 inline expanded unless \code{space} is \code{0} or the function is
8027 eventually (perhaps locally) declared \code{inline}.
8028
8029 Use of the \code{extensions:maybe-inline} declaration followed by the
8030 \code{defun} is preferable to the standard idiom of:
8031 \begin{lisp}
8032 (proclaim '(inline myfun))
8033 (defun myfun () ...)
8034 (proclaim '(notinline myfun))
8035
8036 ;;; \i{Any calls to \code{myfun} here are not inline expanded.}
8037
8038 (defun somefun ()
8039   (declare (inline myfun))
8040   ;;
8041   ;; \i{Calls to \code{myfun} here are inline expanded.}
8042   ...)
8043 \end{lisp}
8044 The problem with using \code{notinline} in this way is that in
8045 \clisp{} it does more than just suppress inline expansion, it also
8046 forbids the compiler to use any knowledge of \code{myfun} until a
8047 later \code{inline} declaration overrides the \code{notinline}.  This
8048 prevents compiler warnings about incorrect calls to the function, and
8049 also prevents block compilation.
8050
8051 The \code{extensions:maybe-inline} declaration is used like this:
8052 \begin{lisp}
8053 (proclaim '(extensions:maybe-inline myfun))
8054 (defun myfun () ...)
8055
8056 ;;; \i{Any calls to \code{myfun} here are not inline expanded.}
8057
8058 (defun somefun ()
8059   (declare (inline myfun))
8060   ;;
8061   ;; \i{Calls to \code{myfun} here are inline expanded.}
8062   ...)
8063
8064 (defun someotherfun ()
8065   (declare (optimize (space 0)))
8066   ;;
8067   ;; \i{Calls to \code{myfun} here are expanded semi-inline.}
8068   ...)
8069 \end{lisp}
8070 In this example, the use of \code{extensions:maybe-inline} causes the
8071 expansion to be recorded when the \code{defun} for \code{somefun} is
8072 compiled, and doesn't waste space through doing inline expansion by
8073 default.  Unlike \code{notinline}, this declaration still allows the
8074 compiler to assume that the known definition really is the one that
8075 will be called when giving compiler warnings, and also allows the
8076 compiler to do semi-inline expansion when the policy is appropriate.
8077
8078 When the goal is merely to control whether inline expansion is done by
8079 default, it is preferable to use \code{extensions:maybe-inline} rather
8080 than \code{notinline}.  The \code{notinline} declaration should be
8081 reserved for those special occasions when a function may be redefined
8082 at run-time, so the compiler must be told that the obvious definition
8083 of a function is not necessarily the one that will be in effect at the
8084 time of the call.
8085
8086 %%\f
8087 %%\node Byte Coded Compilation, Object Representation, Inline Expansion, Advanced Compiler Use and Efficiency Hints
8088 \section{Byte Coded Compilation}
8089 \label{byte-compile}
8090 \cindex{byte coded compilation}
8091 \cindex{space optimization}
8092
8093 \Python{} supports byte compilation to reduce the size of Lisp
8094 programs by allowing functions to be compiled more compactly.  Byte
8095 compilation provides an extreme speed/space tradeoff: byte code is
8096 typically six times more compact than native code, but runs fifty
8097 times (or more) slower.  This is about ten times faster than the
8098 standard interpreter, which is itself considered fast in comparison to
8099 other \clisp{} interpreters.
8100
8101 Large Lisp systems (such as \cmucl{} itself) often have large amounts
8102 of user-interface code, compile-time (macro) code, debugging code, or
8103 rarely executed special-case code.  This code is a good target for
8104 byte compilation: very little time is spent running in it, but it can
8105 take up quite a bit of space.  Straight-line code with many function
8106 calls is much more suitable than inner loops.
8107
8108 When byte-compiling, the compiler compiles about twice as fast, and
8109 can produce a hardware independent object file (\file{.bytef} type.)
8110 This file can be loaded like a normal fasl file on any implementation
8111 of CMU CL with the same byte-ordering (DEC PMAX has \file{.lbytef}
8112 type.)
8113
8114 The decision to byte compile or native compile can be done on a
8115 per-file or per-code-object basis.  The \kwd{byte-compile} argument to
8116 \funref{compile-file} has these possible values:
8117 \begin{Lentry}
8118 \item[\false{}] Don't byte compile anything in this file.
8119
8120 \item[\true{}] Byte compile everything in this file and produce a
8121   processor-independent \file{.bytef} file.
8122
8123 \item[\kwd{maybe}] Produce a normal fasl file, but byte compile any
8124   functions for which the \code{speed} optimization quality is
8125   \code{0} and the \code{debug} quality is not greater than \code{1}.
8126 \end{Lentry}
8127
8128 \begin{defvar}{extensions:}{byte-compile-top-level}
8129
8130   If this variable is true (the default) and the \kwd{byte-compile}
8131   argument to \code{compile-file} is \kwd{maybe}, then byte compile
8132   top-level code (code outside of any \code{defun}, \code{defmethod},
8133   etc.)
8134 \end{defvar}
8135
8136 \begin{defvar}{extensions:}{byte-compile-default}
8137
8138   This variable determines the default value for the
8139   \kwd{byte-compile} argument to \code{compile-file}, initially
8140   \kwd{maybe}.
8141 \end{defvar}
8142
8143 %%\f
8144 %%\node Object Representation, Numbers, Byte Coded Compilation, Advanced Compiler Use and Efficiency Hints
8145 \section{Object Representation}
8146 \label{object-representation}
8147 \cindex{object representation}
8148 \cpsubindex{representation}{object}
8149 \cpsubindex{efficiency}{of objects}
8150
8151 A somewhat subtle aspect of writing efficient \clisp{} programs is
8152 choosing the correct data structures so that the underlying objects
8153 can be implemented efficiently.  This is partly because of the need
8154 for multiple representations for a given value
8155 (\pxlref{non-descriptor}), but is also due to the sheer number of
8156 object types that \clisp{} has built in.  The number of possible
8157 representations complicates the choice of a good representation
8158 because semantically similar objects may vary in their efficiency
8159 depending on how the program operates on them.
8160
8161 \begin{comment}
8162 * Think Before You Use a List::
8163 * Structure Representation::
8164 * Arrays::
8165 * Vectors::
8166 * Bit-Vectors::
8167 * Hashtables::
8168 \end{comment}
8169
8170 %%\node Think Before You Use a List, Structure Representation, Object Representation, Object Representation
8171 \subsection{Think Before You Use a List}
8172 \cpsubindex{lists}{efficiency of}
8173
8174 Although Lisp's creator seemed to think that it was for LISt Processing, the
8175 astute observer may have noticed that the chapter on list manipulation makes up
8176 less that three percent of \i{Common Lisp: the Language II}.  The language has
8177 grown since Lisp 1.5\dash{}new data types supersede lists for many purposes.
8178
8179 %%\node Structure Representation, Arrays, Think Before You Use a List, Object Representation
8180 \subsection{Structure Representation}
8181 \cpsubindex{structure types}{efficiency of} One of the best ways of
8182 building complex data structures is to define appropriate structure
8183 types using \findexed{defstruct}.  In \python, access of structure
8184 slots is always at least as fast as list or vector access, and is
8185 usually faster.  In comparison to a list representation of a tuple,
8186 structures also have a space advantage.
8187
8188 Even if structures weren't more efficient than other representations, structure
8189 use would still be attractive because programs that use structures in
8190 appropriate ways are much more maintainable and robust than programs written
8191 using only lists.  For example:
8192 \begin{lisp}
8193 (rplaca (caddr (cadddr x)) (caddr y))
8194 \end{lisp}
8195 could have been written using structures in this way:
8196 \begin{lisp}
8197 (setf (beverage-flavor (astronaut-beverage x)) (beverage-flavor y))
8198 \end{lisp}
8199 The second version is more maintainable because it is easier to
8200 understand what it is doing.  It is more robust because structures
8201 accesses are type checked.  An \code{astronaut} will never be confused
8202 with a \code{beverage}, and the result of \code{beverage-flavor} is
8203 always a flavor.  See sections \ref{structure-types} and
8204 \ref{freeze-type} for more information about structure types.
8205 \xlref{type-inference} for a number of examples that make clear the
8206 advantages of structure typing.
8207
8208 Note that the structure definition should be compiled before any uses
8209 of its accessors or type predicate so that these function calls can be
8210 efficiently open-coded.
8211
8212 %%\node Arrays, Vectors, Structure Representation, Object Representation
8213 \subsection{Arrays}
8214 \label{array-types}
8215 \cpsubindex{arrays}{efficiency of}
8216
8217 Arrays are often the most efficient representation for collections of objects
8218 because:
8219 \begin{itemize}
8220
8221 \item Array representations are often the most compact.  An array is
8222   always more compact than a list containing the same number of
8223   elements.
8224
8225 \item Arrays allow fast constant-time access.
8226
8227 \item Arrays are easily destructively modified, which can reduce
8228   consing.
8229
8230 \item Array element types can be specialized, which reduces both
8231   overall size and consing (\pxlref{specialized-array-types}.)
8232 \end{itemize}
8233
8234
8235 Access of arrays that are not of type \code{simple-array} is less
8236 efficient, so declarations are appropriate when an array is of a
8237 simple type like \code{simple-string} or \code{simple-bit-vector}.
8238 Arrays are almost always simple, but the compiler may not be able to
8239 prove simpleness at every use.  The only way to get a non-simple array
8240 is to use the \kwd{displaced-to}, \kwd{fill-pointer} or
8241 \code{adjustable} arguments to \code{make-array}.  If you don't use
8242 these hairy options, then arrays can always be declared to be simple.
8243
8244 Because of the many specialized array types and the possibility of
8245 non-simple arrays, array access is much like generic arithmetic
8246 (\pxlref{generic-arithmetic}).  In order for array accesses to be
8247 efficiently compiled, the element type and simpleness of the array
8248 must be known at compile time.  If there is inadequate information,
8249 the compiler is forced to call a generic array access routine.  You
8250 can detect inefficient array accesses by enabling efficiency notes,
8251 \pxlref{efficiency-notes}.
8252
8253 %%\node Vectors, Bit-Vectors, Arrays, Object Representation
8254 \subsection{Vectors}
8255 \cpsubindex{vectors}{efficiency of}
8256
8257 Vectors (one dimensional arrays) are particularly useful, since in
8258 addition to their obvious array-like applications, they are also well
8259 suited to representing sequences.  In comparison to a list
8260 representation, vectors are faster to access and take up between two
8261 and sixty-four times less space (depending on the element type.)  As
8262 with arbitrary arrays, the compiler needs to know that vectors are not
8263 complex, so you should use \code{simple-string} in preference to
8264 \code{string}, etc.
8265
8266 The only advantage that lists have over vectors for representing
8267 sequences is that it is easy to change the length of a list, add to it
8268 and remove items from it.  Likely signs of archaic, slow lisp code are
8269 \code{nth} and \code{nthcdr}.  If you are using these functions you
8270 should probably be using a vector.
8271
8272 %%\node Bit-Vectors, Hashtables, Vectors, Object Representation
8273 \subsection{Bit-Vectors}
8274 \cpsubindex{bit-vectors}{efficiency of}
8275
8276 Another thing that lists have been used for is set manipulation.  In
8277 applications where there is a known, reasonably small universe of
8278 items bit-vectors can be used to improve performance.  This is much
8279 less convenient than using lists, because instead of symbols, each
8280 element in the universe must be assigned a numeric index into the bit
8281 vector.  Using a bit-vector will nearly always be faster, and can be
8282 tremendously faster if the number of elements in the set is not small.
8283 The logical operations on \code{simple-bit-vector}s are efficient,
8284 since they operate on a word at a time.
8285
8286
8287 %%\node Hashtables,  , Bit-Vectors, Object Representation
8288 \subsection{Hashtables}
8289 \cpsubindex{hash-tables}{efficiency of}
8290
8291 Hashtables are an efficient and general mechanism for maintaining associations
8292 such as the association between an object and its name.  Although hashtables
8293 are usually the best way to maintain associations, efficiency and style
8294 considerations sometimes favor the use of an association list (a-list).
8295
8296 \code{assoc} is fairly fast when the \var{test} argument is \code{eq}
8297 or \code{eql} and there are only a few elements, but the time goes up
8298 in proportion with the number of elements.  In contrast, the
8299 hash-table lookup has a somewhat higher overhead, but the speed is
8300 largely unaffected by the number of entries in the table.  For an
8301 \code{equal} hash-table or alist, hash-tables have an even greater
8302 advantage, since the test is more expensive.  Whatever you do, be sure
8303 to use the most restrictive test function possible.
8304
8305 The style argument observes that although hash-tables and alists
8306 overlap in function, they do not do all things equally well.
8307 \begin{itemize}
8308
8309 \item Alists are good for maintaining scoped environments.  They were
8310   originally invented to implement scoping in the Lisp interpreter,
8311   and are still used for this in \python.  With an alist one can
8312   non-destructively change an association simply by consing a new
8313   element on the front.  This is something that cannot be done with
8314   hash-tables.
8315
8316 \item Hashtables are good for maintaining a global association.  The
8317   value associated with an entry can easily be changed with
8318   \code{setf}.  With an alist, one has to go through contortions,
8319   either \code{rplacd}'ing the cons if the entry exists, or pushing a
8320   new one if it doesn't.  The side-effecting nature of hash-table
8321   operations is an advantage here.
8322 \end{itemize}
8323
8324
8325 Historically, symbol property lists were often used for global name
8326 associations.  Property lists provide an awkward and error-prone
8327 combination of name association and record structure.  If you must use
8328 the property list, please store all the related values in a single
8329 structure under a single property, rather than using many properties.
8330 This makes access more efficient, and also adds a modicum of typing
8331 and abstraction.  \xlref{advanced-type-stuff} for information on types
8332 in \cmucl.
8333
8334 %%\f
8335 %%\node Numbers, General Efficiency Hints, Object Representation, Advanced Compiler Use and Efficiency Hints
8336 \section{Numbers}
8337 \label{numeric-types}
8338 \cpsubindex{numeric}{types}
8339 \cpsubindex{types}{numeric}
8340
8341 Numbers are interesting because numbers are one of the few \llisp{} data types
8342 that have direct support in conventional hardware.  If a number can be
8343 represented in the way that the hardware expects it, then there is a big
8344 efficiency advantage.
8345
8346 Using hardware representations is problematical in \llisp{} due to
8347 dynamic typing (where the type of a value may be unknown at compile
8348 time.)  It is possible to compile code for statically typed portions
8349 of a \llisp{} program with efficiency comparable to that obtained in
8350 statically typed languages such as C, but not all \llisp{}
8351 implementations succeed.  There are two main barriers to efficient
8352 numerical code in \llisp{}:
8353 \begin{itemize}
8354
8355 \item The compiler must prove that the numerical expression is in fact
8356   statically typed, and
8357
8358 \item The compiler must be able to somehow reconcile the conflicting
8359   demands of the hardware mandated number representation with the
8360   \llisp{} requirements of dynamic typing and garbage-collecting
8361   dynamic storage allocation.
8362 \end{itemize}
8363
8364 Because of its type inference (\pxlref{type-inference}) and efficiency
8365 notes (\pxlref{efficiency-notes}), \python{} is better than
8366 conventional \llisp{} compilers at ensuring that numerical expressions
8367 are statically typed.  Python also goes somewhat farther than existing
8368 compilers in the area of allowing native machine number
8369 representations in the presence of garbage collection.
8370
8371 \begin{comment}
8372 * Descriptors::
8373 * Non-Descriptor Representations::
8374 * Variables::
8375 * Generic Arithmetic::
8376 * Fixnums::
8377 * Word Integers::
8378 * Floating Point Efficiency::
8379 * Specialized Arrays::
8380 * Specialized Structure Slots::
8381 * Interactions With Local Call::
8382 * Representation of Characters::
8383 \end{comment}
8384
8385 %%\node Descriptors, Non-Descriptor Representations, Numbers, Numbers
8386 \subsection{Descriptors}
8387 \cpsubindex{descriptors}{object}
8388 \cindex{object representation}
8389 \cpsubindex{representation}{object}
8390 \cpsubindex{consing}{overhead of}
8391
8392 \llisp{}'s dynamic typing requires that it be possible to represent
8393 any value with a fixed length object, known as a \var{descriptor}.
8394 This fixed-length requirement is implicit in features such as:
8395 \begin{itemize}
8396
8397 \item Data types (like \code{simple-vector}) that can contain any type
8398   of object, and that can be destructively modified to contain
8399   different objects (of possibly different types.)
8400
8401 \item Functions that can be called with any type of argument, and that
8402   can be redefined at run time.
8403 \end{itemize}
8404
8405 In order to save space, a descriptor is invariably represented as a
8406 single word.  Objects that can be directly represented in the
8407 descriptor itself are said to be \var{immediate}.  Descriptors for
8408 objects larger than one word are in reality pointers to the memory
8409 actually containing the object.
8410
8411 Representing objects using pointers has two major disadvantages:
8412 \begin{itemize}
8413
8414 \item The memory pointed to must be allocated on the heap, so it must
8415   eventually be freed by the garbage collector.  Excessive heap
8416   allocation of objects (or ``consing'') is inefficient in several
8417   ways.  \xlref{consing}.
8418
8419 \item Representing an object in memory requires the compiler to emit
8420   additional instructions to read the actual value in from memory, and
8421   then to write the value back after operating on it.
8422 \end{itemize}
8423
8424 The introduction of garbage collection makes things even worse, since
8425 the garbage collector must be able to determine whether a descriptor
8426 is an immediate object or a pointer.  This requires that a few bits in
8427 each descriptor be dedicated to the garbage collector.  The loss of a
8428 few bits doesn't seem like much, but it has a major efficiency
8429 implication\dash{}objects whose natural machine representation is a
8430 full word (integers and single-floats) cannot have an immediate
8431 representation.  So the compiler is forced to use an unnatural
8432 immediate representation (such as \code{fixnum}) or a natural pointer
8433 representation (with the attendant consing overhead.)
8434
8435
8436 %%\node Non-Descriptor Representations, Variables, Descriptors, Numbers
8437 \subsection{Non-Descriptor Representations}
8438 \label{non-descriptor}
8439 \cindex{non-descriptor representations}
8440 \cindex{stack numbers}
8441
8442 From the discussion above, we can see that the standard descriptor
8443 representation has many problems, the worst being number consing.
8444 \llisp{} compilers try to avoid these descriptor efficiency problems by using
8445 \var{non-descriptor} representations.  A compiler that uses non-descriptor
8446 representations can compile this function so that it does no number consing:
8447 \begin{lisp}
8448 (defun multby (vec n)
8449   (declare (type (simple-array single-float (*)) vec)
8450            (single-float n))
8451   (dotimes (i (length vec))
8452     (setf (aref vec i)
8453           (* n (aref vec i)))))
8454 \end{lisp}
8455 If a descriptor representation were used, each iteration of the loop might
8456 cons two floats and do three times as many memory references.
8457
8458 As its negative definition suggests, the range of possible non-descriptor
8459 representations is large.  The performance improvement from non-descriptor
8460 representation depends upon both the number of types that have non-descriptor
8461 representations and the number of contexts in which the compiler is forced to
8462 use a descriptor representation.
8463
8464 Many \llisp{} compilers support non-descriptor representations for
8465 float types such as \code{single-float} and \code{double-float}
8466 (section \ref{float-efficiency}.)  \python{} adds support for full
8467 word integers (\pxlref{word-integers}), characters
8468 (\pxlref{characters}) and system-area pointers (unconstrained
8469 pointers, \pxlref{system-area-pointers}.)  Many \llisp{} compilers
8470 support non-descriptor representations for variables (section
8471 \ref{ND-variables}) and array elements (section
8472 \ref{specialized-array-types}.)  \python{} adds support for
8473 non-descriptor arguments and return values in local call
8474 (\pxlref{number-local-call}) and structure slots (\pxlref{raw-slots}).
8475
8476 %%\node Variables, Generic Arithmetic, Non-Descriptor Representations, Numbers
8477 \subsection{Variables}
8478 \label{ND-variables}
8479 \cpsubindex{variables}{non-descriptor}
8480 \cpsubindex{type declarations}{variable}
8481 \cpsubindex{efficiency}{of numeric variables}
8482
8483 In order to use a non-descriptor representation for a variable or
8484 expression intermediate value, the compiler must be able to prove that
8485 the value is always of a particular type having a non-descriptor
8486 representation.  Type inference (\pxlref{type-inference}) often needs
8487 some help from user-supplied declarations.  The best kind of type
8488 declaration is a variable type declaration placed at the binding
8489 point:
8490 \begin{lisp}
8491 (let ((x (car l)))
8492   (declare (single-float x))
8493   ...)
8494 \end{lisp}
8495 Use of \code{the}, or of variable declarations not at the binding form
8496 is insufficient to allow non-descriptor representation of the
8497 variable\dash{}with these declarations it is not certain that all
8498 values of the variable are of the right type.  It is sometimes useful
8499 to introduce a gratuitous binding that allows the compiler to change
8500 to a non-descriptor representation, like:
8501 \begin{lisp}
8502 (etypecase x
8503   ((signed-byte 32)
8504    (let ((x x))
8505      (declare (type (signed-byte 32) x))
8506      ...))
8507   ...)
8508 \end{lisp}
8509 The declaration on the inner \code{x} is necessary here due to a phase
8510 ordering problem.  Although the compiler will eventually prove that
8511 the outer \code{x} is a \w{\code{(signed-byte 32)}} within that
8512 \code{etypecase} branch, the inner \code{x} would have been optimized
8513 away by that time.  Declaring the type makes let optimization more
8514 cautious.
8515
8516 Note that storing a value into a global (or \code{special}) variable
8517 always forces a descriptor representation.  Wherever possible, you
8518 should operate only on local variables, binding any referenced globals
8519 to local variables at the beginning of the function, and doing any
8520 global assignments at the end.
8521
8522 Efficiency notes signal use of inefficient representations, so
8523 programmer's needn't continuously worry about the details of
8524 representation selection (\pxlref{representation-eff-note}.)
8525
8526 %%\node Generic Arithmetic, Fixnums, Variables, Numbers
8527 \subsection{Generic Arithmetic}
8528 \label{generic-arithmetic}
8529 \cindex{generic arithmetic}
8530 \cpsubindex{arithmetic}{generic}
8531 \cpsubindex{numeric}{operation efficiency}
8532
8533 In \clisp, arithmetic operations are \var{generic}.\footnote{As Steele
8534   notes in CLTL II, this is a generic conception of generic, and is
8535   not to be confused with the CLOS concept of a generic function.}
8536 The \code{+} function can be passed \code{fixnum}s, \code{bignum}s,
8537 \code{ratio}s, and various kinds of \code{float}s and
8538 \code{complex}es, in any combination.  In addition to the inherent
8539 complexity of \code{bignum} and \code{ratio} operations, there is also
8540 a lot of overhead in just figuring out which operation to do and what
8541 contagion and canonicalization rules apply.  The complexity of generic
8542 arithmetic is so great that it is inconceivable to open code it.
8543 Instead, the compiler does a function call to a generic arithmetic
8544 routine, consuming many instructions before the actual computation
8545 even starts.
8546
8547 This is ridiculous, since even \llisp{} programs do a lot of
8548 arithmetic, and the hardware is capable of doing operations on small
8549 integers and floats with a single instruction.  To get acceptable
8550 efficiency, the compiler special-cases uses of generic arithmetic that
8551 are directly implemented in the hardware.  In order to open code
8552 arithmetic, several constraints must be met:
8553 \begin{itemize}
8554
8555 \item All the arguments must be known to be a good type of number.
8556
8557 \item The result must be known to be a good type of number.
8558
8559 \item Any intermediate values such as the result of \w{\code{(+ a b)}}
8560   in the call \w{\code{(+ a b c)}} must be known to be a good type of
8561   number.
8562
8563 \item All the above numbers with good types must be of the \var{same}
8564   good type.  Don't try to mix integers and floats or different float
8565   formats.
8566 \end{itemize}
8567
8568 The ``good types'' are \w{\code{(signed-byte 32)}},
8569 \w{\code{(unsigned-byte 32)}}, \code{single-float} and
8570 \code{double-float}.  See sections \ref{fixnums}, \ref{word-integers}
8571 and \ref{float-efficiency} for more discussion of good numeric types.
8572
8573 \code{float} is not a good type, since it might mean either
8574 \code{single-float} or \code{double-float}.  \code{integer} is not a
8575 good type, since it might mean \code{bignum}.  \code{rational} is not
8576 a good type, since it might mean \code{ratio}.  Note however that
8577 these types are still useful in declarations, since type inference may
8578 be able to strengthen a weak declaration into a good one, when it
8579 would be at a loss if there was no declaration at all
8580 (\pxlref{type-inference}).  The \code{integer} and
8581 \code{unsigned-byte} (or non-negative integer) types are especially
8582 useful in this regard, since they can often be strengthened to a good
8583 integer type.
8584
8585 Arithmetic with \code{complex} numbers is inefficient in comparison to
8586 float and integer arithmetic.  Complex numbers are always represented
8587 with a pointer descriptor (causing consing overhead), and complex
8588 arithmetic is always closed coded using the general generic arithmetic
8589 functions.  But arithmetic with complex types such as:
8590 \begin{lisp}
8591 (complex float)
8592 (complex fixnum)
8593 \end{lisp}
8594 is still faster than \code{bignum} or \code{ratio} arithmetic, since the
8595 implementation is much simpler.
8596
8597 Note: don't use \code{/} to divide integers unless you want the
8598 overhead of rational arithmetic.  Use \code{truncate} even when you
8599 know that the arguments divide evenly.
8600
8601 You don't need to remember all the rules for how to get open-coded
8602 arithmetic, since efficiency notes will tell you when and where there
8603 is a problem\dash{}\pxlref{efficiency-notes}.
8604
8605
8606 %%\node Fixnums, Word Integers, Generic Arithmetic, Numbers
8607 \subsection{Fixnums}
8608 \label{fixnums}
8609 \cindex{fixnums}
8610 \cindex{bignums}
8611
8612 A fixnum is a ``FIXed precision NUMber''.  In modern \llisp{}
8613 implementations, fixnums can be represented with an immediate
8614 descriptor, so operating on fixnums requires no consing or memory
8615 references.  Clever choice of representations also allows some
8616 arithmetic operations to be done on fixnums using hardware supported
8617 word-integer instructions, somewhat reducing the speed penalty for
8618 using an unnatural integer representation.
8619
8620 It is useful to distinguish the \code{fixnum} type from the fixnum
8621 representation of integers.  In \python, there is absolutely nothing
8622 magical about the \code{fixnum} type in comparison to other finite
8623 integer types.  \code{fixnum} is equivalent to (is defined with
8624 \code{deftype} to be) \w{\code{(signed-byte 30)}}.  \code{fixnum} is
8625 simply the largest subset of integers that \i{can be represented}
8626 using an immediate fixnum descriptor.
8627
8628 Unlike in other \clisp{} compilers, it is in no way desirable to use
8629 the \code{fixnum} type in declarations in preference to more
8630 restrictive integer types such as \code{bit}, \w{\code{(integer -43
8631     7)}} and \w{\code{(unsigned-byte 8)}}.  Since Python does
8632 understand these integer types, it is preferable to use the more
8633 restrictive type, as it allows better type inference
8634 (\pxlref{operation-type-inference}.)
8635
8636 The small, efficient fixnum is contrasted with bignum, or ``BIG
8637 NUMber''.  This is another descriptor representation for integers, but
8638 this time a pointer representation that allows for arbitrarily large
8639 integers.  Bignum operations are less efficient than fixnum
8640 operations, both because of the consing and memory reference overheads
8641 of a pointer descriptor, and also because of the inherent complexity
8642 of extended precision arithmetic.  While fixnum operations can often
8643 be done with a single instruction, bignum operations are so complex
8644 that they are always done using generic arithmetic.
8645
8646 A crucial point is that the compiler will use generic arithmetic if it
8647 can't \var{prove} that all the arguments, intermediate values, and
8648 results are fixnums.  With bounded integer types such as
8649 \code{fixnum}, the result type proves to be especially problematical,
8650 since these types are not closed under common arithmetic operations
8651 such as \code{+}, \code{-}, \code{*} and \code{/}.  For example,
8652 \w{\code{(1+ (the fixnum x))}} does not necessarily evaluate to a
8653 \code{fixnum}.  Bignums were added to \llisp{} to get around this
8654 problem, but they really just transform the correctness problem ``if
8655 this add overflows, you will get the wrong answer'' to the efficiency
8656 problem ``if this add \var{might} overflow then your program will run
8657 slowly (because of generic arithmetic.)''
8658
8659 There is just no getting around the fact that the hardware only
8660 directly supports short integers.  To get the most efficient open
8661 coding, the compiler must be able to prove that the result is a good
8662 integer type.  This is an argument in favor of using more restrictive
8663 integer types: \w{\code{(1+ (the fixnum x))}} may not always be a
8664 \code{fixnum}, but \w{\code{(1+ (the (unsigned-byte 8) x))}} always
8665 is.  Of course, you can also assert the result type by putting in lots
8666 of \code{the} declarations and then compiling with \code{safety}
8667 \code{0}.
8668
8669 %%\node Word Integers, Floating Point Efficiency, Fixnums, Numbers
8670 \subsection{Word Integers}
8671 \label{word-integers}
8672 \cindex{word integers}
8673
8674 Python is unique in its efficient implementation of arithmetic
8675 on full-word integers through non-descriptor representations and open coding.
8676 Arithmetic on any subtype of these types:
8677 \begin{lisp}
8678 (signed-byte 32)
8679 (unsigned-byte 32)
8680 \end{lisp}
8681 is reasonably efficient, although subtypes of \code{fixnum} remain
8682 somewhat more efficient.
8683
8684 If a word integer must be represented as a descriptor, then the
8685 \code{bignum} representation is used, with its associated consing
8686 overhead.  The support for word integers in no way changes the
8687 language semantics, it just makes arithmetic on small bignums vastly
8688 more efficient.  It is fine to do arithmetic operations with mixed
8689 \code{fixnum} and word integer operands; just declare the most
8690 specific integer type you can, and let the compiler decide what
8691 representation to use.
8692
8693 In fact, to most users, the greatest advantage of word integer
8694 arithmetic is that it effectively provides a few guard bits on the
8695 fixnum representation.  If there are missing assertions on
8696 intermediate values in a fixnum expression, the intermediate results
8697 can usually be proved to fit in a word.  After the whole expression is
8698 evaluated, there will often be a fixnum assertion on the final result,
8699 allowing creation of a fixnum result without even checking for
8700 overflow.
8701
8702 The remarks in section \ref{fixnums} about fixnum result type also
8703 apply to word integers; you must be careful to give the compiler
8704 enough information to prove that the result is still a word integer.
8705 This time, though, when we blow out of word integers we land in into
8706 generic bignum arithmetic, which is much worse than sleazing from
8707 \code{fixnum}s to word integers.  Note that mixing
8708 \w{\code{(unsigned-byte 32)}} arguments with arguments of any signed
8709 type (such as \code{fixnum}) is a no-no, since the result might not be
8710 unsigned.
8711
8712 %%\node Floating Point Efficiency, Specialized Arrays, Word Integers, Numbers
8713 \subsection{Floating Point Efficiency}
8714 \label{float-efficiency}
8715 \cindex{floating point efficiency}
8716
8717 Arithmetic on objects of type \code{single-float} and \code{double-float} is
8718 efficiently implemented using non-descriptor representations and open coding.
8719 As for integer arithmetic, the arguments must be known to be of the same float
8720 type.  Unlike for integer arithmetic, the results and intermediate values
8721 usually take care of themselves due to the rules of float contagion, i.e.
8722 \w{\code{(1+ (the single-float x))}} is always a \code{single-float}.
8723
8724 Although they are not specially implemented, \code{short-float} and
8725 \code{long-float} are also acceptable in declarations, since they are
8726 synonyms for the \code{single-float} and \code{double-float} types,
8727 respectively.
8728
8729 \begin{changebar}
8730   Some versions of CMU Common Lisp include extra support for floating
8731   point arithmetic.  In particular, if \code{*features*} includes
8732   \kwd{propagate-float-type}, list-style float type specifiers such as
8733   \w{\code{(single-float 0.0 1.0)}} will be used to good effect.
8734
8735   For example, in this function,
8736   \begin{example}
8737     (defun square (x)
8738       (declare (type (single-float 0f0 10f0)))
8739       (* x x))
8740   \end{example}
8741   \Python{} can deduce that the
8742   return type of the function \code{square} is \w{\code{(single-float
8743       0f0 100f0)}}.
8744
8745   Many union types are also supported so that
8746   \begin{example}
8747     (+ (the (or (integer 1 1) (integer 5 5)) x)
8748        (the (or (integer 10 10) (integer 20 20)) y))
8749   \end{example}
8750   has the inferred type \code{(or (integer 11 11) (integer 15 15)
8751     (integer 21 21) (integer 25 25))}.  This also works for
8752   floating-point numbers.  Member types, however, are not because in
8753   general the member elements do not have to be numbers.  Thus,
8754   instead of \code{(member 1 4)}, you should write \code{(or (integer
8755     1 1) (integer 4 4))}.
8756
8757   In addition, if \kwd{propagate-fun-type} is in \code{*features*},
8758   \Python{} knows how to infer types for many mathematical functions
8759   including square root, exponential and logarithmic functions,
8760   trignometric functions and their inverses, and hyperbolic functions
8761   and their inverses.  For numeric code, this can greatly enhance
8762   efficiency by allowing the compiler to use specialized versions of
8763   the functions instead of the generic versions.  The greatest benefit
8764   of this type inference is determining that the result of the
8765   function is real-valued number instead of possibly being
8766   a complex-valued number.
8767
8768   For example, consider the function
8769   \begin{example}
8770     (defun fun (x)
8771       (declare (type (single-float 0f0 100f0) x))
8772       (values (sqrt x) (log x 10f0)))
8773   \end{example}
8774   With this declaration, the compiler can determine that the argument
8775   to \code{sqrt} and \code{log} are always non-negative so that the result
8776   is always a \code{single-float}.  In fact, the return type for this
8777   function is derived to be \code{(values (single-float 0f0 10f0)
8778       (single-float * 2f0))}.
8779
8780   If the declaration were reduced to just \w{\code{(declare
8781       single-float x)}}, the argument to \code{sqrt} and \code{log}
8782   could be negative.  This forces the use of the generic versions of
8783   these functions because the result could be a complex number.
8784
8785   Union types are not yet supported for functions.
8786
8787   We note, however, that proper interval arithmetic is not fully
8788   implemented in the compiler so the inferred types may be slightly in
8789   error due to round-off errors.  This round-off error could
8790   accumulate to cause the compiler to erroneously deduce the result
8791   type and cause code to be removed as being
8792   unreachable.\footnote{This, however, has not actually happened, but
8793     it is a possibility.}%
8794   Thus, the declarations should only be precise enough for the
8795   compiler to deduce that a real-valued argument to a function would
8796   produce a real-valued result.  The efficiency notes
8797   (\pxlref{representation-eff-note}) from the compiler will guide you
8798   on what declarations might be useful.
8799 \end{changebar}
8800
8801 When a float must be represented as a descriptor, a pointer representation is
8802 used, creating consing overhead.  For this reason, you should try to avoid
8803 situations (such as full call and non-specialized data structures) that force a
8804 descriptor representation.  See sections \ref{specialized-array-types},
8805 \ref{raw-slots} and \ref{number-local-call}.
8806
8807 \xlref{ieee-float} for information on the extensions to support IEEE
8808 floating point.
8809
8810 %%\node Specialized Arrays, Specialized Structure Slots, Floating Point Efficiency, Numbers
8811 \subsection{Specialized Arrays}
8812 \label{specialized-array-types}
8813 \cindex{specialized array types}
8814 \cpsubindex{array types}{specialized}
8815 \cpsubindex{types}{specialized array}
8816
8817 \clisp{} supports specialized array element types through the
8818 \kwd{element-type} argument to \code{make-array}.  When an array has a
8819 specialized element type, only elements of that type can be stored in
8820 the array.  From this restriction comes two major efficiency
8821 advantages:
8822 \begin{itemize}
8823
8824 \item A specialized array can save space by packing multiple elements
8825   into a single word.  For example, a \code{base-char} array can have
8826   4 elements per word, and a \code{bit} array can have 32.  This
8827   space-efficient representation is possible because it is not
8828   necessary to separately indicate the type of each element.
8829
8830 \item The elements in a specialized array can be given the same
8831   non-descriptor representation as the one used in registers and on
8832   the stack, eliminating the need for representation conversions when
8833   reading and writing array elements.  For objects with pointer
8834   descriptor representations (such as floats and word integers) there
8835   is also a substantial consing reduction because it is not necessary
8836   to allocate a new object every time an array element is modified.
8837 \end{itemize}
8838
8839
8840 These are the specialized element types currently supported:
8841 \begin{lisp}
8842 bit
8843 (unsigned-byte 2)
8844 (unsigned-byte 4)
8845 (unsigned-byte 8)
8846 (unsigned-byte 16)
8847 (unsigned-byte 32)
8848 base-character
8849 single-float
8850 double-float
8851 \end{lisp}
8852 \begin{changebar}
8853 %% New stuff
8854 Some versions of \cmucl{}\footnote{Currently, this includes the X86
8855   and Sparc versions which are compiled with the \kwd{signed-array}
8856   feature.} also support the following specialized element types:
8857 \begin{lisp}
8858 (signed-byte 8)
8859 (signed-byte 16)
8860 (signed-byte 30)
8861 (signed-byte 32)
8862 \end{lisp}
8863 \end{changebar}
8864 Although a \code{simple-vector} can hold any type of object, \true{}
8865 should still be considered a specialized array type, since arrays with
8866 element type \true{} are specialized to hold descriptors.
8867
8868
8869
8870 When using non-descriptor representations, it is particularly
8871 important to make sure that array accesses are open-coded, since in
8872 addition to the generic operation overhead, efficiency is lost when
8873 the array element is converted to a descriptor so that it can be
8874 passed to (or from) the generic access routine.  You can detect
8875 inefficient array accesses by enabling efficiency notes,
8876 \pxlref{efficiency-notes}.  \xlref{array-types}.
8877
8878 %%\node Specialized Structure Slots, Interactions With Local Call, Specialized Arrays, Numbers
8879 \subsection{Specialized Structure Slots}
8880 \label{raw-slots}
8881 \cpsubindex{structure types}{numeric slots}
8882 \cindex{specialized structure slots}
8883
8884 Structure slots declared by the \kwd{type} \code{defstruct} slot option
8885 to have certain known numeric types are also given non-descriptor
8886 representations.  These types (and subtypes of these types) are supported:
8887 \begin{lisp}
8888 (unsigned-byte 32)
8889 single-float
8890 double-float
8891 \end{lisp}
8892
8893 The primary advantage of specialized slot representations is a large
8894 reduction spurious memory allocation and access overhead of programs
8895 that intensively use these types.
8896
8897 %%\node Interactions With Local Call, Representation of Characters, Specialized Structure Slots, Numbers
8898 \subsection{Interactions With Local Call}
8899 \label{number-local-call}
8900 \cpsubindex{local call}{numeric operands}
8901 \cpsubindex{call}{numeric operands}
8902 \cindex{numbers in local call}
8903
8904 Local call has many advantages (\pxlref{local-call}); one relevant to
8905 our discussion here is that local call extends the usefulness of
8906 non-descriptor representations.  If the compiler knows from the
8907 argument type that an argument has a non-descriptor representation,
8908 then the argument will be passed in that representation.  The easiest
8909 way to ensure that the argument type is known at compile time is to
8910 always declare the argument type in the called function, like:
8911 \begin{lisp}
8912 (defun 2+f (x)
8913   (declare (single-float x))
8914   (+ x 2.0))
8915 \end{lisp}
8916 The advantages of passing arguments and return values in a non-descriptor
8917 representation are the same as for non-descriptor representations in general:
8918 reduced consing and memory access (\pxlref{non-descriptor}.)  This
8919 extends the applicative programming styles discussed in section
8920 \ref{local-call} to numeric code.  Also, if source files are kept reasonably
8921 small, block compilation can be used to reduce number consing to a minimum.
8922
8923 Note that non-descriptor return values can only be used with the known return
8924 convention (section \ref{local-call-return}.)  If the compiler can't prove that
8925 a function always returns the same number of values, then it must use the
8926 unknown values return convention, which requires a descriptor representation.
8927 Pay attention to the known return efficiency notes to avoid number consing.
8928
8929 %%\node Representation of Characters,  , Interactions With Local Call, Numbers
8930 \subsection{Representation of Characters}
8931 \label{characters}
8932 \cindex{characters}
8933 \cindex{strings}
8934
8935 Python also uses a non-descriptor representation for characters when
8936 convenient.  This improves the efficiency of string manipulation, but is
8937 otherwise pretty invisible; characters have an immediate descriptor
8938 representation, so there is not a great penalty for converting a character to a
8939 descriptor.  Nonetheless, it may sometimes be helpful to declare
8940 character-valued variables as \code{base-character}.
8941
8942 %%\f
8943 %%\node General Efficiency Hints, Efficiency Notes, Numbers, Advanced Compiler Use and Efficiency Hints
8944 \section{General Efficiency Hints}
8945 \label{general-efficiency}
8946 \cpsubindex{efficiency}{general hints}
8947
8948 This section is a summary of various implementation costs and ways to get
8949 around them.  These hints are relatively unrelated to the use of the \python{}
8950 compiler, and probably also apply to most other \llisp{} implementations.  In
8951 each section, there are references to related in-depth discussion.
8952
8953 \begin{comment}
8954 * Compile Your Code::
8955 * Avoid Unnecessary Consing::
8956 * Complex Argument Syntax::
8957 * Mapping and Iteration::
8958 * Trace Files and Disassembly::
8959 \end{comment}
8960
8961 %%\node Compile Your Code, Avoid Unnecessary Consing, General Efficiency Hints, General Efficiency Hints
8962 \subsection{Compile Your Code}
8963 \cpsubindex{compilation}{why to}
8964
8965 At this point, the advantages of compiling code relative to running it
8966 interpreted probably need not be emphasized too much, but remember that
8967 in \cmucl, compiled code typically runs hundreds of times faster than
8968 interpreted code.  Also, compiled (\code{fasl}) files load significantly faster
8969 than source files, so it is worthwhile compiling files which are loaded many
8970 times, even if the speed of the functions in the file is unimportant.
8971
8972 Even disregarding the efficiency advantages, compiled code is as good or better
8973 than interpreted code.  Compiled code can be debugged at the source level (see
8974 chapter \ref{debugger}), and compiled code does more error checking.  For these
8975 reasons, the interpreter should be regarded mainly as an interactive command
8976 interpreter, rather than as a programming language implementation.
8977
8978 \b{Do not} be concerned about the performance of your program until you
8979 see its speed compiled.  Some techniques that make compiled code run
8980 faster make interpreted code run slower.
8981
8982 %%\node Avoid Unnecessary Consing, Complex Argument Syntax, Compile Your Code, General Efficiency Hints
8983 \subsection{Avoid Unnecessary Consing}
8984 \label{consing}
8985 \cindex{consing}
8986 \cindex{garbage collection}
8987 \cindex{memory allocation}
8988 \cpsubindex{efficiency}{of memory use}
8989
8990
8991 Consing is another name for allocation of storage, as done by the
8992 \code{cons} function (hence its name.)  \code{cons} is by no means the
8993 only function which conses\dash{}so does \code{make-array} and many
8994 other functions.  Arithmetic and function call can also have hidden
8995 consing overheads.  Consing hurts performance in the following ways:
8996 \begin{itemize}
8997
8998 \item Consing reduces memory access locality, increasing paging
8999   activity.
9000
9001 \item Consing takes time just like anything else.
9002
9003 \item Any space allocated eventually needs to be reclaimed, either by
9004   garbage collection or by starting a new \code{lisp} process.
9005 \end{itemize}
9006
9007
9008 Consing is not undiluted evil, since programs do things other than
9009 consing, and appropriate consing can speed up the real work.  It would
9010 certainly save time to allocate a vector of intermediate results that
9011 are reused hundreds of times.  Also, if it is necessary to copy a
9012 large data structure many times, it may be more efficient to update
9013 the data structure non-destructively; this somewhat increases update
9014 overhead, but makes copying trivial.
9015
9016 Note that the remarks in section \ref{efficiency-overview} about the
9017 importance of separating tuning from coding also apply to consing
9018 overhead.  The majority of consing will be done by a small portion of
9019 the program.  The consing hot spots are even less predictable than the
9020 CPU hot spots, so don't waste time and create bugs by doing
9021 unnecessary consing optimization.  During initial coding, avoid
9022 unnecessary side-effects and cons where it is convenient.  If
9023 profiling reveals a consing problem, \var{then} go back and fix the
9024 hot spots.
9025
9026 \xlref{non-descriptor} for a discussion of how to avoid number consing
9027 in \python.
9028
9029
9030 %%\node Complex Argument Syntax, Mapping and Iteration, Avoid Unnecessary Consing, General Efficiency Hints
9031 \subsection{Complex Argument Syntax}
9032 \cpsubindex{argument syntax}{efficiency}
9033 \cpsubindex{efficiency}{of argument syntax}
9034 \cindex{keyword argument efficiency}
9035 \cindex{rest argument efficiency}
9036
9037 Common Lisp has very powerful argument passing mechanisms.  Unfortunately, two
9038 of the most powerful mechanisms, rest arguments and keyword arguments, have a
9039 significant performance penalty:
9040 \begin{itemize}
9041
9042 \item
9043 With keyword arguments, the called function has to parse the supplied keywords
9044 by iterating over them and checking them against the desired keywords.
9045
9046 \item
9047 With rest arguments, the function must cons a list to hold the arguments.  If a
9048 function is called many times or with many arguments, large amounts of memory
9049 will be allocated.
9050 \end{itemize}
9051
9052 Although rest argument consing is worse than keyword parsing, neither problem
9053 is serious unless thousands of calls are made to such a function.  The use of
9054 keyword arguments is strongly encouraged in functions with many arguments or
9055 with interfaces that are likely to be extended, and rest arguments are often
9056 natural in user interface functions.
9057
9058 Optional arguments have some efficiency advantage over keyword
9059 arguments, but their syntactic clumsiness and lack of extensibility
9060 has caused many \clisp{} programmers to abandon use of optionals
9061 except in functions that have obviously simple and immutable
9062 interfaces (such as \code{subseq}), or in functions that are only
9063 called in a few places.  When defining an interface function to be
9064 used by other programmers or users, use of only required and keyword
9065 arguments is recommended.
9066
9067 Parsing of \code{defmacro} keyword and rest arguments is done at
9068 compile time, so a macro can be used to provide a convenient syntax
9069 with an efficient implementation.  If the macro-expanded form contains
9070 no keyword or rest arguments, then it is perfectly acceptable in inner
9071 loops.
9072
9073 Keyword argument parsing overhead can also be avoided by use of inline
9074 expansion (\pxlref{inline-expansion}) and block compilation (section
9075 \ref{block-compilation}.)
9076
9077 Note: the compiler open-codes most heavily used system functions which have
9078 keyword or rest arguments, so that no run-time overhead is involved.
9079
9080 %%\node Mapping and Iteration, Trace Files and Disassembly, Complex Argument Syntax, General Efficiency Hints
9081 \subsection{Mapping and Iteration}
9082 \cpsubindex{mapping}{efficiency of}
9083
9084 One of the traditional \llisp{} programming styles is a highly applicative one,
9085 involving the use of mapping functions and many lists to store intermediate
9086 results.  To compute the sum of the square-roots of a list of numbers, one
9087 might say:
9088 \begin{lisp}
9089 (apply #'+ (mapcar #'sqrt list-of-numbers))
9090 \end{lisp}
9091
9092 This programming style is clear and elegant, but unfortunately results
9093 in slow code.  There are two reasons why:
9094 \begin{itemize}
9095
9096 \item The creation of lists of intermediate results causes much
9097   consing (see \ref{consing}).
9098
9099 \item Each level of application requires another scan down the list.
9100   Thus, disregarding other effects, the above code would probably take
9101   twice as long as a straightforward iterative version.
9102 \end{itemize}
9103
9104
9105 An example of an iterative version of the same code:
9106 \begin{lisp}
9107 (do ((num list-of-numbers (cdr num))
9108      (sum 0 (+ (sqrt (car num)) sum)))
9109     ((null num) sum))
9110 \end{lisp}
9111
9112 See sections \ref{variable-type-inference} and \ref{let-optimization}
9113 for a discussion of the interactions of iteration constructs with type
9114 inference and variable optimization.  Also, section
9115 \ref{local-tail-recursion} discusses an applicative style of
9116 iteration.
9117
9118 %%\node Trace Files and Disassembly,  , Mapping and Iteration, General Efficiency Hints
9119 \subsection{Trace Files and Disassembly}
9120 \label{trace-files}
9121 \cindex{trace files}
9122 \cindex{assembly listing}
9123 \cpsubindex{listing files}{trace}
9124 \cindex{Virtual Machine (VM, or IR2) representation}
9125 \cindex{implicit continuation representation (IR1)}
9126 \cpsubindex{continuations}{implicit representation}
9127
9128 In order to write efficient code, you need to know the relative costs
9129 of different operations.  The main reason why writing efficient
9130 \llisp{} code is difficult is that there are so many operations, and
9131 the costs of these operations vary in obscure context-dependent ways.
9132 Although efficiency notes point out some problem areas, the only way
9133 to ensure generation of the best code is to look at the assembly code
9134 output.
9135
9136 The \code{disassemble} function is a convenient way to get the assembly code for a
9137 function, but it can be very difficult to interpret, since the correspondence
9138 with the original source code is weak.  A better (but more awkward) option is
9139 to use the \kwd{trace-file} argument to \code{compile-file} to generate a trace
9140 file.
9141
9142 A trace file is a dump of the compiler's internal representations,
9143 including annotated assembly code.  Each component in the program gets
9144 four pages in the trace file (separated by ``\code{$\hat{ }L$}''):
9145 \begin{itemize}
9146
9147 \item The implicit-continuation (or IR1) representation of the
9148   optimized source.  This is a dump of the flow graph representation
9149   used for ``source level'' optimizations.  As you will quickly
9150   notice, it is not really very close to the source.  This
9151   representation is not very useful to even sophisticated users.
9152
9153 \item The Virtual Machine (VM, or IR2) representation of the program.
9154   This dump represents the generated code as sequences of ``Virtual
9155   OPerations'' (VOPs.)  This representation is intermediate between
9156   the source and the assembly code\dash{}each VOP corresponds fairly
9157   directly to some primitive function or construct, but a given VOP
9158   also has a fairly predictable instruction sequence.  An operation
9159   (such as \code{+}) may have multiple implementations with different
9160   cost and applicability.  The choice of a particular VOP such as
9161   \code{+/fixnum} or \code{+/single-float} represents this choice of
9162   implementation.  Once you are familiar with it, the VM
9163   representation is probably the most useful for determining what
9164   implementation has been used.
9165
9166 \item An assembly listing, annotated with the VOP responsible for
9167   generating the instructions.  This listing is useful for figuring
9168   out what a VOP does and how it is implemented in a particular
9169   context, but its large size makes it more difficult to read.
9170
9171 \item A disassembly of the generated code, which has all
9172   pseudo-operations expanded out, but is not annotated with VOPs.
9173 \end{itemize}
9174
9175
9176 Note that trace file generation takes much space and time, since the trace file
9177 is tens of times larger than the source file.  To avoid huge confusing trace
9178 files and much wasted time, it is best to separate the critical program portion
9179 into its own file and then generate the trace file from this small file.
9180
9181 %%\f
9182 %%\node Efficiency Notes, Profiling, General Efficiency Hints, Advanced Compiler Use and Efficiency Hints
9183 \section{Efficiency Notes}
9184 \label{efficiency-notes}
9185 \cindex{efficiency notes}
9186 \cpsubindex{notes}{efficiency}
9187 \cindex{tuning}
9188
9189 Efficiency notes are messages that warn the user that the compiler has
9190 chosen a relatively inefficient implementation for some operation.
9191 Usually an efficiency note reflects the compiler's desire for more
9192 type information.  If the type of the values concerned is known to the
9193 programmer, then additional declarations can be used to get a more
9194 efficient implementation.
9195
9196 Efficiency notes are controlled by the
9197 \code{extensions:inhibit-warnings} (\pxlref{optimize-declaration})
9198 optimization quality. When \code{speed} is greater than
9199 \code{extensions:inhibit-warnings}, efficiency notes are enabled.
9200 Note that this implicitly enables efficiency notes whenever
9201 \code{speed} is increased from its default of \code{1}.
9202
9203 Consider this program with an obscure missing declaration:
9204 \begin{lisp}
9205 (defun eff-note (x y z)
9206   (declare (fixnum x y z))
9207   (the fixnum (+ x y z)))
9208 \end{lisp}
9209 If compiled with \code{\w{(speed 3) (safety 0)}}, this note is given:
9210 \begin{example}
9211 In: DEFUN EFF-NOTE
9212   (+ X Y Z)
9213 ==>
9214   (+ (+ X Y) Z)
9215 Note: Forced to do inline (signed-byte 32) arithmetic (cost 3).
9216       Unable to do inline fixnum arithmetic (cost 2) because:
9217       The first argument is a (INTEGER -1073741824 1073741822),
9218       not a FIXNUM.
9219 \end{example}
9220 This efficiency note tells us that the result of the intermediate
9221 computation \code{\w{(+ x y)}} is not known to be a \code{fixnum}, so
9222 the addition of the intermediate sum to \code{z} must be done less
9223 efficiently.  This can be fixed by changing the definition of
9224 \code{eff-note}:
9225 \begin{lisp}
9226 (defun eff-note (x y z)
9227   (declare (fixnum x y z))
9228   (the fixnum (+ (the fixnum (+ x y)) z)))
9229 \end{lisp}
9230
9231 \begin{comment}
9232 * Type Uncertainty::
9233 * Efficiency Notes and Type Checking::
9234 * Representation Efficiency Notes::
9235 * Verbosity Control::
9236 \end{comment}
9237
9238 %%\node Type Uncertainty, Efficiency Notes and Type Checking, Efficiency Notes, Efficiency Notes
9239 \subsection{Type Uncertainty}
9240 \cpsubindex{types}{uncertainty}
9241 \cindex{uncertainty of types}
9242
9243 The main cause of inefficiency is the compiler's lack of adequate
9244 information about the types of function argument and result values.
9245 Many important operations (such as arithmetic) have an inefficient
9246 general (generic) case, but have efficient implementations that can
9247 usually be used if there is sufficient argument type information.
9248
9249 Type efficiency notes are given when a value's type is uncertain.
9250 There is an important distinction between values that are \i{not
9251   known} to be of a good type (uncertain) and values that are \i{known
9252   not} to be of a good type.  Efficiency notes are given mainly for
9253 the first case (uncertain types.)  If it is clear to the compiler that
9254 that there is not an efficient implementation for a particular
9255 function call, then an efficiency note will only be given if the
9256 \code{extensions:inhibit-warnings} optimization quality is \code{0}
9257 (\pxlref{optimize-declaration}.)
9258
9259 In other words, the default efficiency notes only suggest that you add
9260 declarations, not that you change the semantics of your program so
9261 that an efficient implementation will apply.  For example, compilation
9262 of this form will not give an efficiency note:
9263 \begin{lisp}
9264 (elt (the list l) i)
9265 \end{lisp}
9266 even though a vector access is more efficient than indexing a list.
9267
9268 %%\node Efficiency Notes and Type Checking, Representation Efficiency Notes, Type Uncertainty, Efficiency Notes
9269 \subsection{Efficiency Notes and Type Checking}
9270 \cpsubindex{type checking}{efficiency of}
9271 \cpsubindex{efficiency}{of type checking}
9272 \cpsubindex{optimization}{type check}
9273
9274 It is important that the \code{eff-note} example above used
9275 \w{\code{(safety 0)}}.  When type checking is enabled, you may get apparently
9276 spurious efficiency notes.  With \w{\code{(safety 1)}}, the note has this extra
9277 line on the end:
9278 \begin{example}
9279 The result is a (INTEGER -1610612736 1610612733), not a FIXNUM.
9280 \end{example}
9281 This seems strange, since there is a \code{the} declaration on the result of that
9282 second addition.
9283
9284 In fact, the inefficiency is real, and is a consequence of \python{}'s
9285 treating declarations as assertions to be verified.  The compiler
9286 can't assume that the result type declaration is true\dash{}it must
9287 generate the result and then test whether it is of the appropriate
9288 type.
9289
9290 In practice, this means that when you are tuning a program to run
9291 without type checks, you should work from the efficiency notes
9292 generated by unsafe compilation.  If you want code to run efficiently
9293 with type checking, then you should pay attention to all the
9294 efficiency notes that you get during safe compilation.  Since user
9295 supplied output type assertions (e.g., from \code{the}) are
9296 disregarded when selecting operation implementations for safe code,
9297 you must somehow give the compiler information that allows it to prove
9298 that the result truly must be of a good type.  In our example, it
9299 could be done by constraining the argument types more:
9300 \begin{lisp}
9301 (defun eff-note (x y z)
9302   (declare (type (unsigned-byte 18) x y z))
9303   (+ x y z))
9304 \end{lisp}
9305 Of course, this declaration is acceptable only if the arguments to \code{eff-note}
9306 always \var{are} \w{\code{(unsigned-byte 18)}} integers.
9307
9308 %%\node Representation Efficiency Notes, Verbosity Control, Efficiency Notes and Type Checking, Efficiency Notes
9309 \subsection{Representation Efficiency Notes}
9310 \label{representation-eff-note}
9311 \cindex{representation efficiency notes}
9312 \cpsubindex{efficiency notes}{for representation}
9313 \cindex{object representation efficiency notes}
9314 \cindex{stack numbers}
9315 \cindex{non-descriptor representations}
9316 \cpsubindex{descriptor representations}{forcing of}
9317
9318 When operating on values that have non-descriptor representations
9319 (\pxlref{non-descriptor}), there can be a substantial time and consing
9320 penalty for converting to and from descriptor representations.  For
9321 this reason, the compiler gives an efficiency note whenever it is
9322 forced to do a representation coercion more expensive than
9323 \varref{efficiency-note-cost-threshold}.
9324
9325 Inefficient representation coercions may be due to type uncertainty,
9326 as in this example:
9327 \begin{lisp}
9328 (defun set-flo (x)
9329   (declare (single-float x))
9330   (prog ((var 0.0))
9331     (setq var (gorp))
9332     (setq var x)
9333     (return var)))
9334 \end{lisp}
9335 which produces this efficiency note:
9336 \begin{example}
9337 In: DEFUN SET-FLO
9338   (SETQ VAR X)
9339 Note: Doing float to pointer coercion (cost 13) from X to VAR.
9340 \end{example}
9341 The variable \code{var} is not known to always hold values of type
9342 \code{single-float}, so a descriptor representation must be used for its value.
9343 In sort of situation, and adding a declaration will eliminate the inefficiency.
9344
9345 Often inefficient representation conversions are not due to type
9346 uncertainty\dash{}instead, they result from evaluating a
9347 non-descriptor expression in a context that requires a descriptor
9348 result:
9349 \begin{itemize}
9350
9351 \item Assignment to or initialization of any data structure other than
9352   a specialized array (\pxlref{specialized-array-types}), or
9353
9354 \item Assignment to a \code{special} variable, or
9355
9356 \item Passing as an argument or returning as a value in any function
9357   call that is not a local call (\pxlref{number-local-call}.)
9358 \end{itemize}
9359
9360 If such inefficient coercions appear in a ``hot spot'' in the program, data
9361 structures redesign or program reorganization may be necessary to improve
9362 efficiency.  See sections \ref{block-compilation}, \ref{numeric-types} and
9363 \ref{profiling}.
9364
9365 Because representation selection is done rather late in compilation,
9366 the source context in these efficiency notes is somewhat vague, making
9367 interpretation more difficult.  This is a fairly straightforward
9368 example:
9369 \begin{lisp}
9370 (defun cf+ (x y)
9371   (declare (single-float x y))
9372   (cons (+ x y) t))
9373 \end{lisp}
9374 which gives this efficiency note:
9375 \begin{example}
9376 In: DEFUN CF+
9377   (CONS (+ X Y) T)
9378 Note: Doing float to pointer coercion (cost 13), for:
9379       The first argument of CONS.
9380 \end{example}
9381 The source context form is almost always the form that receives the value being
9382 coerced (as it is in the preceding example), but can also be the source form
9383 which generates the coerced value.  Compiling this example:
9384 \begin{lisp}
9385 (defun if-cf+ (x y)
9386   (declare (single-float x y))
9387   (cons (if (grue) (+ x y) (snoc)) t))
9388 \end{lisp}
9389 produces this note:
9390 \begin{example}
9391 In: DEFUN IF-CF+
9392   (+ X Y)
9393 Note: Doing float to pointer coercion (cost 13).
9394 \end{example}
9395
9396 In either case, the note's text explanation attempts to include
9397 additional information about what locations are the source and
9398 destination of the coercion.  Here are some example notes:
9399 \begin{example}
9400   (IF (GRUE) X (SNOC))
9401 Note: Doing float to pointer coercion (cost 13) from X.
9402
9403   (SETQ VAR X)
9404 Note: Doing float to pointer coercion (cost 13) from X to VAR.
9405 \end{example}
9406 Note that the return value of a function is also a place to which coercions may
9407 have to be done:
9408 \begin{example}
9409   (DEFUN F+ (X Y) (DECLARE (SINGLE-FLOAT X Y)) (+ X Y))
9410 Note: Doing float to pointer coercion (cost 13) to "<return value>".
9411 \end{example}
9412 Sometimes the compiler is unable to determine a name for the source or
9413 destination, in which case the source context is the only clue.
9414
9415
9416 %%\node Verbosity Control,  , Representation Efficiency Notes, Efficiency Notes
9417 \subsection{Verbosity Control}
9418 \cpsubindex{verbosity}{of efficiency notes}
9419 \cpsubindex{efficiency notes}{verbosity}
9420
9421 These variables control the verbosity of efficiency notes:
9422
9423 \begin{defvar}{}{efficiency-note-cost-threshold}
9424
9425   Before printing some efficiency notes, the compiler compares the
9426   value of this variable to the difference in cost between the chosen
9427   implementation and the best potential implementation.  If the
9428   difference is not greater than this limit, then no note is printed.
9429   The units are implementation dependent; the initial value suppresses
9430   notes about ``trivial'' inefficiencies.  A value of \code{1} will
9431   note any inefficiency.
9432 \end{defvar}
9433
9434 \begin{defvar}{}{efficiency-note-limit}
9435
9436   When printing some efficiency notes, the compiler reports possible
9437   efficient implementations.  The initial value of \code{2} prevents
9438   excessively long efficiency notes in the common case where there is
9439   no type information, so all implementations are possible.
9440 \end{defvar}
9441
9442 %%\f
9443 %%\node Profiling,  , Efficiency Notes, Advanced Compiler Use and Efficiency Hints
9444 \section{Profiling}
9445
9446 \cindex{profiling}
9447 \cindex{timing}
9448 \cindex{consing}
9449 \cindex{tuning}
9450 \label{profiling}
9451
9452 The first step in improving a program's performance is to profile the
9453 activity of the program to find where it spends its time.  The best
9454 way to do this is to use the profiling utility found in the
9455 \code{profile} package.  This package provides a macro \code{profile}
9456 that encapsulates functions with statistics gathering code.
9457
9458 \begin{comment}
9459 * Profile Interface::
9460 * Profiling Techniques::
9461 * Nested or Recursive Calls::
9462 * Clock resolution::
9463 * Profiling overhead::
9464 * Additional Timing Utilities::
9465 * A Note on Timing::
9466 * Benchmarking Techniques::
9467 \end{comment}
9468
9469 %%\node Profile Interface, Profiling Techniques, Profiling, Profiling
9470 \subsection{Profile Interface}
9471
9472 \begin{defvar}{profile:}{timed-functions}
9473
9474   This variable holds a list of all functions that are currently being
9475   profiled.
9476 \end{defvar}
9477
9478 \begin{defmac}{profile:}{profile}{%
9479     \args{\mstar{\var{name} \mor \kwd{callers} \code{t}}}}
9480
9481   This macro wraps profiling code around the named functions.  As in
9482   \code{trace}, the \var{name}s are not evaluated.  If a function is
9483   already profiled, then the function is unprofiled and reprofiled
9484   (useful to notice function redefinition.)  A warning is printed for
9485   each name that is not a defined function.
9486
9487   If \kwd{callers \var{t}} is specified, then each function that calls
9488   this function is recorded along with the number of calls made.
9489 \end{defmac}
9490
9491 \begin{defmac}{profile:}{unprofile}{%
9492     \args{\mstar{\var{name}}}}
9493
9494   This macro removes profiling code from the named functions.  If no
9495   \var{name}s are supplied, all currently profiled functions are
9496   unprofiled.
9497 \end{defmac}
9498
9499 \begin{changebar}
9500   \begin{defmac}{profile:}{profile-all}{%
9501       \args{\keys{\kwd{package} \kwd{callers-p}}}}
9502
9503     This macro in effect calls \code{profile:profile} for each
9504     function in the specified package which defaults to
9505     \code{*package*}.  \kwd{callers-p} has the same meaning as in
9506     \code{profile:profile}.
9507   \end{defmac}
9508 \end{changebar}
9509
9510 \begin{defmac}{profile:}{report-time}{\args{\mstar{\var{name}}}}
9511
9512   This macro prints a report for each \var{name}d function of the
9513   following information:
9514   \begin{itemize}
9515   \item The total CPU time used in that function for all calls,
9516
9517   \item the total number of bytes consed in that function for all
9518     calls,
9519
9520   \item the total number of calls,
9521
9522   \item the average amount of CPU time per call.
9523   \end{itemize}
9524   Summary totals of the CPU time, consing and calls columns are
9525   printed.  An estimate of the profiling overhead is also printed (see
9526   below).  If no \var{name}s are supplied, then the times for all
9527   currently profiled functions are printed.
9528 \end{defmac}
9529
9530 \begin{defmac}{}{reset-time}{\args{\mstar{\var{name}}}}
9531
9532   This macro resets the profiling counters associated with the
9533   \var{name}d functions.  If no \var{name}s are supplied, then all
9534   currently profiled functions are reset.
9535 \end{defmac}
9536
9537
9538 %%\node Profiling Techniques, Nested or Recursive Calls, Profile Interface, Profiling
9539 \subsection{Profiling Techniques}
9540
9541 Start by profiling big pieces of a program, then carefully choose which
9542 functions close to, but not in, the inner loop are to be profiled next.
9543 Avoid profiling functions that are called by other profiled functions, since
9544 this opens the possibility of profiling overhead being included in the reported
9545 times.
9546
9547 If the per-call time reported is less than 1/10 second, then consider the clock
9548 resolution and profiling overhead before you believe the time.  It may be that
9549 you will need to run your program many times in order to average out to a
9550 higher resolution.
9551
9552
9553 %%\node Nested or Recursive Calls, Clock resolution, Profiling Techniques, Profiling
9554 \subsection{Nested or Recursive Calls}
9555
9556 The profiler attempts to compensate for nested or recursive calls.  Time and
9557 consing overhead will be charged to the dynamically innermost (most recent)
9558 call to a profiled function.  So profiling a subfunction of a profiled function
9559 will cause the reported time for the outer function to decrease.  However if an
9560 inner function has a large number of calls, some of the profiling overhead may
9561 ``leak'' into the reported time for the outer function.  In general, be wary of
9562 profiling short functions that are called many times.
9563
9564 %%\node Clock resolution, Profiling overhead, Nested or Recursive Calls, Profiling
9565 \subsection{Clock resolution}
9566
9567 Unless you are very lucky, the length of your machine's clock ``tick'' is
9568 probably much longer than the time it takes simple function to run.  For
9569 example, on the IBM RT, the clock resolution is 1/50 second.  This means that
9570 if a function is only called a few times, then only the first couple decimal
9571 places are really meaningful.
9572
9573 Note however, that if a function is called many times, then the statistical
9574 averaging across all calls should result in increased resolution.  For example,
9575 on the IBM RT, if a function is called a thousand times, then a resolution of
9576 tens of microseconds can be expected.
9577
9578 %%\node Profiling overhead, Additional Timing Utilities, Clock resolution, Profiling
9579 \subsection{Profiling overhead}
9580
9581 The added profiling code takes time to run every time that the profiled
9582 function is called, which can disrupt the attempt to collect timing
9583 information.  In order to avoid serious inflation of the times for functions
9584 that take little time to run, an estimate of the overhead due to profiling is
9585 subtracted from the times reported for each function.
9586
9587 Although this correction works fairly well, it is not totally accurate,
9588 resulting in times that become increasingly meaningless for functions with
9589 short runtimes.  This is only a concern when the estimated profiling overhead
9590 is many times larger than reported total CPU time.
9591
9592 The estimated profiling overhead is not represented in the reported total CPU
9593 time.  The sum of total CPU time and the estimated profiling overhead should be
9594 close to the total CPU time for the entire profiling run (as determined by the
9595 \code{time} macro.)  Time unaccounted for is probably being used by functions that
9596 you forgot to profile.
9597
9598 %%\node Additional Timing Utilities, A Note on Timing, Profiling overhead, Profiling
9599 \subsection{Additional Timing Utilities}
9600
9601 \begin{defmac}{}{time}{ \args{\var{form}}}
9602
9603   This macro evaluates \var{form}, prints some timing and memory
9604   allocation information to \code{*trace-output*}, and returns any
9605   values that \var{form} returns.  The timing information includes
9606   real time, user run time, and system run time.  This macro executes
9607   a form and reports the time and consing overhead.  If the
9608   \code{time} form is not compiled (e.g. it was typed at top-level),
9609   then \code{compile} will be called on the form to give more accurate
9610   timing information.  If you really want to time interpreted speed,
9611   you can say:
9612 \begin{lisp}
9613 (time (eval '\var{form}))
9614 \end{lisp}
9615 Things that execute fairly quickly should be timed more than once,
9616 since there may be more paging overhead in the first timing.  To
9617 increase the accuracy of very short times, you can time multiple
9618 evaluations:
9619 \begin{lisp}
9620 (time (dotimes (i 100) \var{form}))
9621 \end{lisp}
9622 \end{defmac}
9623
9624 \begin{defun}{extensions:}{get-bytes-consed}{}
9625
9626   This function returns the number of bytes allocated since the first
9627   time you called it.  The first time it is called it returns zero.
9628   The above profiling routines use this to report consing information.
9629 \end{defun}
9630
9631 \begin{defvar}{extensions:}{gc-run-time}
9632
9633   This variable accumulates the run-time consumed by garbage
9634   collection, in the units returned by
9635   \findexed{get-internal-run-time}.
9636 \end{defvar}
9637
9638 \begin{defconst}{}{internal-time-units-per-second}
9639 The value of internal-time-units-per-second is 100.
9640 \end{defconst}
9641
9642 %%\node A Note on Timing, Benchmarking Techniques, Additional Timing Utilities, Profiling
9643 \subsection{A Note on Timing}
9644 \cpsubindex{CPU time}{interpretation of}
9645 \cpsubindex{run time}{interpretation of}
9646 \cindex{interpretation of run time}
9647
9648 There are two general kinds of timing information provided by the
9649 \code{time} macro and other profiling utilities: real time and run
9650 time.  Real time is elapsed, wall clock time.  It will be affected in
9651 a fairly obvious way by any other activity on the machine.  The more
9652 other processes contending for CPU and memory, the more real time will
9653 increase.  This means that real time measurements are difficult to
9654 replicate, though this is less true on a dedicated workstation.  The
9655 advantage of real time is that it is real.  It tells you really how
9656 long the program took to run under the benchmarking conditions.  The
9657 problem is that you don't know exactly what those conditions were.
9658
9659 Run time is the amount of time that the processor supposedly spent
9660 running the program, as opposed to waiting for I/O or running other
9661 processes.  ``User run time'' and ``system run time'' are numbers
9662 reported by the Unix kernel.  They are supposed to be a measure of how
9663 much time the processor spent running your ``user'' program (which
9664 will include GC overhead, etc.), and the amount of time that the
9665 kernel spent running ``on your behalf.''
9666
9667 Ideally, user time should be totally unaffected by benchmarking
9668 conditions; in reality user time does depend on other system activity,
9669 though in rather non-obvious ways.
9670
9671 System time will clearly depend on benchmarking conditions.  In Lisp
9672 benchmarking, paging activity increases system run time (but not by as much
9673 as it increases real time, since the kernel spends some time waiting for
9674 the disk, and this is not run time, kernel or otherwise.)
9675
9676 In my experience, the biggest trap in interpreting kernel/user run time is
9677 to look only at user time.  In reality, it seems that the \var{sum} of kernel
9678 and user time is more reproducible.  The problem is that as system activity
9679 increases, there is a spurious \var{decrease} in user run time.  In effect, as
9680 paging, etc., increases, user time leaks into system time.
9681
9682 So, in practice, the only way to get truly reproducible results is to run
9683 with the same competing activity on the system.  Try to run on a machine
9684 with nobody else logged in, and check with ``ps aux'' to see if there are any
9685 system processes munching large amounts of CPU or memory.  If the ratio
9686 between real time and the sum of user and system time varies much between
9687 runs, then you have a problem.
9688
9689 %%\node Benchmarking Techniques,  , A Note on Timing, Profiling
9690 \subsection{Benchmarking Techniques}
9691 \cindex{benchmarking techniques}
9692
9693 Given these imperfect timing tools, how do should you do benchmarking?  The
9694 answer depends on whether you are trying to measure improvements in the
9695 performance of a single program on the same hardware, or if you are trying to
9696 compare the performance of different programs and/or different hardware.
9697
9698 For the first use (measuring the effect of program modifications with
9699 constant hardware), you should look at \var{both} system+user and real time to
9700 understand what effect the change had on CPU use, and on I/O (including
9701 paging.)  If you are working on a CPU intensive program, the change in
9702 system+user time will give you a moderately reproducible measure of
9703 performance across a fairly wide range of system conditions.  For a CPU
9704 intensive program, you can think of system+user as ``how long it would have
9705 taken to run if I had my own machine.''  So in the case of comparing CPU
9706 intensive programs, system+user time is relatively real, and reasonable to
9707 use.
9708
9709 For programs that spend a substantial amount of their time paging, you
9710 really can't predict elapsed time under a given operating condition without
9711 benchmarking in that condition.  User or system+user time may be fairly
9712 reproducible, but it is also relatively meaningless, since in a paging or
9713 I/O intensive program, the program is spending its time waiting, not
9714 running, and system time and user time are both measures of run time.
9715 A change that reduces run time might increase real time by increasing
9716 paging.
9717
9718 Another common use for benchmarking is comparing the performance of
9719 the same program on different hardware.  You want to know which
9720 machine to run your program on.  For comparing different machines
9721 (operating systems, etc.), the only way to compare that makes sense is
9722 to set up the machines in \var{exactly} the way that they will
9723 \var{normally} be run, and then measure \var{real} time.  If the
9724 program will normally be run along with X, then run X.  If the program
9725 will normally be run on a dedicated workstation, then be sure nobody
9726 else is on the benchmarking machine.  If the program will normally be
9727 run on a machine with three other Lisp jobs, then run three other Lisp
9728 jobs.  If the program will normally be run on a machine with 8meg of
9729 memory, then run with 8meg.  Here, ``normal'' means ``normal for that
9730 machine''.  If you the choice of an unloaded RT or a heavily loaded
9731 PMAX, do your benchmarking on an unloaded RT and a heavily loaded
9732 PMAX.
9733
9734 If you have a program you believe to be CPU intensive, then you might be
9735 tempted to compare ``run'' times across systems, hoping to get a meaningful
9736 result even if the benchmarking isn't done under the expected running
9737 condition.  Don't to this, for two reasons:
9738 \begin{itemize}
9739
9740 \item The operating systems might not compute run time in the same
9741   way.
9742
9743 \item Under the real running condition, the program might not be CPU
9744   intensive after all.
9745 \end{itemize}
9746
9747
9748 In the end, only real time means anything\dash{}it is the amount of time you
9749 have to wait for the result.  The only valid uses for run time are:
9750 \begin{itemize}
9751
9752 \item To develop insight into the program.  For example, if run time
9753   is much less than elapsed time, then you are probably spending lots
9754   of time paging.
9755
9756 \item To evaluate the relative performance of CPU intensive programs
9757   in the same environment.
9758 \end{itemize}
9759
9760
9761 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/Unix.ms}
9762
9763
9764
9765 %%\node UNIX Interface, Event Dispatching with SERVE-EVENT, Advanced Compiler Use and Efficiency Hints, Top
9766 \chapter{UNIX Interface}
9767 \label{unix-interface}
9768 \begin{center}
9769 \b{By Robert MacLachlan, Skef Wholey,}
9770 \end{center}
9771 \begin{center}
9772 \b{Bill Chiles, and William Lott}
9773 \end{center}
9774
9775 CMU Common Lisp attempts to make the full power of the underlying
9776 environment available to the Lisp programmer.  This is done using
9777 combination of hand-coded interfaces and foreign function calls to C
9778 libraries.  Although the techniques differ, the style of interface is
9779 similar.  This chapter provides an overview of the facilities
9780 available and general rules for using them, as well as describing
9781 specific features in detail.  It is assumed that the reader has a
9782 working familiarity with Mach, Unix and X, as well as access to the
9783 standard system documentation.
9784
9785 \begin{comment}
9786 * Reading the Command Line::
9787 * Lisp Equivalents for C Routines::
9788 * Type Translations::
9789 * System Area Pointers::
9790 * Unix System Calls::
9791 * File Descriptor Streams::
9792 * Making Sense of Mach Return Codes::
9793 * Unix Interrupts::
9794 \end{comment}
9795
9796
9797 %%\node Reading the Command Line, Useful Variables, UNIX Interface, UNIX Interface
9798 \section{Reading the Command Line}
9799
9800 The shell parses the command line with which Lisp is invoked, and
9801 passes a data structure containing the parsed information to Lisp.
9802 This information is then extracted from that data structure and put
9803 into a set of Lisp data structures.
9804
9805 \begin{defvar}{extensions:}{command-line-strings}
9806   \defvarx[extensions:]{command-line-utility-name}
9807   \defvarx[extensions:]{command-line-words}
9808   \defvarx[extensions:]{command-line-switches}
9809
9810   The value of \code{*command-line-words*} is a list of strings that
9811   make up the command line, one word per string.  The first word on
9812   the command line, i.e.  the name of the program invoked (usually
9813   \code{lisp}) is stored in \code{*command-line-utility-name*}.  The
9814   value of \code{*command-line-switches*} is a list of
9815   \code{command-line-switch} structures, with a structure for each
9816   word on the command line starting with a hyphen.  All the command
9817   line words between the program name and the first switch are stored
9818   in \code{*command-line-words*}.
9819 \end{defvar}
9820
9821 The following functions may be used to examine \code{command-line-switch}
9822 structures.
9823 \begin{defun}{extensions:}{cmd-switch-name}{\args{\var{switch}}}
9824
9825   Returns the name of the switch, less the preceding hyphen and
9826   trailing equal sign (if any).
9827 \end{defun}
9828 \begin{defun}{extensions:}{cmd-switch-value}{\args{\var{switch}}}
9829
9830   Returns the value designated using an embedded equal sign, if any.
9831   If the switch has no equal sign, then this is null.
9832 \end{defun}
9833 \begin{defun}{extensions:}{cmd-switch-words}{\args{\var{switch}}}
9834
9835   Returns a list of the words between this switch and the next switch
9836   or the end of the command line.
9837 \end{defun}
9838 \begin{defun}{extensions:}{cmd-switch-arg}{\args{\var{switch}}}
9839
9840   Returns the first non-null value from \code{cmd-switch-value}, the
9841   first element in \code{cmd-switch-words}, or the first word in
9842   \var{command-line-words}.
9843 \end{defun}
9844
9845 \begin{defun}{extensions:}{get-command-line-switch}{\args{\var{sname}}}
9846
9847   This function takes the name of a switch as a string and returns the
9848   value of the switch given on the command line.  If no value was
9849   specified, then any following words are returned.  If there are no
9850   following words, then \true{} is returned.  If the switch was not
9851   specified, then \false{} is returned.
9852 \end{defun}
9853
9854 \begin{defmac}{extensions:}{defswitch}{%
9855     \args{\var{name} \ampoptional{} \var{function}}}
9856
9857   This macro causes \var{function} to be called when the switch
9858   \var{name} appears in the command line.  Name is a simple-string
9859   that does not begin with a hyphen (unless the switch name really
9860   does begin with one.)
9861
9862   If \var{function} is not supplied, then the switch is parsed into
9863   \var{command-line-switches}, but otherwise ignored.  This suppresses
9864   the undefined switch warning which would otherwise take place.  THe
9865   warning can also be globally suppressed by
9866   \var{complain-about-illegal-switches}.
9867 \end{defmac}
9868
9869 %%\node Useful Variables, Lisp Equivalents for C Routines, Reading the Command Line, UNIX Interface
9870
9871 \section{Useful Variables}
9872
9873 \begin{defvar}{system:}{stdin}
9874   \defvarx[system:]{stdout} \defvarx[system:]{stderr}
9875
9876   Streams connected to the standard input, output and error file
9877   descriptors.
9878 \end{defvar}
9879
9880 \begin{defvar}{system:}{tty}
9881
9882   A stream connected to \file{/dev/tty}.
9883 \end{defvar}
9884
9885 %%\node Lisp Equivalents for C Routines, Type Translations, Useful Variables, UNIX Interface
9886 \section{Lisp Equivalents for C Routines}
9887
9888 The UNIX documentation describes the system interface in terms of C
9889 procedure headers.  The corresponding Lisp function will have a somewhat
9890 different interface, since Lisp argument passing conventions and
9891 datatypes are different.
9892
9893 The main difference in the argument passing conventions is that Lisp does not
9894 support passing values by reference.  In Lisp, all argument and results are
9895 passed by value.  Interface functions take some fixed number of arguments and
9896 return some fixed number of values.  A given ``parameter'' in the C
9897 specification will appear as an argument, return value, or both, depending on
9898 whether it is an In parameter, Out parameter, or In/Out parameter.  The basic
9899 transformation one makes to come up with the Lisp equivalent of a C routine is
9900 to remove the Out parameters from the call, and treat them as extra return
9901 values.  In/Out parameters appear both as arguments and return values.  Since
9902 Out and In/Out parameters are only conventions in C, you must determine the
9903 usage from the documentation.
9904
9905
9906 Thus, the C routine declared as
9907 \begin{example}
9908 kern_return_t lookup(servport, portsname, portsid)
9909         port        servport;
9910         char        *portsname;
9911         int        *portsid;        /* out */
9912  {
9913   ...
9914   *portsid = <expression to compute portsid field>
9915   return(KERN_SUCCESS);
9916  }
9917 \end{example}
9918 has as its Lisp equivalent something like
9919 \begin{lisp}
9920 (defun lookup (ServPort PortsName)
9921   ...
9922   (values
9923    success
9924    <expression to compute portsid field>))
9925 \end{lisp}
9926 If there are multiple out or in-out arguments, then there are multiple
9927 additional returns values.
9928
9929 Fortunately, CMU Common Lisp programmers rarely have to worry about the
9930 nuances of this translation process, since the names of the arguments and
9931 return values are documented in a way so that the \code{describe} function
9932 (and the \Hemlock{} \code{Describe Function Call} command, invoked with
9933 \b{C-M-Shift-A}) will list this information.  Since the names of arguments
9934 and return values are usually descriptive, the information that
9935 \code{describe} prints is usually all one needs to write a
9936 call. Most programmers use this on-line documentation nearly
9937 all of the time, and thereby avoid the need to handle bulky
9938 manuals and perform the translation from barbarous tongues.
9939
9940 %%\node Type Translations, System Area Pointers, Lisp Equivalents for C Routines, UNIX Interface
9941 \section{Type Translations}
9942 \cindex{aliens}
9943 \cpsubindex{types}{alien}
9944 \cpsubindex{types}{foreign language}
9945
9946 Lisp data types have very different representations from those used by
9947 conventional languages such as C.  Since the system interfaces are
9948 designed for conventional languages, Lisp must translate objects to and
9949 from the Lisp representations.  Many simple objects have a direct
9950 translation: integers, characters, strings and floating point numbers
9951 are translated to the corresponding Lisp object.  A number of types,
9952 however, are implemented differently in Lisp for reasons of clarity and
9953 efficiency.
9954
9955 Instances of enumerated types are expressed as keywords in Lisp.
9956 Records, arrays, and pointer types are implemented with the \Alien{}
9957 facility (see page \pageref{aliens}.)  Access functions are defined
9958 for these types which convert fields of records, elements of arrays,
9959 or data referenced by pointers into Lisp objects (possibly another
9960 object to be referenced with another access function).
9961
9962 One should dispose of \Alien{} objects created by constructor
9963 functions or returned from remote procedure calls when they are no
9964 longer of any use, freeing the virtual memory associated with that
9965 object.  Since \alien{}s contain pointers to non-Lisp data, the
9966 garbage collector cannot do this itself.  If the memory
9967 was obtained from \funref{make-alien} or from a foreign function call
9968 to a routine that used \code{malloc}, then \funref{free-alien} should
9969 be used.    If the \alien{} was created
9970 using MACH memory allocation (e.g.  \code{vm\_allocate}), then the
9971 storage should be freed using \code{vm\_deallocate}.
9972
9973 %%\node System Area Pointers, Unix System Calls, Type Translations, UNIX Interface
9974 \section{System Area Pointers}
9975 \label{system-area-pointers}
9976
9977 \cindex{pointers}\cpsubindex{malloc}{C function}\cpsubindex{free}{C function}
9978 Note that in some cases an address is represented by a Lisp integer, and in
9979 other cases it is represented by a real pointer.  Pointers are usually used
9980 when an object in the current address space is being referred to.  The MACH
9981 virtual memory manipulation calls must use integers, since in principle the
9982 address could be in any process, and Lisp cannot abide random pointers.
9983 Because these types are represented differently in Lisp, one must explicitly
9984 coerce between these representations.
9985
9986 System Area Pointers (SAPs) provide a mechanism that bypasses the
9987 \Alien{} type system and accesses virtual memory directly.  A SAP is a
9988 raw byte pointer into the \code{lisp} process address space.  SAPs are
9989 represented with a pointer descriptor, so SAP creation can cause
9990 consing.  However, the compiler uses a non-descriptor representation
9991 for SAPs when possible, so the consing overhead is generally minimal.
9992 \xlref{non-descriptor}.
9993
9994 \begin{defun}{system:}{sap-int}{\args{\var{sap}}}
9995   \defunx[system:]{int-sap}{\args{\var{int}}}
9996
9997   The function \code{sap-int} is used to generate an integer
9998   corresponding to the system area pointer, suitable for passing to
9999   the kernel interfaces (which want all addresses specified as
10000   integers).  The function \code{int-sap} is used to do the opposite
10001   conversion.  The integer representation of a SAP is the byte offset
10002   of the SAP from the start of the address space.
10003 \end{defun}
10004
10005 \begin{defun}{system:}{sap+}{\args{\var{sap} \var{offset}}}
10006
10007   This function adds a byte \var{offset} to \var{sap}, returning a new
10008   SAP.
10009 \end{defun}
10010
10011 \begin{defun}{system:}{sap-ref-8}{\args{\var{sap} \var{offset}}}
10012   \defunx[system:]{sap-ref-16}{\args{\var{sap} \var{offset}}}
10013   \defunx[system:]{sap-ref-32}{\args{\var{sap} \var{offset}}}
10014
10015   These functions return the 8, 16 or 32 bit unsigned integer at
10016   \var{offset} from \var{sap}.  The \var{offset} is always a byte
10017   offset, regardless of the number of bits accessed.  \code{setf} may
10018   be used with the these functions to deposit values into virtual
10019   memory.
10020 \end{defun}
10021
10022 \begin{defun}{system:}{signed-sap-ref-8}{\args{\var{sap} \var{offset}}}
10023   \defunx[system:]{signed-sap-ref-16}{\args{\var{sap} \var{offset}}}
10024   \defunx[system:]{signed-sap-ref-32}{\args{\var{sap} \var{offset}}}
10025
10026   These functions are the same as the above unsigned operations,
10027   except that they sign-extend, returning a negative number if the
10028   high bit is set.
10029 \end{defun}
10030
10031 %%\node Unix System Calls, File Descriptor Streams, System Area Pointers, UNIX Interface
10032 \section{Unix System Calls}
10033
10034 You probably won't have much cause to use them, but all the Unix system
10035 calls are available.  The Unix system call functions are in the
10036 \code{Unix} package.  The name of the interface for a particular system
10037 call is the name of the system call prepended with \code{unix-}.  The
10038 system usually defines the associated constants without any prefix name.
10039 To find out how to use a particular system call, try using
10040 \code{describe} on it.  If that is unhelpful, look at the source in
10041 \file{syscall.lisp} or consult your system maintainer.
10042
10043 The Unix system calls indicate an error by returning \false{} as the
10044 first value and the Unix error number as the second value.  If the call
10045 succeeds, then the first value will always be non-\nil, often \code{t}.
10046
10047 \begin{defun}{Unix:}{get-unix-error-msg}{\args{\var{error}}}
10048
10049   This function returns a string describing the Unix error number
10050   \var{error}.
10051 \end{defun}
10052
10053 %%\node File Descriptor Streams, Making Sense of Mach Return Codes, Unix System Calls, UNIX Interface
10054 \section{File Descriptor Streams}
10055
10056 Many of the UNIX system calls return file descriptors.  Instead of using other
10057 UNIX system calls to perform I/O on them, you can create a stream around them.
10058 For this purpose, fd-streams exist.  See also \funref{read-n-bytes}.
10059
10060 \begin{defun}{system:}{make-fd-stream}{%
10061     \args{\var{descriptor}} \keys{\kwd{input} \kwd{output}
10062       \kwd{element-type}} \morekeys{\kwd{buffering} \kwd{name}
10063       \kwd{file} \kwd{original}} \yetmorekeys{\kwd{delete-original}
10064       \kwd{auto-close}} \yetmorekeys{\kwd{timeout} \kwd{pathname}}}
10065
10066   This function creates a file descriptor stream using
10067   \var{descriptor}.  If \kwd{input} is non-\nil, input operations are
10068   allowed.  If \kwd{output} is non-\nil, output operations are
10069   allowed.  The default is input only.  These keywords are defined:
10070   \begin{Lentry}
10071   \item[\kwd{element-type}] is the type of the unit of transaction for
10072     the stream, which defaults to \code{string-char}.  See the Common
10073     Lisp description of \code{open} for valid values.
10074
10075   \item[\kwd{buffering}] is the kind of output buffering desired for
10076     the stream.  Legal values are \kwd{none} for no buffering,
10077     \kwd{line} for buffering up to each newline, and \kwd{full} for
10078     full buffering.
10079
10080   \item[\kwd{name}] is a simple-string name to use for descriptive
10081     purposes when the system prints an fd-stream.  When printing
10082     fd-streams, the system prepends the streams name with \code{Stream
10083       for }.  If \var{name} is unspecified, it defaults to a string
10084     containing \var{file} or \var{descriptor}, in order of preference.
10085
10086   \item[\kwd{file}, \kwd{original}] \var{file} specifies the defaulted
10087     namestring of the associated file when creating a file stream
10088     (must be a \code{simple-string}). \var{original} is the
10089     \code{simple-string} name of a backup file containing the original
10090     contents of \var{file} while writing \var{file}.
10091
10092     When you abort the stream by passing \true{} to \code{close} as
10093     the second argument, if you supplied both \var{file} and
10094     \var{original}, \code{close} will rename the \var{original} name
10095     to the \var{file} name.  When you \code{close} the stream
10096     normally, if you supplied \var{original}, and
10097     \var{delete-original} is non-\nil, \code{close} deletes
10098     \var{original}.  If \var{auto-close} is true (the default), then
10099     \var{descriptor} will be closed when the stream is garbage
10100     collected.
10101
10102   \item[\kwd{pathname}]: The original pathname passed to open and
10103     returned by \code{pathname}; not defaulted or translated.
10104
10105   \item[\kwd{timeout}] if non-null, then \var{timeout} is an integer
10106     number of seconds after which an input wait should time out.  If a
10107     read does time out, then the \code{system:io-timeout} condition is
10108     signalled.
10109   \end{Lentry}
10110 \end{defun}
10111
10112 \begin{defun}{system:}{fd-stream-p}{\args{\var{object}}}
10113
10114   This function returns \true{} if \var{object} is an fd-stream, and
10115   \nil{} if not.  Obsolete: use the portable \code{(typep x
10116     'file-stream)}.
10117 \end{defun}
10118
10119 \begin{defun}{system:}{fd-stream-fd}{\args{\var{stream}}}
10120
10121   This returns the file descriptor associated with \var{stream}.
10122 \end{defun}
10123
10124
10125 %%\node Making Sense of Mach Return Codes, Unix Interrupts, File Descriptor Streams, UNIX Interface
10126 \section{Making Sense of Mach Return Codes}
10127
10128 Whenever a remote procedure call returns a Unix error code (such as
10129 \code{kern\_return\_t}), it is usually prudent to check that code to
10130 see if the call was successful.  To relieve the programmer of the
10131 hassle of testing this value himself, and to centralize the
10132 information about the meaning of non-success return codes, CMU Common
10133 Lisp provides a number of macros and functions.  See also
10134 \funref{get-unix-error-msg}.
10135
10136 \begin{defun}{system:}{gr-error}{%
10137     \args{\var{function} \var{gr} \ampoptional{} \var{context}}}
10138
10139   Signals a Lisp error, printing a message indicating that the call to
10140   the specified \var{function} failed, with the return code \var{gr}.
10141   If supplied, the \var{context} string is printed after the
10142   \var{function} name and before the string associated with the
10143   \var{gr}.  For example:
10144 \begin{example}
10145 * (gr-error 'nukegarbage 3 "lost big")
10146
10147 Error in function GR-ERROR:
10148 NUKEGARBAGE lost big, no space.
10149 Proceed cases:
10150 0: Return to Top-Level.
10151 Debug  (type H for help)
10152 (Signal #<Conditions:Simple-Error.5FDE0>)
10153 0]
10154 \end{example}
10155 \end{defun}
10156
10157 \begin{defmac}{system:}{gr-call}{\args{\var{function} \amprest{} \var{args}}}
10158   \defmacx[system:]{gr-call*}{\args{\var{function} \amprest{} \var{args}}}
10159
10160   These macros can be used to call a function and automatically check
10161   the GeneralReturn code and signal an appropriate error in case of
10162   non-successful return.  \code{gr-call} returns \false{} if no error
10163   occurs, while \code{gr-call*} returns the second value of the
10164   function called.
10165 \begin{example}
10166 * (gr-call mach:port_allocate *task-self*)
10167 NIL
10168 *
10169 \end{example}
10170 \end{defmac}
10171
10172 \begin{defmac}{system:}{gr-bind}{
10173     \args{\code{(}\mstar{\var{var}}\code{)}
10174       \code{(}\var{function} \mstar{\var{arg}}\code{)}
10175       \mstar{\var{form}}}}
10176
10177   This macro can be used much like \code{multiple-value-bind} to bind
10178   the \var{var}s to return values resulting from calling the
10179   \var{function} with the given \var{arg}s.  The first return value is
10180   not bound to a variable, but is checked as a GeneralReturn code, as
10181   in \code{gr-call}.
10182 \begin{example}
10183 * (gr-bind (port_list port_list_cnt)
10184            (mach:port_select *task-self*)
10185     (format t "The port count is ~S." port_list_cnt)
10186     port_list)
10187 The port count is 0.
10188 #<Alien value>
10189 *
10190 \end{example}
10191 \end{defmac}
10192
10193 %%\node Unix Interrupts,  , Making Sense of Mach Return Codes, UNIX Interface
10194 \section{Unix Interrupts}
10195
10196 \cindex{unix interrupts} \cindex{interrupts}
10197 CMU Common Lisp allows access to all the Unix signals that can be generated
10198 under Unix.  It should be noted that if this capability is abused, it is
10199 possible to completely destroy the running Lisp.  The following macros and
10200 functions allow access to the Unix interrupt system.  The signal names as
10201 specified in section 2 of the \i{Unix Programmer's Manual} are exported
10202 from the Unix package.
10203
10204 \begin{comment}
10205 * Changing Interrupt Handlers::
10206 * Examples of Signal Handlers::
10207 \end{comment}
10208
10209 %%\node Changing Interrupt Handlers, Examples of Signal Handlers, Unix Interrupts, Unix Interrupts
10210 \subsection{Changing Interrupt Handlers}
10211 \label{signal-handlers}
10212
10213 \begin{defmac}{system:}{with-enabled-interrupts}{
10214     \args{\var{specs} \amprest{} \var{body}}}
10215
10216   This macro should be called with a list of signal specifications,
10217   \var{specs}.  Each element of \var{specs} should be a list of
10218   two\hide{ or three} elements: the first should be the Unix signal
10219   for which a handler should be established, the second should be a
10220   function to be called when the signal is received\hide{, and the
10221     third should be an optional character used to generate the signal
10222     from the keyboard.  This last item is only useful for the SIGINT,
10223     SIGQUIT, and SIGTSTP signals.}  One or more signal handlers can be
10224   established in this way.  \code{with-enabled-interrupts} establishes
10225   the correct signal handlers and then executes the forms in
10226   \var{body}.  The forms are executed in an unwind-protect so that the
10227   state of the signal handlers will be restored to what it was before
10228   the \code{with-enabled-interrupts} was entered.  A signal handler
10229   function specified as NIL will set the Unix signal handler to the
10230   default which is normally either to ignore the signal or to cause a
10231   core dump depending on the particular signal.
10232 \end{defmac}
10233
10234 \begin{defmac}{system:}{without-interrupts}{\args{\amprest{} \var{body}}}
10235
10236   It is sometimes necessary to execute a piece a code that can not be
10237   interrupted.  This macro the forms in \var{body} with interrupts
10238   disabled.  Note that the Unix interrupts are not actually disabled,
10239   rather they are queued until after \var{body} has finished
10240   executing.
10241 \end{defmac}
10242
10243 \begin{defmac}{system:}{with-interrupts}{\args{\amprest{} \var{body}}}
10244
10245   When executing an interrupt handler, the system disables interrupts,
10246   as if the handler was wrapped in in a \code{without-interrupts}.
10247   The macro \code{with-interrupts} can be used to enable interrupts
10248   while the forms in \var{body} are evaluated.  This is useful if
10249   \var{body} is going to enter a break loop or do some long
10250   computation that might need to be interrupted.
10251 \end{defmac}
10252
10253 \begin{defmac}{system:}{without-hemlock}{\args{\amprest{} \var{body}}}
10254
10255   For some interrupts, such as SIGTSTP (suspend the Lisp process and
10256   return to the Unix shell) it is necessary to leave Hemlock and then
10257   return to it.  This macro executes the forms in \var{body} after
10258   exiting Hemlock.  When \var{body} has been executed, control is
10259   returned to Hemlock.
10260 \end{defmac}
10261
10262 \begin{defun}{system:}{enable-interrupt}{%
10263     \args{\var{signal} \var{function}\hide{ \ampoptional{}
10264         \var{character}}}}
10265
10266   This function establishes \var{function} as the handler for
10267   \var{signal}.
10268   \hide{The optional \var{character} can be specified
10269     for the SIGINT, SIGQUIT, and SIGTSTP signals and causes that
10270     character to generate the appropriate signal from the keyboard.}
10271   Unless you want to establish a global signal handler, you should use
10272   the macro \code{with-enabled-interrupts} to temporarily establish a
10273   signal handler.  \hide{Without \var{character},}
10274   \code{enable-interrupt} returns the old function associated with the
10275   signal.  \hide{When \var{character} is specified for SIGINT,
10276     SIGQUIT, or SIGTSTP, it returns the old character code.}
10277 \end{defun}
10278
10279 \begin{defun}{system:}{ignore-interrupt}{\args{\var{signal}}}
10280
10281   Ignore-interrupt sets the Unix signal mechanism to ignore
10282   \var{signal} which means that the Lisp process will never see the
10283   signal.  Ignore-interrupt returns the old function associated with
10284   the signal or \false{} if none is currently defined.
10285 \end{defun}
10286
10287 \begin{defun}{system:}{default-interrupt}{\args{\var{signal}}}
10288
10289   Default-interrupt can be used to tell the Unix signal mechanism to
10290   perform the default action for \var{signal}.  For details on what
10291   the default action for a signal is, see section 2 of the \i{Unix
10292     Programmer's Manual}.  In general, it is likely to ignore the
10293   signal or to cause a core dump.
10294 \end{defun}
10295
10296 %%\node Examples of Signal Handlers,  , Changing Interrupt Handlers, Unix Interrupts
10297 \subsection{Examples of Signal Handlers}
10298
10299 The following code is the signal handler used by the Lisp system for the
10300 SIGINT signal.
10301 \begin{lisp}
10302 (defun ih-sigint (signal code scp)
10303   (declare (ignore signal code scp))
10304   (without-hemlock
10305    (with-interrupts
10306     (break "Software Interrupt" t))))
10307 \end{lisp}
10308 The \code{without-hemlock} form is used to make sure that Hemlock is exited before
10309 a break loop is entered.  The \code{with-interrupts} form is used to enable
10310 interrupts because the user may want to generate an interrupt while in the
10311 break loop.  Finally, break is called to enter a break loop, so the user
10312 can look at the current state of the computation.  If the user proceeds
10313 from the break loop, the computation will be restarted from where it was
10314 interrupted.
10315
10316 The following function is the Lisp signal handler for the SIGTSTP signal
10317 which suspends a process and returns to the Unix shell.
10318 \begin{lisp}
10319 (defun ih-sigtstp (signal code scp)
10320   (declare (ignore signal code scp))
10321   (without-hemlock
10322    (Unix:unix-kill (Unix:unix-getpid) Unix:sigstop)))
10323 \end{lisp}
10324 Lisp uses this interrupt handler to catch the SIGTSTP signal because it is
10325 necessary to get out of Hemlock in a clean way before returning to the shell.
10326
10327 To set up these interrupt handlers, the following is recommended:
10328 \begin{lisp}
10329 (with-enabled-interrupts ((Unix:SIGINT #'ih-sigint)
10330                           (Unix:SIGTSTP #'ih-sigtstp))
10331   <user code to execute with the above signal handlers enabled.>
10332 )
10333 \end{lisp}
10334
10335
10336 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/server.ms}
10337
10338 %%\node Event Dispatching with SERVE-EVENT, Alien Objects, UNIX Interface, Top
10339 \chapter{Event Dispatching with SERVE-EVENT}
10340 \begin{center}
10341 \b{By Bill Chiles and Robert MacLachlan}
10342 \end{center}
10343
10344 It is common to have multiple activities simultaneously operating in the same
10345 Lisp process.  Furthermore, Lisp programmers tend to expect a flexible
10346 development environment.  It must be possible to load and modify application
10347 programs without requiring modifications to other running programs.  CMU Common
10348 Lisp achieves this by having a central scheduling mechanism based on an
10349 event-driven, object-oriented paradigm.
10350
10351 An \var{event} is some interesting happening that should cause the Lisp process
10352 to wake up and do something.  These events include X events and activity on
10353 Unix file descriptors.  The object-oriented mechanism is only available with
10354 the first two, and it is optional with X events as described later in this
10355 chapter.  In an X event, the window ID is the object capability and the X event
10356 type is the operation code.  The Unix file descriptor input mechanism simply
10357 consists of an association list of a handler to call when input shows up on a
10358 particular file descriptor.
10359
10360
10361 \begin{comment}
10362 * Object Sets::
10363 * The SERVE-EVENT Function::
10364 * Using SERVE-EVENT with Unix File Descriptors::
10365 * Using SERVE-EVENT with the CLX Interface to X::
10366 * A SERVE-EVENT Example::
10367 \end{comment}
10368
10369 %%\node Object Sets, The SERVE-EVENT Function, Event Dispatching with SERVE-EVENT, Event Dispatching with SERVE-EVENT
10370 \section{Object Sets}
10371 \label{object-sets}
10372 \cindex{object sets}
10373 An \i{object set} is a collection of objects that have the same implementation
10374 for each operation.  Externally the object is represented by the object
10375 capability and the operation is represented by the operation code.  Within
10376 Lisp, the object is represented by an arbitrary Lisp object, and the
10377 implementation for the operation is represented by an arbitrary Lisp function.
10378 The object set mechanism maintains this translation from the external to the
10379 internal representation.
10380
10381 \begin{defun}{system:}{make-object-set}{%
10382     \args{\var{name} \ampoptional{} \var{default-handler}}}
10383
10384   This function makes a new object set.  \var{Name} is a string used
10385   only for purposes of identifying the object set when it is printed.
10386   \var{Default-handler} is the function used as a handler when an
10387   undefined operation occurs on an object in the set.  You can define
10388   operations with the \code{serve-}\var{operation} functions exported
10389   the \code{extensions} package for X events
10390   (\pxlref{x-serve-mumbles}).  Objects are added with
10391   \code{system:add-xwindow-object}.  Initially the object set has no
10392   objects and no defined operations.
10393 \end{defun}
10394
10395 \begin{defun}{system:}{object-set-operation}{%
10396     \args{\var{object-set} \var{operation-code}}}
10397
10398   This function returns the handler function that is the
10399   implementation of the operation corresponding to
10400   \var{operation-code} in \var{object-set}.  When set with
10401   \code{setf}, the setter function establishes the new handler.  The
10402   \code{serve-}\var{operation} functions exported from the
10403   \code{extensions} package for X events (\pxlref{x-serve-mumbles})
10404   call this on behalf of the user when announcing a new operation for
10405   an object set.
10406 \end{defun}
10407
10408 \begin{defun}{system:}{add-xwindow-object}{%
10409     \args{\var{window} \var{object} \var{object-set}}}
10410
10411   These functions add \var{port} or \var{window} to \var{object-set}.
10412   \var{Object} is an arbitrary Lisp object that is associated with the
10413   \var{port} or \var{window} capability.  \var{Window} is a CLX
10414   window.  When an event occurs, \code{system:serve-event} passes
10415   \var{object} as an argument to the handler function.
10416 \end{defun}
10417
10418
10419 %%\node The SERVE-EVENT Function, Using SERVE-EVENT with Unix File Descriptors, Object Sets, Event Dispatching with SERVE-EVENT
10420 \section{The SERVE-EVENT Function}
10421
10422 The \code{system:serve-event} function is the standard way for an application
10423 to wait for something to happen.  For example, the Lisp system calls
10424 \code{system:serve-event} when it wants input from X or a terminal stream.
10425 The idea behind \code{system:serve-event} is that it knows the appropriate
10426 action to take when any interesting event happens.  If an application calls
10427 \code{system:serve-event} when it is idle, then any other applications with
10428 pending events can run.  This allows several applications to run ``at the
10429 same time'' without interference, even though there is only one thread of
10430 control.  Note that if an application is waiting for input of any kind,
10431 then other applications will get events.
10432
10433 \begin{defun}{system:}{serve-event}{\args{\ampoptional{} \var{timeout}}}
10434
10435   This function waits for an event to happen and then dispatches to
10436   the correct handler function.  If specified, \var{timeout} is the
10437   number of seconds to wait before timing out.  A time out of zero
10438   seconds is legal and causes \code{system:serve-event} to poll for
10439   any events immediately available for processing.
10440   \code{system:serve-event} returns \true{} if it serviced at least
10441   one event, and \nil{} otherwise.  Depending on the application, when
10442   \code{system:serve-event} returns \true, you might want to call it
10443   repeatedly with a timeout of zero until it returns \nil.
10444
10445   If input is available on any designated file descriptor, then this
10446   calls the appropriate handler function supplied by
10447   \code{system:add-fd-handler}.
10448
10449   Since events for many different applications may arrive
10450   simultaneously, an application waiting for a specific event must
10451   loop on \code{system:serve-event} until the desired event happens.
10452   Since programs such as \hemlock{} call \code{system:serve-event} for
10453   input, applications usually do not need to call
10454   \code{system:serve-event} at all; \hemlock{} allows other
10455   application's handlers to run when it goes into an input wait.
10456 \end{defun}
10457
10458 \begin{defun}{system:}{serve-all-events}{\args{\ampoptional{} \var{timeout}}}
10459
10460   This function is similar to \code{system:serve-event}, except it
10461   serves all the pending events rather than just one.  It returns
10462   \true{} if it serviced at least one event, and \nil{} otherwise.
10463 \end{defun}
10464
10465
10466 %%\node Using SERVE-EVENT with Unix File Descriptors, Using SERVE-EVENT with the CLX Interface to X, The SERVE-EVENT Function, Event Dispatching with SERVE-EVENT
10467 \section{Using SERVE-EVENT with Unix File Descriptors}
10468 Object sets are not available for use with file descriptors, as there are
10469 only two operations possible on file descriptors: input and output.
10470 Instead, a handler for either input or output can be registered with
10471 \code{system:serve-event} for a specific file descriptor.  Whenever any input
10472 shows up, or output is possible on this file descriptor, the function
10473 associated with the handler for that descriptor is funcalled with the
10474 descriptor as it's single argument.
10475
10476 \begin{defun}{system:}{add-fd-handler}{%
10477     \args{\var{fd} \var{direction} \var{function}}}
10478
10479   This function installs and returns a new handler for the file
10480   descriptor \var{fd}.  \var{direction} can be either \kwd{input} if
10481   the system should invoke the handler when input is available or
10482   \kwd{output} if the system should invoke the handler when output is
10483   possible.  This returns a unique object representing the handler,
10484   and this is a suitable argument for \code{system:remove-fd-handler}
10485   \var{function} must take one argument, the file descriptor.
10486 \end{defun}
10487
10488 \begin{defun}{system:}{remove-fd-handler}{\args{\var{handler}}}
10489
10490   This function removes \var{handler}, that \code{add-fd-handler} must
10491   have previously returned.
10492 \end{defun}
10493
10494 \begin{defmac}{system:}{with-fd-handler}{%
10495     \args{(\var{direction} \var{fd} \var{function})
10496       \mstar{\var{form}}}}
10497
10498   This macro executes the supplied forms with a handler installed
10499   using \var{fd}, \var{direction}, and \var{function}.  See
10500   \code{system:add-fd-handler}.
10501 \end{defmac}
10502
10503 \begin{defun}{system:}{wait-until-fd-usable}{%
10504     \args{\var{direction} \var{fd} \ampoptional{} \var{timeout}}}
10505
10506   This function waits for up to \var{timeout} seconds for \var{fd} to
10507   become usable for \var{direction} (either \kwd{input} or
10508   \kwd{output}).  If \var{timeout} is \nil{} or unspecified, this
10509   waits forever.
10510 \end{defun}
10511
10512 \begin{defun}{system:}{invalidate-descriptor}{\args{\var{fd}}}
10513
10514   This function removes all handlers associated with \var{fd}.  This
10515   should only be used in drastic cases (such as I/O errors, but not
10516   necessarily EOF).  Normally, you should use \code{remove-fd-handler}
10517   to remove the specific handler.
10518 \end{defun}
10519
10520 \begin{comment}
10521
10522 section{Using SERVE-EVENT with Matchmaker Interfaces}
10523 \label{ipc-serve-mumbles}
10524 Remember from section \ref{object-sets}, an object set is a collection of
10525 objects, ports in this case, with some set of operations, message ID's, with
10526 corresponding implementations, the same handler functions.
10527
10528 Matchmaker uses the object set operations to implement servers.  For
10529 each server interface \i{XXX}, Matchmaker defines a function,
10530 \code{serve-}\i{XXX}, of two arguments, an object set and a function.
10531 The \code{serve-}\i{XXX} function establishes the function as the
10532 implementation of the \i{XXX} operation in the object set.  Recall
10533 from section \ref{object-sets}, \code{system:add-port-object}
10534 associates some Lisp object with a port in an object set.  When
10535 \code{system:serve-event} notices activity on a port, it calls the
10536 function given to \code{serve-}\i{XXX} with the object given to
10537 \code{system:add-port-object} and the input parameters specified in
10538 the message definition.  The return values from the function are used
10539 as the output parameters for the message, if any.
10540 \code{serve-}\i{XXX} functions are also generated for each \i{server
10541   message} and asynchronous user interface.
10542
10543 To use a Lisp server:
10544 \begin{itemize}
10545
10546 \item Create an object set.
10547
10548 \item Define some operations on it using the \code{serve-}\i{XXX}
10549   functions.
10550
10551 \item Create an object for every port on which you receive requests.
10552
10553 \item Call \code{system:serve-event} to service an RPC request.
10554 \end{itemize}
10555
10556
10557 Object sets allow many servers in the same Lisp to operate without knowing
10558 about each other.  There can be multiple implementations of the same interface
10559 with different operation handlers established in distinct object sets.  This
10560 property is especially useful when handling emergency messages.
10561
10562 \end{comment}
10563
10564 %%\node Using SERVE-EVENT with the CLX Interface to X, A SERVE-EVENT Example, Using SERVE-EVENT with Unix File Descriptors, Event Dispatching with SERVE-EVENT
10565 \section{Using SERVE-EVENT with the CLX Interface to X}
10566 \label{x-serve-mumbles}
10567 Remember from section \ref{object-sets}, an object set is a collection of
10568 objects, CLX windows in this case, with some set of operations, event keywords,
10569 with corresponding implementations, the same handler functions.  Since X allows
10570 multiple display connections from a given process, you can avoid using object
10571 sets if every window in an application or display connection behaves the same.
10572 If a particular X application on a single display connection has windows that
10573 want to handle certain events differently, then using object sets is a
10574 convenient way to organize this since you need some way to map the window/event
10575 combination to the appropriate functionality.
10576
10577 The following is a discussion of functions exported from the \code{extensions}
10578 package that facilitate handling CLX events through \code{system:serve-event}.
10579 The first two routines are useful regardless of whether you use
10580 \code{system:serve-event}:
10581 \begin{defun}{ext:}{open-clx-display}{%
10582     \args{\ampoptional{} \var{string}}}
10583
10584   This function parses \var{string} for an X display specification
10585   including display and screen numbers.  \var{String} defaults to the
10586   following:
10587   \begin{example}
10588     (cdr (assoc :display ext:*environment-list* :test #'eq))
10589   \end{example}
10590   If any field in the display specification is missing, this signals
10591   an error.  \code{ext:open-clx-display} returns the CLX display and
10592   screen.
10593 \end{defun}
10594
10595 \begin{defun}{ext:}{flush-display-events}{\args{\var{display}}}
10596
10597   This function flushes all the events in \var{display}'s event queue
10598   including the current event, in case the user calls this from within
10599   an event handler.
10600 \end{defun}
10601
10602
10603 \begin{comment}
10604 * Without Object Sets::
10605 * With Object Sets::
10606 \end{comment}
10607
10608 %%\node Without Object Sets, With Object Sets, Using SERVE-EVENT with the CLX Interface to X, Using SERVE-EVENT with the CLX Interface to X
10609 \subsection{Without Object Sets}
10610 Since most applications that use CLX, can avoid the complexity of object sets,
10611 these routines are described in a separate section.  The routines described in
10612 the next section that use the object set mechanism are based on these
10613 interfaces.
10614
10615 \begin{defun}{ext:}{enable-clx-event-handling}{%
10616     \args{\var{display} \var{handler}}}
10617
10618   This function causes \code{system:serve-event} to notice when there
10619   is input on \var{display}'s connection to the X11 server.  When this
10620   happens, \code{system:serve-event} invokes \var{handler} on
10621   \var{display} in a dynamic context with an error handler bound that
10622   flushes all events from \var{display} and returns.  By returning,
10623   the error handler declines to handle the error, but it will have
10624   cleared all events; thus, entering the debugger will not result in
10625   infinite errors due to streams that wait via
10626   \code{system:serve-event} for input.  Calling this repeatedly on the
10627   same \var{display} establishes \var{handler} as a new handler,
10628   replacing any previous one for \var{display}.
10629 \end{defun}
10630
10631 \begin{defun}{ext:}{disable-clx-event-handling}{\args{\var{display}}}
10632
10633   This function undoes the effect of
10634   \code{ext:enable-clx-event-handling}.
10635 \end{defun}
10636
10637 \begin{defmac}{ext:}{with-clx-event-handling}{%
10638     \args{(\var{display} \var{handler}) \mstar{form}}}
10639
10640   This macro evaluates each \var{form} in a context where
10641   \code{system:serve-event} invokes \var{handler} on \var{display}
10642   whenever there is input on \var{display}'s connection to the X
10643   server.  This destroys any previously established handler for
10644   \var{display}.
10645 \end{defmac}
10646
10647
10648 %%\node With Object Sets,  , Without Object Sets, Using SERVE-EVENT with the CLX Interface to X
10649 \subsection{With Object Sets}
10650 This section discusses the use of object sets and
10651 \code{system:serve-event} to handle CLX events.  This is necessary
10652 when a single X application has distinct windows that want to handle
10653 the same events in different ways.  Basically, you need some way of
10654 asking for a given window which way you want to handle some event
10655 because this event is handled differently depending on the window.
10656 Object sets provide this feature.
10657
10658 For each CLX event-key symbol-name \i{XXX} (for example,
10659 \var{key-press}), there is a function \code{serve-}\i{XXX} of two
10660 arguments, an object set and a function.  The \code{serve-}\i{XXX}
10661 function establishes the function as the handler for the \kwd{XXX}
10662 event in the object set.  Recall from section \ref{object-sets},
10663 \code{system:add-xwindow-object} associates some Lisp object with a
10664 CLX window in an object set.  When \code{system:serve-event} notices
10665 activity on a window, it calls the function given to
10666 \code{ext:enable-clx-event-handling}.  If this function is
10667 \code{ext:object-set-event-handler}, it calls the function given to
10668 \code{serve-}\i{XXX}, passing the object given to
10669 \code{system:add-xwindow-object} and the event's slots as well as a
10670 couple other arguments described below.
10671
10672 To use object sets in this way:
10673 \begin{itemize}
10674
10675 \item Create an object set.
10676
10677 \item Define some operations on it using the \code{serve-}\i{XXX}
10678   functions.
10679
10680 \item Add an object for every window on which you receive requests.
10681   This can be the CLX window itself or some structure more meaningful
10682   to your application.
10683
10684 \item Call \code{system:serve-event} to service an X event.
10685 \end{itemize}
10686
10687
10688 \begin{defun}{ext:}{object-set-event-handler}{%
10689     \args{\var{display}}}
10690
10691   This function is a suitable argument to
10692   \code{ext:enable-clx-event-handling}.  The actual event handlers
10693   defined for particular events within a given object set must take an
10694   argument for every slot in the appropriate event.  In addition to
10695   the event slots, \code{ext:object-set-event-handler} passes the
10696   following arguments:
10697   \begin{itemize}
10698   \item The object, as established by
10699     \code{system:add-xwindow-object}, on which the event occurred.
10700   \item event-key, see \code{xlib:event-case}.
10701   \item send-event-p, see \code{xlib:event-case}.
10702   \end{itemize}
10703
10704   Describing any \code{ext:serve-}\var{event-key-name} function, where
10705   \var{event-key-name} is an event-key symbol-name (for example,
10706   \code{ext:serve-key-press}), indicates exactly what all the
10707   arguments are in their correct order.
10708
10709 %%  \begin{comment}
10710 %%    \code{ext:object-set-event-handler} ignores \kwd{no-exposure}
10711 %%    events on pixmaps, issuing a warning if one occurs.  It is only
10712 %%    prepared to dispatch events for windows.
10713 %%  \end{comment}
10714
10715   When creating an object set for use with
10716   \code{ext:object-set-event-handler}, specify
10717   \code{ext:default-clx-event-handler} as the default handler for
10718   events in that object set.  If no default handler is specified, and
10719   the system invokes the default default handler, it will cause an
10720   error since this function takes arguments suitable for handling port
10721   messages.
10722 \end{defun}
10723
10724
10725 %%\node A SERVE-EVENT Example,  , Using SERVE-EVENT with the CLX Interface to X, Event Dispatching with SERVE-EVENT
10726 \section{A SERVE-EVENT Example}
10727 This section contains two examples using \code{system:serve-event}.  The first
10728 one does not use object sets, and the second, slightly more complicated one
10729 does.
10730
10731
10732 \begin{comment}
10733 * Without Object Sets Example::
10734 * With Object Sets Example::
10735 \end{comment}
10736
10737 %%\node Without Object Sets Example, With Object Sets Example, A SERVE-EVENT Example, A SERVE-EVENT Example
10738 \subsection{Without Object Sets Example}
10739 This example defines an input handler for a CLX display connection.  It only
10740 recognizes \kwd{key-press} events.  The body of the example loops over
10741 \code{system:serve-event} to get input.
10742
10743 \begin{lisp}
10744 (in-package "SERVER-EXAMPLE")
10745
10746 (defun my-input-handler (display)
10747   (xlib:event-case (display :timeout 0)
10748     (:key-press (event-window code state)
10749      (format t "KEY-PRESSED (Window = ~D) = ~S.~%"
10750                   (xlib:window-id event-window)
10751              ;; See Hemlock Command Implementor's Manual for convenient
10752              ;; input mapping function.
10753              (ext:translate-character display code state))
10754       ;; Make XLIB:EVENT-CASE discard the event.
10755       t)))
10756 \end{lisp}
10757 \begin{lisp}
10758 (defun server-example ()
10759   "An example of using the SYSTEM:SERVE-EVENT function and object sets to
10760    handle CLX events."
10761   (let* ((display (ext:open-clx-display))
10762          (screen (display-default-screen display))
10763          (black (screen-black-pixel screen))
10764          (white (screen-white-pixel screen))
10765          (window (create-window :parent (screen-root screen)
10766                                 :x 0 :y 0 :width 200 :height 200
10767                                 :background white :border black
10768                                 :border-width 2
10769                                 :event-mask
10770                                 (xlib:make-event-mask :key-press))))
10771     ;; Wrap code in UNWIND-PROTECT, so we clean up after ourselves.
10772     (unwind-protect
10773         (progn
10774           ;; Enable event handling on the display.
10775           (ext:enable-clx-event-handling display #'my-input-handler)
10776           ;; Map the windows to the screen.
10777           (map-window window)
10778           ;; Make sure we send all our requests.
10779           (display-force-output display)
10780           ;; Call serve-event for 100,000 events or immediate timeouts.
10781           (dotimes (i 100000) (system:serve-event)))
10782       ;; Disable event handling on this display.
10783       (ext:disable-clx-event-handling display)
10784       ;; Get rid of the window.
10785       (destroy-window window)
10786       ;; Pick off any events the X server has already queued for our
10787       ;; windows, so we don't choke since SYSTEM:SERVE-EVENT is no longer
10788       ;; prepared to handle events for us.
10789       (loop
10790        (unless (deleting-window-drop-event *display* window)
10791         (return)))
10792       ;; Close the display.
10793       (xlib:close-display display))))
10794
10795 (defun deleting-window-drop-event (display win)
10796   "Check for any events on win.  If there is one, remove it from the
10797    event queue and return t; otherwise, return nil."
10798   (xlib:display-finish-output display)
10799   (let ((result nil))
10800     (xlib:process-event
10801      display :timeout 0
10802      :handler #'(lambda (&key event-window &allow-other-keys)
10803                   (if (eq event-window win)
10804                       (setf result t)
10805                       nil)))
10806     result))
10807 \end{lisp}
10808
10809
10810 %%\node With Object Sets Example,  , Without Object Sets Example, A SERVE-EVENT Example
10811 \subsection{With Object Sets Example}
10812 This example involves more work, but you get a little more for your effort.  It
10813 defines two objects, \code{input-box} and \code{slider}, and establishes a
10814 \kwd{key-press} handler for each object, \code{key-pressed} and
10815 \code{slider-pressed}.  We have two object sets because we handle events on the
10816 windows manifesting these objects differently, but the events come over the
10817 same display connection.
10818
10819 \begin{lisp}
10820 (in-package "SERVER-EXAMPLE")
10821
10822 (defstruct (input-box (:print-function print-input-box)
10823                       (:constructor make-input-box (display window)))
10824   "Our program knows about input-boxes, and it doesn't care how they
10825    are implemented."
10826   display        ; The CLX display on which my input-box is displayed.
10827   window)        ; The CLX window in which the user types.
10828 ;;;
10829 (defun print-input-box (object stream n)
10830   (declare (ignore n))
10831   (format stream "#<Input-Box ~S>" (input-box-display object)))
10832
10833 (defvar *input-box-windows*
10834         (system:make-object-set "Input Box Windows"
10835                                 #'ext:default-clx-event-handler))
10836
10837 (defun key-pressed (input-box event-key event-window root child
10838                     same-screen-p x y root-x root-y modifiers time
10839                     key-code send-event-p)
10840   "This is our :key-press event handler."
10841   (declare (ignore event-key root child same-screen-p x y
10842                    root-x root-y time send-event-p))
10843   (format t "KEY-PRESSED (Window = ~D) = ~S.~%"
10844           (xlib:window-id event-window)
10845           ;; See Hemlock Command Implementor's Manual for convenient
10846           ;; input mapping function.
10847           (ext:translate-character (input-box-display input-box)
10848                                      key-code modifiers)))
10849 ;;;
10850 (ext:serve-key-press *input-box-windows* #'key-pressed)
10851 \end{lisp}
10852 \begin{lisp}
10853 (defstruct (slider (:print-function print-slider)
10854                    (:include input-box)
10855                    (:constructor %make-slider
10856                                     (display window window-width max)))
10857   "Our program knows about sliders too, and these provide input values
10858    zero to max."
10859   bits-per-value  ; bits per discrete value up to max.
10860   max)            ; End value for slider.
10861 ;;;
10862 (defun print-slider (object stream n)
10863   (declare (ignore n))
10864   (format stream "#<Slider ~S  0..~D>"
10865           (input-box-display object)
10866           (1- (slider-max object))))
10867 ;;;
10868 (defun make-slider (display window max)
10869   (%make-slider display window
10870                   (truncate (xlib:drawable-width window) max)
10871                 max))
10872
10873 (defvar *slider-windows*
10874         (system:make-object-set "Slider Windows"
10875                                 #'ext:default-clx-event-handler))
10876
10877 (defun slider-pressed (slider event-key event-window root child
10878                        same-screen-p x y root-x root-y modifiers time
10879                        key-code send-event-p)
10880   "This is our :key-press event handler for sliders.  Probably this is
10881    a mouse thing, but for simplicity here we take a character typed."
10882   (declare (ignore event-key root child same-screen-p x y
10883                    root-x root-y time send-event-p))
10884   (format t "KEY-PRESSED (Window = ~D) = ~S  -->  ~D.~%"
10885           (xlib:window-id event-window)
10886           ;; See Hemlock Command Implementor's Manual for convenient
10887           ;; input mapping function.
10888           (ext:translate-character (input-box-display slider)
10889                                      key-code modifiers)
10890           (truncate x (slider-bits-per-value slider))))
10891 ;;;
10892 (ext:serve-key-press *slider-windows* #'slider-pressed)
10893 \end{lisp}
10894 \begin{lisp}
10895 (defun server-example ()
10896   "An example of using the SYSTEM:SERVE-EVENT function and object sets to
10897    handle CLX events."
10898   (let* ((display (ext:open-clx-display))
10899          (screen (display-default-screen display))
10900          (black (screen-black-pixel screen))
10901          (white (screen-white-pixel screen))
10902          (iwindow (create-window :parent (screen-root screen)
10903                                  :x 0 :y 0 :width 200 :height 200
10904                                  :background white :border black
10905                                  :border-width 2
10906                                  :event-mask
10907                                  (xlib:make-event-mask :key-press)))
10908          (swindow (create-window :parent (screen-root screen)
10909                                  :x 0 :y 300 :width 200 :height 50
10910                                  :background white :border black
10911                                  :border-width 2
10912                                  :event-mask
10913                                  (xlib:make-event-mask :key-press)))
10914          (input-box (make-input-box display iwindow))
10915          (slider (make-slider display swindow 15)))
10916     ;; Wrap code in UNWIND-PROTECT, so we clean up after ourselves.
10917     (unwind-protect
10918         (progn
10919           ;; Enable event handling on the display.
10920           (ext:enable-clx-event-handling display
10921                                          #'ext:object-set-event-handler)
10922           ;; Add the windows to the appropriate object sets.
10923           (system:add-xwindow-object iwindow input-box
10924                                        *input-box-windows*)
10925           (system:add-xwindow-object swindow slider
10926                                        *slider-windows*)
10927           ;; Map the windows to the screen.
10928           (map-window iwindow)
10929           (map-window swindow)
10930           ;; Make sure we send all our requests.
10931           (display-force-output display)
10932           ;; Call server for 100,000 events or immediate timeouts.
10933           (dotimes (i 100000) (system:serve-event)))
10934       ;; Disable event handling on this display.
10935       (ext:disable-clx-event-handling display)
10936       (delete-window iwindow display)
10937       (delete-window swindow display)
10938       ;; Close the display.
10939       (xlib:close-display display))))
10940 \end{lisp}
10941 \begin{lisp}
10942 (defun delete-window (window display)
10943   ;; Remove the windows from the object sets before destroying them.
10944   (system:remove-xwindow-object window)
10945   ;; Destroy the window.
10946   (destroy-window window)
10947   ;; Pick off any events the X server has already queued for our
10948   ;; windows, so we don't choke since SYSTEM:SERVE-EVENT is no longer
10949   ;; prepared to handle events for us.
10950   (loop
10951    (unless (deleting-window-drop-event display window)
10952      (return))))
10953
10954 (defun deleting-window-drop-event (display win)
10955   "Check for any events on win.  If there is one, remove it from the
10956    event queue and return t; otherwise, return nil."
10957   (xlib:display-finish-output display)
10958   (let ((result nil))
10959     (xlib:process-event
10960      display :timeout 0
10961      :handler #'(lambda (&key event-window &allow-other-keys)
10962                   (if (eq event-window win)
10963                       (setf result t)
10964                       nil)))
10965     result))
10966 \end{lisp}
10967
10968 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/alien.ms}
10969
10970 %%\node Alien Objects, Interprocess Communication under LISP, Event Dispatching with SERVE-EVENT, Top
10971 \chapter{Alien Objects}
10972 \label{aliens}
10973 \begin{center}
10974 \b{By Robert MacLachlan and William Lott}
10975 \end{center}
10976 \vspace{1 cm}
10977
10978 \begin{comment}
10979 * Introduction to Aliens::
10980 * Alien Types::
10981 * Alien Operations::
10982 * Alien Variables::
10983 * Alien Data Structure Example::
10984 * Loading Unix Object Files::
10985 * Alien Function Calls::
10986 * Step-by-Step Alien Example::
10987 \end{comment}
10988
10989 %%\node Introduction to Aliens, Alien Types, Alien Objects, Alien Objects
10990 \section{Introduction to Aliens}
10991
10992 Because of Lisp's emphasis on dynamic memory allocation and garbage
10993 collection, Lisp implementations use unconventional memory representations
10994 for objects.  This representation mismatch creates problems when a Lisp
10995 program must share objects with programs written in another language.  There
10996 are three different approaches to establishing communication:
10997 \begin{itemize}
10998 \item The burden can be placed on the foreign program (and programmer) by
10999 requiring the use of Lisp object representations.  The main difficulty with
11000 this approach is that either the foreign program must be written with Lisp
11001 interaction in mind, or a substantial amount of foreign ``glue'' code must be
11002 written to perform the translation.
11003
11004 \item The Lisp system can automatically convert objects back and forth
11005 between the Lisp and foreign representations.  This is convenient, but
11006 translation becomes prohibitively slow when large or complex data structures
11007 must be shared.
11008
11009 \item The Lisp program can directly manipulate foreign objects through the
11010 use of extensions to the Lisp language.  Most Lisp systems make use of
11011 this approach, but the language for describing types and expressing
11012 accesses is often not powerful enough for complex objects to be easily
11013 manipulated.
11014 \end{itemize}
11015 \cmucl{} relies primarily on the automatic conversion and direct manipulation
11016 approaches: Aliens of simple scalar types are automatically converted,
11017 while complex types are directly manipulated in their foreign
11018 representation.  Any foreign objects that can't automatically be
11019 converted into Lisp values are represented by objects of type
11020 \code{alien-value}.  Since Lisp is a dynamically typed language, even
11021 foreign objects must have a run-time type; this type information is
11022 provided by encapsulating the raw pointer to the foreign data within an
11023 \code{alien-value} object.
11024
11025 The Alien type language and operations are most similar to those of the
11026 C language, but Aliens can also be used when communicating with most
11027 other languages that can be linked with C.
11028
11029 %%\f
11030 %%\node Alien Types, Alien Operations, Introduction to Aliens, Alien Objects
11031 \section{Alien Types}
11032
11033 Alien types have a description language based on nested list structure.  For
11034 example:
11035 \begin{example}
11036 struct foo \{
11037     int a;
11038     struct foo *b[100];
11039 \};
11040 \end{example}
11041 has the corresponding Alien type:
11042 \begin{lisp}
11043 (struct foo
11044   (a int)
11045   (b (array (* (struct foo)) 100)))
11046 \end{lisp}
11047
11048
11049 \begin{comment}
11050 * Defining Alien Types::
11051 * Alien Types and Lisp Types::
11052 * Alien Type Specifiers::
11053 * The C-Call Package::
11054 \end{comment}
11055
11056 %%\node Defining Alien Types, Alien Types and Lisp Types, Alien Types, Alien Types
11057 \subsection{Defining Alien Types}
11058
11059 Types may be either named or anonymous.  With structure and union
11060 types, the name is part of the type specifier, allowing recursively
11061 defined types such as:
11062 \begin{lisp}
11063 (struct foo (a (* (struct foo))))
11064 \end{lisp}
11065 An anonymous structure or union type is specified by using the name
11066 \nil.  The \funref{with-alien} macro defines a local scope which
11067 ``captures'' any named type definitions.  Other types are not
11068 inherently named, but can be given named abbreviations using
11069 \code{def-alien-type}.
11070
11071 \begin{defmac}{alien:}{def-alien-type}{name type}
11072
11073   This macro globally defines \var{name} as a shorthand for the Alien
11074   type \var{type}.  When introducing global structure and union type
11075   definitions, \var{name} may be \nil, in which case the name to
11076   define is taken from the type's name.
11077 \end{defmac}
11078
11079
11080 %%\node Alien Types and Lisp Types, Alien Type Specifiers, Defining Alien Types, Alien Types
11081 \subsection{Alien Types and Lisp Types}
11082
11083 The Alien types form a subsystem of the \cmucl{} type system.  An
11084 \code{alien} type specifier provides a way to use any Alien type as a
11085 Lisp type specifier.  For example
11086 \begin{lisp}
11087 (typep foo '(alien (* int)))
11088 \end{lisp}
11089 can be used to determine whether \code{foo} is a pointer to an
11090 \code{int}.  \code{alien} type specifiers can be used in the same ways
11091 as ordinary type specifiers (like \code{string}.)  Alien type
11092 declarations are subject to the same precise type checking as any
11093 other declaration (section \xlref{precise-type-checks}.)
11094
11095 Note that the Alien type system overlaps with normal Lisp type
11096 specifiers in some cases.  For example, the type specifier
11097 \code{(alien single-float)} is identical to \code{single-float}, since
11098 Alien floats are automatically converted to Lisp floats.  When
11099 \code{type-of} is called on an Alien value that is not automatically
11100 converted to a Lisp value, then it will return an \code{alien} type
11101 specifier.
11102
11103 %%\node Alien Type Specifiers, The C-Call Package, Alien Types and Lisp Types, Alien Types
11104 \subsection{Alien Type Specifiers}
11105
11106 Some Alien type names are \clisp symbols, but the names are
11107 still exported from the \code{alien} package, so it is legal to say
11108 \code{alien:single-float}.  These are the basic Alien type specifiers:
11109
11110 \begin{deftp}{Alien type}{*}{%
11111     \args{\var{type}}}
11112
11113   A pointer to an object of the specified \var{type}.  If \var{type}
11114   is \true, then it means a pointer to anything, similar to
11115   ``\code{void *}'' in ANSI C.  Currently, the only way to detect a
11116   null pointer is:
11117 \begin{lisp}
11118   (zerop (sap-int (alien-sap \var{ptr})))
11119 \end{lisp}
11120 \xlref{system-area-pointers}
11121 \end{deftp}
11122
11123 \begin{deftp}{Alien type}{array}{\var{type} \mstar{\var{dimension}}}
11124
11125   An array of the specified \var{dimensions}, holding elements of type
11126   \var{type}.  Note that \code{(* int)} and \code{(array int)} are
11127   considered to be different types when type checking is done; pointer
11128   and array types must be explicitly coerced using \code{cast}.
11129
11130   Arrays are accessed using \code{deref}, passing the indices as
11131   additional arguments.  Elements are stored in column-major order (as
11132   in C), so the first dimension determines only the size of the memory
11133   block, and not the layout of the higher dimensions.  An array whose
11134   first dimension is variable may be specified by using \nil{} as the
11135   first dimension.  Fixed-size arrays can be allocated as array
11136   elements, structure slots or \code{with-alien} variables.  Dynamic
11137   arrays can only be allocated using \funref{make-alien}.
11138 \end{deftp}
11139
11140 \begin{deftp}{Alien type}{struct}{\var{name}
11141     \mstar{(\var{field} \var{type} \mopt{\var{bits}})}}
11142
11143   A structure type with the specified \var{name} and \var{fields}.
11144   Fields are allocated at the same positions used by the
11145   implementation's C compiler.  \var{bits} is intended for C-like bit
11146   field support, but is currently unused.  If \var{name} is \false,
11147   then the type is anonymous.
11148
11149   If a named Alien \code{struct} specifier is passed to
11150   \funref{def-alien-type} or \funref{with-alien}, then this defines,
11151   respectively, a new global or local Alien structure type.  If no
11152   \var{fields} are specified, then the fields are taken from the
11153   current (local or global) Alien structure type definition of
11154   \var{name}.
11155 \end{deftp}
11156
11157 \begin{deftp}{Alien type}{union}{\var{name}
11158     \mstar{(\var{field} \var{type} \mopt{\var{bits}})}}
11159
11160   Similar to \code{struct}, but defines a union type.  All fields are
11161   allocated at the same offset, and the size of the union is the size
11162   of the largest field.  The programmer must determine which field is
11163   active from context.
11164 \end{deftp}
11165
11166 \begin{deftp}{Alien type}{enum}{\var{name} \mstar{\var{spec}}}
11167
11168   An enumeration type that maps between integer values and keywords.
11169   If \var{name} is \false, then the type is anonymous.  Each
11170   \var{spec} is either a keyword, or a list \code{(\var{keyword}
11171     \var{value})}.  If \var{integer} is not supplied, then it defaults
11172   to one greater than the value for the preceding spec (or to zero if
11173   it is the first spec.)
11174 \end{deftp}
11175
11176 \begin{deftp}{Alien type}{signed}{\mopt{\var{bits}}}
11177   A signed integer with the specified number of bits precision.  The
11178   upper limit on integer precision is determined by the machine's word
11179   size.  If no size is specified, the maximum size will be used.
11180 \end{deftp}
11181
11182 \begin{deftp}{Alien type}{integer}{\mopt{\var{bits}}}
11183   Identical to \code{signed}---the distinction between \code{signed}
11184   and \code{integer} is purely stylistic.
11185 \end{deftp}
11186
11187 \begin{deftp}{Alien type}{unsigned}{\mopt{\var{bits}}}
11188   Like \code{signed}, but specifies an unsigned integer.
11189 \end{deftp}
11190
11191 \begin{deftp}{Alien type}{boolean}{\mopt{\var{bits}}}
11192   Similar to an enumeration type that maps \code{0} to \false{} and
11193   all other values to \true.  \var{bits} determines the amount of
11194   storage allocated to hold the truth value.
11195 \end{deftp}
11196
11197 \begin{deftp}{Alien type}{single-float}{}
11198   A floating-point number in IEEE single format.
11199 \end{deftp}
11200
11201 \begin{deftp}{Alien type}{double-float}{}
11202   A floating-point number in IEEE double format.
11203 \end{deftp}
11204
11205 \begin{deftp}{Alien type}{function}{\var{result-type} \mstar{\var{arg-type}}}
11206   \label{alien-function-types}
11207   A Alien function that takes arguments of the specified
11208   \var{arg-types} and returns a result of type \var{result-type}.
11209   Note that the only context where a \code{function} type is directly
11210   specified is in the argument to \code{alien-funcall} (see section
11211   \funref{alien-funcall}.)  In all other contexts, functions are
11212   represented by function pointer types: \code{(* (function ...))}.
11213 \end{deftp}
11214
11215 \begin{deftp}{Alien type}{system-area-pointer}{}
11216   A pointer which is represented in Lisp as a
11217   \code{system-area-pointer} object (\pxlref{system-area-pointers}.)
11218 \end{deftp}
11219
11220 %%\node The C-Call Package,  , Alien Type Specifiers, Alien Types
11221 \subsection{The C-Call Package}
11222
11223 The \code{c-call} package exports these type-equivalents to the C type
11224 of the same name: \code{char}, \code{short}, \code{int}, \code{long},
11225 \code{unsigned-char}, \code{unsigned-short}, \code{unsigned-int},
11226 \code{unsigned-long}, \code{float}, \code{double}.  \code{c-call} also
11227 exports these types:
11228
11229 \begin{deftp}{Alien type}{void}{}
11230   This type is used in function types to declare that no useful value
11231   is returned.  Evaluation of an \code{alien-funcall} form will return
11232   zero values.
11233 \end{deftp}
11234
11235 \begin{deftp}{Alien type}{c-string}{}
11236   This type is similar to \code{(* char)}, but is interpreted as a
11237   null-terminated string, and is automatically converted into a Lisp
11238   string when accessed.  If the pointer is C \code{NULL} (or 0), then
11239   accessing gives Lisp \false.
11240
11241   Assigning a Lisp string to a \code{c-string} structure field or
11242   variable stores the contents of the string to the memory already
11243   pointed to by that variable.  When an Alien of type \code{(* char)}
11244   is assigned to a \code{c-string}, then the \code{c-string} pointer
11245   is assigned to.  This allows \code{c-string} pointers to be
11246   initialized.  For example:
11247 \begin{lisp}
11248   (def-alien-type nil (struct foo (str c-string)))
11249
11250   (defun make-foo (str) (let ((my-foo (make-alien (struct foo))))
11251   (setf (slot my-foo 'str) (make-alien char (length str))) (setf (slot
11252   my-foo 'str) str) my-foo))
11253 \end{lisp}
11254 Storing Lisp \false{} writes C \code{NULL} to the \code{c-string}
11255 pointer.
11256 \end{deftp}
11257
11258 %%\f
11259 %%\node Alien Operations, Alien Variables, Alien Types, Alien Objects
11260 \section{Alien Operations}
11261
11262 This section describes the basic operations on Alien values.
11263
11264 \begin{comment}
11265 * Alien Access Operations::
11266 * Alien Coercion Operations::
11267 * Alien Dynamic Allocation::
11268 \end{comment}
11269
11270 %%\node Alien Access Operations, Alien Coercion Operations, Alien Operations, Alien Operations
11271 \subsection{Alien Access Operations}
11272
11273 \begin{defun}{alien:}{deref}{\args{\var{pointer-or-array} \amprest \var{indices}}}
11274
11275   This function returns the value pointed to by an Alien pointer or
11276   the value of an Alien array element.  If a pointer, an optional
11277   single index can be specified to give the equivalent of C pointer
11278   arithmetic; this index is scaled by the size of the type pointed to.
11279   If an array, the number of indices must be the same as the number of
11280   dimensions in the array type.  \code{deref} can be set with
11281   \code{setf} to assign a new value.
11282 \end{defun}
11283
11284 \begin{defun}{alien:}{slot}{\args{\var{struct-or-union} \var{slot-name}}}
11285
11286   This function extracts the value of slot \var{slot-name} from the an
11287   Alien \code{struct} or \code{union}.  If \var{struct-or-union} is a
11288   pointer to a structure or union, then it is automatically
11289   dereferenced.  This can be set with \code{setf} to assign a new
11290   value.  Note that \var{slot-name} is evaluated, and need not be a
11291   compile-time constant (but only constant slot accesses are
11292   efficiently compiled.)
11293 \end{defun}
11294
11295 %%\node Alien Coercion Operations, Alien Dynamic Allocation, Alien Access Operations, Alien Operations
11296 \subsection{Alien Coercion Operations}
11297
11298 \begin{defmac}{alien:}{addr}{\var{alien-expr}}
11299
11300   This macro returns a pointer to the location specified by
11301   \var{alien-expr}, which must be either an Alien variable, a use of
11302   \code{deref}, a use of \code{slot}, or a use of
11303   \funref{extern-alien}.
11304 \end{defmac}
11305
11306 \begin{defmac}{alien:}{cast}{\var{alien} \var{new-type}}
11307
11308   This macro converts \var{alien} to a new Alien with the specified
11309   \var{new-type}.  Both types must be an Alien pointer, array or
11310   function type.  Note that the result is not \code{eq} to the
11311   argument, but does refer to the same data bits.
11312 \end{defmac}
11313
11314 \begin{defmac}{alien:}{sap-alien}{\var{sap} \var{type}}
11315   \defunx[alien:]{alien-sap}{\var{alien-value}}
11316
11317   \code{sap-alien} converts \var{sap} (a system area pointer
11318   \pxlref{system-area-pointers}) to an Alien value with the specified
11319   \var{type}.  \var{type} is not evaluated.
11320
11321 \code{alien-sap} returns the SAP which points to \var{alien-value}'s
11322 data.
11323
11324 The \var{type} to \code{sap-alien} and the type of the \var{alien-value} to
11325 \code{alien-sap} must some Alien pointer, array or record type.
11326 \end{defmac}
11327
11328 %%\node Alien Dynamic Allocation,  , Alien Coercion Operations, Alien Operations
11329 \subsection{Alien Dynamic Allocation}
11330
11331 Dynamic Aliens are allocated using the \code{malloc} library, so foreign code
11332 can call \code{free} on the result of \code{make-alien}, and Lisp code can
11333 call \code{free-alien} on objects allocated by foreign code.
11334
11335 \begin{defmac}{alien:}{make-alien}{\var{type} \mopt{\var{size}}}
11336
11337   This macro returns a dynamically allocated Alien of the specified
11338   \var{type} (which is not evaluated.)  The allocated memory is not
11339   initialized, and may contain arbitrary junk.  If supplied,
11340   \var{size} is an expression to evaluate to compute the size of the
11341   allocated object.  There are two major cases:
11342   \begin{itemize}
11343   \item When \var{type} is an array type, an array of that type is
11344     allocated and a \var{pointer} to it is returned.  Note that you
11345     must use \code{deref} to change the result to an array before you
11346     can use \code{deref} to read or write elements:
11347     \begin{lisp}
11348       (defvar *foo* (make-alien (array char 10)))
11349
11350       (type-of *foo*) \result{} (alien (* (array (signed 8) 10)))
11351
11352       (setf (deref (deref foo) 0) 10) \result{} 10
11353     \end{lisp}
11354     If supplied, \var{size} is used as the first dimension for the
11355     array.
11356
11357   \item When \var{type} is any other type, then then an object for
11358     that type is allocated, and a \var{pointer} to it is returned.  So
11359     \code{(make-alien int)} returns a \code{(* int)}.  If \var{size}
11360     is specified, then a block of that many objects is allocated, with
11361     the result pointing to the first one.
11362   \end{itemize}
11363 \end{defmac}
11364
11365 \begin{defun}{alien:}{free-alien}{\var{alien}}
11366
11367   This function frees the storage for \var{alien} (which must have
11368   been allocated with \code{make-alien} or \code{malloc}.)
11369 \end{defun}
11370
11371 See also \funref{with-alien}, which stack-allocates Aliens.
11372
11373 %%\f
11374 %%\node Alien Variables, Alien Data Structure Example, Alien Operations, Alien Objects
11375 \section{Alien Variables}
11376
11377 Both local (stack allocated) and external (C global) Alien variables are
11378 supported.
11379
11380 \begin{comment}
11381 * Local Alien Variables::
11382 * External Alien Variables::
11383 \end{comment}
11384
11385 %%\node Local Alien Variables, External Alien Variables, Alien Variables, Alien Variables
11386 \subsection{Local Alien Variables}
11387
11388 \begin{defmac}{alien:}{with-alien}{\mstar{(\var{name} \var{type}
11389       \mopt{\var{initial-value}})} \mstar{form}}
11390
11391   This macro establishes local alien variables with the specified
11392   Alien types and names for dynamic extent of the body.  The variable
11393   \var{names} are established as symbol-macros; the bindings have
11394   lexical scope, and may be assigned with \code{setq} or \code{setf}.
11395   This form is analogous to defining a local variable in C: additional
11396   storage is allocated, and the initial value is copied.
11397
11398   \code{with-alien} also establishes a new scope for named structures
11399   and unions.  Any \var{type} specified for a variable may contain
11400   name structure or union types with the slots specified.  Within the
11401   lexical scope of the binding specifiers and body, a locally defined
11402   structure type \var{foo} can be referenced by its name using:
11403 \begin{lisp}
11404   (struct foo)
11405 \end{lisp}
11406 \end{defmac}
11407
11408 %%\node External Alien Variables,  , Local Alien Variables, Alien Variables
11409 \subsection{External Alien Variables}
11410 \label{external-aliens}
11411
11412 External Alien names are strings, and Lisp names are symbols.  When an
11413 external Alien is represented using a Lisp variable, there must be a
11414 way to convert from one name syntax into the other.  The macros
11415 \code{extern-alien}, \code{def-alien-variable} and
11416 \funref{def-alien-routine} use this conversion heuristic:
11417 \begin{itemize}
11418 \item Alien names are converted to Lisp names by uppercasing and
11419   replacing underscores with hyphens.
11420
11421 \item Conversely, Lisp names are converted to Alien names by
11422   lowercasing and replacing hyphens with underscores.
11423
11424 \item Both the Lisp symbol and Alien string names may be separately
11425   specified by using a list of the form:
11426 \begin{lisp}
11427   (\var{alien-string} \var{lisp-symbol})
11428 \end{lisp}
11429 \end{itemize}
11430
11431 \begin{defmac}{alien:}{def-alien-variable}{\var{name} \var{type}}
11432
11433   This macro defines \var{name} as an external Alien variable of the
11434   specified Alien \var{type}.  \var{name} and \var{type} are not
11435   evaluated.  The Lisp name of the variable (see above) becomes a
11436   global Alien variable in the Lisp namespace.  Global Alien variables
11437   are effectively ``global symbol macros''; a reference to the
11438   variable fetches the contents of the external variable.  Similarly,
11439   setting the variable stores new contents---the new contents must be
11440   of the declared \var{type}.
11441
11442   For example, it is often necessary to read the global C variable
11443   \code{errno} to determine why a particular function call failed.  It
11444   is possible to define errno and make it accessible from Lisp by the
11445   following:
11446 \begin{lisp}
11447 (def-alien-variable "errno" int)
11448
11449 ;; Now it is possible to get the value of the C variable errno simply by
11450 ;; referencing that Lisp variable:
11451 ;;
11452 (print errno)
11453 \end{lisp}
11454 \end{defmac}
11455
11456 \begin{defmac}{alien:}{extern-alien}{\var{name} \var{type}}
11457
11458   This macro returns an Alien with the specified \var{type} which
11459   points to an externally defined value.  \var{name} is not evaluated,
11460   and may be specified either as a string or a symbol.  \var{type} is
11461   an unevaluated Alien type specifier.
11462 \end{defmac}
11463
11464 %%\f
11465 %%\node Alien Data Structure Example, Loading Unix Object Files, Alien Variables, Alien Objects
11466 \section{Alien Data Structure Example}
11467
11468 Now that we have Alien types, operations and variables, we can manipulate
11469 foreign data structures.  This C declaration can be translated into the
11470 following Alien type:
11471 \begin{lisp}
11472 struct foo \{
11473     int a;
11474     struct foo *b[100];
11475 \};
11476
11477  \myequiv
11478
11479 (def-alien-type nil
11480   (struct foo
11481     (a int)
11482     (b (array (* (struct foo)) 100))))
11483 \end{lisp}
11484
11485 With this definition, the following C expression can be translated in this way:
11486 \begin{example}
11487 struct foo f;
11488 f.b[7].a
11489
11490  \myequiv
11491
11492 (with-alien ((f (struct foo)))
11493   (slot (deref (slot f 'b) 7) 'a)
11494   ;;
11495   ;; Do something with f...
11496   )
11497 \end{example}
11498
11499
11500 Or consider this example of an external C variable and some accesses:
11501 \begin{example}
11502 struct c_struct \{
11503         short x, y;
11504         char a, b;
11505         int z;
11506         c_struct *n;
11507 \};
11508
11509 extern struct c_struct *my_struct;
11510
11511 my_struct->x++;
11512 my_struct->a = 5;
11513 my_struct = my_struct->n;
11514 \end{example}
11515 which can be made be manipulated in Lisp like this:
11516 \begin{lisp}
11517 (def-alien-type nil
11518   (struct c-struct
11519           (x short)
11520           (y short)
11521           (a char)
11522           (b char)
11523           (z int)
11524           (n (* c-struct))))
11525
11526 (def-alien-variable "my_struct" (* c-struct))
11527
11528 (incf (slot my-struct 'x))
11529 (setf (slot my-struct 'a) 5)
11530 (setq my-struct (slot my-struct 'n))
11531 \end{lisp}
11532
11533
11534 %%\f
11535 %%\node Loading Unix Object Files, Alien Function Calls, Alien Data Structure Example, Alien Objects
11536 \section{Loading Unix Object Files}
11537
11538 Foreign object files are loaded into the running Lisp process by
11539 \code{load-foreign}.  First, it runs the linker on the files and
11540 libraries, creating an absolute Unix object file.  This object file is
11541 then loaded into into the currently running Lisp.  The external
11542 symbols defining routines and variables are made available for future
11543 external references (e.g.  by \code{extern-alien}.)
11544 \code{load-foreign} must be run before any of the defined symbols are
11545 referenced.
11546
11547 Note that if a Lisp core image is saved (using \funref{save-lisp}), all
11548 loaded foreign code is lost when the image is restarted.
11549
11550 \begin{defun}{alien:}{load-foreign}{%
11551     \args{\var{files} \keys{\kwd{libraries} \kwd{base-file} \kwd{env}}}}
11552
11553   \var{files} is a \code{simple-string} or list of
11554   \code{simple-string}s specifying the names of the object files.
11555   \var{libraries} is a list of \code{simple-string}s specifying
11556   libraries in a format that \code{ld}, the Unix linker, expects.  The
11557   default value for \var{libraries} is \code{("-lc")} (i.e., the
11558   standard C library).  \var{base-file} is the file to use for the
11559   initial symbol table information.  The default is the Lisp start up
11560   code: \file{path:lisp}.  \var{env} should be a list of simple
11561   strings in the format of Unix environment variables (i.e.,
11562   \code{\var{A}=\var{B}}, where \var{A} is an environment variable and
11563   \var{B} is its value).  The default value for \var{env} is the
11564   environment information available at the time Lisp was invoked.
11565   Unless you are certain that you want to change this, you should just
11566   use the default.
11567 \end{defun}
11568
11569 %%\f
11570 %%\node Alien Function Calls, Step-by-Step Alien Example, Loading Unix Object Files, Alien Objects
11571 \section{Alien Function Calls}
11572
11573 The foreign function call interface allows a Lisp program to call functions
11574 written in other languages.  The current implementation of the foreign
11575 function call interface assumes a C calling convention and thus routines
11576 written in any language that adheres to this convention may be called from
11577 Lisp.
11578
11579 Lisp sets up various interrupt handling routines and other environment
11580 information when it first starts up, and expects these to be in place at all
11581 times.  The C functions called by Lisp should either not change the
11582 environment, especially the interrupt entry points, or should make sure
11583 that these entry points are restored when the C function returns to Lisp.
11584 If a C function makes changes without restoring things to the way they were
11585 when the C function was entered, there is no telling what will happen.
11586
11587 \begin{comment}
11588 * alien-funcall::               The alien-funcall Primitive
11589 * def-alien-routine::           The def-alien-routine Macro
11590 * def-alien-routine Example::
11591 * Calling Lisp from C::
11592 \end{comment}
11593
11594 %%\node alien-funcall, def-alien-routine, Alien Function Calls, Alien Function Calls
11595 \subsection{The alien-funcall Primitive}
11596
11597 \begin{defun}{alien:}{alien-funcall}{%
11598     \args{\var{alien-function} \amprest{} \var{arguments}}}
11599
11600   This function is the foreign function call primitive:
11601   \var{alien-function} is called with the supplied \var{arguments} and
11602   its value is returned.  The \var{alien-function} is an arbitrary
11603   run-time expression; to call a constant function, use
11604   \funref{extern-alien} or \code{def-alien-routine}.
11605
11606   The type of \var{alien-function} must be \code{(alien (function
11607     ...))} or \code{(alien (* (function ...)))},
11608   \xlref{alien-function-types}.  The function type is used to
11609   determine how to call the function (as though it was declared with
11610   a prototype.)  The type need not be known at compile time, but only
11611   known-type calls are efficiently compiled.  Limitations:
11612   \begin{itemize}
11613   \item Structure type return values are not implemented.
11614   \item Passing of structures by value is not implemented.
11615   \end{itemize}
11616 \end{defun}
11617
11618 Here is an example which allocates a \code{(struct foo)}, calls a foreign
11619 function to initialize it, then returns a Lisp vector of all the
11620 \code{(* (struct foo))} objects filled in by the foreign call:
11621 \begin{lisp}
11622 ;;
11623 ;; Allocate a foo on the stack.
11624 (with-alien ((f (struct foo)))
11625   ;;
11626   ;; Call some C function to fill in foo fields.
11627   (alien-funcall (extern-alien "mangle_foo" (function void (* foo)))
11628                  (addr f))
11629   ;;
11630   ;; Find how many foos to use by getting the A field.
11631   (let* ((num (slot f 'a))
11632          (result (make-array num)))
11633     ;;
11634     ;; Get a pointer to the array so that we don't have to keep extracting it:
11635     (with-alien ((a (* (array (* (struct foo)) 100)) (addr (slot f 'b))))
11636       ;;
11637       ;; Loop over the first N elements and stash them in the result vector.
11638       (dotimes (i num)
11639         (setf (svref result i) (deref (deref a) i)))
11640       result)))
11641 \end{lisp}
11642
11643 %%\node def-alien-routine, def-alien-routine Example, alien-funcall, Alien Function Calls
11644 \subsection{The def-alien-routine Macro}
11645
11646
11647 \begin{defmac}{alien:}{def-alien-routine}{\var{name} \var{result-type}
11648     \mstar{(\var{aname} \var{atype} \mopt{style})}}
11649
11650   This macro is a convenience for automatically generating Lisp
11651   interfaces to simple foreign functions.  The primary feature is the
11652   parameter style specification, which translates the C
11653   pass-by-reference idiom into additional return values.
11654
11655   \var{name} is usually a string external symbol, but may also be a
11656   symbol Lisp name or a list of the foreign name and the Lisp name.
11657   If only one name is specified, the other is automatically derived,
11658   (\pxlref{external-aliens}.)
11659
11660   \var{result-type} is the Alien type of the return value.  Each
11661   remaining subform specifies an argument to the foreign function.
11662   \var{aname} is the symbol name of the argument to the constructed
11663   function (for documentation) and \var{atype} is the Alien type of
11664   corresponding foreign argument.  The semantics of the actual call
11665   are the same as for \funref{alien-funcall}.  \var{style} should be
11666   one of the following:
11667   \begin{Lentry}
11668   \item[\kwd{in}] specifies that the argument is passed by value.
11669     This is the default.  \kwd{in} arguments have no corresponding
11670     return value from the Lisp function.
11671
11672   \item[\kwd{out}] specifies a pass-by-reference output value.  The
11673     type of the argument must be a pointer to a fixed sized object
11674     (such as an integer or pointer).  \kwd{out} and \kwd{in-out}
11675     cannot be used with pointers to arrays, records or functions.  An
11676     object of the correct size is allocated, and its address is passed
11677     to the foreign function.  When the function returns, the contents
11678     of this location are returned as one of the values of the Lisp
11679     function.
11680
11681   \item[\kwd{copy}] is similar to \kwd{in}, but the argument is copied
11682     to a pre-allocated object and a pointer to this object is passed
11683     to the foreign routine.
11684
11685   \item[\kwd{in-out}] is a combination of \kwd{copy} and \kwd{out}.
11686     The argument is copied to a pre-allocated object and a pointer to
11687     this object is passed to the foreign routine.  On return, the
11688     contents of this location is returned as an additional value.
11689   \end{Lentry}
11690   Any efficiency-critical foreign interface function should be inline
11691   expanded by preceding \code{def-alien-routine} with:
11692   \begin{lisp}
11693     (declaim (inline \var{lisp-name}))
11694   \end{lisp}
11695   In addition to avoiding the Lisp call overhead, this allows
11696   pointers, word-integers and floats to be passed using non-descriptor
11697   representations, avoiding consing (\pxlref{non-descriptor}.)
11698 \end{defmac}
11699
11700 %%\node def-alien-routine Example, Calling Lisp from C, def-alien-routine, Alien Function Calls
11701 \subsection{def-alien-routine Example}
11702
11703 Consider the C function \code{cfoo} with the following calling convention:
11704 \begin{example}
11705 cfoo (str, a, i)
11706     char *str;
11707     char *a; /* update */
11708     int *i; /* out */
11709 \{
11710 /* Body of cfoo. */
11711 \}
11712 \end{example}
11713 which can be described by the following call to \code{def-alien-routine}:
11714 \begin{lisp}
11715 (def-alien-routine "cfoo" void
11716   (str c-string)
11717   (a char :in-out)
11718   (i int :out))
11719 \end{lisp}
11720 The Lisp function \code{cfoo} will have two arguments (\var{str} and \var{a})
11721 and two return values (\var{a} and \var{i}).
11722
11723 %%\node Calling Lisp from C,  , def-alien-routine Example, Alien Function Calls
11724 \subsection{Calling Lisp from C}
11725
11726 Calling Lisp functions from C is sometimes possible, but is rather hackish.
11727 See \code{funcall0} ... \code{funcall3} in the \file{lisp/arch.h}.  The
11728 arguments must be valid CMU CL object descriptors (e.g.  fixnums must be
11729 left-shifted by 2.)  See \file{compiler/generic/objdef.lisp} or the derived
11730 file \file{lisp/internals.h} for details of the object representation.
11731 \file{lisp/internals.h} is mechanically generated, and is not part of the
11732 source distribution.  It is distributed in the \file{docs/} directory of the
11733 binary distribution.
11734
11735 Note that the garbage collector moves objects, and won't be able to fix up any
11736 references in C variables, so either turn GC off or don't keep Lisp pointers
11737 in C data unless they are to statically allocated objects.  You can use
11738 \funref{purify} to place live data structures in static space so that they
11739 won't move during GC.
11740
11741 \begin{changebar}
11742 \subsection{Accessing Lisp Arrays}
11743
11744 Due to the way \cmucl{} manages memory, the amount of memory that can
11745 be dynamically allocated by \code{malloc} or \funref{make-alien} is
11746 limited\footnote{\cmucl{} mmaps a large piece of memory for it's own
11747   use and this memory is typically about 8 MB above the start of the C
11748   heap.  Thus, only about 8 MB of memory can be dynamically
11749   allocated.}.
11750
11751 To overcome this limitation, it is possible to access the content of
11752 Lisp arrays which are limited only by the amount of physical memory
11753 and swap space available.  However, this technique is only useful if
11754 the foreign function takes pointers to memory instead of allocating
11755 memory for itself.  In latter case, you will have to modify the
11756 foreign functions.
11757
11758 This technique takes advantage of the fact that \cmucl{} has
11759 specialized array types (\pxlref{specialized-array-types}) that match
11760 a typical C array.  For example, a \code{(simple-array double-float
11761   (100))} is stored in memory in essentially the same way as the C
11762 array \code{double x[100]} would be.  The following function allows us
11763 to get the physical address of such a Lisp array:
11764 \begin{example}
11765 (defun array-data-address (array)
11766   "Return the physical address of where the actual data of an array is
11767 stored.
11768
11769 ARRAY must be a specialized array type in CMU Lisp.  This means ARRAY
11770 must be an array of one of the following types:
11771
11772                   double-float
11773                   single-float
11774                   (unsigned-byte 32)
11775                   (unsigned-byte 16)
11776                   (unsigned-byte  8)
11777                   (signed-byte 32)
11778                   (signed-byte 16)
11779                   (signed-byte  8)
11780 "
11781   (declare (type (or #+signed-array (array (signed-byte 8))
11782                      #+signed-array (array (signed-byte 16))
11783                      #+signed-array (array (signed-byte 32))
11784                      (array (unsigned-byte 8))
11785                      (array (unsigned-byte 16))
11786                      (array (unsigned-byte 32))
11787                      (array single-float)
11788                      (array double-float))
11789                  array)
11790            (optimize (speed 3) (safety 0))
11791            (ext:optimize-interface (safety 3)))
11792   ;; with-array-data will get us to the actual data.  However, because
11793   ;; the array could have been displaced, we need to know where the
11794   ;; data starts.
11795   (lisp::with-array-data ((data array)
11796                           (start)
11797                           (end))
11798     (declare (ignore end))
11799     ;; DATA is a specialized simple-array.  Memory is laid out like this:
11800     ;;
11801     ;;   byte offset    Value
11802     ;;        0         type code (should be 70 for double-float vector)
11803     ;;        4         4 * number of elements in vector
11804     ;;        8         1st element of vector
11805     ;;      ...         ...
11806     ;;
11807     (let ((addr (+ 8 (logandc1 7 (kernel:get-lisp-obj-address data))))
11808           (type-size (let ((type (array-element-type data)))
11809                        (cond ((or (equal type '(signed-byte 8))
11810                                   (equal type '(unsigned-byte 8)))
11811                               1)
11812                              ((or (equal type '(signed-byte 16))
11813                                   (equal type '(unsigned-byte 16)))
11814                               2)
11815                              ((or (equal type '(signed-byte 32))
11816                                   (equal type '(unsigned-byte 32)))
11817                               4)
11818                              ((equal type 'single-float)
11819                               4)
11820                              ((equal type 'double-float)
11821                               8)
11822                              (t
11823                               (error "Unknown specialized array element type"))))))
11824       (declare (type (unsigned-byte 32) addr)
11825                (optimize (speed 3) (safety 0) (ext:inhibit-warnings 3)))
11826       (system:int-sap (the (unsigned-byte 32)
11827                         (+ addr (* type-size start)))))))
11828 \end{example}
11829
11830 Assume we have the C function below that we wish to use:
11831 \begin{example}
11832   double dotprod(double* x, double* y, int n)
11833   \{
11834     int k;
11835     double sum = 0;
11836
11837     for (k = 0; k < n; ++k) \{
11838       sum += x[k] * y[k];
11839     \}
11840   \}
11841 \end{example}
11842 The following example generates two large arrays in Lisp, and calls the C
11843 function to do the desired computation.  This would not have been
11844 possible using \code{malloc} or \code{make-alien} since we need about
11845 16 MB of memory to hold the two arrays.
11846 \begin{example}
11847   (def-alien-routine "dotprod" double
11848     (x (* double-float) :in)
11849     (y (* double-float) :in)
11850     (n int :in))
11851
11852   (let ((x (make-array 1000000 :element-type 'double-float))
11853         (y (make-array 1000000 :element-type 'double-float)))
11854     ;; Initialize X and Y somehow
11855     (let ((x-addr (system:int-sap (array-data-address x)))
11856           (y-addr (system:int-sap (array-data-address y))))
11857       (dotprod x-addr y-addr 1000000)))
11858 \end{example}
11859 In this example, it may be useful to wrap the inner \code{let}
11860 expression in an \code{unwind-protect} that first turns off garbage
11861 collection and then turns garbage collection on afterwards.  This will
11862 prevent garbage collection from moving \code{x} and \code{y} after we
11863 have obtained the (now erroneous) addresses but before the call to
11864 \code{dotprod} is made.
11865
11866 \end{changebar}
11867 %%\f
11868 %%\node Step-by-Step Alien Example,  , Alien Function Calls, Alien Objects
11869 \section{Step-by-Step Alien Example}
11870
11871 This section presents a complete example of an interface to a somewhat
11872 complicated C function.  This example should give a fairly good idea
11873 of how to get the effect you want for almost any kind of C function.
11874 Suppose you have the following C function which you want to be able to
11875 call from Lisp in the file \file{test.c}:
11876 \begin{verbatim}
11877 struct c_struct
11878 {
11879   int x;
11880   char *s;
11881 };
11882
11883 struct c_struct *c_function (i, s, r, a)
11884     int i;
11885     char *s;
11886     struct c_struct *r;
11887     int a[10];
11888 {
11889   int j;
11890   struct c_struct *r2;
11891
11892   printf("i = %d\n", i);
11893   printf("s = %s\n", s);
11894   printf("r->x = %d\n", r->x);
11895   printf("r->s = %s\n", r->s);
11896   for (j = 0; j < 10; j++) printf("a[%d] = %d.\n", j, a[j]);
11897   r2 = (struct c_struct *) malloc (sizeof(struct c_struct));
11898   r2->x = i + 5;
11899   r2->s = "A C string";
11900   return(r2);
11901 };
11902 \end{verbatim}
11903 It is possible to call this function from Lisp using the file \file{test.lisp}
11904 whose contents is:
11905 \begin{lisp}
11906 ;;; -*- Package: test-c-call -*-
11907 (in-package "TEST-C-CALL")
11908 (use-package "ALIEN")
11909 (use-package "C-CALL")
11910
11911 ;;; Define the record c-struct in Lisp.
11912 (def-alien-type nil
11913     (struct c-struct
11914             (x int)
11915             (s c-string)))
11916
11917 ;;; Define the Lisp function interface to the C routine.  It returns a
11918 ;;; pointer to a record of type c-struct.  It accepts four parameters:
11919 ;;; i, an int; s, a pointer to a string; r, a pointer to a c-struct
11920 ;;; record; and a, a pointer to the array of 10 ints.
11921 ;;;
11922 ;;; The INLINE declaration eliminates some efficiency notes about heap
11923 ;;; allocation of Alien values.
11924 (declaim (inline c-function))
11925 (def-alien-routine c-function
11926     (* (struct c-struct))
11927   (i int)
11928   (s c-string)
11929   (r (* (struct c-struct)))
11930   (a (array int 10)))
11931
11932 ;;; A function which sets up the parameters to the C function and
11933 ;;; actually calls it.
11934 (defun call-cfun ()
11935   (with-alien ((ar (array int 10))
11936                (c-struct (struct c-struct)))
11937     (dotimes (i 10)                     ; Fill array.
11938       (setf (deref ar i) i))
11939     (setf (slot c-struct 'x) 20)
11940     (setf (slot c-struct 's) "A Lisp String")
11941
11942     (with-alien ((res (* (struct c-struct))
11943                       (c-function 5 "Another Lisp String" (addr c-struct) ar)))
11944       (format t "Returned from C function.~%")
11945       (multiple-value-prog1
11946           (values (slot res 'x)
11947                   (slot res 's))
11948         ;;
11949         ;; Deallocate result \i{after} we are done using it.
11950         (free-alien res)))))
11951 \end{lisp}
11952 To execute the above example, it is necessary to compile the C routine as
11953 follows:
11954 \begin{example}
11955 cc -c test.c
11956 \end{example}
11957 In order to enable incremental loading with some linkers, you may need to say:
11958 \begin{example}
11959 cc -G 0 -c test.c
11960 \end{example}
11961 Once the C code has been compiled, you can start up Lisp and load it in:
11962 \begin{example}
11963 %lisp
11964 ;;; Lisp should start up with its normal prompt.
11965
11966 ;;; Compile the Lisp file.  This step can be done separately.  You don't have
11967 ;;; to recompile every time.
11968 * (compile-file "test.lisp")
11969
11970 ;;; Load the foreign object file to define the necessary symbols.  This must
11971 ;;; be done before loading any code that refers to these symbols.  next block
11972 ;;; of comments are actually the output of LOAD-FOREIGN.  Different linkers
11973 ;;; will give different warnings, but some warning about redefining the code
11974 ;;; size is typical.
11975 * (load-foreign "test.o")
11976
11977 ;;; Running library:load-foreign.csh...
11978 ;;; Loading object file...
11979 ;;; Parsing symbol table...
11980 Warning:  "_gp" moved from #x00C082C0 to #x00C08460.
11981
11982 Warning:  "end" moved from #x00C00340 to #x00C004E0.
11983
11984 ;;; o.k. now load the compiled Lisp object file.
11985 * (load "test")
11986
11987 ;;; Now we can call the routine that sets up the parameters and calls the C
11988 ;;; function.
11989 * (test-c-call::call-cfun)
11990
11991 ;;; The C routine prints the following information to standard output.
11992 i = 5
11993 s = Another Lisp string
11994 r->x = 20
11995 r->s = A Lisp string
11996 a[0] = 0.
11997 a[1] = 1.
11998 a[2] = 2.
11999 a[3] = 3.
12000 a[4] = 4.
12001 a[5] = 5.
12002 a[6] = 6.
12003 a[7] = 7.
12004 a[8] = 8.
12005 a[9] = 9.
12006 ;;; Lisp prints out the following information.
12007 Returned from C function.
12008 ;;; Return values from the call to test-c-call::call-cfun.
12009 10
12010 "A C string"
12011 *
12012 \end{example}
12013
12014 If any of the foreign functions do output, they should not be called from
12015 within Hemlock.  Depending on the situation, various strange behavior occurs.
12016 Under X, the output goes to the window in which Lisp was started; on a
12017 terminal, the output will overwrite the Hemlock screen image; in a Hemlock
12018 slave, standard output is \file{/dev/null} by default, so any output is
12019 discarded.
12020
12021 \hide{File:/afs/cs.cmu.edu/project/clisp/hackers/ram/docs/cmu-user/ipc.ms}
12022
12023 %%\node Interprocess Communication under LISP, Debugger Programmer's Interface, Alien Objects, Top
12024 \chapter{Interprocess Communication under LISP}
12025 \begin{center}
12026 \b{Written by William Lott and Bill Chiles}
12027 \end{center}
12028 \label{remote}
12029
12030 CMU Common Lisp offers a facility for interprocess communication (IPC)
12031 on top of using Unix system calls and the complications of that level
12032 of IPC.  There is a simple remote-procedure-call (RPC) package build
12033 on top of TCP/IP sockets.
12034
12035
12036 \begin{comment}
12037 * The REMOTE Package::
12038 * The WIRE Package::
12039 * Out-Of-Band Data::
12040 \end{comment}
12041
12042 %%\node The REMOTE Package, The WIRE Package, Interprocess Communication under LISP, Interprocess Communication under LISP
12043 \section{The REMOTE Package}
12044 The \code{remote} package provides simple RPC facility including
12045 interfaces for creating servers, connecting to already existing
12046 servers, and calling functions in other Lisp processes.  The routines
12047 for establishing a connection between two processes,
12048 \code{create-request-server} and \code{connect-to-remote-server},
12049 return \var{wire} structures.  A wire maintains the current state of
12050 a connection, and all the RPC forms require a wire to indicate where
12051 to send requests.
12052
12053
12054 \begin{comment}
12055 * Connecting Servers and Clients::
12056 * Remote Evaluations::
12057 * Remote Objects::
12058 * Host Addresses::
12059 \end{comment}
12060
12061 %%\node Connecting Servers and Clients, Remote Evaluations, The REMOTE Package, The REMOTE Package
12062 \subsection{Connecting Servers and Clients}
12063
12064 Before a client can connect to a server, it must know the network address on
12065 which the server accepts connections.  Network addresses consist of a host
12066 address or name, and a port number.  Host addresses are either a string of the
12067 form \code{VANCOUVER.SLISP.CS.CMU.EDU} or a 32 bit unsigned integer.  Port
12068 numbers are 16 bit unsigned integers.  Note: \var{port} in this context has
12069 nothing to do with Mach ports and message passing.
12070
12071 When a process wants to receive connection requests (that is, become a
12072 server), it first picks an integer to use as the port.  Only one server
12073 (Lisp or otherwise) can use a given port number on a given machine at
12074 any particular time.  This can be an iterative process to find a free
12075 port: picking an integer and calling \code{create-request-server}.  This
12076 function signals an error if the chosen port is unusable.  You will
12077 probably want to write a loop using \code{handler-case}, catching
12078 conditions of type error, since this function does not signal more
12079 specific conditions.
12080
12081 \begin{defun}{wire:}{create-request-server}{%
12082     \args{\var{port} \ampoptional{} \var{on-connect}}}
12083
12084   \code{create-request-server} sets up the current Lisp to accept
12085   connections on the given port.  If port is unavailable for any
12086   reason, this signals an error.  When a client connects to this port,
12087   the acceptance mechanism makes a wire structure and invokes the
12088   \var{on-connect} function.  Invoking this function has a couple
12089   purposes, and \var{on-connect} may be \nil{} in which case the
12090   system foregoes invoking any function at connect time.
12091
12092   The \var{on-connect} function is both a hook that allows you access
12093   to the wire created by the acceptance mechanism, and it confirms the
12094   connection.  This function takes two arguments, the wire and the
12095   host address of the connecting process.  See the section on host
12096   addresses below.  When \var{on-connect} is \nil, the request server
12097   allows all connections.  When it is non-\nil, the function returns
12098   two values, whether to accept the connection and a function the
12099   system should call when the connection terminates.  Either value may
12100   be \nil, but when the first value is \nil, the acceptance mechanism
12101   destroys the wire.
12102
12103   \code{create-request-server} returns an object that
12104   \code{destroy-request-server} uses to terminate a connection.
12105 \end{defun}
12106
12107 \begin{defun}{wire:}{destroy-request-server}{\args{\var{server}}}
12108
12109   \code{destroy-request-server} takes the result of
12110   \code{create-request-server} and terminates that server.  Any
12111   existing connections remain intact, but all additional connection
12112   attempts will fail.
12113 \end{defun}
12114
12115 \begin{defun}{wire:}{connect-to-remote-server}{%
12116     \args{\var{host} \var{port} \ampoptional{} \var{on-death}}}
12117
12118   \code{connect-to-remote-server} attempts to connect to a remote
12119   server at the given \var{port} on \var{host} and returns a wire
12120   structure if it is successful.  If \var{on-death} is non-\nil, it is
12121   a function the system invokes when this connection terminates.
12122 \end{defun}
12123
12124
12125 %%\node Remote Evaluations, Remote Objects, Connecting Servers and Clients, The REMOTE Package
12126 \subsection{Remote Evaluations}
12127 After the server and client have connected, they each have a wire
12128 allowing function evaluation in the other process.  This RPC mechanism
12129 has three flavors: for side-effect only, for a single value, and for
12130 multiple values.
12131
12132 Only a limited number of data types can be sent across wires as
12133 arguments for remote function calls and as return values: integers
12134 inclusively less than 32 bits in length, symbols, lists, and
12135 \var{remote-objects} (\pxlref{remote-objs}).  The system sends symbols
12136 as two strings, the package name and the symbol name, and if the
12137 package doesn't exist remotely, the remote process signals an error.
12138 The system ignores other slots of symbols.  Lists may be any tree of
12139 the above valid data types.  To send other data types you must
12140 represent them in terms of these supported types.  For example, you
12141 could use \code{prin1-to-string} locally, send the string, and use
12142 \code{read-from-string} remotely.
12143
12144 \begin{defmac}{wire:}{remote}{%
12145     \args{\var{wire} \mstar{call-specs}}}
12146
12147   The \code{remote} macro arranges for the process at the other end of
12148   \var{wire} to invoke each of the functions in the \var{call-specs}.
12149   To make sure the system sends the remote evaluation requests over
12150   the wire, you must call \code{wire-force-output}.
12151
12152   Each of \var{call-specs} looks like a function call textually, but
12153   it has some odd constraints and semantics.  The function position of
12154   the form must be the symbolic name of a function.  \code{remote}
12155   evaluates each of the argument subforms for each of the
12156   \var{call-specs} locally in the current context, sending these
12157   values as the arguments for the functions.
12158
12159   Consider the following example:
12160 \begin{verbatim}
12161 (defun write-remote-string (str)
12162   (declare (simple-string str))
12163   (wire:remote wire
12164     (write-string str)))
12165 \end{verbatim}
12166   The value of \code{str} in the local process is passed over the wire
12167   with a request to invoke \code{write-string} on the value.  The
12168   system does not expect to remotely evaluate \code{str} for a value
12169   in the remote process.
12170 \end{defmac}
12171
12172 \begin{defun}{wire:}{wire-force-output}{\args{\var{wire}}}
12173
12174   \code{wire-force-output} flushes all internal buffers associated
12175   with \var{wire}, sending the remote requests.  This is necessary
12176   after a call to \code{remote}.
12177 \end{defun}
12178
12179 \begin{defmac}{wire:}{remote-value}{\args{\var{wire} \var{call-spec}}}
12180
12181   The \code{remote-value} macro is similar to the \code{remote} macro.
12182   \code{remote-value} only takes one \var{call-spec}, and it returns
12183   the value returned by the function call in the remote process.  The
12184   value must be a valid type the system can send over a wire, and
12185   there is no need to call \code{wire-force-output} in conjunction
12186   with this interface.
12187
12188   If client unwinds past the call to \code{remote-value}, the server
12189   continues running, but the system ignores the value the server sends
12190   back.
12191
12192   If the server unwinds past the remotely requested call, instead of
12193   returning normally, \code{remote-value} returns two values, \nil{}
12194   and \true.  Otherwise this returns the result of the remote
12195   evaluation and \nil.
12196 \end{defmac}
12197
12198 \begin{defmac}{wire:}{remote-value-bind}{%
12199     \args{\var{wire} (\mstar{variable}) remote-form
12200       \mstar{local-forms}}}
12201
12202   \code{remote-value-bind} is similar to \code{multiple-value-bind}
12203   except the values bound come from \var{remote-form}'s evaluation in
12204   the remote process.  The \var{local-forms} execute in an implicit
12205   \code{progn}.
12206
12207   If the client unwinds past the call to \code{remote-value-bind}, the
12208   server continues running, but the system ignores the values the
12209   server sends back.
12210
12211   If the server unwinds past the remotely requested call, instead of
12212   returning normally, the \var{local-forms} never execute, and
12213   \code{remote-value-bind} returns \nil.
12214 \end{defmac}
12215
12216
12217 %%\node Remote Objects, Host Addresses, Remote Evaluations, The REMOTE Package
12218 \subsection{Remote Objects}
12219 \label{remote-objs}
12220
12221 The wire mechanism only directly supports a limited number of data
12222 types for transmission as arguments for remote function calls and as
12223 return values: integers inclusively less than 32 bits in length,
12224 symbols, lists.  Sometimes it is useful to allow remote processes to
12225 refer to local data structures without allowing the remote process
12226 to operate on the data.  We have \var{remote-objects} to support
12227 this without the need to represent the data structure in terms of
12228 the above data types, to send the representation to the remote
12229 process, to decode the representation, to later encode it again, and
12230 to send it back along the wire.
12231
12232 You can convert any Lisp object into a remote-object.  When you send
12233 a remote-object along a wire, the system simply sends a unique token
12234 for it.  In the remote process, the system looks up the token and
12235 returns a remote-object for the token.  When the remote process
12236 needs to refer to the original Lisp object as an argument to a
12237 remote call back or as a return value, it uses the remote-object it
12238 has which the system converts to the unique token, sending that
12239 along the wire to the originating process.  Upon receipt in the
12240 first process, the system converts the token back to the same
12241 (\code{eq}) remote-object.
12242
12243 \begin{defun}{wire:}{make-remote-object}{\args{\var{object}}}
12244
12245   \code{make-remote-object} returns a remote-object that has
12246   \var{object} as its value.  The remote-object can be passed across
12247   wires just like the directly supported wire data types.
12248 \end{defun}
12249
12250 \begin{defun}{wire:}{remote-object-p}{\args{\var{object}}}
12251
12252   The function \code{remote-object-p} returns \true{} if \var{object}
12253   is a remote object and \nil{} otherwise.
12254 \end{defun}
12255
12256 \begin{defun}{wire:}{remote-object-local-p}{\args{\var{remote}}}
12257
12258   The function \code{remote-object-local-p} returns \true{} if
12259   \var{remote} refers to an object in the local process.  This is can
12260   only occur if the local process created \var{remote} with
12261   \code{make-remote-object}.
12262 \end{defun}
12263
12264 \begin{defun}{wire:}{remote-object-eq}{\args{\var{obj1} \var{obj2}}}
12265
12266   The function \code{remote-object-eq} returns \true{} if \var{obj1} and
12267   \var{obj2} refer to the same (\code{eq}) lisp object, regardless of
12268   which process created the remote-objects.
12269 \end{defun}
12270
12271 \begin{defun}{wire:}{remote-object-value}{\args{\var{remote}}}
12272
12273   This function returns the original object used to create the given
12274   remote object.  It is an error if some other process originally
12275   created the remote-object.
12276 \end{defun}
12277
12278 \begin{defun}{wire:}{forget-remote-translation}{\args{\var{object}}}
12279
12280   This function removes the information and storage necessary to
12281   translate remote-objects back into \var{object}, so the next
12282   \code{gc} can reclaim the memory.  You should use this when you no
12283   longer expect to receive references to \var{object}.  If some remote
12284   process does send a reference to \var{object},
12285   \code{remote-object-value} signals an error.
12286 \end{defun}
12287
12288
12289 %%\node Host Addresses,  , Remote Objects, The REMOTE Package
12290 \subsection{Host Addresses}
12291 The operating system maintains a database of all the valid host
12292 addresses.  You can use this database to convert between host names
12293 and addresses and vice-versa.
12294
12295 \begin{defun}{ext:}{lookup-host-entry}{\args{\var{host}}}
12296
12297   \code{lookup-host-entry} searches the database for the given
12298   \var{host} and returns a host-entry structure for it.  If it fails
12299   to find \var{host} in the database, it returns \nil.  \var{Host} is
12300   either the address (as an integer) or the name (as a string) of the
12301   desired host.
12302 \end{defun}
12303
12304 \begin{defun}{ext:}{host-entry-name}{\args{\var{host-entry}}}
12305   \defunx[ext:]{host-entry-aliases}{\args{\var{host-entry}}}
12306   \defunx[ext:]{host-entry-addr-list}{\args{\var{host-entry}}}
12307   \defunx[ext:]{host-entry-addr}{\args{\var{host-entry}}}
12308
12309   \code{host-entry-name}, \code{host-entry-aliases}, and
12310   \code{host-entry-addr-list} each return the indicated slot from the
12311   host-entry structure.  \code{host-entry-addr} returns the primary
12312   (first) address from the list returned by
12313   \code{host-entry-addr-list}.
12314 \end{defun}
12315
12316
12317 %%\node The WIRE Package, Out-Of-Band Data, The REMOTE Package, Interprocess Communication under LISP
12318 \section{The WIRE Package}
12319
12320 The \code{wire} package provides for sending data along wires.  The
12321 \code{remote} package sits on top of this package.  All data sent
12322 with a given output routine must be read in the remote process with
12323 the complementary fetching routine.  For example, if you send so a
12324 string with \code{wire-output-string}, the remote process must know
12325 to use \code{wire-get-string}.  To avoid rigid data transfers and
12326 complicated code, the interface supports sending
12327 \var{tagged} data.  With tagged data, the system sends a tag
12328 announcing the type of the next data, and the remote system takes
12329 care of fetching the appropriate type.
12330
12331 When using interfaces at the wire level instead of the RPC level,
12332 the remote process must read everything sent by these routines.  If
12333 the remote process leaves any input on the wire, it will later
12334 mistake the data for an RPC request causing unknown lossage.
12335
12336 \begin{comment}
12337 * Untagged Data::
12338 * Tagged Data::
12339 * Making Your Own Wires::
12340 \end{comment}
12341
12342 %%\node Untagged Data, Tagged Data, The WIRE Package, The WIRE Package
12343 \subsection{Untagged Data}
12344 When using these routines both ends of the wire know exactly what types are
12345 coming and going and in what order. This data is restricted to the following
12346 types:
12347 \begin{itemize}
12348
12349 \item
12350 8 bit unsigned bytes.
12351
12352 \item
12353 32 bit unsigned bytes.
12354
12355 \item
12356 32 bit integers.
12357
12358 \item
12359 simple-strings less than 65535 in length.
12360 \end{itemize}
12361
12362
12363 \begin{defun}{wire:}{wire-output-byte}{\args{\var{wire} \var{byte}}}
12364   \defunx[wire:]{wire-get-byte}{\args{\var{wire}}}
12365   \defunx[wire:]{wire-output-number}{\args{\var{wire} \var{number}}}
12366   \defunx[wire:]{wire-get-number}{\args{\var{wire} \ampoptional{}
12367       \var{signed}}}
12368   \defunx[wire:]{wire-output-string}{\args{\var{wire} \var{string}}}
12369   \defunx[wire:]{wire-get-string}{\args{\var{wire}}}
12370
12371   These functions either output or input an object of the specified
12372   data type.  When you use any of these output routines to send data
12373   across the wire, you must use the corresponding input routine
12374   interpret the data.
12375 \end{defun}
12376
12377
12378 %%\node Tagged Data, Making Your Own Wires, Untagged Data, The WIRE Package
12379 \subsection{Tagged Data}
12380 When using these routines, the system automatically transmits and interprets
12381 the tags for you, so both ends can figure out what kind of data transfers
12382 occur.  Sending tagged data allows a greater variety of data types: integers
12383 inclusively less than 32 bits in length, symbols, lists, and \var{remote-objects}
12384 (\pxlref{remote-objs}).  The system sends symbols as two strings, the
12385 package name and the symbol name, and if the package doesn't exist remotely,
12386 the remote process signals an error.  The system ignores other slots of
12387 symbols.  Lists may be any tree of the above valid data types.  To send other
12388 data types you must represent them in terms of these supported types.  For
12389 example, you could use \code{prin1-to-string} locally, send the string, and use
12390 \code{read-from-string} remotely.
12391
12392 \begin{defun}{wire:}{wire-output-object}{%
12393     \args{\var{wire} \var{object} \ampoptional{} \var{cache-it}}}
12394   \defunx[wire:]{wire-get-object}{\args{\var{wire}}}
12395
12396   The function \code{wire-output-object} sends \var{object} over
12397   \var{wire} preceded by a tag indicating its type.
12398
12399   If \var{cache-it} is non-\nil, this function only sends \var{object}
12400   the first time it gets \var{object}.  Each end of the wire
12401   associates a token with \var{object}, similar to remote-objects,
12402   allowing you to send the object more efficiently on successive
12403   transmissions.  \var{cache-it} defaults to \true{} for symbols and
12404   \nil{} for other types.  Since the RPC level requires function
12405   names, a high-level protocol based on a set of function calls saves
12406   time in sending the functions' names repeatedly.
12407
12408   The function \code{wire-get-object} reads the results of
12409   \code{wire-output-object} and returns that object.
12410 \end{defun}
12411
12412
12413 %%\node Making Your Own Wires,  , Tagged Data, The WIRE Package
12414 \subsection{Making Your Own Wires}
12415 You can create wires manually in addition to the \code{remote} package's
12416 interface creating them for you.  To create a wire, you need a Unix \i{file
12417 descriptor}.  If you are unfamiliar with Unix file descriptors, see section 2 of
12418 the Unix manual pages.
12419
12420 \begin{defun}{wire:}{make-wire}{\args{\var{descriptor}}}
12421
12422   The function \code{make-wire} creates a new wire when supplied with
12423   the file descriptor to use for the underlying I/O operations.
12424 \end{defun}
12425
12426 \begin{defun}{wire:}{wire-p}{\args{\var{object}}}
12427
12428   This function returns \true{} if \var{object} is indeed a wire,
12429   \nil{} otherwise.
12430 \end{defun}
12431
12432 \begin{defun}{wire:}{wire-fd}{\args{\var{wire}}}
12433
12434   This function returns the file descriptor used by the \var{wire}.
12435 \end{defun}
12436
12437
12438 %%\node Out-Of-Band Data,  , The WIRE Package, Interprocess Communication under LISP
12439 \section{Out-Of-Band Data}
12440
12441 The TCP/IP protocol allows users to send data asynchronously, otherwise
12442 known as \var{out-of-band} data.  When using this feature, the operating
12443 system interrupts the receiving process if this process has chosen to be
12444 notified about out-of-band data.  The receiver can grab this input
12445 without affecting any information currently queued on the socket.
12446 Therefore, you can use this without interfering with any current
12447 activity due to other wire and remote interfaces.
12448
12449 Unfortunately, most implementations of TCP/IP are broken, so use of
12450 out-of-band data is limited for safety reasons.  You can only reliably
12451 send one character at a time.
12452
12453 This routines in this section provide a mechanism for establishing
12454 handlers for out-of-band characters and for sending them out-of-band.
12455 These all take a Unix file descriptor instead of a wire, but you can
12456 fetch a wire's file descriptor with \code{wire-fd}.
12457
12458 \begin{defun}{wire:}{add-oob-handler}{\args{\var{fd} \var{char} \var{handler}}}
12459
12460   The function \code{add-oob-handler} arranges for \var{handler} to be
12461   called whenever \var{char} shows up as out-of-band data on the file
12462   descriptor \var{fd}.
12463 \end{defun}
12464
12465 \begin{defun}{wire:}{remove-oob-handler}{\args{\var{fd} \var{char}}}
12466
12467   This function removes the handler for the character \var{char} on
12468   the file descriptor \var{fd}.
12469 \end{defun}
12470
12471 \begin{defun}{wire:}{remove-all-oob-handlers}{\args{\var{fd}}}
12472
12473   This function removes all handlers for the file descriptor \var{fd}.
12474 \end{defun}
12475
12476 \begin{defun}{wire:}{send-character-out-of-band}{\args{\var{fd} \var{char}}}
12477
12478   This function Sends the character \var{char} down the file
12479   descriptor \var{fd} out-of-band.
12480 \end{defun}
12481
12482 %%\f
12483 \hide{File:debug-int.tex}
12484 %%\node Debugger Programmer's Interface, Function Index, Interprocess Communication under LISP, Top
12485 \chapter{Debugger Programmer's Interface}
12486 \label{debug-internals}
12487
12488 The debugger programmers interface is exported from from the
12489 \code{"DEBUG-INTERNALS"} or \code{"DI"} package.  This is a CMU
12490 extension that allows debugging tools to be written without detailed
12491 knowledge of the compiler or run-time system.
12492
12493 Some of the interface routines take a code-location as an argument.  As
12494 described in the section on code-locations, some code-locations are
12495 unknown.  When a function calls for a \var{basic-code-location}, it
12496 takes either type, but when it specifically names the argument
12497 \var{code-location}, the routine will signal an error if you give it an
12498 unknown code-location.
12499
12500 \begin{comment}
12501 * DI Exceptional Conditions::
12502 * Debug-variables::
12503 * Frames::
12504 * Debug-functions::
12505 * Debug-blocks::
12506 * Breakpoints::
12507 * Code-locations::
12508 * Debug-sources::
12509 * Source Translation Utilities::
12510 \end{comment}
12511
12512 %%\f
12513 %%\node DI Exceptional Conditions, Debug-variables, Debugger Programmer's Interface, Debugger Programmer's Interface
12514 \section{DI Exceptional Conditions}
12515
12516 Some of these operations fail depending on the availability debugging
12517 information.  In the most severe case, when someone saved a Lisp image
12518 stripping all debugging data structures, no operations are valid.  In
12519 this case, even backtracing and finding frames is impossible.  Some
12520 interfaces can simply return values indicating the lack of information,
12521 or their return values are naturally meaningful in light missing data.
12522 Other routines, as documented below, will signal
12523 \code{serious-condition}s when they discover awkward situations.  This
12524 interface does not provide for programs to detect these situations other
12525 than by calling a routine that detects them and signals a condition.
12526 These are serious-conditions because the program using the interface
12527 must handle them before it can correctly continue execution.  These
12528 debugging conditions are not errors since it is no fault of the
12529 programmers that the conditions occur.
12530
12531 \begin{comment}
12532 * Debug-conditions::
12533 * Debug-errors::
12534 \end{comment}
12535
12536 %%\node Debug-conditions, Debug-errors, DI Exceptional Conditions, DI Exceptional Conditions
12537 \subsection{Debug-conditions}
12538
12539 The debug internals interface signals conditions when it can't adhere
12540 to its contract.  These are serious-conditions because the program
12541 using the interface must handle them before it can correctly continue
12542 execution.  These debugging conditions are not errors since it is no
12543 fault of the programmers that the conditions occur.  The interface
12544 does not provide for programs to detect these situations other than
12545 calling a routine that detects them and signals a condition.
12546
12547
12548 \begin{deftp}{Condition}{debug-condition}{}
12549
12550 This condition inherits from serious-condition, and all debug-conditions
12551 inherit from this.  These must be handled, but they are not programmer errors.
12552 \end{deftp}
12553
12554
12555 \begin{deftp}{Condition}{no-debug-info}{}
12556
12557 This condition indicates there is absolutely no debugging information
12558 available.
12559 \end{deftp}
12560
12561
12562 \begin{deftp}{Condition}{no-debug-function-returns}{}
12563
12564 This condition indicates the system cannot return values from a frame since
12565 its debug-function lacks debug information details about returning values.
12566 \end{deftp}
12567
12568
12569 \begin{deftp}{Condition}{no-debug-blocks}{}
12570 This condition indicates that a function was not compiled with debug-block
12571 information, but this information is necessary necessary for some requested
12572 operation.
12573 \end{deftp}
12574
12575 \begin{deftp}{Condition}{no-debug-variables}{}
12576 Similar to \code{no-debug-blocks}, except that variable information was
12577 requested.
12578 \end{deftp}
12579
12580 \begin{deftp}{Condition}{lambda-list-unavailable}{}
12581 Similar to \code{no-debug-blocks}, except that lambda list information was
12582 requested.
12583 \end{deftp}
12584
12585 \begin{deftp}{Condition}{invalid-value}{}
12586
12587 This condition indicates a debug-variable has \kwd{invalid} or \kwd{unknown}
12588 value in a particular frame.
12589 \end{deftp}
12590
12591
12592 \begin{deftp}{Condition}{ambiguous-variable-name}{}
12593
12594 This condition indicates a user supplied debug-variable name identifies more
12595 than one valid variable in a particular frame.
12596 \end{deftp}
12597
12598
12599 %%\node Debug-errors,  , Debug-conditions, DI Exceptional Conditions
12600 \subsection{Debug-errors}
12601
12602 These are programmer errors resulting from misuse of the debugging tools'
12603 programmers' interface.  You could have avoided an occurrence of one of these
12604 by using some routine to check the use of the routine generating the error.
12605
12606
12607 \begin{deftp}{Condition}{debug-error}{}
12608 This condition inherits from error, and all user programming errors inherit
12609 from this condition.
12610 \end{deftp}
12611
12612
12613 \begin{deftp}{Condition}{unhandled-debug-condition}{}
12614 This error results from a signalled \code{debug-condition} occurring
12615 without anyone handling it.
12616 \end{deftp}
12617
12618
12619 \begin{deftp}{Condition}{unknown-code-location}{}
12620 This error indicates the invalid use of an unknown-code-location.
12621 \end{deftp}
12622
12623
12624 \begin{deftp}{Condition}{unknown-debug-variable}{}
12625
12626 This error indicates an attempt to use a debug-variable in conjunction with an
12627 inappropriate debug-function; for example, checking the variable's validity
12628 using a code-location in the wrong debug-function will signal this error.
12629 \end{deftp}
12630
12631
12632 \begin{deftp}{Condition}{frame-function-mismatch}{}
12633
12634 This error indicates you called a function returned by
12635 \code{preprocess-for-eval}
12636 on a frame other than the one for which the function had been prepared.
12637 \end{deftp}
12638
12639
12640 %%\f
12641 %%\node Debug-variables, Frames, DI Exceptional Conditions, Debugger Programmer's Interface
12642 \section{Debug-variables}
12643
12644 Debug-variables represent the constant information about where the system
12645 stores argument and local variable values.  The system uniquely identifies with
12646 an integer every instance of a variable with a particular name and package.  To
12647 access a value, you must supply the frame along with the debug-variable since
12648 these are particular to a function, not every instance of a variable on the
12649 stack.
12650
12651 \begin{defun}{}{debug-variable-name}{\args{\var{debug-variable}}}
12652
12653   This function returns the name of the \var{debug-variable}.  The
12654   name is the name of the symbol used as an identifier when writing
12655   the code.
12656 \end{defun}
12657
12658
12659 \begin{defun}{}{debug-variable-package}{\args{\var{debug-variable}}}
12660
12661   This function returns the package name of the \var{debug-variable}.
12662   This is the package name of the symbol used as an identifier when
12663   writing the code.
12664 \end{defun}
12665
12666
12667 \begin{defun}{}{debug-variable-symbol}{\args{\var{debug-variable}}}
12668
12669   This function returns the symbol from interning
12670   \code{debug-variable-name} in the package named by
12671   \code{debug-variable-package}.
12672 \end{defun}
12673
12674
12675 \begin{defun}{}{debug-variable-id}{\args{\var{debug-variable}}}
12676
12677   This function returns the integer that makes \var{debug-variable}'s
12678   name and package name unique with respect to other
12679   \var{debug-variable}'s in the same function.
12680 \end{defun}
12681
12682
12683 \begin{defun}{}{debug-variable-validity}{%
12684     \args{\var{debug-variable} \var{basic-code-location}}}
12685
12686   This function returns three values reflecting the validity of
12687   \var{debug-variable}'s value at \var{basic-code-location}:
12688   \begin{Lentry}
12689   \item[\kwd{valid}] The value is known to be available.
12690   \item[\kwd{invalid}] The value is known to be unavailable.
12691   \item[\kwd{unknown}] The value's availability is unknown.
12692   \end{Lentry}
12693 \end{defun}
12694
12695
12696 \begin{defun}{}{debug-variable-value}{\args{\var{debug-variable}
12697       \var{frame}}}
12698
12699   This function returns the value stored for \var{debug-variable} in
12700   \var{frame}.  The value may be invalid.  This is \code{SETF}'able.
12701 \end{defun}
12702
12703
12704 \begin{defun}{}{debug-variable-valid-value}{%
12705     \args{\var{debug-variable} \var{frame}}}
12706
12707   This function returns the value stored for \var{debug-variable} in
12708   \var{frame}.  If the value is not \kwd{valid}, then this signals an
12709   \code{invalid-value} error.
12710 \end{defun}
12711
12712
12713 %%\f
12714 %%\node Frames, Debug-functions, Debug-variables, Debugger Programmer's Interface
12715 \section{Frames}
12716
12717 Frames describe a particular call on the stack for a particular thread.  This
12718 is the environment for name resolution, getting arguments and locals, and
12719 returning values.  The stack conceptually grows up, so the top of the stack is
12720 the most recently called function.
12721
12722 \code{top-frame}, \code{frame-down}, \code{frame-up}, and
12723 \code{frame-debug-function} can only fail when there is absolutely no
12724 debug information available.  This can only happen when someone saved a
12725 Lisp image specifying that the system dump all debugging data.
12726
12727
12728 \begin{defun}{}{top-frame}{}
12729
12730   This function never returns the frame for itself, always the frame
12731   before calling \code{top-frame}.
12732 \end{defun}
12733
12734
12735 \begin{defun}{}{frame-down}{\args{\var{frame}}}
12736
12737   This returns the frame immediately below \var{frame} on the stack.
12738   When \var{frame} is the bottom of the stack, this returns \nil.
12739 \end{defun}
12740
12741
12742 \begin{defun}{}{frame-up}{\args{\var{frame}}}
12743
12744   This returns the frame immediately above \var{frame} on the stack.
12745   When \var{frame} is the top of the stack, this returns \nil.
12746 \end{defun}
12747
12748
12749 \begin{defun}{}{frame-debug-function}{\args{\var{frame}}}
12750
12751   This function returns the debug-function for the function whose call
12752   \var{frame} represents.
12753 \end{defun}
12754
12755
12756 \begin{defun}{}{frame-code-location}{\args{\var{frame}}}
12757
12758   This function returns the code-location where \var{frame}'s
12759   debug-function will continue running when program execution returns
12760   to \var{frame}.  If someone interrupted this frame, the result could
12761   be an unknown code-location.
12762 \end{defun}
12763
12764
12765 \begin{defun}{}{frame-catches}{\args{\var{frame}}}
12766
12767   This function returns an a-list for all active catches in
12768   \var{frame} mapping catch tags to the code-locations at which the
12769   catch re-enters.
12770 \end{defun}
12771
12772
12773 \begin{defun}{}{eval-in-frame}{\args{\var{frame} \var{form}}}
12774
12775   This evaluates \var{form} in \var{frame}'s environment.  This can
12776   signal several different debug-conditions since its success relies
12777   on a variety of inexact debug information: \code{invalid-value},
12778   \code{ambiguous-variable-name}, \code{frame-function-mismatch}.  See
12779   also \funref{preprocess-for-eval}.
12780 \end{defun}
12781
12782 \begin{comment}
12783   \begin{defun}{}{return-from-frame}{\args{\var{frame} \var{values}}}
12784
12785     This returns the elements in the list \var{values} as multiple
12786     values from \var{frame} as if the function \var{frame} represents
12787     returned these values.  This signals a
12788     \code{no-debug-function-returns} condition when \var{frame}'s
12789     debug-function lacks information on returning values.
12790
12791     \i{Not Yet Implemented}
12792   \end{defun}
12793 \end{comment}
12794
12795 %%\f
12796 %%\node Debug-functions, Debug-blocks, Frames, Debugger Programmer's Interface
12797 \section {Debug-functions}
12798
12799 Debug-functions represent the static information about a function determined at
12800 compile time---argument and variable storage, their lifetime information,
12801 etc.  The debug-function also contains all the debug-blocks representing
12802 basic-blocks of code, and these contains information about specific
12803 code-locations in a debug-function.
12804
12805 \begin{defmac}{}{do-debug-function-blocks}{%
12806     \args{(\var{block-var} \var{debug-function} \mopt{result-form})
12807       \mstar{form}}}
12808
12809   This executes the forms in a context with \var{block-var} bound to
12810   each debug-block in \var{debug-function} successively.
12811   \var{Result-form} is an optional form to execute for a return value,
12812   and \code{do-debug-function-blocks} returns \nil if there is no
12813   \var{result-form}.  This signals a \code{no-debug-blocks} condition
12814   when the \var{debug-function} lacks debug-block information.
12815 \end{defmac}
12816
12817
12818 \begin{defun}{}{debug-function-lambda-list}{\args{\var{debug-function}}}
12819
12820   This function returns a list representing the lambda-list for
12821   \var{debug-function}.  The list has the following structure:
12822   \begin{example}
12823     (required-var1 required-var2
12824     ...
12825     (:optional var3 suppliedp-var4)
12826     (:optional var5)
12827     ...
12828     (:rest var6) (:rest var7)
12829     ...
12830     (:keyword keyword-symbol var8 suppliedp-var9)
12831     (:keyword keyword-symbol var10)
12832     ...
12833     )
12834   \end{example}
12835   Each \code{var}\var{n} is a debug-variable; however, the symbol
12836   \kwd{deleted} appears instead whenever the argument remains
12837   unreferenced throughout \var{debug-function}.
12838
12839   If there is no lambda-list information, this signals a
12840   \code{lambda-list-unavailable} condition.
12841 \end{defun}
12842
12843
12844 \begin{defmac}{}{do-debug-function-variables}{%
12845     \args{(\var{var} \var{debug-function} \mopt{result})
12846       \mstar{form}}}
12847
12848   This macro executes each \var{form} in a context with \var{var}
12849   bound to each debug-variable in \var{debug-function}.  This returns
12850   the value of executing \var{result} (defaults to \nil).  This may
12851   iterate over only some of \var{debug-function}'s variables or none
12852   depending on debug policy; for example, possibly the compilation
12853   only preserved argument information.
12854 \end{defmac}
12855
12856
12857 \begin{defun}{}{debug-variable-info-available}{\args{\var{debug-function}}}
12858
12859   This function returns whether there is any variable information for
12860   \var{debug-function}.  This is useful for distinguishing whether
12861   there were no locals in a function or whether there was no variable
12862   information.  For example, if \code{do-debug-function-variables}
12863   executes its forms zero times, then you can use this function to
12864   determine the reason.
12865 \end{defun}
12866
12867
12868 \begin{defun}{}{debug-function-symbol-variables}{%
12869     \args{\var{debug-function} \var{symbol}}}
12870
12871   This function returns a list of debug-variables in
12872   \var{debug-function} having the same name and package as
12873   \var{symbol}.  If \var{symbol} is uninterned, then this returns a
12874   list of debug-variables without package names and with the same name
12875   as \var{symbol}.  The result of this function is limited to the
12876   availability of variable information in \var{debug-function}; for
12877   example, possibly \var{debug-function} only knows about its
12878   arguments.
12879 \end{defun}
12880
12881
12882 \begin{defun}{}{ambiguous-debug-variables}{%
12883     \args{\var{debug-function} \var{name-prefix-string}}}
12884
12885   This function returns a list of debug-variables in
12886   \var{debug-function} whose names contain \var{name-prefix-string} as
12887   an initial substring.  The result of this function is limited to the
12888   availability of variable information in \var{debug-function}; for
12889   example, possibly \var{debug-function} only knows about its
12890   arguments.
12891 \end{defun}
12892
12893
12894 \begin{defun}{}{preprocess-for-eval}{%
12895     \args{\var{form} \var{basic-code-location}}}
12896
12897   This function returns a function of one argument that evaluates
12898   \var{form} in the lexical context of \var{basic-code-location}.
12899   This allows efficient repeated evaluation of \var{form} at a certain
12900   place in a function which could be useful for conditional breaking.
12901   This signals a \code{no-debug-variables} condition when the
12902   code-location's debug-function has no debug-variable information
12903   available.  The returned function takes a frame as an argument.  See
12904   also \funref{eval-in-frame}.
12905 \end{defun}
12906
12907
12908 \begin{defun}{}{function-debug-function}{\args{\var{function}}}
12909
12910   This function returns a debug-function that represents debug
12911   information for \var{function}.
12912 \end{defun}
12913
12914
12915 \begin{defun}{}{debug-function-kind}{\args{\var{debug-function}}}
12916
12917   This function returns the kind of function \var{debug-function}
12918   represents.  The value is one of the following:
12919   \begin{Lentry}
12920   \item[\kwd{optional}] This kind of function is an entry point to an
12921     ordinary function.  It handles optional defaulting, parsing
12922     keywords, etc.
12923   \item[\kwd{external}] This kind of function is an entry point to an
12924     ordinary function.  It checks argument values and count and calls
12925     the defined function.
12926   \item[\kwd{top-level}] This kind of function executes one or more
12927     random top-level forms from a file.
12928   \item[\kwd{cleanup}] This kind of function represents the cleanup
12929     forms in an \code{unwind-protect}.
12930   \item[\nil] This kind of function is not one of the above; that is,
12931     it is not specially marked in any way.
12932   \end{Lentry}
12933 \end{defun}
12934
12935
12936 \begin{defun}{}{debug-function-function}{\args{\var{debug-function}}}
12937
12938   This function returns the Common Lisp function associated with the
12939   \var{debug-function}.  This returns \nil{} if the function is
12940   unavailable or is non-existent as a user callable function object.
12941 \end{defun}
12942
12943
12944 \begin{defun}{}{debug-function-name}{\args{\var{debug-function}}}
12945
12946   This function returns the name of the function represented by
12947   \var{debug-function}.  This may be a string or a cons; do not assume
12948   it is a symbol.
12949 \end{defun}
12950
12951
12952 %%\f
12953 %%\node Debug-blocks, Breakpoints, Debug-functions, Debugger Programmer's Interface
12954 \section{Debug-blocks}
12955
12956 Debug-blocks contain information pertinent to a specific range of code in a
12957 debug-function.
12958
12959 \begin{defmac}{}{do-debug-block-locations}{%
12960     \args{(\var{code-var} \var{debug-block} \mopt{result})
12961       \mstar{form}}}
12962
12963   This macro executes each \var{form} in a context with \var{code-var}
12964   bound to each code-location in \var{debug-block}.  This returns the
12965   value of executing \var{result} (defaults to \nil).
12966 \end{defmac}
12967
12968
12969 \begin{defun}{}{debug-block-successors}{\args{\var{debug-block}}}
12970
12971   This function returns the list of possible code-locations where
12972   execution may continue when the basic-block represented by
12973   \var{debug-block} completes its execution.
12974 \end{defun}
12975
12976
12977 \begin{defun}{}{debug-block-elsewhere-p}{\args{\var{debug-block}}}
12978
12979   This function returns whether \var{debug-block} represents elsewhere
12980   code.  This is code the compiler has moved out of a function's code
12981   sequence for optimization reasons.  Code-locations in these blocks
12982   are unsuitable for stepping tools, and the first code-location has
12983   nothing to do with a normal starting location for the block.
12984 \end{defun}
12985
12986
12987 %%\f
12988 %%\node Breakpoints, Code-locations, Debug-blocks, Debugger Programmer's Interface
12989 \section{Breakpoints}
12990
12991 A breakpoint represents a function the system calls with the current frame when
12992 execution passes a certain code-location.  A break point is active or inactive
12993 independent of its existence.  They also have an extra slot for users to tag
12994 the breakpoint with information.
12995
12996 \begin{defun}{}{make-breakpoint}{%
12997     \args{\var{hook-function} \var{what} \keys{\kwd{kind} \kwd{info}
12998         \kwd{function-end-cookie}}}}
12999
13000   This function creates and returns a breakpoint.  When program
13001   execution encounters the breakpoint, the system calls
13002   \var{hook-function}.  \var{hook-function} takes the current frame
13003   for the function in which the program is running and the breakpoint
13004   object.
13005
13006   \var{what} and \var{kind} determine where in a function the system
13007   invokes \var{hook-function}.  \var{what} is either a code-location
13008   or a debug-function.  \var{kind} is one of \kwd{code-location},
13009   \kwd{function-start}, or \kwd{function-end}.  Since the starts and
13010   ends of functions may not have code-locations representing them,
13011   designate these places by supplying \var{what} as a debug-function
13012   and \var{kind} indicating the \kwd{function-start} or
13013   \kwd{function-end}.  When \var{what} is a debug-function and
13014   \var{kind} is \kwd{function-end}, then hook-function must take two
13015   additional arguments, a list of values returned by the function and
13016   a function-end-cookie.
13017
13018   \var{info} is information supplied by and used by the user.
13019
13020   \var{function-end-cookie} is a function.  To implement function-end
13021   breakpoints, the system uses starter breakpoints to establish the
13022   function-end breakpoint for each invocation of the function.  Upon
13023   each entry, the system creates a unique cookie to identify the
13024   invocation, and when the user supplies a function for this argument,
13025   the system invokes it on the cookie.  The system later invokes the
13026   function-end breakpoint hook on the same cookie.  The user may save
13027   the cookie when passed to the function-end-cookie function for later
13028   comparison in the hook function.
13029
13030   This signals an error if \var{what} is an unknown code-location.
13031
13032   \i{Note: Breakpoints in interpreted code or byte-compiled code are
13033     not implemented.  Function-end breakpoints are not implemented for
13034     compiled functions that use the known local return convention
13035     (e.g. for block-compiled or self-recursive functions.)}
13036
13037 \end{defun}
13038
13039
13040 \begin{defun}{}{activate-breakpoint}{\args{\var{breakpoint}}}
13041
13042   This function causes the system to invoke the \var{breakpoint}'s
13043   hook-function until the next call to \code{deactivate-breakpoint} or
13044   \code{delete-breakpoint}.  The system invokes breakpoint hook
13045   functions in the opposite order that you activate them.
13046 \end{defun}
13047
13048
13049 \begin{defun}{}{deactivate-breakpoint}{\args{\var{breakpoint}}}
13050
13051   This function stops the system from invoking the \var{breakpoint}'s
13052   hook-function.
13053 \end{defun}
13054
13055
13056 \begin{defun}{}{breakpoint-active-p}{\args{\var{breakpoint}}}
13057
13058   This returns whether \var{breakpoint} is currently active.
13059 \end{defun}
13060
13061
13062 \begin{defun}{}{breakpoint-hook-function}{\args{\var{breakpoint}}}
13063
13064   This function returns the \var{breakpoint}'s function the system
13065   calls when execution encounters \var{breakpoint}, and it is active.
13066   This is \code{SETF}'able.
13067 \end{defun}
13068
13069
13070 \begin{defun}{}{breakpoint-info}{\args{\var{breakpoint}}}
13071
13072   This function returns \var{breakpoint}'s information supplied by the
13073   user.  This is \code{SETF}'able.
13074 \end{defun}
13075
13076
13077 \begin{defun}{}{breakpoint-kind}{\args{\var{breakpoint}}}
13078
13079   This function returns the \var{breakpoint}'s kind specification.
13080 \end{defun}
13081
13082
13083 \begin{defun}{}{breakpoint-what}{\args{\var{breakpoint}}}
13084
13085   This function returns the \var{breakpoint}'s what specification.
13086 \end{defun}
13087
13088
13089 \begin{defun}{}{delete-breakpoint}{\args{\var{breakpoint}}}
13090
13091   This function frees system storage and removes computational
13092   overhead associated with \var{breakpoint}.  After calling this,
13093   \var{breakpoint} is useless and can never become active again.
13094 \end{defun}
13095
13096
13097 %%\f
13098 %%\node Code-locations, Debug-sources, Breakpoints, Debugger Programmer's Interface
13099 \section{Code-locations}
13100
13101 Code-locations represent places in functions where the system has correct
13102 information about the function's environment and where interesting operations
13103 can occur---asking for a local variable's value, setting breakpoints,
13104 evaluating forms within the function's environment, etc.
13105
13106 Sometimes the interface returns unknown code-locations.  These
13107 represent places in functions, but there is no debug information
13108 associated with them.  Some operations accept these since they may
13109 succeed even with missing debug data.  These operations' argument is
13110 named \var{basic-code-location} indicating they take known and unknown
13111 code-locations.  If an operation names its argument
13112 \var{code-location}, and you supply an unknown one, it will signal an
13113 error.  For example, \code{frame-code-location} may return an unknown
13114 code-location if someone interrupted Lisp in the given frame.  The
13115 system knows where execution will continue, but this place in the code
13116 may not be a place for which the compiler dumped debug information.
13117
13118 \begin{defun}{}{code-location-debug-function}{\args{\var{basic-code-location}}}
13119
13120   This function returns the debug-function representing information
13121   about the function corresponding to the code-location.
13122 \end{defun}
13123
13124
13125 \begin{defun}{}{code-location-debug-block}{\args{\var{basic-code-location}}}
13126
13127   This function returns the debug-block containing code-location if it
13128   is available.  Some debug policies inhibit debug-block information,
13129   and if none is available, then this signals a \code{no-debug-blocks}
13130   condition.
13131 \end{defun}
13132
13133
13134 \begin{defun}{}{code-location-top-level-form-offset}{%
13135     \args{\var{code-location}}}
13136
13137   This function returns the number of top-level forms before the one
13138   containing \var{code-location} as seen by the compiler in some
13139   compilation unit.  A compilation unit is not necessarily a single
13140   file, see the section on debug-sources.
13141 \end{defun}
13142
13143
13144 \begin{defun}{}{code-location-form-number}{\args{\var{code-location}}}
13145
13146   This function returns the number of the form corresponding to
13147   \var{code-location}.  The form number is derived by walking the
13148   subforms of a top-level form in depth-first order.  While walking
13149   the top-level form, count one in depth-first order for each subform
13150   that is a cons.  See \funref{form-number-translations}.
13151 \end{defun}
13152
13153
13154 \begin{defun}{}{code-location-debug-source}{\args{\var{code-location}}}
13155
13156   This function returns \var{code-location}'s debug-source.
13157 \end{defun}
13158
13159
13160 \begin{defun}{}{code-location-unknown-p}{\args{\var{basic-code-location}}}
13161
13162   This function returns whether \var{basic-code-location} is unknown.
13163   It returns \nil when the code-location is known.
13164 \end{defun}
13165
13166
13167 \begin{defun}{}{code-location=}{\args{\var{code-location1}
13168       \var{code-location2}}}
13169
13170   This function returns whether the two code-locations are the same.
13171 \end{defun}
13172
13173
13174 %%\f
13175 %%\node Debug-sources, Source Translation Utilities, Code-locations, Debugger Programmer's Interface
13176 \section{Debug-sources}
13177
13178 Debug-sources represent how to get back the source for some code.  The
13179 source is either a file (\code{compile-file} or \code{load}), a
13180 lambda-expression (\code{compile}, \code{defun}, \code{defmacro}), or
13181 a stream (something particular to CMU Common Lisp,
13182 \code{compile-from-stream}).
13183
13184 When compiling a source, the compiler counts each top-level form it
13185 processes, but when the compiler handles multiple files as one block
13186 compilation, the top-level form count continues past file boundaries.
13187 Therefore \code{code-location-top-level-form-offset} returns an offset
13188 that does not always start at zero for the code-location's
13189 debug-source.  The offset into a particular source is
13190 \code{code-location-top-level-form-offset} minus
13191 \code{debug-source-root-number}.
13192
13193 Inside a top-level form, a code-location's form number indicates the
13194 subform corresponding to the code-location.
13195
13196 \begin{defun}{}{debug-source-from}{\args{\var{debug-source}}}
13197
13198   This function returns an indication of the type of source.  The
13199   following are the possible values:
13200   \begin{Lentry}
13201   \item[\kwd{file}] from a file (obtained by \code{compile-file} if
13202     compiled).
13203   \item[\kwd{lisp}] from Lisp (obtained by \code{compile} if
13204     compiled).
13205   \item[\kwd{stream}] from a non-file stream (CMU Common Lisp supports
13206     \code{compile-from-stream}).
13207   \end{Lentry}
13208 \end{defun}
13209
13210
13211 \begin{defun}{}{debug-source-name}{\args{\var{debug-source}}}
13212
13213   This function returns the actual source in some sense represented by
13214   debug-source, which is related to \code{debug-source-from}:
13215   \begin{Lentry}
13216   \item[\kwd{file}] the pathname of the file.
13217   \item[\kwd{lisp}] a lambda-expression.
13218   \item[\kwd{stream}] some descriptive string that's otherwise
13219     useless.
13220 \end{Lentry}
13221 \end{defun}
13222
13223
13224 \begin{defun}{}{debug-source-created}{\args{\var{debug-source}}}
13225
13226   This function returns the universal time someone created the source.
13227   This may be \nil{} if it is unavailable.
13228 \end{defun}
13229
13230
13231 \begin{defun}{}{debug-source-compiled}{\args{\var{debug-source}}}
13232
13233   This function returns the time someone compiled the source.  This is
13234   \nil if the source is uncompiled.
13235 \end{defun}
13236
13237
13238 \begin{defun}{}{debug-source-root-number}{\args{\var{debug-source}}}
13239
13240   This returns the number of top-level forms processed by the compiler
13241   before compiling this source.  If this source is uncompiled, this is
13242   zero.  This may be zero even if the source is compiled since the
13243   first form in the first file compiled in one compilation, for
13244   example, must have a root number of zero---the compiler saw no other
13245   top-level forms before it.
13246 \end{defun}
13247
13248
13249 %%\node Source Translation Utilities,  , Debug-sources, Debugger Programmer's Interface
13250 \section{Source Translation Utilities}
13251
13252 These two functions provide a mechanism for converting the rather
13253 obscure (but highly compact) representation of source locations into an
13254 actual source form:
13255
13256 \begin{defun}{}{debug-source-start-positions}{\args{\var{debug-source}}}
13257
13258   This function returns the file position of each top-level form a
13259   vector if \var{debug-source} is from a \kwd{file}.  If
13260   \code{debug-source-from} is \kwd{lisp} or \kwd{stream}, or the file
13261   is byte-compiled, then the result is \false.
13262 \end{defun}
13263
13264
13265 \begin{defun}{}{form-number-translations}{\args{\var{form}
13266       \var{tlf-number}}}
13267
13268   This function returns a table mapping form numbers (see
13269   \code{code-location-form-number}) to source-paths.  A source-path
13270   indicates a descent into the top-level-form \var{form}, going
13271   directly to the subform corresponding to a form number.
13272   \var{tlf-number} is the top-level-form number of \var{form}.
13273 \end{defun}
13274
13275
13276 \begin{defun}{}{source-path-context}{%
13277     \args{\var{form} \var{path} \var{context}}}
13278
13279   This function returns the subform of \var{form} indicated by the
13280   source-path.  \var{Form} is a top-level form, and \var{path} is a
13281   source-path into it.  \var{Context} is the number of enclosing forms
13282   to return instead of directly returning the source-path form.  When
13283   \var{context} is non-zero, the form returned contains a marker,
13284   \code{\#:****HERE****}, immediately before the form indicated by
13285   \var{path}.
13286 \end{defun}
13287
13288
13289 %%\f
13290 \twocolumn
13291 %%\node Function Index, Variable Index, Debugger Programmer's Interface, Top
13292 %%\unnumbered{Function Index}
13293 \cindex{Function Index}
13294
13295 %%\printindex{fn}
13296 \printindex[funs]
13297
13298 \twocolumn
13299 %%\node Variable Index, Type Index, Function Index, Top
13300 %%\unnumbered{Variable Index}
13301 \cindex{Variable Index}
13302
13303 %%\printindex{vr}
13304 \printindex[vars]
13305
13306 \twocolumn
13307 %%\node Type Index, Concept Index, Variable Index, Top
13308 %%\unnumbered{Type Index}
13309 \cindex{Type Index}
13310
13311 %%\printindex{tp}
13312 \printindex[types]
13313
13314 %%\node Concept Index,  , Type Index, Top
13315 %%\unnumbered{Concept Index}
13316 \cindex{Concept Index}
13317
13318 %%\printindex{cp}
13319 \onecolumn
13320 \printindex[concept]
13321 \end{document}