From 1e7fc4730aa0cafb0aba5278e8cdbdba566b8725 Mon Sep 17 00:00:00 2001 From: Gabor Melis Date: Fri, 24 Apr 2009 13:32:41 +0000 Subject: [PATCH] 1.0.27.39: SIGCHLD related fixes - WITH-ACTIVE-PROCESSES-LOCK does not allow WITH-INTERRUPTS because that can lead to recursive lock attempts upon receiving a SIGCHLD. - if fork() in RUN-PROGRAM fails, signal the error outside the lock. - the SIGCHLD handler only reaps processes started by RUN-PROGRAM in order not to interfere with SB-POSIX:WAIT, SB-POSIX:WAITPID and their C equivalents (thanks to James Y Knight). - the SIGCHLD handler is installed once at startup, because on Darwin sigaction() seems to do unexpected things to the current sigmask. --- NEWS | 3 + src/code/run-program.lisp | 139 +++++++++++++++++++---------------------- src/code/target-signal.lisp | 6 ++ tests/run-program.impure.lisp | 15 +++++ version.lisp-expr | 2 +- 5 files changed, 91 insertions(+), 74 deletions(-) diff --git a/NEWS b/NEWS index 35b4f14..bd23c1b 100644 --- a/NEWS +++ b/NEWS @@ -28,6 +28,9 @@ changes in sbcl-1.0.28 relative to 1.0.27: potentially-invalid effective methods in its cache. * bug fix: SB-INTROSPECT:FIND-DEFINITION-SOURCE now works with funcallable instances as well (thanks to Paul Khuong) + * bug fix: using RUN-PROGRAM does not interfere with SB-POSIX:WAIT, + SB-POSIX:WAITPID and their C equivalents. + * bug fix: RUN-PROGRAM does not crash on Darwin when stressed. changes in sbcl-1.0.27 relative to 1.0.26: * new port: support added for x86-64 OpenBSD. (thanks to Josh Elsasser) diff --git a/src/code/run-program.lisp b/src/code/run-program.lisp index f8a36a1..04b5d6a 100644 --- a/src/code/run-program.lisp +++ b/src/code/run-program.lisp @@ -97,23 +97,23 @@ ;;;; Import wait3(2) from Unix. #-win32 -(define-alien-routine ("wait3" c-wait3) sb-alien:int +(define-alien-routine ("waitpid" c-waitpid) sb-alien:int + (pid sb-alien:int) (status sb-alien:int :out) - (options sb-alien:int) - (rusage sb-alien:int)) + (options sb-alien:int)) #-win32 -(defun wait3 (&optional do-not-hang check-for-stopped) +(defun waitpid (pid &optional do-not-hang check-for-stopped) #+sb-doc - "Return any available status information on child process. " + "Return any available status information on child process with PID." (multiple-value-bind (pid status) - (c-wait3 (logior (if do-not-hang - sb-unix:wnohang - 0) - (if check-for-stopped - sb-unix:wuntraced - 0)) - 0) + (c-waitpid pid + (logior (if do-not-hang + sb-unix:wnohang + 0) + (if check-for-stopped + sb-unix:wuntraced + 0))) (cond ((or (minusp pid) (zerop pid)) nil) @@ -154,7 +154,7 @@ ;;; accesses it, that's why we need without-interrupts. (defmacro with-active-processes-lock (() &body body) #-win32 - `(sb-thread::with-system-mutex (*active-processes-lock* :allow-with-interrupts t) + `(sb-thread::with-system-mutex (*active-processes-lock*) ,@body) #+win32 `(progn ,@body)) @@ -320,36 +320,28 @@ status slot." (setf *active-processes* (delete process *active-processes*))) process) -;;; the handler for SIGCHLD signals that RUN-PROGRAM establishes -#-win32 -(defun sigchld-handler (ignore1 ignore2 ignore3) - (declare (ignore ignore1 ignore2 ignore3)) - (get-processes-status-changes)) - (defun get-processes-status-changes () - #-win32 - (loop - (multiple-value-bind (pid what code core) - (wait3 t t) - (unless pid - (return)) - (let ((proc (with-active-processes-lock () - (find pid *active-processes* :key #'process-pid)))) - (when proc - (setf (process-%status proc) what) - (setf (process-exit-code proc) code) - (setf (process-core-dumped proc) core) - (when (process-status-hook proc) - (funcall (process-status-hook proc) proc)) - (when (position what #(:exited :signaled)) - (with-active-processes-lock () - (setf *active-processes* - (delete proc *active-processes*)))))))) - #+win32 (let (exited) (with-active-processes-lock () (setf *active-processes* - (delete-if (lambda (proc) + (delete-if #-win32 + (lambda (proc) + ;; Wait only on pids belonging to processes + ;; started by RUN-PROGRAM. There used to be a + ;; WAIT3 call here, but that makes direct + ;; WAIT, WAITPID usage impossible due to the + ;; race with the SIGCHLD signal handler. + (multiple-value-bind (pid what code core) + (waitpid (process-pid proc) t t) + (when pid + (setf (process-%status proc) what) + (setf (process-exit-code proc) code) + (setf (process-core-dumped proc) core) + (when (process-status-hook proc) + (push proc exited)) + t))) + #+win32 + (lambda (proc) (multiple-value-bind (ok code) (get-exit-code-process (process-pid proc)) (when (and (plusp ok) (/= code 259)) @@ -361,8 +353,8 @@ status slot." *active-processes*))) ;; Can't call the hooks before all the processes have been deal ;; with, as calling a hook may cause re-entry to - ;; GET-PROCESS-STATUS-CHANGES. That may be OK when using wait3, - ;; but in the Windows implementation is would be deeply bad. + ;; GET-PROCESS-STATUS-CHANGES. That may be OK when using waitpid, + ;; but in the Windows implementation it would be deeply bad. (dolist (proc exited) (let ((hook (process-status-hook proc))) (when hook @@ -673,9 +665,6 @@ Users Manual for details about the PROCESS structure."#-win32" #-win32 (when (and env-p environment-p) (error "can't specify :ENV and :ENVIRONMENT simultaneously")) - ;; Make sure that the interrupt handler is installed. - #-win32 - (sb-sys:enable-interrupt sb-unix:sigchld #'sigchld-handler) ;; Prepend the program to the argument list. (push (namestring program) args) (labels (;; It's friendly to allow the caller to pass any string @@ -727,7 +716,8 @@ Users Manual for details about the PROCESS structure."#-win32" (values stdout output-stream) (get-descriptor-for ,@args)))) ,@body)) - (with-open-pty (((pty-name pty-stream) (pty cookie)) &body body) + (with-open-pty (((pty-name pty-stream) (pty cookie)) + &body body) #+win32 `(declare (ignore ,pty ,cookie)) #+win32 `(let (,pty-name ,pty-stream) ,@body) #-win32 `(multiple-value-bind (,pty-name ,pty-stream) @@ -759,34 +749,37 @@ Users Manual for details about the PROCESS structure."#-win32" ;; Make sure we are not notified about the child ;; death before we have installed the PROCESS ;; structure in *ACTIVE-PROCESSES*. - (with-active-processes-lock () - (with-args-vec (args-vec simple-args) - (with-environment-vec (environment-vec environment) - (let ((child - (without-gcing - (spawn progname args-vec - stdin stdout stderr - (if search 1 0) - environment-vec pty-name - (if wait 1 0))))) - (when (= child -1) - (error "couldn't fork child process: ~A" - (strerror))) - (setf proc (apply - #'make-process - :pid child - :input input-stream - :output output-stream - :error error-stream - :status-hook status-hook - :cookie cookie - #-win32 (list :pty pty-stream - :%status :running) - #+win32 (if wait - (list :%status :exited - :exit-code child) - (list :%status :running)))) - (push proc *active-processes*)))))))))) + (let (child) + (with-active-processes-lock () + (with-args-vec (args-vec simple-args) + (with-environment-vec (environment-vec environment) + (setq child (without-gcing + (spawn progname args-vec + stdin stdout stderr + (if search 1 0) + environment-vec pty-name + (if wait 1 0)))) + (when (plusp child) + (setf proc + (apply + #'make-process + :pid child + :input input-stream + :output output-stream + :error error-stream + :status-hook status-hook + :cookie cookie + #-win32 (list :pty pty-stream + :%status :running) + #+win32 (if wait + (list :%status :exited + :exit-code child) + (list :%status :running)))) + (push proc *active-processes*))))) + ;; Report the error outside the lock. + (when (= child -1) + (error "couldn't fork child process: ~A" + (strerror))))))))) (dolist (fd *close-in-parent*) (sb-unix:unix-close fd)) (unless proc diff --git a/src/code/target-signal.lisp b/src/code/target-signal.lisp index b660735..e08e38d 100644 --- a/src/code/target-signal.lisp +++ b/src/code/target-signal.lisp @@ -210,6 +210,11 @@ (declare (ignore signal code context)) (sb!thread::run-interruption)) +;;; the handler for SIGCHLD signals for RUN-PROGRAM +(defun sigchld-handler (signal code context) + (declare (ignore signal code context)) + (sb!impl::get-processes-status-changes)) + (defun sb!kernel:signal-cold-init-or-reinit () #!+sb-doc "Enable all the default signals that Lisp knows how to deal with." @@ -224,6 +229,7 @@ (enable-interrupt sigsys #'sigsys-handler) (enable-interrupt sigalrm #'sigalrm-handler) (enable-interrupt sigpipe #'sigpipe-handler) + (enable-interrupt sigchld #'sigchld-handler) #!+hpux (ignore-interrupt sigxcpu) (unblock-gc-signals) (unblock-deferrable-signals) diff --git a/tests/run-program.impure.lisp b/tests/run-program.impure.lisp index 0200e14..9ef07e5 100644 --- a/tests/run-program.impure.lisp +++ b/tests/run-program.impure.lisp @@ -120,3 +120,18 @@ :search t :wait t))) (when file (delete-file file)))) + +;;; This used to crash on Darwin and trigger recursive lock errors on +;;; every platform. +(with-test (:name (:run-program :stress)) + ;; Do it a hundred times in batches of 10 so that with a low limit + ;; of the number of processes the test can have a chance to pass. + (loop + repeat 10 do + (map nil + #'sb-ext:process-wait + (loop repeat 10 + collect + (sb-ext:run-program "/bin/echo" ' + ("It would be nice if this didn't crash.") + :wait nil :output nil))))) diff --git a/version.lisp-expr b/version.lisp-expr index 820ed83..e75c497 100644 --- a/version.lisp-expr +++ b/version.lisp-expr @@ -17,4 +17,4 @@ ;;; checkins which aren't released. (And occasionally for internal ;;; versions, especially for internal versions off the main CVS ;;; branch, it gets hairier, e.g. "0.pre7.14.flaky4.13".) -"1.0.27.38" +"1.0.27.39" -- 1.7.10.4