Avoid zombie processes on signal races

The system curses library can handle terminal size changes with
SIGWINCH without asking system calls to restart, which effectively
stops system calls with -1 and EINTR. An example is ncurses on
Linux systems.

One of these system calls is waitpid. While waiting for the lsof child
to complete, a badly timed SIGWINCH can interrupt the waitpid call,
effectively never clearing the state of the child, keeping the zombie
until htop exits.

Proof of Concept:

 #include <unistd.h>
 int main(void) {
   close(1); close(2);
   sleep(5);
   return 0;
 }

Compile this as a replacement "lsof" and put it into your path. Make
sure that it's called instead of the real lsof.

Press "l" to list open files and resize your terminal within the next
5 seconds. You will see that a zombie process is kept by htop when the
timeout finishes.
This commit is contained in:
Tobias Stoeckmann 2022-01-11 22:25:29 +01:00 committed by BenBE
parent a0ad0697a8
commit d0d9f202c5
2 changed files with 10 additions and 5 deletions

View File

@ -9,6 +9,7 @@ in the source distribution for its full text.
#include "OpenFilesScreen.h"
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdio.h>
@ -197,10 +198,11 @@ static OpenFiles_ProcessData* OpenFilesScreen_getProcessData(pid_t pid) {
fclose(fd);
int wstatus;
if (waitpid(child, &wstatus, 0) == -1) {
pdata->error = 1;
return pdata;
}
while (waitpid(child, &wstatus, 0) == -1)
if (errno != EINTR) {
pdata->error = 1;
return pdata;
}
if (!WIFEXITED(wstatus)) {
pdata->error = 1;

View File

@ -10,6 +10,7 @@ in the source distribution for its full text.
#include "TraceScreen.h"
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdbool.h>
@ -47,7 +48,9 @@ void TraceScreen_delete(Object* cast) {
TraceScreen* this = (TraceScreen*) cast;
if (this->child > 0) {
kill(this->child, SIGTERM);
waitpid(this->child, NULL, 0);
while (waitpid(this->child, NULL, 0) == -1)
if (errno != EINTR)
break;
}
if (this->strace) {