From 3a82f6a1170678e4af34c827c78270fd4a458f03 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Mar 2023 18:58:12 +0100 Subject: [PATCH 1/4] Simplify commonChildInit() (cherry picked from commit 19326ac2979f0d989835105a5d816a943a6bc7f2) --- src/libstore/build.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 6910e52c5..674e240d8 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -442,7 +442,7 @@ void Goal::trace(const FormatOrString & fs) /* Common initialisation performed in child processes. */ -static void commonChildInit(Pipe & logPipe) +static void commonChildInit(int stderrFd) { restoreSignals(); @@ -454,7 +454,7 @@ static void commonChildInit(Pipe & logPipe) throw SysError(format("creating a new session")); /* Dup the write side of the logger pipe into stderr. */ - if (dup2(logPipe.writeSide.get(), STDERR_FILENO) == -1) + if (dup2(stderrFd, STDERR_FILENO) == -1) throw SysError("cannot pipe standard error into log file"); /* Dup stderr to stdout. */ @@ -678,7 +678,7 @@ HookInstance::HookInstance() /* Fork the hook. */ pid = startProcess([&]() { - commonChildInit(fromHook); + commonChildInit(fromHook.writeSide.get()); if (chdir("/") == -1) throw SysError("changing into /"); @@ -2736,7 +2736,7 @@ void DerivationGoal::runChild() try { /* child */ - commonChildInit(builderOut); + commonChildInit(builderOut.writeSide.get()); try { setupSeccomp(); From 6613e7ebfb04dd014332ed5f2d274627fc1efe7a Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Tue, 14 Mar 2023 19:10:59 +0100 Subject: [PATCH 2/4] Move pseudoterminal slave open to child Hopefully this fixes "unexpected EOF" failures on macOS (#3137, #3605, #7242, #7702). The problem appears to be that under some circumstances, macOS discards the output written to the slave side of the pseudoterminal. Hence the parent never sees the "sandbox initialized" message from the child, even though it succeeded. The conditions are: * The child finishes very quickly. That's why this bug is likely to trigger in nix-env tests, since that uses a builtin builder. Adding a short sleep before the child exits makes the problem go away. * The parent has closed its duplicate of the slave file descriptor. This shouldn't matter, since the child has a duplicate as well, but it does. E.g. moving the close to the bottom of startBuilder() makes the problem go away. However, that's not a solution because it would make Nix hang if the child dies before sending the "sandbox initialized" message. * The system is under high load. E.g. "make installcheck -j16" makes the issue pretty reproducible, while it's very rare under "make installcheck -j1". As a fix/workaround, we now open the pseudoterminal slave in the child, rather than the parent. This removes the second condition (i.e. the parent no longer needs to close the slave fd) and I haven't been able to reproduce the "unexpected EOF" with this. (cherry picked from commit c536e00c9deeac58bc4b3299dbc702604c32adbe) --- src/libstore/build.cc | 56 ++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 674e240d8..55ae3fbf9 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -827,6 +827,10 @@ private: /* Pipe for the builder's standard output/error. */ Pipe builderOut; + /* Slave side of the pseudoterminal used for the builder's + standard output/error. */ + Path slaveName; + /* Pipe for synchronising updates to the builder user namespace. */ Pipe userNamespaceSync; @@ -2238,14 +2242,12 @@ void DerivationGoal::startBuilder() /* Create the log file. */ Path logFile = openLogFile(); - /* Create a pipe to get the output of the builder. */ - //builderOut.create(); - + /* Create a pseudoterminal to get the output of the builder. */ builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); if (!builderOut.readSide) throw SysError("opening pseudoterminal master"); - std::string slaveName(ptsname(builderOut.readSide.get())); + slaveName = ptsname(builderOut.readSide.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -2258,30 +2260,9 @@ void DerivationGoal::startBuilder() throw SysError("granting access to pseudoterminal slave"); } - #if 0 - // Mount the pt in the sandbox so that the "tty" command works. - // FIXME: this doesn't work with the new devpts in the sandbox. - if (useChroot) - dirsInChroot[slaveName] = {slaveName, false}; - #endif - if (unlockpt(builderOut.readSide.get())) throw SysError("unlocking pseudoterminal"); - builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut.writeSide) - throw SysError("opening pseudoterminal slave"); - - // Put the pt into raw mode to prevent \n -> \r\n translation. - struct termios term; - if (tcgetattr(builderOut.writeSide.get(), &term)) - throw SysError("getting pseudoterminal attributes"); - - cfmakeraw(&term); - - if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) - throw SysError("putting pseudoterminal into raw mode"); - result.startTime = time(0); /* Fork a child to build the package. */ @@ -2330,7 +2311,11 @@ void DerivationGoal::startBuilder() options.allowVfork = false; + Pipe sendPid; + sendPid.create(); + Pid helper = startProcess([&]() { + sendPid.readSide.close(); /* Drop additional groups here because we can't do it after we've created the new user namespace. FIXME: @@ -2374,10 +2359,12 @@ void DerivationGoal::startBuilder() _exit(1); if (child == -1) throw SysError("cloning builder process"); - writeFull(builderOut.writeSide.get(), std::to_string(child) + "\n"); + writeFull(sendPid.writeSide.get(), std::to_string(child) + "\n"); _exit(0); }, options); + sendPid.writeSide.close(); + int res = helper.wait(); if (res != 0 && settings.sandboxFallback) { useChroot = false; @@ -2395,7 +2382,7 @@ void DerivationGoal::startBuilder() }); pid_t tmp; - if (!string2Int(readLine(builderOut.readSide.get()), tmp)) abort(); + if (!string2Int(readLine(sendPid.readSide.get()), tmp)) abort(); pid = tmp; /* Set the UID/GID mapping of the builder's user namespace @@ -2736,6 +2723,21 @@ void DerivationGoal::runChild() try { /* child */ + /* Open the slave side of the pseudoterminal. */ + builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut.writeSide) + throw SysError("opening pseudoterminal slave"); + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.writeSide.get(), &term)) + throw SysError("getting pseudoterminal attributes"); + + cfmakeraw(&term); + + if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) + throw SysError("putting pseudoterminal into raw mode"); + commonChildInit(builderOut.writeSide.get()); try { From 94eba8a85a15ab099e191ad869ee00b92a3ab921 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 15 Mar 2023 10:37:39 +0100 Subject: [PATCH 3/4] Change builderOut from Pipe to AutoCloseFD (cherry picked from commit 6029c763c2c5998dc3265152425c8ff0ce01b1a0) --- src/libstore/build.cc | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 55ae3fbf9..030918318 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -824,8 +824,9 @@ private: std::string currentHookLine; - /* Pipe for the builder's standard output/error. */ - Pipe builderOut; + /* Master side of the pseudoterminal used for the builder's + standard output/error. */ + AutoCloseFD builderOut; /* Slave side of the pseudoterminal used for the builder's standard output/error. */ @@ -1606,7 +1607,7 @@ void DerivationGoal::buildDone() hook->builderOut.readSide = -1; hook->fromHook.readSide = -1; } else - builderOut.readSide = -1; + builderOut.close(); /* Close the log file. */ closeLogFile(); @@ -2243,11 +2244,11 @@ void DerivationGoal::startBuilder() Path logFile = openLogFile(); /* Create a pseudoterminal to get the output of the builder. */ - builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); - if (!builderOut.readSide) + builderOut = posix_openpt(O_RDWR | O_NOCTTY); + if (!builderOut) throw SysError("opening pseudoterminal master"); - slaveName = ptsname(builderOut.readSide.get()); + slaveName = ptsname(builderOut.get()); if (buildUser) { if (chmod(slaveName.c_str(), 0600)) @@ -2256,11 +2257,11 @@ void DerivationGoal::startBuilder() if (chown(slaveName.c_str(), buildUser->getUID(), 0)) throw SysError("changing owner of pseudoterminal slave"); } else { - if (grantpt(builderOut.readSide.get())) + if (grantpt(builderOut.get())) throw SysError("granting access to pseudoterminal slave"); } - if (unlockpt(builderOut.readSide.get())) + if (unlockpt(builderOut.get())) throw SysError("unlocking pseudoterminal"); result.startTime = time(0); @@ -2415,12 +2416,11 @@ void DerivationGoal::startBuilder() /* parent */ pid.setSeparatePG(true); - builderOut.writeSide = -1; - worker.childStarted(shared_from_this(), {builderOut.readSide.get()}, true, true); + worker.childStarted(shared_from_this(), {builderOut.get()}, true, true); /* Check if setting up the build environment failed. */ while (true) { - string msg = readLine(builderOut.readSide.get()); + string msg = readLine(builderOut.get()); if (string(msg, 0, 1) == "\1") { if (msg.size() == 1) break; throw Error(string(msg, 1)); @@ -2724,21 +2724,21 @@ void DerivationGoal::runChild() try { /* child */ /* Open the slave side of the pseudoterminal. */ - builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut.writeSide) + AutoCloseFD builderOut = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut) throw SysError("opening pseudoterminal slave"); // Put the pt into raw mode to prevent \n -> \r\n translation. struct termios term; - if (tcgetattr(builderOut.writeSide.get(), &term)) + if (tcgetattr(builderOut.get(), &term)) throw SysError("getting pseudoterminal attributes"); cfmakeraw(&term); - if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term)) + if (tcsetattr(builderOut.get(), TCSANOW, &term)) throw SysError("putting pseudoterminal into raw mode"); - commonChildInit(builderOut.writeSide.get()); + commonChildInit(builderOut.get()); try { setupSeccomp(); @@ -3730,7 +3730,7 @@ void DerivationGoal::deleteTmpDir(bool force) void DerivationGoal::handleChildOutput(int fd, const string & data) { if ((hook && fd == hook->builderOut.readSide.get()) || - (!hook && fd == builderOut.readSide.get())) + (!hook && fd == builderOut.get())) { logSize += data.size(); if (settings.maxLogSize && logSize > settings.maxLogSize) { From 5fca88973f54964e89202f8bd80a98d9c1c0a282 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Mon, 20 Mar 2023 17:04:57 +0100 Subject: [PATCH 4/4] Open slave pseudoterminal before CLONE_NEWUSER Otherwise, when running as root and user namespaces are enabled, opening the slave fails with EPERM. Fixes "opening pseudoterminal slave: Permission denied" followed by a hang (https://hydra.nixos.org/build/213104244), and "error: getting sandbox mount namespace: No such file or directory" (#8072), which happens when the child fails very quickly and consequently reading /proc//ns fails. (cherry picked from commit 16db8dc96f64a0facbb620907e571f2dfc8e802e) --- src/libstore/build.cc | 46 +++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 030918318..1bf6580f4 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -454,7 +454,7 @@ static void commonChildInit(int stderrFd) throw SysError(format("creating a new session")); /* Dup the write side of the logger pipe into stderr. */ - if (dup2(stderrFd, STDERR_FILENO) == -1) + if (stderrFd != -1 && dup2(stderrFd, STDERR_FILENO) == -1) throw SysError("cannot pipe standard error into log file"); /* Dup stderr to stdout. */ @@ -2264,6 +2264,27 @@ void DerivationGoal::startBuilder() if (unlockpt(builderOut.get())) throw SysError("unlocking pseudoterminal"); + /* Open the slave side of the pseudoterminal and use it as stderr. */ + auto openSlave = [&]() + { + AutoCloseFD builderOut = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut) + throw SysError("opening pseudoterminal slave"); + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.get(), &term)) + throw SysError("getting pseudoterminal attributes"); + + cfmakeraw(&term); + + if (tcsetattr(builderOut.get(), TCSANOW, &term)) + throw SysError("putting pseudoterminal into raw mode"); + + if (dup2(builderOut.get(), STDERR_FILENO) == -1) + throw SysError("cannot pipe standard error into log file"); + }; + result.startTime = time(0); /* Fork a child to build the package. */ @@ -2318,6 +2339,11 @@ void DerivationGoal::startBuilder() Pid helper = startProcess([&]() { sendPid.readSide.close(); + /* We need to open the slave early, before + CLONE_NEWUSER. Otherwise we get EPERM when running as + root. */ + openSlave(); + /* Drop additional groups here because we can't do it after we've created the new user namespace. FIXME: this means that if we're not root in the parent @@ -2410,6 +2436,7 @@ void DerivationGoal::startBuilder() fallback: options.allowVfork = !buildUser && !drv->isBuiltin(); pid = startProcess([&]() { + openSlave(); runChild(); }, options); } @@ -2723,22 +2750,7 @@ void DerivationGoal::runChild() try { /* child */ - /* Open the slave side of the pseudoterminal. */ - AutoCloseFD builderOut = open(slaveName.c_str(), O_RDWR | O_NOCTTY); - if (!builderOut) - throw SysError("opening pseudoterminal slave"); - - // Put the pt into raw mode to prevent \n -> \r\n translation. - struct termios term; - if (tcgetattr(builderOut.get(), &term)) - throw SysError("getting pseudoterminal attributes"); - - cfmakeraw(&term); - - if (tcsetattr(builderOut.get(), TCSANOW, &term)) - throw SysError("putting pseudoterminal into raw mode"); - - commonChildInit(builderOut.get()); + commonChildInit(-1); try { setupSeccomp();