From d2483b1addd97fa93f627b483c664f7348c6ba48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Wed, 3 Sep 2025 11:07:16 +0200 Subject: [PATCH] Fix macOS HUP detection using kqueue instead of poll On macOS, poll() is fundamentally broken for HUP detection. It loses event subscriptions when EVFILT_READ fires without matching the requested events in the pollfd. This causes daemon processes to linger after client disconnect. This commit replaces poll() with kqueue on macOS, which is what poll() uses internally but without the bugs. The kqueue implementation uses EVFILT_READ which works for both sockets and pipes, avoiding EVFILT_SOCK which only works for sockets. On Linux and other platforms, we continue using poll() with the standard POSIX behavior where POLLHUP is always reported regardless of requested events. Based on work from the Lix project (https://git.lix.systems/lix-project/lix) commit 69ba3c92db3ecca468bcd5ff7849fa8e8e0fc6c0 Fixes: https://github.com/NixOS/nix/issues/13847 Related: https://git.lix.systems/lix-project/lix/issues/729 Apple bugs: rdar://37537852 (poll), FB17447257 (poll) Co-authored-by: Jade Lovelace (cherry picked from commit 1286d5db78701a5c0a83ae6b5f838b9ac60a61c1) --- .../unix/include/nix/util/monitor-fd.hh | 211 +++++++++--------- 1 file changed, 108 insertions(+), 103 deletions(-) diff --git a/src/libutil/unix/include/nix/util/monitor-fd.hh b/src/libutil/unix/include/nix/util/monitor-fd.hh index 5c1e5f195..b87bf5ca4 100644 --- a/src/libutil/unix/include/nix/util/monitor-fd.hh +++ b/src/libutil/unix/include/nix/util/monitor-fd.hh @@ -2,15 +2,18 @@ ///@file #include -#include +#include -#include #include -#include -#include -#include +#include + +#ifdef __APPLE__ +# include +# include +#endif #include "nix/util/signals.hh" +#include "nix/util/file-descriptor.hh" namespace nix { @@ -20,111 +23,113 @@ private: std::thread thread; Pipe notifyPipe; + void runThread(int watchFd, int notifyFd); + public: - MonitorFdHup(int fd) - { - notifyPipe.create(); - thread = std::thread([this, fd]() { - while (true) { - // There is a POSIX violation on macOS: you have to listen for - // at least POLLHUP to receive HUP events for a FD. POSIX says - // this is not so, and you should just receive them regardless. - // However, as of our testing on macOS 14.5, the events do not - // get delivered if in the all-bits-unset case, but do get - // delivered if `POLLHUP` is set. - // - // This bug filed as rdar://37537852 - // (https://openradar.appspot.com/37537852). - // - // macOS's own man page - // (https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/poll.2.html) - // additionally says that `POLLHUP` is ignored as an input. It - // seems the likely order of events here was - // - // 1. macOS did not follow the POSIX spec - // - // 2. Somebody ninja-fixed this other spec violation to make - // sure `POLLHUP` was not forgotten about, even though they - // "fixed" this issue in a spec-non-compliant way. Whatever, - // we'll use the fix. - // - // Relevant code, current version, which shows the : - // https://github.com/apple-oss-distributions/xnu/blob/94d3b452840153a99b38a3a9659680b2a006908e/bsd/kern/sys_generic.c#L1751-L1758 - // - // The `POLLHUP` detection was added in - // https://github.com/apple-oss-distributions/xnu/commit/e13b1fa57645afc8a7b2e7d868fe9845c6b08c40#diff-a5aa0b0e7f4d866ca417f60702689fc797e9cdfe33b601b05ccf43086c35d395R1468 - // That means added in 2007 or earlier. Should be good enough - // for us. - short hangup_events = -#ifdef __APPLE__ - POLLHUP -#else - 0 -#endif - ; - - /* Wait indefinitely until a POLLHUP occurs. */ - constexpr size_t num_fds = 2; - struct pollfd fds[num_fds] = { - { - .fd = fd, - .events = hangup_events, - }, - { - .fd = notifyPipe.readSide.get(), - .events = hangup_events, - }, - }; - - auto count = poll(fds, num_fds, -1); - if (count == -1) { - if (errno == EINTR || errno == EAGAIN) - continue; - throw SysError("failed to poll() in MonitorFdHup"); - } - /* This shouldn't happen, but can on macOS due to a bug. - See rdar://37550628. - - This may eventually need a delay or further - coordination with the main thread if spinning proves - too harmful. - */ - if (count == 0) - continue; - if (fds[0].revents & POLLHUP) { - unix::triggerInterrupt(); - break; - } - if (fds[1].revents & POLLHUP) { - break; - } - // On macOS, (jade thinks that) it is possible (although not - // observed on macOS 14.5) that in some limited cases on buggy - // kernel versions, all the non-POLLHUP events for the socket - // get delivered. - // - // We could sleep to avoid pointlessly spinning a thread on - // those, but this opens up a different problem, which is that - // if do sleep, it will be longer before the daemon fork for a - // client exits. Imagine a sequential shell script, running Nix - // commands, each of which talk to the daemon. If the previous - // command registered a temp root, exits, and then the next - // command issues a delete request before the temp root is - // cleaned up, that delete request might fail. - // - // Not sleeping doesn't actually fix the race condition --- we - // would need to block on the old connections' tempt roots being - // cleaned up in in the new connection --- but it does make it - // much less likely. - } - }); - }; + MonitorFdHup(int fd); ~MonitorFdHup() { + // Close the write side to signal termination via POLLHUP notifyPipe.writeSide.close(); thread.join(); } }; +#ifdef __APPLE__ +/* This custom kqueue usage exists because Apple's poll implementation is + * broken and loses event subscriptions if EVFILT_READ fires without matching + * the requested `events` in the pollfd. + * + * We use EVFILT_READ, which causes some spurious wakeups (at most one per write + * from the client, in addition to the socket lifecycle events), because the + * alternate API, EVFILT_SOCK, doesn't work on pipes, which this is also used + * to monitor in certain situations. + * + * See (EVFILT_SOCK): + * https://github.com/netty/netty/blob/64bd2f4eb62c2fb906bc443a2aabf894c8b7dce9/transport-classes-kqueue/src/main/java/io/netty/channel/kqueue/AbstractKQueueChannel.java#L434 + * + * See: https://git.lix.systems/lix-project/lix/issues/729 + * Apple bug in poll(2): FB17447257, available at https://openradar.appspot.com/FB17447257 + */ +inline void MonitorFdHup::runThread(int watchFd, int notifyFd) +{ + int kqResult = kqueue(); + if (kqResult < 0) { + throw SysError("MonitorFdHup kqueue"); + } + AutoCloseFD kq{kqResult}; + + std::array kevs; + + // kj uses EVFILT_WRITE for this, but it seems that it causes more spurious + // wakeups in our case of doing blocking IO from another thread compared to + // EVFILT_READ. + // + // EVFILT_WRITE and EVFILT_READ (for sockets at least, where I am familiar + // with the internals) both go through a common filter which catches EOFs + // and generates spurious wakeups for either readable/writable events. + EV_SET(&kevs[0], watchFd, EVFILT_READ, EV_ADD | EV_ENABLE | EV_CLEAR, 0, 0, nullptr); + EV_SET(&kevs[1], notifyFd, EVFILT_READ, EV_ADD | EV_ENABLE | EV_CLEAR, 0, 0, nullptr); + + int result = kevent(kq.get(), kevs.data(), kevs.size(), nullptr, 0, nullptr); + if (result < 0) { + throw SysError("MonitorFdHup kevent add"); + } + + while (true) { + struct kevent event; + int numEvents = kevent(kq.get(), nullptr, 0, &event, 1, nullptr); + if (numEvents < 0) { + throw SysError("MonitorFdHup kevent watch"); + } + + if (numEvents > 0 && (event.flags & EV_EOF)) { + if (event.ident == uintptr_t(watchFd)) { + unix::triggerInterrupt(); + } + // Either watched fd or notify fd closed, exit + return; + } + } +} +#else +inline void MonitorFdHup::runThread(int watchFd, int notifyFd) +{ + while (true) { + struct pollfd fds[2]; + fds[0].fd = watchFd; + fds[0].events = 0; // POSIX: POLLHUP is always reported + fds[1].fd = notifyFd; + fds[1].events = 0; + + auto count = poll(fds, 2, -1); + if (count == -1) { + if (errno == EINTR || errno == EAGAIN) { + continue; + } else { + throw SysError("in MonitorFdHup poll()"); + } + } + + if (fds[0].revents & POLLHUP) { + unix::triggerInterrupt(); + break; + } + + if (fds[1].revents & POLLHUP) { + // Notify pipe closed, exit thread + break; + } + } +} +#endif + +inline MonitorFdHup::MonitorFdHup(int fd) +{ + notifyPipe.create(); + int notifyFd = notifyPipe.readSide.get(); + thread = std::thread([this, fd, notifyFd]() { this->runThread(fd, notifyFd); }); +}; + } // namespace nix