From 22635279aa309c022f4805b4e2c1117a2bf09752 Mon Sep 17 00:00:00 2001 From: Ben Siraphob Date: Fri, 22 Aug 2025 01:29:37 -0700 Subject: [PATCH] add nix store break-lock command --- src/nix/meson.build | 1 + src/nix/store-break-lock.cc | 202 ++++++++++++++++++++++++++++++++++++ src/nix/store-break-lock.md | 48 +++++++++ 3 files changed, 251 insertions(+) create mode 100644 src/nix/store-break-lock.cc create mode 100644 src/nix/store-break-lock.md diff --git a/src/nix/meson.build b/src/nix/meson.build index f67a2948f..c4a00edf0 100644 --- a/src/nix/meson.build +++ b/src/nix/meson.build @@ -101,6 +101,7 @@ nix_sources = [ config_priv_h ] + files( 'search.cc', 'self-exe.cc', 'sigs.cc', + 'store-break-lock.cc', 'store-copy-log.cc', 'store-delete.cc', 'store-gc.cc', diff --git a/src/nix/store-break-lock.cc b/src/nix/store-break-lock.cc new file mode 100644 index 000000000..d626ec878 --- /dev/null +++ b/src/nix/store-break-lock.cc @@ -0,0 +1,202 @@ +#include "nix/cmd/command.hh" +#include "nix/main/common-args.hh" +#include "nix/main/shared.hh" +#include "nix/store/store-api.hh" +#include "nix/util/file-system.hh" +#include "nix/util/environment-variables.hh" +#include "nix/util/signals.hh" + +#if !defined(__linux__) +# include "nix/util/processes.hh" +#endif + +#include +#include +#include +#include + +using namespace nix; + +struct CmdStoreBreakLock : StorePathsCommand, MixDryRun +{ + CmdStoreBreakLock() {} + + std::string description() override + { + return "break stale locks on store paths"; + } + + std::string doc() override + { + return +#include "store-break-lock.md" + ; + } + + void run(ref store, StorePaths && storePaths) override + { + for (auto & storePath : storePaths) { + auto pathStr = store->printStorePath(storePath); + auto lockPath = pathStr + ".lock"; + + if (!pathExists(lockPath)) { + warn("lock file '%s' does not exist", lockPath); + continue; + } + + std::set lockingPids; + findLockingProcesses(lockPath, lockingPids); + + if (!lockingPids.empty()) { + printInfo("found %d process(es) holding lock on '%s':", lockingPids.size(), lockPath); + for (pid_t pid : lockingPids) { + printInfo(" PID %d", pid); + } + + if (dryRun) { + printInfo("would kill these processes and remove lock file '%s'", lockPath); + continue; + } + + killProcesses(lockingPids); + } else { + printInfo("no processes found holding lock on '%s'", lockPath); + if (dryRun) { + printInfo("would remove stale lock file '%s'", lockPath); + continue; + } + } + + if (!dryRun) { + removeLockFile(lockPath); + } + } + + if (dryRun) { + printInfo("dry run complete, no locks were broken"); + } else { + printInfo("lock breaking complete"); + } + } + +private: + void findLockingProcesses(const std::string & lockPath, std::set & lockingPids) + { +#ifdef __linux__ + findLockingProcessesLinux(lockPath, lockingPids); +#else + findLockingProcessesLsof(lockPath, lockingPids); +#endif + } + +#ifdef __linux__ + void findLockingProcessesLinux(const std::string & lockPath, std::set & lockingPids) + { + try { + static const std::regex digitsRegex(R"(^\d+$)"); + + for (auto & entry : DirectoryIterator{"/proc"}) { + checkInterrupt(); + + auto name = entry.path().filename().string(); + /* Check if the directory name is a PID */ + if (!std::regex_match(name, digitsRegex)) + continue; + + try { + pid_t pid = std::stoi(name); + auto fdDir = fmt("/proc/%d/fd", pid); + + for (auto & fdEntry : DirectoryIterator{fdDir}) { + try { + auto target = readLink(fdEntry.path().string()); + if (target == lockPath) { + lockingPids.insert(pid); + break; + } + } catch (SysError & e) { + /* Ignore permission errors or missing links */ + if (e.errNo != ENOENT && e.errNo != EACCES) + throw; + } + } + } catch (SysError & e) { + /* Process likely exited or we lack permission */ + if (e.errNo != ENOENT && e.errNo != EACCES && e.errNo != ESRCH) + throw; + } + } + } catch (SysError & e) { + /* /proc might not be mounted or accessible */ + if (e.errNo != ENOENT && e.errNo != EACCES) + throw; + } + } +#endif + +#if !defined(__linux__) + void findLockingProcessesLsof(const std::string & lockPath, std::set & lockingPids) + { + /* lsof can be slow, but it's the portable way to find open files */ + if (getEnv("_NIX_TEST_NO_LSOF") == "1") { + return; + } + + try { + /* Run lsof to find processes with the lock file open + -t: terse output (PIDs only) */ + auto pidsStr = runProgram("lsof", true, {"-t", lockPath}); + auto pids = tokenizeString>(pidsStr, "\n"); + for (const auto & pidStr : pids) { + if (!pidStr.empty()) { + try { + lockingPids.insert(std::stoi(pidStr)); + } catch (const std::invalid_argument &) { + /* Ignore malformed PIDs */ + } catch (const std::out_of_range &) { + /* Ignore out-of-range PIDs */ + } + } + } + } catch (ExecError & e) { + /* lsof returns non-zero if no files found, which is fine */ + } + } +#endif + + void killProcesses(const std::set & pids) + { + for (pid_t pid : pids) { + printInfo("killing process %d", pid); + if (kill(pid, SIGTERM) != 0) { + if (errno == ESRCH) { + warn("process %d no longer exists", pid); + } else { + warn("failed to kill process %d: %s", pid, strerror(errno)); + /* Try SIGKILL as last resort */ + if (kill(pid, SIGKILL) != 0 && errno != ESRCH) { + warn("failed to forcefully kill process %d: %s", pid, strerror(errno)); + } + } + } + } + + /* Give processes time to terminate */ + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + void removeLockFile(const std::string & lockPath) + { + if (unlink(lockPath.c_str()) == 0) { + printInfo("successfully removed lock file '%s'", lockPath); + } else { + if (errno == ENOENT) { + printInfo("lock file '%s' was already removed", lockPath); + } else { + throw SysError("removing lock file '%s'", lockPath); + } + } + } +}; + +static auto rCmdStoreBreakLock = registerCommand2({"store", "break-lock"}); diff --git a/src/nix/store-break-lock.md b/src/nix/store-break-lock.md new file mode 100644 index 000000000..688638770 --- /dev/null +++ b/src/nix/store-break-lock.md @@ -0,0 +1,48 @@ +R""( +# Examples + +* Break a lock on a specific store path: + + ```console + # nix store break-lock /nix/store/abc123-package + ``` + +* Preview what locks would be broken without actually breaking them: + + ```console + # nix store break-lock --dry-run /nix/store/abc123-package + ``` + +* Break locks on multiple paths: + + ```console + # nix store break-lock /nix/store/abc123-package /nix/store/def456-other + ``` + +# Description + +This command breaks locks on Nix store paths that may be stuck due to +interrupted builds or crashed processes. It attempts to: + +1. Find processes holding locks on the specified store paths +2. Kill those processes (unless `--dry-run` is specified) +3. Remove the lock files + +This is useful when you encounter messages like "waiting for locks or +build slots" and the build process appears stuck indefinitely. + +# Options + +* `--dry-run`: Show what locks would be broken without actually breaking + them. This is useful for seeing which processes would be affected. + +# Notes + +* This command only works on local Nix stores +* On Linux, it uses `/proc` to identify processes holding locks +* On macOS and other Unix systems, it uses `lsof` to identify processes +* Always prefer letting builds complete naturally or using `Ctrl-C` to + cleanly interrupt them before resorting to this command +* Breaking locks on paths that are genuinely being built may lead to + inconsistent store state +)""