mirror of
https://github.com/NixOS/nix.git
synced 2025-11-08 19:46:02 +01:00
add nix store break-lock command
This commit is contained in:
parent
090f7fb05e
commit
22635279aa
3 changed files with 251 additions and 0 deletions
|
|
@ -101,6 +101,7 @@ nix_sources = [ config_priv_h ] + files(
|
|||
'search.cc',
|
||||
'self-exe.cc',
|
||||
'sigs.cc',
|
||||
'store-break-lock.cc',
|
||||
'store-copy-log.cc',
|
||||
'store-delete.cc',
|
||||
'store-gc.cc',
|
||||
|
|
|
|||
202
src/nix/store-break-lock.cc
Normal file
202
src/nix/store-break-lock.cc
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
#include "nix/cmd/command.hh"
|
||||
#include "nix/main/common-args.hh"
|
||||
#include "nix/main/shared.hh"
|
||||
#include "nix/store/store-api.hh"
|
||||
#include "nix/util/file-system.hh"
|
||||
#include "nix/util/environment-variables.hh"
|
||||
#include "nix/util/signals.hh"
|
||||
|
||||
#if !defined(__linux__)
|
||||
# include "nix/util/processes.hh"
|
||||
#endif
|
||||
|
||||
#include <signal.h>
|
||||
#include <thread>
|
||||
#include <chrono>
|
||||
#include <regex>
|
||||
|
||||
using namespace nix;
|
||||
|
||||
struct CmdStoreBreakLock : StorePathsCommand, MixDryRun
|
||||
{
|
||||
CmdStoreBreakLock() {}
|
||||
|
||||
std::string description() override
|
||||
{
|
||||
return "break stale locks on store paths";
|
||||
}
|
||||
|
||||
std::string doc() override
|
||||
{
|
||||
return
|
||||
#include "store-break-lock.md"
|
||||
;
|
||||
}
|
||||
|
||||
void run(ref<Store> store, StorePaths && storePaths) override
|
||||
{
|
||||
for (auto & storePath : storePaths) {
|
||||
auto pathStr = store->printStorePath(storePath);
|
||||
auto lockPath = pathStr + ".lock";
|
||||
|
||||
if (!pathExists(lockPath)) {
|
||||
warn("lock file '%s' does not exist", lockPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
std::set<pid_t> lockingPids;
|
||||
findLockingProcesses(lockPath, lockingPids);
|
||||
|
||||
if (!lockingPids.empty()) {
|
||||
printInfo("found %d process(es) holding lock on '%s':", lockingPids.size(), lockPath);
|
||||
for (pid_t pid : lockingPids) {
|
||||
printInfo(" PID %d", pid);
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
printInfo("would kill these processes and remove lock file '%s'", lockPath);
|
||||
continue;
|
||||
}
|
||||
|
||||
killProcesses(lockingPids);
|
||||
} else {
|
||||
printInfo("no processes found holding lock on '%s'", lockPath);
|
||||
if (dryRun) {
|
||||
printInfo("would remove stale lock file '%s'", lockPath);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dryRun) {
|
||||
removeLockFile(lockPath);
|
||||
}
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
printInfo("dry run complete, no locks were broken");
|
||||
} else {
|
||||
printInfo("lock breaking complete");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void findLockingProcesses(const std::string & lockPath, std::set<pid_t> & lockingPids)
|
||||
{
|
||||
#ifdef __linux__
|
||||
findLockingProcessesLinux(lockPath, lockingPids);
|
||||
#else
|
||||
findLockingProcessesLsof(lockPath, lockingPids);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
void findLockingProcessesLinux(const std::string & lockPath, std::set<pid_t> & lockingPids)
|
||||
{
|
||||
try {
|
||||
static const std::regex digitsRegex(R"(^\d+$)");
|
||||
|
||||
for (auto & entry : DirectoryIterator{"/proc"}) {
|
||||
checkInterrupt();
|
||||
|
||||
auto name = entry.path().filename().string();
|
||||
/* Check if the directory name is a PID */
|
||||
if (!std::regex_match(name, digitsRegex))
|
||||
continue;
|
||||
|
||||
try {
|
||||
pid_t pid = std::stoi(name);
|
||||
auto fdDir = fmt("/proc/%d/fd", pid);
|
||||
|
||||
for (auto & fdEntry : DirectoryIterator{fdDir}) {
|
||||
try {
|
||||
auto target = readLink(fdEntry.path().string());
|
||||
if (target == lockPath) {
|
||||
lockingPids.insert(pid);
|
||||
break;
|
||||
}
|
||||
} catch (SysError & e) {
|
||||
/* Ignore permission errors or missing links */
|
||||
if (e.errNo != ENOENT && e.errNo != EACCES)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
} catch (SysError & e) {
|
||||
/* Process likely exited or we lack permission */
|
||||
if (e.errNo != ENOENT && e.errNo != EACCES && e.errNo != ESRCH)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
} catch (SysError & e) {
|
||||
/* /proc might not be mounted or accessible */
|
||||
if (e.errNo != ENOENT && e.errNo != EACCES)
|
||||
throw;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(__linux__)
|
||||
void findLockingProcessesLsof(const std::string & lockPath, std::set<pid_t> & lockingPids)
|
||||
{
|
||||
/* lsof can be slow, but it's the portable way to find open files */
|
||||
if (getEnv("_NIX_TEST_NO_LSOF") == "1") {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
/* Run lsof to find processes with the lock file open
|
||||
-t: terse output (PIDs only) */
|
||||
auto pidsStr = runProgram("lsof", true, {"-t", lockPath});
|
||||
auto pids = tokenizeString<std::vector<std::string>>(pidsStr, "\n");
|
||||
for (const auto & pidStr : pids) {
|
||||
if (!pidStr.empty()) {
|
||||
try {
|
||||
lockingPids.insert(std::stoi(pidStr));
|
||||
} catch (const std::invalid_argument &) {
|
||||
/* Ignore malformed PIDs */
|
||||
} catch (const std::out_of_range &) {
|
||||
/* Ignore out-of-range PIDs */
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (ExecError & e) {
|
||||
/* lsof returns non-zero if no files found, which is fine */
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void killProcesses(const std::set<pid_t> & pids)
|
||||
{
|
||||
for (pid_t pid : pids) {
|
||||
printInfo("killing process %d", pid);
|
||||
if (kill(pid, SIGTERM) != 0) {
|
||||
if (errno == ESRCH) {
|
||||
warn("process %d no longer exists", pid);
|
||||
} else {
|
||||
warn("failed to kill process %d: %s", pid, strerror(errno));
|
||||
/* Try SIGKILL as last resort */
|
||||
if (kill(pid, SIGKILL) != 0 && errno != ESRCH) {
|
||||
warn("failed to forcefully kill process %d: %s", pid, strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Give processes time to terminate */
|
||||
std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
}
|
||||
|
||||
void removeLockFile(const std::string & lockPath)
|
||||
{
|
||||
if (unlink(lockPath.c_str()) == 0) {
|
||||
printInfo("successfully removed lock file '%s'", lockPath);
|
||||
} else {
|
||||
if (errno == ENOENT) {
|
||||
printInfo("lock file '%s' was already removed", lockPath);
|
||||
} else {
|
||||
throw SysError("removing lock file '%s'", lockPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static auto rCmdStoreBreakLock = registerCommand2<CmdStoreBreakLock>({"store", "break-lock"});
|
||||
48
src/nix/store-break-lock.md
Normal file
48
src/nix/store-break-lock.md
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
R""(
|
||||
# Examples
|
||||
|
||||
* Break a lock on a specific store path:
|
||||
|
||||
```console
|
||||
# nix store break-lock /nix/store/abc123-package
|
||||
```
|
||||
|
||||
* Preview what locks would be broken without actually breaking them:
|
||||
|
||||
```console
|
||||
# nix store break-lock --dry-run /nix/store/abc123-package
|
||||
```
|
||||
|
||||
* Break locks on multiple paths:
|
||||
|
||||
```console
|
||||
# nix store break-lock /nix/store/abc123-package /nix/store/def456-other
|
||||
```
|
||||
|
||||
# Description
|
||||
|
||||
This command breaks locks on Nix store paths that may be stuck due to
|
||||
interrupted builds or crashed processes. It attempts to:
|
||||
|
||||
1. Find processes holding locks on the specified store paths
|
||||
2. Kill those processes (unless `--dry-run` is specified)
|
||||
3. Remove the lock files
|
||||
|
||||
This is useful when you encounter messages like "waiting for locks or
|
||||
build slots" and the build process appears stuck indefinitely.
|
||||
|
||||
# Options
|
||||
|
||||
* `--dry-run`: Show what locks would be broken without actually breaking
|
||||
them. This is useful for seeing which processes would be affected.
|
||||
|
||||
# Notes
|
||||
|
||||
* This command only works on local Nix stores
|
||||
* On Linux, it uses `/proc` to identify processes holding locks
|
||||
* On macOS and other Unix systems, it uses `lsof` to identify processes
|
||||
* Always prefer letting builds complete naturally or using `Ctrl-C` to
|
||||
cleanly interrupt them before resorting to this command
|
||||
* Breaking locks on paths that are genuinely being built may lead to
|
||||
inconsistent store state
|
||||
)""
|
||||
Loading…
Add table
Add a link
Reference in a new issue