1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-09 12:06:01 +01:00

Merge pull request #13905 from obsidiansystems/derivation-building-goal-simplify-0

Derivation building goal simplify -- no `goto`
This commit is contained in:
John Ericson 2025-09-03 17:34:27 -04:00 committed by GitHub
commit b69576e2b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 272 additions and 260 deletions

View file

@ -127,31 +127,6 @@ static void runPostBuildHook(
produced using a substitute. So we have to build instead. */
Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution()
{
/* Recheck at goal start. In particular, whereas before we were
given this information by the downstream goal, that cannot happen
anymore if the downstream goal only cares about one output, but
we care about all outputs. */
auto outputHashes = staticOutputHashes(worker.evalStore, *drv);
for (auto & [outputName, outputHash] : outputHashes) {
InitialOutput v{.outputHash = outputHash};
/* TODO we might want to also allow randomizing the paths
for regular CA derivations, e.g. for sake of checking
determinism. */
if (drv->type().isImpure()) {
v.known = InitialOutputStatus{
.path = StorePath::random(outputPathName(drv->name, outputName)),
.status = PathStatus::Absent,
};
}
initialOutputs.insert({
outputName,
std::move(v),
});
}
checkPathValidity();
Goals waitees;
std::map<ref<const SingleDerivedPath>, GoalPtr, value_comparison> inputGoals;
@ -334,14 +309,15 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution()
if (resolvedResult.success()) {
SingleDrvOutputs builtOutputs;
auto outputHashes = staticOutputHashes(worker.evalStore, *drv);
auto resolvedHashes = staticOutputHashes(worker.store, drvResolved);
StorePathSet outputPaths;
for (auto & outputName : drvResolved.outputNames()) {
auto initialOutput = get(initialOutputs, outputName);
auto outputHash = get(outputHashes, outputName);
auto resolvedHash = get(resolvedHashes, outputName);
if ((!initialOutput) || (!resolvedHash))
if ((!outputHash) || (!resolvedHash))
throw Error(
"derivation '%s' doesn't have expected output '%s' (derivation-goal.cc/resolve)",
worker.store.printStorePath(drvPath),
@ -368,7 +344,7 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution()
if (!drv->type().isImpure()) {
auto newRealisation = realisation;
newRealisation.id = DrvOutput{initialOutput->outputHash, outputName};
newRealisation.id = DrvOutput{*outputHash, outputName};
newRealisation.signatures.clear();
if (!drv->type().isFixed()) {
auto & drvStore = worker.evalStore.isValidPath(drvPath) ? worker.evalStore : worker.store;
@ -439,136 +415,263 @@ Goal::Co DerivationBuildingGoal::gaveUpOnSubstitution()
co_return tryToBuild();
}
void DerivationBuildingGoal::started()
{
auto msg =
fmt(buildMode == bmRepair ? "repairing outputs of '%s'"
: buildMode == bmCheck ? "checking outputs of '%s'"
: "building '%s'",
worker.store.printStorePath(drvPath));
fmt("building '%s'", worker.store.printStorePath(drvPath));
#ifndef _WIN32 // TODO enable build hook on Windows
if (hook)
msg += fmt(" on '%s'", machineName);
#endif
act = std::make_unique<Activity>(
*logger,
lvlInfo,
actBuild,
msg,
Logger::Fields{
worker.store.printStorePath(drvPath),
#ifndef _WIN32 // TODO enable build hook on Windows
hook ? machineName :
#endif
"",
1,
1});
mcRunningBuilds = std::make_unique<MaintainCount<uint64_t>>(worker.runningBuilds);
worker.updateProgress();
}
Goal::Co DerivationBuildingGoal::tryToBuild()
{
trace("trying to build");
std::map<std::string, InitialOutput> initialOutputs;
/* Obtain locks on all output paths, if the paths are known a priori.
/* Recheck at this point. In particular, whereas before we were
given this information by the downstream goal, that cannot happen
anymore if the downstream goal only cares about one output, but
we care about all outputs. */
auto outputHashes = staticOutputHashes(worker.evalStore, *drv);
for (auto & [outputName, outputHash] : outputHashes) {
InitialOutput v{.outputHash = outputHash};
The locks are automatically released when we exit this function or Nix
crashes. If we can't acquire the lock, then continue; hopefully some
other goal can start a build, and if not, the main loop will sleep a few
seconds and then retry this goal. */
PathSet lockFiles;
/* FIXME: Should lock something like the drv itself so we don't build same
CA drv concurrently */
if (dynamic_cast<LocalStore *>(&worker.store)) {
/* If we aren't a local store, we might need to use the local store as
a build remote, but that would cause a deadlock. */
/* FIXME: Make it so we can use ourselves as a build remote even if we
are the local store (separate locking for building vs scheduling? */
/* FIXME: find some way to lock for scheduling for the other stores so
a forking daemon with --store still won't farm out redundant builds.
*/
for (auto & i : drv->outputsAndOptPaths(worker.store)) {
if (i.second.second)
lockFiles.insert(worker.store.Store::toRealPath(*i.second.second));
else
lockFiles.insert(worker.store.Store::toRealPath(drvPath) + "." + i.first);
/* TODO we might want to also allow randomizing the paths
for regular CA derivations, e.g. for sake of checking
determinism. */
if (drv->type().isImpure()) {
v.known = InitialOutputStatus{
.path = StorePath::random(outputPathName(drv->name, outputName)),
.status = PathStatus::Absent,
};
}
initialOutputs.insert({
outputName,
std::move(v),
});
}
checkPathValidity(initialOutputs);
if (!outputLocks.lockPaths(lockFiles, "", false)) {
Activity act(*logger, lvlWarn, actBuildWaiting, fmt("waiting for lock on %s", Magenta(showPaths(lockFiles))));
auto started = [&]() {
auto msg =
fmt(buildMode == bmRepair ? "repairing outputs of '%s'"
: buildMode == bmCheck ? "checking outputs of '%s'"
: "building '%s'",
worker.store.printStorePath(drvPath));
fmt("building '%s'", worker.store.printStorePath(drvPath));
#ifndef _WIN32 // TODO enable build hook on Windows
if (hook)
msg += fmt(" on '%s'", hook->machineName);
#endif
act = std::make_unique<Activity>(
*logger,
lvlInfo,
actBuild,
msg,
Logger::Fields{
worker.store.printStorePath(drvPath),
#ifndef _WIN32 // TODO enable build hook on Windows
hook ? hook->machineName :
#endif
"",
1,
1});
mcRunningBuilds = std::make_unique<MaintainCount<uint64_t>>(worker.runningBuilds);
worker.updateProgress();
};
/* Wait then try locking again, repeat until success (returned
boolean is true). */
do {
co_await waitForAWhile();
} while (!outputLocks.lockPaths(lockFiles, "", false));
}
/**
* Activity that denotes waiting for a lock.
*/
std::unique_ptr<Activity> actLock;
/* Now check again whether the outputs are valid. This is because
another process may have started building in parallel. After
it has finished and released the locks, we can (and should)
reuse its results. (Strictly speaking the first check can be
omitted, but that would be less efficient.) Note that since we
now hold the locks on the output paths, no other process can
build this derivation, so no further checks are necessary. */
auto [allValid, validOutputs] = checkPathValidity();
/**
* Locks on (fixed) output paths.
*/
PathLocks outputLocks;
if (buildMode != bmCheck && allValid) {
debug("skipping build of derivation '%s', someone beat us to it", worker.store.printStorePath(drvPath));
outputLocks.setDeletion(true);
outputLocks.unlock();
co_return doneSuccess(BuildResult::AlreadyValid, std::move(validOutputs));
}
bool useHook;
/* If any of the outputs already exist but are not valid, delete
them. */
for (auto & [_, status] : initialOutputs) {
if (!status.known || status.known->isValid())
continue;
auto storePath = status.known->path;
debug("removing invalid path '%s'", worker.store.printStorePath(status.known->path));
deletePath(worker.store.Store::toRealPath(storePath));
}
while (true) {
trace("trying to build");
/* Don't do a remote build if the derivation has the attribute
`preferLocalBuild' set. Also, check and repair modes are only
supported for local builds. */
bool buildLocally =
(buildMode != bmNormal || drvOptions->willBuildLocally(worker.store, *drv)) && settings.maxBuildJobs.get() != 0;
/* Obtain locks on all output paths, if the paths are known a priori.
if (!buildLocally) {
switch (tryBuildHook()) {
case rpAccept:
/* Yes, it has started doing so. Wait until we get
EOF from the hook. */
actLock.reset();
buildResult.startTime = time(0); // inexact
started();
co_await Suspend{};
co_return hookDone();
case rpPostpone:
/* Not now; wait until at least one child finishes or
the wake-up timeout expires. */
if (!actLock)
actLock = std::make_unique<Activity>(
*logger,
lvlWarn,
actBuildWaiting,
fmt("waiting for a machine to build '%s'", Magenta(worker.store.printStorePath(drvPath))));
The locks are automatically released when we exit this function or Nix
crashes. If we can't acquire the lock, then continue; hopefully some
other goal can start a build, and if not, the main loop will sleep a few
seconds and then retry this goal. */
PathSet lockFiles;
/* FIXME: Should lock something like the drv itself so we don't build same
CA drv concurrently */
if (dynamic_cast<LocalStore *>(&worker.store)) {
/* If we aren't a local store, we might need to use the local store as
a build remote, but that would cause a deadlock. */
/* FIXME: Make it so we can use ourselves as a build remote even if we
are the local store (separate locking for building vs scheduling? */
/* FIXME: find some way to lock for scheduling for the other stores so
a forking daemon with --store still won't farm out redundant builds.
*/
for (auto & i : drv->outputsAndOptPaths(worker.store)) {
if (i.second.second)
lockFiles.insert(worker.store.Store::toRealPath(*i.second.second));
else
lockFiles.insert(worker.store.Store::toRealPath(drvPath) + "." + i.first);
}
}
if (!outputLocks.lockPaths(lockFiles, "", false)) {
Activity act(
*logger, lvlWarn, actBuildWaiting, fmt("waiting for lock on %s", Magenta(showPaths(lockFiles))));
/* Wait then try locking again, repeat until success (returned
boolean is true). */
do {
co_await waitForAWhile();
} while (!outputLocks.lockPaths(lockFiles, "", false));
}
/* Now check again whether the outputs are valid. This is because
another process may have started building in parallel. After
it has finished and released the locks, we can (and should)
reuse its results. (Strictly speaking the first check can be
omitted, but that would be less efficient.) Note that since we
now hold the locks on the output paths, no other process can
build this derivation, so no further checks are necessary. */
auto [allValid, validOutputs] = checkPathValidity(initialOutputs);
if (buildMode != bmCheck && allValid) {
debug("skipping build of derivation '%s', someone beat us to it", worker.store.printStorePath(drvPath));
outputLocks.setDeletion(true);
outputLocks.unlock();
co_await waitForAWhile();
co_return tryToBuild();
case rpDecline:
/* We should do it ourselves. */
break;
co_return doneSuccess(BuildResult::AlreadyValid, std::move(validOutputs));
}
/* If any of the outputs already exist but are not valid, delete
them. */
for (auto & [_, status] : initialOutputs) {
if (!status.known || status.known->isValid())
continue;
auto storePath = status.known->path;
debug("removing invalid path '%s'", worker.store.printStorePath(status.known->path));
deletePath(worker.store.Store::toRealPath(storePath));
}
/* Don't do a remote build if the derivation has the attribute
`preferLocalBuild' set. Also, check and repair modes are only
supported for local builds. */
bool buildLocally = (buildMode != bmNormal || drvOptions->willBuildLocally(worker.store, *drv))
&& settings.maxBuildJobs.get() != 0;
if (buildLocally) {
useHook = false;
} else {
switch (tryBuildHook(initialOutputs)) {
case rpAccept:
/* Yes, it has started doing so. Wait until we get
EOF from the hook. */
useHook = true;
break;
case rpPostpone:
/* Not now; wait until at least one child finishes or
the wake-up timeout expires. */
if (!actLock)
actLock = std::make_unique<Activity>(
*logger,
lvlWarn,
actBuildWaiting,
fmt("waiting for a machine to build '%s'", Magenta(worker.store.printStorePath(drvPath))));
outputLocks.unlock();
co_await waitForAWhile();
continue;
case rpDecline:
/* We should do it ourselves. */
useHook = false;
break;
}
}
break;
}
actLock.reset();
if (useHook) {
buildResult.startTime = time(0); // inexact
started();
co_await Suspend{};
#ifndef _WIN32
assert(hook);
#endif
trace("hook build done");
/* Since we got an EOF on the logger pipe, the builder is presumed
to have terminated. In fact, the builder could also have
simply have closed its end of the pipe, so just to be sure,
kill it. */
int status =
#ifndef _WIN32 // TODO enable build hook on Windows
hook->pid.kill();
#else
0;
#endif
debug("build hook for '%s' finished", worker.store.printStorePath(drvPath));
buildResult.timesBuilt++;
buildResult.stopTime = time(0);
/* So the child is gone now. */
worker.childTerminated(this);
/* Close the read side of the logger pipe. */
#ifndef _WIN32 // TODO enable build hook on Windows
hook->builderOut.readSide.close();
hook->fromHook.readSide.close();
#endif
/* Close the log file. */
closeLogFile();
/* Check the exit status. */
if (!statusOk(status)) {
auto e = fixupBuilderFailureErrorMessage({BuildResult::MiscFailure, status, ""});
outputLocks.unlock();
/* TODO (once again) support fine-grained error codes, see issue #12641. */
co_return doneFailure(std::move(e));
}
/* Compute the FS closure of the outputs and register them as
being valid. */
auto builtOutputs =
/* When using a build hook, the build hook can register the output
as valid (by doing `nix-store --import'). If so we don't have
to do anything here.
We can only early return when the outputs are known a priori. For
floating content-addressing derivations this isn't the case.
Aborts if any output is not valid or corrupt, and otherwise
returns a 'SingleDrvOutputs' structure containing all outputs.
*/
[&] {
auto [allValid, validOutputs] = checkPathValidity(initialOutputs);
if (!allValid)
throw Error("some outputs are unexpectedly invalid");
return validOutputs;
}();
StorePathSet outputPaths;
for (auto & [_, output] : builtOutputs)
outputPaths.insert(output.outPath);
runPostBuildHook(worker.store, *logger, drvPath, outputPaths);
/* It is now safe to delete the lock files, since all future
lockers will see that the output paths are valid; they will
not create new lock files with the same names as the old
(unlinked) lock files. */
outputLocks.setDeletion(true);
outputLocks.unlock();
co_return doneSuccess(BuildResult::Built, std::move(builtOutputs));
}
co_await yield();
if (!dynamic_cast<LocalStore *>(&worker.store)) {
@ -584,12 +687,11 @@ Goal::Co DerivationBuildingGoal::tryToBuild()
#ifdef _WIN32 // TODO enable `DerivationBuilder` on Windows
throw UnimplementedError("building derivations is not yet implemented on Windows");
#else
assert(!hook);
// Will continue here while waiting for a build user below
while (true) {
assert(!hook);
unsigned int curBuilds = worker.getNrLocalBuilds();
if (curBuilds >= settings.maxBuildJobs) {
outputLocks.unlock();
@ -868,81 +970,7 @@ BuildError DerivationBuildingGoal::fixupBuilderFailureErrorMessage(BuilderFailur
return BuildError{e.status, msg};
}
Goal::Co DerivationBuildingGoal::hookDone()
{
#ifndef _WIN32
assert(hook);
#endif
trace("hook build done");
/* Since we got an EOF on the logger pipe, the builder is presumed
to have terminated. In fact, the builder could also have
simply have closed its end of the pipe, so just to be sure,
kill it. */
int status =
#ifndef _WIN32 // TODO enable build hook on Windows
hook->pid.kill();
#else
0;
#endif
debug("build hook for '%s' finished", worker.store.printStorePath(drvPath));
buildResult.timesBuilt++;
buildResult.stopTime = time(0);
/* So the child is gone now. */
worker.childTerminated(this);
/* Close the read side of the logger pipe. */
#ifndef _WIN32 // TODO enable build hook on Windows
hook->builderOut.readSide.close();
hook->fromHook.readSide.close();
#endif
/* Close the log file. */
closeLogFile();
/* Check the exit status. */
if (!statusOk(status)) {
auto e = fixupBuilderFailureErrorMessage({BuildResult::MiscFailure, status, ""});
outputLocks.unlock();
/* TODO (once again) support fine-grained error codes, see issue #12641. */
co_return doneFailure(std::move(e));
}
/* Compute the FS closure of the outputs and register them as
being valid. */
auto builtOutputs =
/* When using a build hook, the build hook can register the output
as valid (by doing `nix-store --import'). If so we don't have
to do anything here.
We can only early return when the outputs are known a priori. For
floating content-addressing derivations this isn't the case.
*/
assertPathValidity();
StorePathSet outputPaths;
for (auto & [_, output] : builtOutputs)
outputPaths.insert(output.outPath);
runPostBuildHook(worker.store, *logger, drvPath, outputPaths);
/* It is now safe to delete the lock files, since all future
lockers will see that the output paths are valid; they will
not create new lock files with the same names as the old
(unlinked) lock files. */
outputLocks.setDeletion(true);
outputLocks.unlock();
co_return doneSuccess(BuildResult::Built, std::move(builtOutputs));
}
HookReply DerivationBuildingGoal::tryBuildHook()
HookReply DerivationBuildingGoal::tryBuildHook(const std::map<std::string, InitialOutput> & initialOutputs)
{
#ifdef _WIN32 // TODO enable build hook on Windows
return rpDecline;
@ -1010,7 +1038,7 @@ HookReply DerivationBuildingGoal::tryBuildHook()
hook = std::move(worker.hook);
try {
machineName = readLine(hook->fromHook.readSide.get());
hook->machineName = readLine(hook->fromHook.readSide.get());
} catch (Error & e) {
e.addTrace({}, "while reading the machine name from the build hook");
throw;
@ -1221,7 +1249,8 @@ std::map<std::string, std::optional<StorePath>> DerivationBuildingGoal::queryPar
return res;
}
std::pair<bool, SingleDrvOutputs> DerivationBuildingGoal::checkPathValidity()
std::pair<bool, SingleDrvOutputs>
DerivationBuildingGoal::checkPathValidity(std::map<std::string, InitialOutput> & initialOutputs)
{
if (drv->type().isImpure())
return {false, {}};
@ -1278,17 +1307,8 @@ std::pair<bool, SingleDrvOutputs> DerivationBuildingGoal::checkPathValidity()
return {allValid, validOutputs};
}
SingleDrvOutputs DerivationBuildingGoal::assertPathValidity()
{
auto [allValid, validOutputs] = checkPathValidity();
if (!allValid)
throw Error("some outputs are unexpectedly invalid");
return validOutputs;
}
Goal::Done DerivationBuildingGoal::doneSuccess(BuildResult::Status status, SingleDrvOutputs builtOutputs)
{
outputLocks.unlock();
buildResult.status = status;
assert(buildResult.success());
@ -1306,7 +1326,6 @@ Goal::Done DerivationBuildingGoal::doneSuccess(BuildResult::Status status, Singl
Goal::Done DerivationBuildingGoal::doneFailure(BuildError ex)
{
outputLocks.unlock();
buildResult.status = ex.status;
buildResult.errorMsg = fmt("%s", Uncolored(ex.info().msg));
if (buildResult.status == BuildResult::TimedOut)

View file

@ -29,6 +29,12 @@ typedef enum { rpAccept, rpDecline, rpPostpone } HookReply;
*/
struct DerivationBuildingGoal : public Goal
{
DerivationBuildingGoal(
const StorePath & drvPath, const Derivation & drv, Worker & worker, BuildMode buildMode = bmNormal);
~DerivationBuildingGoal();
private:
/** The path of the derivation. */
StorePath drvPath;
@ -43,19 +49,12 @@ struct DerivationBuildingGoal : public Goal
* The remainder is state held during the build.
*/
/**
* Locks on (fixed) output paths.
*/
PathLocks outputLocks;
/**
* All input paths (that is, the union of FS closures of the
* immediate input paths).
*/
StorePathSet inputPaths;
std::map<std::string, InitialOutput> initialOutputs;
/**
* File descriptor for the log file.
*/
@ -92,22 +91,8 @@ struct DerivationBuildingGoal : public Goal
std::unique_ptr<Activity> act;
/**
* Activity that denotes waiting for a lock.
*/
std::unique_ptr<Activity> actLock;
std::map<ActivityId, Activity> builderActivities;
/**
* The remote machine on which we're building.
*/
std::string machineName;
DerivationBuildingGoal(
const StorePath & drvPath, const Derivation & drv, Worker & worker, BuildMode buildMode = bmNormal);
~DerivationBuildingGoal();
void timedOut(Error && ex) override;
std::string key() override;
@ -117,12 +102,11 @@ struct DerivationBuildingGoal : public Goal
*/
Co gaveUpOnSubstitution();
Co tryToBuild();
Co hookDone();
/**
* Is the build hook willing to perform the build?
*/
HookReply tryBuildHook();
HookReply tryBuildHook(const std::map<std::string, InitialOutput> & initialOutputs);
/**
* Open a log file and a pipe to it.
@ -156,21 +140,13 @@ struct DerivationBuildingGoal : public Goal
* whether all outputs are valid and non-corrupt, and a
* 'SingleDrvOutputs' structure containing the valid outputs.
*/
std::pair<bool, SingleDrvOutputs> checkPathValidity();
/**
* Aborts if any output is not valid or corrupt, and otherwise
* returns a 'SingleDrvOutputs' structure containing all outputs.
*/
SingleDrvOutputs assertPathValidity();
std::pair<bool, SingleDrvOutputs> checkPathValidity(std::map<std::string, InitialOutput> & initialOutputs);
/**
* Forcibly kill the child process, if any.
*/
void killChild();
void started();
Done doneSuccess(BuildResult::Status status, SingleDrvOutputs builtOutputs);
Done doneFailure(BuildError ex);

View file

@ -7,6 +7,14 @@
namespace nix {
/**
* @note Sometimes this is owned by the `Worker`, and sometimes it is
* owned by a `Goal`. This is for efficiency: rather than starting the
* hook every time we want to ask whether we can run a remote build
* (which can be very often), we reuse a hook process for answering
* those queries until it accepts a build. So if there are N
* derivations to be built, at most N hooks will be started.
*/
struct HookInstance
{
/**
@ -29,6 +37,15 @@ struct HookInstance
*/
Pid pid;
/**
* The remote machine on which we're building.
*
* @Invariant When the hook instance is owned by the `Worker`, this
* is the empty string. When it is owned by a `Goal`, this should be
* set.
*/
std::string machineName;
FdSink sink;
std::map<ActivityId, Activity> activities;