diff --git a/src/libexpr-c/nix_api_value.cc b/src/libexpr-c/nix_api_value.cc index ba0dfea7f..8e4325566 100644 --- a/src/libexpr-c/nix_api_value.cc +++ b/src/libexpr-c/nix_api_value.cc @@ -1,4 +1,5 @@ #include "nix/expr/attr-set.hh" +#include "nix/expr/eval-error.hh" #include "nix/util/configuration.hh" #include "nix/expr/eval.hh" #include "nix/store/globals.hh" @@ -89,8 +90,13 @@ static void nix_c_primop_wrapper( f(userdata, &ctx, (EvalState *) &state, (nix_value **) args, (nix_value *) &vTmp); if (ctx.last_err_code != NIX_OK) { - /* TODO: Throw different errors depending on the error code */ - state.error("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow(); + if (ctx.last_err_code == NIX_ERR_RECOVERABLE) { + state.error("Recoverable error from custom function: %s", *ctx.last_err) + .atPos(pos) + .debugThrow(); + } else { + state.error("Error from custom function: %s", *ctx.last_err).atPos(pos).debugThrow(); + } } if (!vTmp.isValid()) { diff --git a/src/libexpr-tests/nix_api_expr.cc b/src/libexpr-tests/nix_api_expr.cc index 5e0868b6e..6d93866a5 100644 --- a/src/libexpr-tests/nix_api_expr.cc +++ b/src/libexpr-tests/nix_api_expr.cc @@ -437,4 +437,105 @@ TEST_F(nix_api_expr_test, nix_value_call_multi_no_args) assert_ctx_ok(); ASSERT_EQ(3, rInt); } + +// The following is a test case for retryable thunks. +// This is a requirement for the current way in which NixOps4 evaluates its deployment expressions. +// An alternative strategy could be implemented, but unwinding the stack may be a more efficient way to deal with many +// suspensions/resumptions, compared to e.g. using a thread or coroutine stack for each suspended dependency. This test +// models the essential bits of a deployment tool that uses such a strategy. + +// State for the retryable primop - simulates deployment resource availability +struct DeploymentResourceState +{ + bool vm_created = false; +}; + +static void primop_load_resource_input( + void * user_data, nix_c_context * context, EvalState * state, nix_value ** args, nix_value * ret) +{ + assert(context); + assert(state); + auto * resource_state = static_cast(user_data); + + // Get the resource input name argument + std::string input_name; + if (nix_get_string(context, args[0], OBSERVE_STRING(input_name)) != NIX_OK) + return; + + // Only handle "vm_id" input - throw for anything else + if (input_name != "vm_id") { + std::string error_msg = "unknown resource input: " + input_name; + nix_set_err_msg(context, NIX_ERR_NIX_ERROR, error_msg.c_str()); + return; + } + + if (resource_state->vm_created) { + // VM has been created, return the ID + nix_init_string(context, ret, "vm-12345"); + } else { + // VM not created yet, fail with dependency error + nix_set_err_msg(context, NIX_ERR_RECOVERABLE, "VM not yet created"); + } +} + +TEST_F(nix_api_expr_test, nix_expr_thunk_re_evaluation_after_deployment) +{ + // This test demonstrates NixOps4's requirement: a thunk calling a primop should be + // re-evaluable when deployment resources become available that were not available initially. + + DeploymentResourceState resource_state; + + PrimOp * primop = nix_alloc_primop( + ctx, + primop_load_resource_input, + 1, + "loadResourceInput", + nullptr, + "load a deployment resource input", + &resource_state); + assert_ctx_ok(); + + nix_value * primopValue = nix_alloc_value(ctx, state); + assert_ctx_ok(); + nix_init_primop(ctx, primopValue, primop); + assert_ctx_ok(); + + nix_value * inputName = nix_alloc_value(ctx, state); + assert_ctx_ok(); + nix_init_string(ctx, inputName, "vm_id"); + assert_ctx_ok(); + + // Create a single thunk by using nix_init_apply instead of nix_value_call + // This creates a lazy application that can be forced multiple times + nix_value * thunk = nix_alloc_value(ctx, state); + assert_ctx_ok(); + nix_init_apply(ctx, thunk, primopValue, inputName); + assert_ctx_ok(); + + // First force: VM not created yet, should fail + nix_value_force(ctx, state, thunk); + ASSERT_EQ(NIX_ERR_NIX_ERROR, nix_err_code(ctx)); + ASSERT_THAT(nix_err_msg(nullptr, ctx, nullptr), testing::HasSubstr("VM not yet created")); + + // Clear the error context for the next attempt + nix_c_context_free(ctx); + ctx = nix_c_context_create(); + + // Simulate deployment process: VM gets created + resource_state.vm_created = true; + + // Second force of the SAME thunk: this is where the "failed" value issue appears + // With failed value caching, this should fail because the thunk is marked as permanently failed + // Without failed value caching (or with retryable failures), this should succeed + nix_value_force(ctx, state, thunk); + + // If we get here without error, the thunk was successfully re-evaluated + assert_ctx_ok(); + + std::string result; + nix_get_string(ctx, thunk, OBSERVE_STRING(result)); + assert_ctx_ok(); + ASSERT_STREQ("vm-12345", result.c_str()); +} + } // namespace nixC diff --git a/src/libexpr/eval-error.cc b/src/libexpr/eval-error.cc index 7f0174715..e884c2295 100644 --- a/src/libexpr/eval-error.cc +++ b/src/libexpr/eval-error.cc @@ -113,5 +113,6 @@ template class EvalErrorBuilder; template class EvalErrorBuilder; template class EvalErrorBuilder; template class EvalErrorBuilder; +template class EvalErrorBuilder; } // namespace nix diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 545bb6cd9..8f12b54b3 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -1,4 +1,5 @@ #include "nix/expr/eval.hh" +#include "nix/expr/eval-error.hh" #include "nix/expr/eval-settings.hh" #include "nix/expr/primops.hh" #include "nix/expr/print-options.hh" @@ -27,6 +28,7 @@ #include "parser-tab.hh" #include +#include #include #include #include @@ -2062,6 +2064,54 @@ void ExprBlackHole::eval(EvalState & state, [[maybe_unused]] Env & env, Value & // always force this to be separate, otherwise forceValue may inline it and take // a massive perf hit [[gnu::noinline]] +void EvalState::handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos) +{ + v.mkThunk(env, expr); + tryFixupBlackHolePos(v, pos); + + auto e = std::current_exception(); + Value * recovery = nullptr; + try { + std::rethrow_exception(e); + } catch (const RecoverableEvalError & e) { + recovery = allocValue(); + } catch (...) { + } + if (recovery) { + recovery->mkThunk(env, expr); + } + v.mkFailed(e, recovery); +} + +[[gnu::noinline]] +void EvalState::handleEvalExceptionForApp(Value & v) +{ + auto e = std::current_exception(); + Value * recovery = nullptr; + try { + std::rethrow_exception(e); + } catch (const RecoverableEvalError & e) { + recovery = allocValue(); + } catch (...) { + } + if (recovery) { + *recovery = v; + } + v.mkFailed(e, recovery); +} + +[[gnu::noinline]] +void EvalState::handleEvalFailed(Value & v, const PosIdx pos) +{ + assert(v.isFailed()); + if (auto recoveryValue = v.failed()->recoveryValue) { + v = *recoveryValue; + forceValue(v, pos); + } else { + std::rethrow_exception(v.failed()->ex); + } +} + void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos) { if (!v.isBlackhole()) @@ -2070,7 +2120,8 @@ void EvalState::tryFixupBlackHolePos(Value & v, PosIdx pos) try { std::rethrow_exception(e); } catch (InfiniteRecursionError & e) { - e.atPos(positions[pos]); + if (!e.hasPos()) + e.atPos(positions[pos]); } catch (...) { } } diff --git a/src/libexpr/include/nix/expr/eval-error.hh b/src/libexpr/include/nix/expr/eval-error.hh index 38db9b706..bcbe967c7 100644 --- a/src/libexpr/include/nix/expr/eval-error.hh +++ b/src/libexpr/include/nix/expr/eval-error.hh @@ -56,6 +56,14 @@ MakeError(MissingArgumentError, EvalError); MakeError(InfiniteRecursionError, EvalError); MakeError(IFDError, EvalBaseError); +/** + * An evaluation error which should be retried instead of rethrown + * + * A RecoverableEvalError is not an EvalError, because we shouldn't cache it in the eval cache, as it should be retried + * anyway. + */ +MakeError(RecoverableEvalError, EvalBaseError); + struct InvalidPathError : public EvalError { public: diff --git a/src/libexpr/include/nix/expr/eval-inline.hh b/src/libexpr/include/nix/expr/eval-inline.hh index 558fb748c..19735e0c0 100644 --- a/src/libexpr/include/nix/expr/eval-inline.hh +++ b/src/libexpr/include/nix/expr/eval-inline.hh @@ -5,6 +5,7 @@ #include "nix/expr/eval.hh" #include "nix/expr/eval-error.hh" #include "nix/expr/eval-settings.hh" +#include namespace nix { @@ -97,19 +98,19 @@ void EvalState::forceValue(Value & v, const PosIdx pos) else ExprBlackHole::throwInfiniteRecursionError(*this, v); } catch (...) { - tryFixupBlackHolePos(v, pos); - v.mkFailed(); + handleEvalExceptionForThunk(env, expr, v, pos); throw; } } else if (v.isApp()) { try { callFunction(*v.app().left, *v.app().right, v, pos); } catch (...) { - v.mkFailed(); + handleEvalExceptionForApp(v); throw; } - } else if (v.isFailed()) - std::rethrow_exception(v.failed()->ex); + } else if (v.isFailed()) { + handleEvalFailed(v, pos); + } } [[gnu::always_inline]] diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index 5015a009b..827569cd9 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -610,8 +610,28 @@ public: */ inline void forceValue(Value & v, const PosIdx pos); +private: + + /** + * Internal support function for forceValue + * + * This code is factored out so that it's not in the heavily inlined hot path. + */ + void handleEvalExceptionForThunk(Env * env, Expr * expr, Value & v, const PosIdx pos); + + /** + * Internal support function for forceValue + * + * This code is factored out so that it's not in the heavily inlined hot path. + */ + void handleEvalExceptionForApp(Value & v); + + void handleEvalFailed(Value & v, PosIdx pos); + void tryFixupBlackHolePos(Value & v, PosIdx pos); +public: + /** * Force a value, then recursively force list elements and * attributes. diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 69832deba..059414540 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -2,6 +2,7 @@ ///@file #include +#include #include #include #include @@ -279,9 +280,15 @@ struct ValueBase { public: std::exception_ptr ex; + /** + * Optional value for recovering `RecoverableEvalError` + * Must be set iff `ex` is an instance of `RecoverableEvalError`. + */ + Value * recoveryValue; - Failed(std::exception_ptr && ex) + Failed(std::exception_ptr ex, Value * recoveryValue) : ex(ex) + , recoveryValue(recoveryValue) { } }; @@ -1086,9 +1093,9 @@ public: setStorage(n); } - inline void mkFailed() noexcept + inline void mkFailed(std::exception_ptr e, Value * recovery) noexcept { - setStorage(new Value::Failed(std::current_exception())); + setStorage(new Value::Failed(e, recovery)); } bool isList() const noexcept diff --git a/src/libutil-c/nix_api_util.h b/src/libutil-c/nix_api_util.h index 5f42641d4..2b80e8c41 100644 --- a/src/libutil-c/nix_api_util.h +++ b/src/libutil-c/nix_api_util.h @@ -98,6 +98,13 @@ enum nix_err { */ NIX_ERR_NIX_ERROR = -4, + /** + * @brief A recoverable error occurred. + * + * This is used primarily by C API *consumers* to communicate that a failed + * primop call should be retried on the next evaluation attempt. + */ + NIX_ERR_RECOVERABLE = -5, }; typedef enum nix_err nix_err; diff --git a/src/libutil/include/nix/util/error.hh b/src/libutil/include/nix/util/error.hh index e564ca5b9..09cc19f07 100644 --- a/src/libutil/include/nix/util/error.hh +++ b/src/libutil/include/nix/util/error.hh @@ -51,6 +51,7 @@ struct LinesOfCode FIXME: Untangle this mess. Should there be AbstractPos as there used to be before 4feb7d9f71? */ struct Pos; +bool isEmpty(const Pos & pos); void printCodeLines(std::ostream & out, const std::string & prefix, const Pos & errPos, const LinesOfCode & loc); @@ -187,6 +188,11 @@ public: err.pos = pos; } + bool hasPos() + { + return err.pos.get() && !isEmpty(*err.pos.get()); + } + void pushTrace(Trace trace) { err.traces.push_front(trace); diff --git a/src/libutil/position.cc b/src/libutil/position.cc index 049c95474..9b93c4076 100644 --- a/src/libutil/position.cc +++ b/src/libutil/position.cc @@ -7,6 +7,11 @@ Pos::operator std::shared_ptr() const return std::make_shared(*this); } +bool isEmpty(const Pos & pos) +{ + return !pos.operator bool(); +} + std::optional Pos::getCodeLines() const { if (line == 0)