From 53b4ea6c85e2d000b8badc923066866ba8de324c Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Mon, 27 Oct 2025 11:26:46 -0700 Subject: [PATCH] Add documentation for NAR spec in kaitai * Add a new flake check * Add unit tests * Add Kaitai spec * Updated documentation --- doc/manual/source/SUMMARY.md.in | 2 +- .../{nix-archive.md => nix-archive/index.md} | 12 ++ .../source/protocols/nix-archive/nar.ksy | 169 ++++++++++++++++++ .../file-system-object/content-address.md | 2 +- flake.nix | 4 + meson.build | 1 + packaging/components.nix | 5 + packaging/dev-shell.nix | 4 +- packaging/hydra.nix | 1 + src/kaitai-struct-checks/.version | 1 + src/kaitai-struct-checks/meson.build | 77 ++++++++ src/kaitai-struct-checks/nar.ksy | 1 + src/kaitai-struct-checks/nars | 1 + .../nix-meson-build-support | 1 + src/kaitai-struct-checks/package.nix | 75 ++++++++ src/kaitai-struct-checks/test-parse-nar.cc | 48 +++++ src/nix/nar.md | 2 +- 17 files changed, 402 insertions(+), 4 deletions(-) rename doc/manual/source/protocols/{nix-archive.md => nix-archive/index.md} (73%) create mode 100644 doc/manual/source/protocols/nix-archive/nar.ksy create mode 120000 src/kaitai-struct-checks/.version create mode 100644 src/kaitai-struct-checks/meson.build create mode 120000 src/kaitai-struct-checks/nar.ksy create mode 120000 src/kaitai-struct-checks/nars create mode 120000 src/kaitai-struct-checks/nix-meson-build-support create mode 100644 src/kaitai-struct-checks/package.nix create mode 100644 src/kaitai-struct-checks/test-parse-nar.cc diff --git a/doc/manual/source/SUMMARY.md.in b/doc/manual/source/SUMMARY.md.in index 7f3b1a103..287dff872 100644 --- a/doc/manual/source/SUMMARY.md.in +++ b/doc/manual/source/SUMMARY.md.in @@ -125,7 +125,7 @@ - [Deriving Path](protocols/json/deriving-path.md) - [Serving Tarball Flakes](protocols/tarball-fetcher.md) - [Store Path Specification](protocols/store-path.md) - - [Nix Archive (NAR) Format](protocols/nix-archive.md) + - [Nix Archive (NAR) Format](protocols/nix-archive/index.md) - [Derivation "ATerm" file format](protocols/derivation-aterm.md) - [C API](c-api.md) - [Glossary](glossary.md) diff --git a/doc/manual/source/protocols/nix-archive.md b/doc/manual/source/protocols/nix-archive/index.md similarity index 73% rename from doc/manual/source/protocols/nix-archive.md rename to doc/manual/source/protocols/nix-archive/index.md index 02a8dd464..4d25f63e2 100644 --- a/doc/manual/source/protocols/nix-archive.md +++ b/doc/manual/source/protocols/nix-archive/index.md @@ -41,3 +41,15 @@ The `str` function / parameterized rule is defined as follows: - `int(n)` = the 64-bit little endian representation of the number `n` - `pad(s)` = the byte sequence `s`, padded with 0s to a multiple of 8 byte + +## Kaitai Struct Specification + +The Nix Archive (NAR) format is also formally described using [Kaitai Struct](https://kaitai.io/), an Interface Description Language (IDL) for defining binary data structures. + +> Kaitai Struct provides a language-agnostic, machine-readable specification that can be compiled into parsers for various programming languages (e.g., C++, Python, Java, Rust). + +```yaml +{{#include nar.ksy}} +``` + +The source of the spec can be found [here](https://github.com/nixos/nix/blob/master/src/nix-manual/source/protocols/nix-archive/nar.ksy). Contributions and improvements to the spec are welcomed. \ No newline at end of file diff --git a/doc/manual/source/protocols/nix-archive/nar.ksy b/doc/manual/source/protocols/nix-archive/nar.ksy new file mode 100644 index 000000000..1cad09097 --- /dev/null +++ b/doc/manual/source/protocols/nix-archive/nar.ksy @@ -0,0 +1,169 @@ +meta: + id: nix_nar + title: Nix Archive (NAR) + file-extension: nar + endian: le +doc: | + Nix Archive (NAR) format. A simple, reproducible binary archive + format used by the Nix package manager to serialize file system objects. +doc-ref: 'https://nixos.org/manual/nix/stable/command-ref/nix-store.html#nar-format' + +seq: + - id: magic + type: padded_str + doc: "Magic string, must be 'nix-archive-1'." + valid: + expr: _.body == 'nix-archive-1' + - id: root_node + type: node + doc: "The root of the archive, which is always a single node." + +types: + padded_str: + doc: | + A string, prefixed with its length (u8le) and + padded with null bytes to the next 8-byte boundary. + seq: + - id: len_str + type: u8 + - id: body + type: str + size: len_str + encoding: 'ascii' + - id: padding + size: (8 - (len_str % 8)) % 8 + + node: + doc: "A single filesystem node (file, directory, or symlink)." + seq: + - id: open_paren + type: padded_str + doc: "Must be '(', a token starting the node definition." + valid: + expr: _.body == '(' + - id: type_key + type: padded_str + doc: "Must be 'type'." + valid: + expr: _.body == 'type' + - id: type_val + type: padded_str + doc: "The type of the node: 'regular', 'directory', or 'symlink'." + - id: body + type: + switch-on: type_val.body + cases: + "'directory'": type_directory + "'regular'": type_regular + "'symlink'": type_symlink + - id: close_paren + type: padded_str + valid: + expr: _.body == ')' + if: "type_val.body != 'directory'" + doc: "Must be ')', a token ending the node definition." + + type_directory: + doc: "A directory node, containing a list of entries. Entries must be ordered by their names." + seq: + - id: entries + type: dir_entry + repeat: until + repeat-until: _.kind.body == ')' + types: + dir_entry: + doc: "A single entry within a directory, or a terminator." + seq: + - id: kind + type: padded_str + valid: + expr: _.body == 'entry' or _.body == ')' + doc: "Must be 'entry' (for a child node) or '' (for terminator)." + - id: open_paren + type: padded_str + valid: + expr: _.body == '(' + if: 'kind.body == "entry"' + - id: name_key + type: padded_str + valid: + expr: _.body == 'name' + if: 'kind.body == "entry"' + - id: name + type: padded_str + if: 'kind.body == "entry"' + - id: node_key + type: padded_str + valid: + expr: _.body == 'node' + if: 'kind.body == "entry"' + - id: node + type: node + if: 'kind.body == "entry"' + doc: "The child node, present only if kind is 'entry'." + - id: close_paren + type: padded_str + valid: + expr: _.body == ')' + if: 'kind.body == "entry"' + instances: + is_terminator: + value: kind.body == ')' + + type_regular: + doc: "A regular file node." + seq: + # Read attributes (like 'executable') until we hit 'contents' + - id: attributes + type: reg_attribute + repeat: until + repeat-until: _.key.body == "contents" + # After the 'contents' token, read the file data + - id: file_data + type: file_content + instances: + is_executable: + value: 'attributes[0].key.body == "executable"' + doc: "True if the file has the 'executable' attribute." + types: + reg_attribute: + doc: "An attribute of the file, e.g., 'executable' or 'contents'." + seq: + - id: key + type: padded_str + doc: "Attribute key, e.g., 'executable' or 'contents'." + valid: + expr: _.body == 'executable' or _.body == 'contents' + - id: value + type: padded_str + if: 'key.body == "executable"' + valid: + expr: _.body == '' + doc: "Must be '' if key is 'executable'." + file_content: + doc: "The raw data of the file, prefixed by length." + seq: + - id: len_contents + type: u8 + # # This relies on the property of instances that they are lazily evaluated and cached. + - size: 0 + if: nar_offset < 0 + - id: contents + size: len_contents + - id: padding + size: (8 - (len_contents % 8)) % 8 + instances: + nar_offset: + value: _io.pos + + type_symlink: + doc: "A symbolic link node." + seq: + - id: target_key + type: padded_str + doc: "Must be 'target'." + valid: + expr: _.body == 'target' + - id: target_val + type: padded_str + doc: "The destination path of the symlink." diff --git a/doc/manual/source/store/file-system-object/content-address.md b/doc/manual/source/store/file-system-object/content-address.md index 04a1021f1..5685de03e 100644 --- a/doc/manual/source/store/file-system-object/content-address.md +++ b/doc/manual/source/store/file-system-object/content-address.md @@ -46,7 +46,7 @@ be many different serialisations. For these reasons, Nix has its very own archive format—the Nix Archive (NAR) format, which is carefully designed to avoid the problems described above. -The exact specification of the Nix Archive format is in [specified here](../../protocols/nix-archive.md). +The exact specification of the Nix Archive format is in [specified here](../../protocols/nix-archive/index.md). ## Content addressing File System Objects beyond a single serialisation pass diff --git a/flake.nix b/flake.nix index e25722d46..a70617b74 100644 --- a/flake.nix +++ b/flake.nix @@ -417,6 +417,10 @@ supportsCross = false; }; + "nix-kaitai-struct-checks" = { + supportsCross = false; + }; + "nix-perl-bindings" = { supportsCross = false; }; diff --git a/meson.build b/meson.build index f3158ea6d..c493dfad6 100644 --- a/meson.build +++ b/meson.build @@ -61,3 +61,4 @@ if get_option('unit-tests') endif subproject('nix-functional-tests') subproject('json-schema-checks') +subproject('kaitai-struct-checks') diff --git a/packaging/components.nix b/packaging/components.nix index f9d7b109a..bbd6208b9 100644 --- a/packaging/components.nix +++ b/packaging/components.nix @@ -443,6 +443,11 @@ in */ nix-json-schema-checks = callPackage ../src/json-schema-checks/package.nix { }; + /** + Kaitai struct schema validation checks + */ + nix-kaitai-struct-checks = callPackage ../src/kaitai-struct-checks/package.nix { }; + nix-perl-bindings = callPackage ../src/perl/package.nix { }; /** diff --git a/packaging/dev-shell.nix b/packaging/dev-shell.nix index 153e7a3eb..ea12e079f 100644 --- a/packaging/dev-shell.nix +++ b/packaging/dev-shell.nix @@ -109,6 +109,7 @@ pkgs.nixComponents2.nix-util.overrideAttrs ( ++ pkgs.nixComponents2.nix-external-api-docs.nativeBuildInputs ++ pkgs.nixComponents2.nix-functional-tests.externalNativeBuildInputs ++ pkgs.nixComponents2.nix-json-schema-checks.externalNativeBuildInputs + ++ pkgs.nixComponents2.nix-kaitai-struct-checks.externalNativeBuildInputs ++ lib.optional ( !buildCanExecuteHost # Hack around https://github.com/nixos/nixpkgs/commit/bf7ad8cfbfa102a90463433e2c5027573b462479 @@ -148,6 +149,7 @@ pkgs.nixComponents2.nix-util.overrideAttrs ( ++ pkgs.nixComponents2.nix-expr.externalPropagatedBuildInputs ++ pkgs.nixComponents2.nix-cmd.buildInputs ++ lib.optionals havePerl pkgs.nixComponents2.nix-perl-bindings.externalBuildInputs - ++ lib.optional havePerl pkgs.perl; + ++ lib.optional havePerl pkgs.perl + ++ pkgs.nixComponents2.nix-kaitai-struct-checks.externalBuildInputs; } ) diff --git a/packaging/hydra.nix b/packaging/hydra.nix index 3bbb6c15b..67e2c0dfd 100644 --- a/packaging/hydra.nix +++ b/packaging/hydra.nix @@ -63,6 +63,7 @@ let "nix-cli" "nix-functional-tests" "nix-json-schema-checks" + "nix-kaitai-struct-checks" ] ++ lib.optionals enableBindings [ "nix-perl-bindings" diff --git a/src/kaitai-struct-checks/.version b/src/kaitai-struct-checks/.version new file mode 120000 index 000000000..b7badcd0c --- /dev/null +++ b/src/kaitai-struct-checks/.version @@ -0,0 +1 @@ +../../.version \ No newline at end of file diff --git a/src/kaitai-struct-checks/meson.build b/src/kaitai-struct-checks/meson.build new file mode 100644 index 000000000..f705a6744 --- /dev/null +++ b/src/kaitai-struct-checks/meson.build @@ -0,0 +1,77 @@ +# Run with: +# meson test --suite kaitai-struct +# Run with: (without shell / configure) +# nix build .#nix-kaitai-struct-checks + +project( + 'nix-kaitai-struct-checks', + 'cpp', + version : files('.version'), + default_options : [ + 'cpp_std=c++23', + # TODO(Qyriad): increase the warning level + 'warning_level=1', + 'errorlogs=true', # Please print logs for tests that fail + ], + meson_version : '>= 1.1', + license : 'LGPL-2.1-or-later', +) + +kaitai_runtime_dep = dependency('kaitai-struct-cpp-stl-runtime', required : true) +gtest_dep = dependency('gtest') +gtest_main_dep = dependency('gtest_main', required : true) + +# Find the Kaitai Struct compiler +ksc = find_program('ksc', required : true) + +kaitai_generated_srcs = custom_target( + 'kaitai-generated-sources', + input : [ 'nar.ksy' ], + output : [ 'nix_nar.cpp', 'nix_nar.h' ], + command : [ + ksc, + '@INPUT@', + '--target', 'cpp_stl', + '--outdir', + meson.current_build_dir(), + ], +) + +nar_kaitai_lib = library( + 'nix-nar-kaitai-lib', + kaitai_generated_srcs, + dependencies : [ kaitai_runtime_dep ], + install : true, +) + +nar_kaitai_dep = declare_dependency( + link_with : nar_kaitai_lib, + sources : kaitai_generated_srcs[1], +) + +# The nar directory is a committed symlink to the actual nars location +nars_dir = meson.current_source_dir() / 'nars' + +# Get all example files +nars = [ + 'dot.nar', +] + +test_deps = [ + nar_kaitai_dep, + kaitai_runtime_dep, + gtest_main_dep, +] + +this_exe = executable( + meson.project_name(), + 'test-parse-nar.cc', + dependencies : test_deps, +) + +test( + meson.project_name(), + this_exe, + env : [ 'NIX_NARS_DIR=' + nars_dir ], + protocol : 'gtest', +) diff --git a/src/kaitai-struct-checks/nar.ksy b/src/kaitai-struct-checks/nar.ksy new file mode 120000 index 000000000..c3a79a3b6 --- /dev/null +++ b/src/kaitai-struct-checks/nar.ksy @@ -0,0 +1 @@ +../../doc/manual/source/protocols/nix-archive/nar.ksy \ No newline at end of file diff --git a/src/kaitai-struct-checks/nars b/src/kaitai-struct-checks/nars new file mode 120000 index 000000000..ed0b4ecc7 --- /dev/null +++ b/src/kaitai-struct-checks/nars @@ -0,0 +1 @@ +../libutil-tests/data/nars \ No newline at end of file diff --git a/src/kaitai-struct-checks/nix-meson-build-support b/src/kaitai-struct-checks/nix-meson-build-support new file mode 120000 index 000000000..0b140f56b --- /dev/null +++ b/src/kaitai-struct-checks/nix-meson-build-support @@ -0,0 +1 @@ +../../nix-meson-build-support \ No newline at end of file diff --git a/src/kaitai-struct-checks/package.nix b/src/kaitai-struct-checks/package.nix new file mode 100644 index 000000000..263dd6fd1 --- /dev/null +++ b/src/kaitai-struct-checks/package.nix @@ -0,0 +1,75 @@ +# Run with: nix build .#nix-kaitai-struct-checks +{ + lib, + mkMesonDerivation, + gtest, + meson, + ninja, + pkg-config, + kaitai-struct-compiler, + fetchzip, + kaitai-struct-cpp-stl-runtime, + # Configuration Options + version, +}: +let + inherit (lib) fileset; +in +mkMesonDerivation (finalAttrs: { + pname = "nix-kaitai-struct-checks"; + inherit version; + + workDir = ./.; + fileset = lib.fileset.unions [ + ../../nix-meson-build-support + ./nix-meson-build-support + ./.version + ../../.version + ../../doc/manual/source/protocols/nix-archive/nar.ksy + ./nars + ../../src/libutil-tests/data + ./meson.build + ./nar.ksy + (fileset.fileFilter (file: file.hasExt "cc") ./.) + (fileset.fileFilter (file: file.hasExt "hh") ./.) + ]; + + outputs = [ "out" ]; + + passthru.externalNativeBuildInputs = [ + # This can go away when we bump up to 25.11 + (kaitai-struct-compiler.overrideAttrs (finalAttrs: { + version = "0.11"; + src = fetchzip { + url = "https://github.com/kaitai-io/kaitai_struct_compiler/releases/download/${version}/kaitai-struct-compiler-${version}.zip"; + sha256 = "sha256-j9TEilijqgIiD0GbJfGKkU1FLio9aTopIi1v8QT1b+A="; + }; + })) + ]; + + passthru.externalBuildInputs = [ + gtest + kaitai-struct-cpp-stl-runtime + ]; + + buildInputs = finalAttrs.passthru.externalBuildInputs; + + nativeBuildInputs = [ + meson + ninja + pkg-config + ] + ++ finalAttrs.passthru.externalNativeBuildInputs; + + doCheck = true; + + mesonCheckFlags = [ "--print-errorlogs" ]; + + postInstall = '' + touch $out + ''; + + meta = { + platforms = lib.platforms.all; + }; +}) diff --git a/src/kaitai-struct-checks/test-parse-nar.cc b/src/kaitai-struct-checks/test-parse-nar.cc new file mode 100644 index 000000000..456ffb127 --- /dev/null +++ b/src/kaitai-struct-checks/test-parse-nar.cc @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "nix_nar.h" + +static const std::vector NarFiles = { + "empty.nar", + "dot.nar", + "dotdot.nar", + "executable-after-contents.nar", + "invalid-tag-instead-of-contents.nar", + "name-after-node.nar", + "nul-character.nar", + "slash.nar", +}; + +class NarParseTest : public ::testing::TestWithParam +{}; + +TEST_P(NarParseTest, ParseSucceeds) +{ + const auto nar_file = GetParam(); + + const char * nars_dir_env = std::getenv("NIX_NARS_DIR"); + if (nars_dir_env == nullptr) { + FAIL() << "NIX_NARS_DIR environment variable not set."; + } + + const std::filesystem::path nar_file_path = std::filesystem::path(nars_dir_env) / "dot.nar"; + ASSERT_TRUE(std::filesystem::exists(nar_file_path)) << "Missing test file: " << nar_file_path; + + std::ifstream ifs(nar_file_path, std::ifstream::binary); + ASSERT_TRUE(ifs.is_open()) << "Failed to open file: " << nar_file; + kaitai::kstream ks(&ifs); + nix_nar_t nar(&ks); + ASSERT_TRUE(nar.root_node() != nullptr) << "Failed to parse NAR file: " << nar_file; +} + +INSTANTIATE_TEST_SUITE_P(AllNarFiles, NarParseTest, ::testing::ValuesIn(NarFiles)); diff --git a/src/nix/nar.md b/src/nix/nar.md index b0f70ce93..c29c2092a 100644 --- a/src/nix/nar.md +++ b/src/nix/nar.md @@ -8,7 +8,7 @@ R""( # File format For the definition of the Nix Archive file format, see -[within the protocols chapter](@docroot@/protocols/nix-archive.md) +[within the protocols chapter](@docroot@/protocols/nix-archive/index.md) of the manual. [Nix Archive]: @docroot@/store/file-system-object/content-address.md#serial-nix-archive