1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-12-15 05:21:03 +01:00

Implement support for Git hashing with SHA-256

SHA-256 is Git's next hash algorithm. The world is still basically stuck
on SHA-1 with git, but shouldn't be. We can at least do our part to get
ready.

On the C++ implementation side, only a little bit of generalization was
needed, and that was fairly straight-forward. The tests (unit and
system) were actually bigger, and care was taken to make sure they were
all cover both algorithms equally.
This commit is contained in:
John Ericson 2025-07-24 14:44:05 -04:00
parent 7f4acb9f10
commit d21e3f88ec
20 changed files with 350 additions and 181 deletions

View file

@ -107,8 +107,11 @@ static std::string makeType(const MixStoreDirMethods & store, std::string && typ
StorePath MixStoreDirMethods::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
throw Error("Git file ingestion must use SHA-1 hash");
if (info.method == FileIngestionMethod::Git
&& !(info.hash.algo == HashAlgorithm::SHA1 || info.hash.algo == HashAlgorithm::SHA256)) {
throw Error(
"Git file ingestion must use SHA-1 or SHA-256 hash, but instead using: %s", printHashAlgo(info.hash.algo));
}
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::NixArchive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name);

44
src/libutil-tests/data/git/check-data.sh Normal file → Executable file
View file

@ -2,30 +2,34 @@
set -eu -o pipefail
export TEST_ROOT=$(realpath ${TMPDIR:-/tmp}/nix-test)/git-hashing/check-data
mkdir -p $TEST_ROOT
TEST_ROOT=$(realpath "${TMPDIR:-/tmp}/nix-test")/git-hashing/check-data
export TEST_ROOT
mkdir -p "$TEST_ROOT"
repo="$TEST_ROOT/scratch"
git init "$repo"
for hash in sha1 sha256; do
repo="$TEST_ROOT/scratch-$hash"
git init "$repo" --object-format="$hash"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
# `-w` to write for tree test
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
encodingHash=$(sha1sum -b < "./hello-world-blob.bin" | head -c 40)
# `-w` to write for tree test
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
encodingHash=$("${hash}sum" -b < "./hello-world-blob.bin" | sed 's/ .*//')
# If the hashes match, then `hello-world-blob.bin` must be the encoding
# of `hello-world.bin`.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# If the hashes match, then `hello-world-blob.bin` must be the encoding
# of `hello-world.bin`.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# Create empty directory object for tree test
echo -n | git -C "$repo" hash-object -w -t tree --stdin
# Create empty directory object for tree test
echo -n | git -C "$repo" hash-object -w -t tree --stdin
# Relies on both child hashes already existing in the git store
freshlyAddedHash=$(git -C "$repo" mktree < "./tree.txt")
encodingHash=$(sha1sum -b < "./tree.bin" | head -c 40)
# Relies on both child hashes already existing in the git store
tree=tree-${hash}
freshlyAddedHash=$(git -C "$repo" mktree < "${tree}.txt")
encodingHash=$("${hash}sum" -b < "${tree}.bin" | sed 's/ .*//')
# If the hashes match, then `tree.bin` must be the encoding of the
# directory denoted by `tree.txt` interpreted as git directory listing.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# If the hashes match, then `tree.bin` must be the encoding of the
# directory denoted by `tree.txt` interpreted as git directory listing.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
done

Binary file not shown.

View file

@ -0,0 +1,4 @@
100644 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 Foo
100755 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 bAr
040000 tree 6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321 baZ
120000 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 quuX

View file

@ -97,7 +97,7 @@ TEST_F(GitTest, blob_write)
* so that we can check our test data in a small shell script test test
* (`src/libutil-tests/data/git/check-data.sh`).
*/
const static Tree tree = {
const static Tree treeSha1 = {
{
"Foo",
{
@ -133,9 +133,48 @@ const static Tree tree = {
},
};
TEST_F(GitTest, tree_read)
/**
* Same conceptual object as `treeSha1`, just different hash algorithm.
* See that one for details.
*/
const static Tree treeSha256 = {
{
"Foo",
{
.mode = Mode::Regular,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
{
"bAr",
{
.mode = Mode::Executable,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
{
"baZ/",
{
.mode = Mode::Directory,
.hash = Hash::parseAny(
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321", HashAlgorithm::SHA256),
},
},
{
"quuX",
{
.mode = Mode::Symlink,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
};
static auto mkTreeReadTest(HashAlgorithm hashAlgo, Tree tree, const ExperimentalFeatureSettings & mockXpSettings)
{
readTest("tree.bin", [&](const auto & encoded) {
return [hashAlgo, tree, mockXpSettings](const auto & encoded) {
StringSource in{encoded};
NullFileSystemObjectSink out;
Tree got;
@ -144,6 +183,7 @@ TEST_F(GitTest, tree_read)
out,
CanonPath::root,
in,
hashAlgo,
[&](auto & name, auto entry) {
auto name2 = std::string{name.rel()};
if (entry.mode == Mode::Directory)
@ -153,14 +193,33 @@ TEST_F(GitTest, tree_read)
mockXpSettings);
ASSERT_EQ(got, tree);
};
}
TEST_F(GitTest, tree_sha1_read)
{
readTest("tree-sha1.bin", mkTreeReadTest(HashAlgorithm::SHA1, treeSha1, mockXpSettings));
}
TEST_F(GitTest, tree_sha256_read)
{
readTest("tree-sha256.bin", mkTreeReadTest(HashAlgorithm::SHA256, treeSha256, mockXpSettings));
}
TEST_F(GitTest, tree_sha1_write)
{
writeTest("tree-sha1.bin", [&]() {
StringSink s;
dumpTree(treeSha1, s, mockXpSettings);
return s.s;
});
}
TEST_F(GitTest, tree_write)
TEST_F(GitTest, tree_sha256_write)
{
writeTest("tree.bin", [&]() {
writeTest("tree-sha256.bin", [&]() {
StringSink s;
dumpTree(tree, s, mockXpSettings);
dumpTree(treeSha256, s, mockXpSettings);
return s.s;
});
}
@ -202,51 +261,54 @@ TEST_F(GitTest, both_roundrip)
},
};
std::map<Hash, std::string> cas;
for (const auto hashAlgo : {HashAlgorithm::SHA1, HashAlgorithm::SHA256}) {
std::map<Hash, std::string> cas;
std::function<DumpHook> dumpHook;
dumpHook = [&](const SourcePath & path) {
StringSink s;
HashSink hashSink{HashAlgorithm::SHA1};
TeeSink s2{s, hashSink};
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
auto hash = hashSink.finish().first;
cas.insert_or_assign(hash, std::move(s.s));
return TreeEntry{
.mode = mode,
.hash = hash,
std::function<DumpHook> dumpHook;
dumpHook = [&](const SourcePath & path) {
StringSink s;
HashSink hashSink{hashAlgo};
TeeSink s2{s, hashSink};
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
auto hash = hashSink.finish().first;
cas.insert_or_assign(hash, std::move(s.s));
return TreeEntry{
.mode = mode,
.hash = hash,
};
};
};
auto root = dumpHook({files});
auto root = dumpHook({files});
auto files2 = make_ref<MemorySourceAccessor>();
auto files2 = make_ref<MemorySourceAccessor>();
MemorySink sinkFiles2{*files2};
MemorySink sinkFiles2{*files2};
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
StringSource in{cas[hash]};
parse(
sinkFiles2,
prefix,
in,
blobMode,
[&](const CanonPath & name, const auto & entry) {
mkSinkHook(
prefix / name,
entry.hash,
// N.B. this cast would not be acceptable in real
// code, because it would make an assert reachable,
// but it should harmless in this test.
static_cast<BlobMode>(entry.mode));
},
mockXpSettings);
};
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
StringSource in{cas[hash]};
parse(
sinkFiles2,
prefix,
in,
blobMode,
hashAlgo,
[&](const CanonPath & name, const auto & entry) {
mkSinkHook(
prefix / name,
entry.hash,
// N.B. this cast would not be acceptable in real
// code, because it would make an assert reachable,
// but it should harmless in this test.
static_cast<BlobMode>(entry.mode));
},
mockXpSettings);
};
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
ASSERT_EQ(files->root, files2->root);
EXPECT_EQ(files->root, files2->root);
}
}
TEST(GitLsRemote, parseSymrefLineWithReference)

View file

@ -59,7 +59,7 @@ void parseBlob(
{
xpSettings.require(Xp::GitHashing);
unsigned long long size = std::stoi(getStringUntil(source, 0));
const unsigned long long size = std::stoi(getStringUntil(source, 0));
auto doRegularFile = [&](bool executable) {
sink.createRegularFile(sinkPath, [&](auto & crf) {
@ -114,10 +114,11 @@ void parseTree(
FileSystemObjectSink & sink,
const CanonPath & sinkPath,
Source & source,
HashAlgorithm hashAlgo,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
unsigned long long size = std::stoi(getStringUntil(source, 0));
const unsigned long long size = std::stoi(getStringUntil(source, 0));
unsigned long long left = size;
sink.createDirectory(sinkPath);
@ -137,10 +138,15 @@ void parseTree(
left -= name.size();
left -= 1;
std::string hashs = getString(source, 20);
left -= 20;
const auto hashSize = regularHashSize(hashAlgo);
std::string hashs = getString(source, hashSize);
left -= hashSize;
Hash hash(HashAlgorithm::SHA1);
if (!(hashAlgo == HashAlgorithm::SHA1 || hashAlgo == HashAlgorithm::SHA256)) {
throw Error("Unsupported hash algorithm for git trees: %s", printHashAlgo(hashAlgo));
}
Hash hash(hashAlgo);
std::copy(hashs.begin(), hashs.end(), hash.hash);
hook(
@ -171,6 +177,7 @@ void parse(
const CanonPath & sinkPath,
Source & source,
BlobMode rootModeIfBlob,
HashAlgorithm hashAlgo,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
@ -183,7 +190,7 @@ void parse(
parseBlob(sink, sinkPath, source, rootModeIfBlob, xpSettings);
break;
case ObjectType::Tree:
parseTree(sink, sinkPath, source, hook, xpSettings);
parseTree(sink, sinkPath, source, hashAlgo, hook, xpSettings);
break;
default:
assert(false);
@ -210,9 +217,9 @@ std::optional<Mode> convertMode(SourceAccessor::Type type)
}
}
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook)
void restore(FileSystemObjectSink & sink, Source & source, HashAlgorithm hashAlgo, std::function<RestoreHook> hook)
{
parse(sink, CanonPath::root, source, BlobMode::Regular, [&](CanonPath name, TreeEntry entry) {
parse(sink, CanonPath::root, source, BlobMode::Regular, hashAlgo, [&](CanonPath name, TreeEntry entry) {
auto [accessor, from] = hook(entry.hash);
auto stat = accessor->lstat(from);
auto gotOpt = convertMode(stat.type);

View file

@ -20,23 +20,6 @@
namespace nix {
static size_t regularHashSize(HashAlgorithm type)
{
switch (type) {
case HashAlgorithm::BLAKE3:
return blake3HashSize;
case HashAlgorithm::MD5:
return md5HashSize;
case HashAlgorithm::SHA1:
return sha1HashSize;
case HashAlgorithm::SHA256:
return sha256HashSize;
case HashAlgorithm::SHA512:
return sha512HashSize;
}
unreachable();
}
const StringSet hashAlgorithms = {"blake3", "md5", "sha1", "sha256", "sha512"};
const StringSet hashFormats = {"base64", "nix32", "base16", "sri"};

View file

@ -94,10 +94,14 @@ void parseBlob(
BlobMode blobMode,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
*/
void parseTree(
FileSystemObjectSink & sink,
const CanonPath & sinkPath,
Source & source,
HashAlgorithm hashAlgo,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
@ -107,12 +111,15 @@ void parseTree(
* @param rootModeIfBlob How to interpret a root blob, for which there is no
* disambiguating dir entry to answer that questino. If the root it not
* a blob, this is ignored.
*
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
*/
void parse(
FileSystemObjectSink & sink,
const CanonPath & sinkPath,
Source & source,
BlobMode rootModeIfBlob,
HashAlgorithm hashAlgo,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
@ -131,8 +138,10 @@ using RestoreHook = SourcePath(Hash);
/**
* Wrapper around `parse` and `RestoreSink`
*
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
*/
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook);
void restore(FileSystemObjectSink & sink, Source & source, HashAlgorithm hashAlgo, std::function<RestoreHook> hook);
/**
* Dumps a single file to a sink

View file

@ -12,11 +12,26 @@ MakeError(BadHash, Error);
enum struct HashAlgorithm : char { MD5 = 42, SHA1, SHA256, SHA512, BLAKE3 };
const int blake3HashSize = 32;
const int md5HashSize = 16;
const int sha1HashSize = 20;
const int sha256HashSize = 32;
const int sha512HashSize = 64;
/**
* @return the size of a hash for the given algorithm
*/
constexpr inline size_t regularHashSize(HashAlgorithm type)
{
switch (type) {
case HashAlgorithm::BLAKE3:
return 32;
case HashAlgorithm::MD5:
return 16;
case HashAlgorithm::SHA1:
return 20;
case HashAlgorithm::SHA256:
return 32;
case HashAlgorithm::SHA512:
return 64;
default:
assert(false);
}
}
extern const StringSet hashAlgorithms;