1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-13 14:02:42 +01:00

Implement support for Git hashing with SHA-256

SHA-256 is Git's next hash algorithm. The world is still basically stuck
on SHA-1 with git, but shouldn't be. We can at least do our part to get
ready.

On the C++ implementation side, only a little bit of generalization was
needed, and that was fairly straight-forward. The tests (unit and
system) were actually bigger, and care was taken to make sure they were
all cover both algorithms equally.
This commit is contained in:
John Ericson 2025-07-24 14:44:05 -04:00
parent 7f4acb9f10
commit d21e3f88ec
20 changed files with 350 additions and 181 deletions

44
src/libutil-tests/data/git/check-data.sh Normal file → Executable file
View file

@ -2,30 +2,34 @@
set -eu -o pipefail
export TEST_ROOT=$(realpath ${TMPDIR:-/tmp}/nix-test)/git-hashing/check-data
mkdir -p $TEST_ROOT
TEST_ROOT=$(realpath "${TMPDIR:-/tmp}/nix-test")/git-hashing/check-data
export TEST_ROOT
mkdir -p "$TEST_ROOT"
repo="$TEST_ROOT/scratch"
git init "$repo"
for hash in sha1 sha256; do
repo="$TEST_ROOT/scratch-$hash"
git init "$repo" --object-format="$hash"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
git -C "$repo" config user.email "you@example.com"
git -C "$repo" config user.name "Your Name"
# `-w` to write for tree test
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
encodingHash=$(sha1sum -b < "./hello-world-blob.bin" | head -c 40)
# `-w` to write for tree test
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
encodingHash=$("${hash}sum" -b < "./hello-world-blob.bin" | sed 's/ .*//')
# If the hashes match, then `hello-world-blob.bin` must be the encoding
# of `hello-world.bin`.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# If the hashes match, then `hello-world-blob.bin` must be the encoding
# of `hello-world.bin`.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# Create empty directory object for tree test
echo -n | git -C "$repo" hash-object -w -t tree --stdin
# Create empty directory object for tree test
echo -n | git -C "$repo" hash-object -w -t tree --stdin
# Relies on both child hashes already existing in the git store
freshlyAddedHash=$(git -C "$repo" mktree < "./tree.txt")
encodingHash=$(sha1sum -b < "./tree.bin" | head -c 40)
# Relies on both child hashes already existing in the git store
tree=tree-${hash}
freshlyAddedHash=$(git -C "$repo" mktree < "${tree}.txt")
encodingHash=$("${hash}sum" -b < "${tree}.bin" | sed 's/ .*//')
# If the hashes match, then `tree.bin` must be the encoding of the
# directory denoted by `tree.txt` interpreted as git directory listing.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
# If the hashes match, then `tree.bin` must be the encoding of the
# directory denoted by `tree.txt` interpreted as git directory listing.
[[ "$encodingHash" == "$freshlyAddedHash" ]]
done

Binary file not shown.

View file

@ -0,0 +1,4 @@
100644 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 Foo
100755 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 bAr
040000 tree 6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321 baZ
120000 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 quuX

View file

@ -97,7 +97,7 @@ TEST_F(GitTest, blob_write)
* so that we can check our test data in a small shell script test test
* (`src/libutil-tests/data/git/check-data.sh`).
*/
const static Tree tree = {
const static Tree treeSha1 = {
{
"Foo",
{
@ -133,9 +133,48 @@ const static Tree tree = {
},
};
TEST_F(GitTest, tree_read)
/**
* Same conceptual object as `treeSha1`, just different hash algorithm.
* See that one for details.
*/
const static Tree treeSha256 = {
{
"Foo",
{
.mode = Mode::Regular,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
{
"bAr",
{
.mode = Mode::Executable,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
{
"baZ/",
{
.mode = Mode::Directory,
.hash = Hash::parseAny(
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321", HashAlgorithm::SHA256),
},
},
{
"quuX",
{
.mode = Mode::Symlink,
.hash = Hash::parseAny(
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
},
},
};
static auto mkTreeReadTest(HashAlgorithm hashAlgo, Tree tree, const ExperimentalFeatureSettings & mockXpSettings)
{
readTest("tree.bin", [&](const auto & encoded) {
return [hashAlgo, tree, mockXpSettings](const auto & encoded) {
StringSource in{encoded};
NullFileSystemObjectSink out;
Tree got;
@ -144,6 +183,7 @@ TEST_F(GitTest, tree_read)
out,
CanonPath::root,
in,
hashAlgo,
[&](auto & name, auto entry) {
auto name2 = std::string{name.rel()};
if (entry.mode == Mode::Directory)
@ -153,14 +193,33 @@ TEST_F(GitTest, tree_read)
mockXpSettings);
ASSERT_EQ(got, tree);
};
}
TEST_F(GitTest, tree_sha1_read)
{
readTest("tree-sha1.bin", mkTreeReadTest(HashAlgorithm::SHA1, treeSha1, mockXpSettings));
}
TEST_F(GitTest, tree_sha256_read)
{
readTest("tree-sha256.bin", mkTreeReadTest(HashAlgorithm::SHA256, treeSha256, mockXpSettings));
}
TEST_F(GitTest, tree_sha1_write)
{
writeTest("tree-sha1.bin", [&]() {
StringSink s;
dumpTree(treeSha1, s, mockXpSettings);
return s.s;
});
}
TEST_F(GitTest, tree_write)
TEST_F(GitTest, tree_sha256_write)
{
writeTest("tree.bin", [&]() {
writeTest("tree-sha256.bin", [&]() {
StringSink s;
dumpTree(tree, s, mockXpSettings);
dumpTree(treeSha256, s, mockXpSettings);
return s.s;
});
}
@ -202,51 +261,54 @@ TEST_F(GitTest, both_roundrip)
},
};
std::map<Hash, std::string> cas;
for (const auto hashAlgo : {HashAlgorithm::SHA1, HashAlgorithm::SHA256}) {
std::map<Hash, std::string> cas;
std::function<DumpHook> dumpHook;
dumpHook = [&](const SourcePath & path) {
StringSink s;
HashSink hashSink{HashAlgorithm::SHA1};
TeeSink s2{s, hashSink};
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
auto hash = hashSink.finish().first;
cas.insert_or_assign(hash, std::move(s.s));
return TreeEntry{
.mode = mode,
.hash = hash,
std::function<DumpHook> dumpHook;
dumpHook = [&](const SourcePath & path) {
StringSink s;
HashSink hashSink{hashAlgo};
TeeSink s2{s, hashSink};
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
auto hash = hashSink.finish().first;
cas.insert_or_assign(hash, std::move(s.s));
return TreeEntry{
.mode = mode,
.hash = hash,
};
};
};
auto root = dumpHook({files});
auto root = dumpHook({files});
auto files2 = make_ref<MemorySourceAccessor>();
auto files2 = make_ref<MemorySourceAccessor>();
MemorySink sinkFiles2{*files2};
MemorySink sinkFiles2{*files2};
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
StringSource in{cas[hash]};
parse(
sinkFiles2,
prefix,
in,
blobMode,
[&](const CanonPath & name, const auto & entry) {
mkSinkHook(
prefix / name,
entry.hash,
// N.B. this cast would not be acceptable in real
// code, because it would make an assert reachable,
// but it should harmless in this test.
static_cast<BlobMode>(entry.mode));
},
mockXpSettings);
};
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
StringSource in{cas[hash]};
parse(
sinkFiles2,
prefix,
in,
blobMode,
hashAlgo,
[&](const CanonPath & name, const auto & entry) {
mkSinkHook(
prefix / name,
entry.hash,
// N.B. this cast would not be acceptable in real
// code, because it would make an assert reachable,
// but it should harmless in this test.
static_cast<BlobMode>(entry.mode));
},
mockXpSettings);
};
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
ASSERT_EQ(files->root, files2->root);
EXPECT_EQ(files->root, files2->root);
}
}
TEST(GitLsRemote, parseSymrefLineWithReference)