mirror of
https://github.com/NixOS/nix.git
synced 2025-12-15 05:21:03 +01:00
Implement support for Git hashing with SHA-256
SHA-256 is Git's next hash algorithm. The world is still basically stuck on SHA-1 with git, but shouldn't be. We can at least do our part to get ready. On the C++ implementation side, only a little bit of generalization was needed, and that was fairly straight-forward. The tests (unit and system) were actually bigger, and care was taken to make sure they were all cover both algorithms equally.
This commit is contained in:
parent
7f4acb9f10
commit
d21e3f88ec
20 changed files with 350 additions and 181 deletions
|
|
@ -107,8 +107,11 @@ static std::string makeType(const MixStoreDirMethods & store, std::string && typ
|
|||
|
||||
StorePath MixStoreDirMethods::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
|
||||
{
|
||||
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
|
||||
throw Error("Git file ingestion must use SHA-1 hash");
|
||||
if (info.method == FileIngestionMethod::Git
|
||||
&& !(info.hash.algo == HashAlgorithm::SHA1 || info.hash.algo == HashAlgorithm::SHA256)) {
|
||||
throw Error(
|
||||
"Git file ingestion must use SHA-1 or SHA-256 hash, but instead using: %s", printHashAlgo(info.hash.algo));
|
||||
}
|
||||
|
||||
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::NixArchive) {
|
||||
return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
|
||||
|
|
|
|||
44
src/libutil-tests/data/git/check-data.sh
Normal file → Executable file
44
src/libutil-tests/data/git/check-data.sh
Normal file → Executable file
|
|
@ -2,30 +2,34 @@
|
|||
|
||||
set -eu -o pipefail
|
||||
|
||||
export TEST_ROOT=$(realpath ${TMPDIR:-/tmp}/nix-test)/git-hashing/check-data
|
||||
mkdir -p $TEST_ROOT
|
||||
TEST_ROOT=$(realpath "${TMPDIR:-/tmp}/nix-test")/git-hashing/check-data
|
||||
export TEST_ROOT
|
||||
mkdir -p "$TEST_ROOT"
|
||||
|
||||
repo="$TEST_ROOT/scratch"
|
||||
git init "$repo"
|
||||
for hash in sha1 sha256; do
|
||||
repo="$TEST_ROOT/scratch-$hash"
|
||||
git init "$repo" --object-format="$hash"
|
||||
|
||||
git -C "$repo" config user.email "you@example.com"
|
||||
git -C "$repo" config user.name "Your Name"
|
||||
git -C "$repo" config user.email "you@example.com"
|
||||
git -C "$repo" config user.name "Your Name"
|
||||
|
||||
# `-w` to write for tree test
|
||||
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
|
||||
encodingHash=$(sha1sum -b < "./hello-world-blob.bin" | head -c 40)
|
||||
# `-w` to write for tree test
|
||||
freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin")
|
||||
encodingHash=$("${hash}sum" -b < "./hello-world-blob.bin" | sed 's/ .*//')
|
||||
|
||||
# If the hashes match, then `hello-world-blob.bin` must be the encoding
|
||||
# of `hello-world.bin`.
|
||||
[[ "$encodingHash" == "$freshlyAddedHash" ]]
|
||||
# If the hashes match, then `hello-world-blob.bin` must be the encoding
|
||||
# of `hello-world.bin`.
|
||||
[[ "$encodingHash" == "$freshlyAddedHash" ]]
|
||||
|
||||
# Create empty directory object for tree test
|
||||
echo -n | git -C "$repo" hash-object -w -t tree --stdin
|
||||
# Create empty directory object for tree test
|
||||
echo -n | git -C "$repo" hash-object -w -t tree --stdin
|
||||
|
||||
# Relies on both child hashes already existing in the git store
|
||||
freshlyAddedHash=$(git -C "$repo" mktree < "./tree.txt")
|
||||
encodingHash=$(sha1sum -b < "./tree.bin" | head -c 40)
|
||||
# Relies on both child hashes already existing in the git store
|
||||
tree=tree-${hash}
|
||||
freshlyAddedHash=$(git -C "$repo" mktree < "${tree}.txt")
|
||||
encodingHash=$("${hash}sum" -b < "${tree}.bin" | sed 's/ .*//')
|
||||
|
||||
# If the hashes match, then `tree.bin` must be the encoding of the
|
||||
# directory denoted by `tree.txt` interpreted as git directory listing.
|
||||
[[ "$encodingHash" == "$freshlyAddedHash" ]]
|
||||
# If the hashes match, then `tree.bin` must be the encoding of the
|
||||
# directory denoted by `tree.txt` interpreted as git directory listing.
|
||||
[[ "$encodingHash" == "$freshlyAddedHash" ]]
|
||||
done
|
||||
|
|
|
|||
BIN
src/libutil-tests/data/git/tree-sha256.bin
Normal file
BIN
src/libutil-tests/data/git/tree-sha256.bin
Normal file
Binary file not shown.
4
src/libutil-tests/data/git/tree-sha256.txt
Normal file
4
src/libutil-tests/data/git/tree-sha256.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
100644 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 Foo
|
||||
100755 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 bAr
|
||||
040000 tree 6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321 baZ
|
||||
120000 blob ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1 quuX
|
||||
|
|
@ -97,7 +97,7 @@ TEST_F(GitTest, blob_write)
|
|||
* so that we can check our test data in a small shell script test test
|
||||
* (`src/libutil-tests/data/git/check-data.sh`).
|
||||
*/
|
||||
const static Tree tree = {
|
||||
const static Tree treeSha1 = {
|
||||
{
|
||||
"Foo",
|
||||
{
|
||||
|
|
@ -133,9 +133,48 @@ const static Tree tree = {
|
|||
},
|
||||
};
|
||||
|
||||
TEST_F(GitTest, tree_read)
|
||||
/**
|
||||
* Same conceptual object as `treeSha1`, just different hash algorithm.
|
||||
* See that one for details.
|
||||
*/
|
||||
const static Tree treeSha256 = {
|
||||
{
|
||||
"Foo",
|
||||
{
|
||||
.mode = Mode::Regular,
|
||||
.hash = Hash::parseAny(
|
||||
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
|
||||
},
|
||||
},
|
||||
{
|
||||
"bAr",
|
||||
{
|
||||
.mode = Mode::Executable,
|
||||
.hash = Hash::parseAny(
|
||||
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
|
||||
},
|
||||
},
|
||||
{
|
||||
"baZ/",
|
||||
{
|
||||
.mode = Mode::Directory,
|
||||
.hash = Hash::parseAny(
|
||||
"6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321", HashAlgorithm::SHA256),
|
||||
},
|
||||
},
|
||||
{
|
||||
"quuX",
|
||||
{
|
||||
.mode = Mode::Symlink,
|
||||
.hash = Hash::parseAny(
|
||||
"ce60f5ad78a08ac24872ef74d78b078f077be212e7a246893a1a5d957dfbc8b1", HashAlgorithm::SHA256),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static auto mkTreeReadTest(HashAlgorithm hashAlgo, Tree tree, const ExperimentalFeatureSettings & mockXpSettings)
|
||||
{
|
||||
readTest("tree.bin", [&](const auto & encoded) {
|
||||
return [hashAlgo, tree, mockXpSettings](const auto & encoded) {
|
||||
StringSource in{encoded};
|
||||
NullFileSystemObjectSink out;
|
||||
Tree got;
|
||||
|
|
@ -144,6 +183,7 @@ TEST_F(GitTest, tree_read)
|
|||
out,
|
||||
CanonPath::root,
|
||||
in,
|
||||
hashAlgo,
|
||||
[&](auto & name, auto entry) {
|
||||
auto name2 = std::string{name.rel()};
|
||||
if (entry.mode == Mode::Directory)
|
||||
|
|
@ -153,14 +193,33 @@ TEST_F(GitTest, tree_read)
|
|||
mockXpSettings);
|
||||
|
||||
ASSERT_EQ(got, tree);
|
||||
};
|
||||
}
|
||||
|
||||
TEST_F(GitTest, tree_sha1_read)
|
||||
{
|
||||
readTest("tree-sha1.bin", mkTreeReadTest(HashAlgorithm::SHA1, treeSha1, mockXpSettings));
|
||||
}
|
||||
|
||||
TEST_F(GitTest, tree_sha256_read)
|
||||
{
|
||||
readTest("tree-sha256.bin", mkTreeReadTest(HashAlgorithm::SHA256, treeSha256, mockXpSettings));
|
||||
}
|
||||
|
||||
TEST_F(GitTest, tree_sha1_write)
|
||||
{
|
||||
writeTest("tree-sha1.bin", [&]() {
|
||||
StringSink s;
|
||||
dumpTree(treeSha1, s, mockXpSettings);
|
||||
return s.s;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(GitTest, tree_write)
|
||||
TEST_F(GitTest, tree_sha256_write)
|
||||
{
|
||||
writeTest("tree.bin", [&]() {
|
||||
writeTest("tree-sha256.bin", [&]() {
|
||||
StringSink s;
|
||||
dumpTree(tree, s, mockXpSettings);
|
||||
dumpTree(treeSha256, s, mockXpSettings);
|
||||
return s.s;
|
||||
});
|
||||
}
|
||||
|
|
@ -202,51 +261,54 @@ TEST_F(GitTest, both_roundrip)
|
|||
},
|
||||
};
|
||||
|
||||
std::map<Hash, std::string> cas;
|
||||
for (const auto hashAlgo : {HashAlgorithm::SHA1, HashAlgorithm::SHA256}) {
|
||||
std::map<Hash, std::string> cas;
|
||||
|
||||
std::function<DumpHook> dumpHook;
|
||||
dumpHook = [&](const SourcePath & path) {
|
||||
StringSink s;
|
||||
HashSink hashSink{HashAlgorithm::SHA1};
|
||||
TeeSink s2{s, hashSink};
|
||||
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
|
||||
auto hash = hashSink.finish().first;
|
||||
cas.insert_or_assign(hash, std::move(s.s));
|
||||
return TreeEntry{
|
||||
.mode = mode,
|
||||
.hash = hash,
|
||||
std::function<DumpHook> dumpHook;
|
||||
dumpHook = [&](const SourcePath & path) {
|
||||
StringSink s;
|
||||
HashSink hashSink{hashAlgo};
|
||||
TeeSink s2{s, hashSink};
|
||||
auto mode = dump(path, s2, dumpHook, defaultPathFilter, mockXpSettings);
|
||||
auto hash = hashSink.finish().first;
|
||||
cas.insert_or_assign(hash, std::move(s.s));
|
||||
return TreeEntry{
|
||||
.mode = mode,
|
||||
.hash = hash,
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
auto root = dumpHook({files});
|
||||
auto root = dumpHook({files});
|
||||
|
||||
auto files2 = make_ref<MemorySourceAccessor>();
|
||||
auto files2 = make_ref<MemorySourceAccessor>();
|
||||
|
||||
MemorySink sinkFiles2{*files2};
|
||||
MemorySink sinkFiles2{*files2};
|
||||
|
||||
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
|
||||
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
|
||||
StringSource in{cas[hash]};
|
||||
parse(
|
||||
sinkFiles2,
|
||||
prefix,
|
||||
in,
|
||||
blobMode,
|
||||
[&](const CanonPath & name, const auto & entry) {
|
||||
mkSinkHook(
|
||||
prefix / name,
|
||||
entry.hash,
|
||||
// N.B. this cast would not be acceptable in real
|
||||
// code, because it would make an assert reachable,
|
||||
// but it should harmless in this test.
|
||||
static_cast<BlobMode>(entry.mode));
|
||||
},
|
||||
mockXpSettings);
|
||||
};
|
||||
std::function<void(const CanonPath, const Hash &, BlobMode)> mkSinkHook;
|
||||
mkSinkHook = [&](auto prefix, auto & hash, auto blobMode) {
|
||||
StringSource in{cas[hash]};
|
||||
parse(
|
||||
sinkFiles2,
|
||||
prefix,
|
||||
in,
|
||||
blobMode,
|
||||
hashAlgo,
|
||||
[&](const CanonPath & name, const auto & entry) {
|
||||
mkSinkHook(
|
||||
prefix / name,
|
||||
entry.hash,
|
||||
// N.B. this cast would not be acceptable in real
|
||||
// code, because it would make an assert reachable,
|
||||
// but it should harmless in this test.
|
||||
static_cast<BlobMode>(entry.mode));
|
||||
},
|
||||
mockXpSettings);
|
||||
};
|
||||
|
||||
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
|
||||
mkSinkHook(CanonPath::root, root.hash, BlobMode::Regular);
|
||||
|
||||
ASSERT_EQ(files->root, files2->root);
|
||||
EXPECT_EQ(files->root, files2->root);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(GitLsRemote, parseSymrefLineWithReference)
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ void parseBlob(
|
|||
{
|
||||
xpSettings.require(Xp::GitHashing);
|
||||
|
||||
unsigned long long size = std::stoi(getStringUntil(source, 0));
|
||||
const unsigned long long size = std::stoi(getStringUntil(source, 0));
|
||||
|
||||
auto doRegularFile = [&](bool executable) {
|
||||
sink.createRegularFile(sinkPath, [&](auto & crf) {
|
||||
|
|
@ -114,10 +114,11 @@ void parseTree(
|
|||
FileSystemObjectSink & sink,
|
||||
const CanonPath & sinkPath,
|
||||
Source & source,
|
||||
HashAlgorithm hashAlgo,
|
||||
std::function<SinkHook> hook,
|
||||
const ExperimentalFeatureSettings & xpSettings)
|
||||
{
|
||||
unsigned long long size = std::stoi(getStringUntil(source, 0));
|
||||
const unsigned long long size = std::stoi(getStringUntil(source, 0));
|
||||
unsigned long long left = size;
|
||||
|
||||
sink.createDirectory(sinkPath);
|
||||
|
|
@ -137,10 +138,15 @@ void parseTree(
|
|||
left -= name.size();
|
||||
left -= 1;
|
||||
|
||||
std::string hashs = getString(source, 20);
|
||||
left -= 20;
|
||||
const auto hashSize = regularHashSize(hashAlgo);
|
||||
std::string hashs = getString(source, hashSize);
|
||||
left -= hashSize;
|
||||
|
||||
Hash hash(HashAlgorithm::SHA1);
|
||||
if (!(hashAlgo == HashAlgorithm::SHA1 || hashAlgo == HashAlgorithm::SHA256)) {
|
||||
throw Error("Unsupported hash algorithm for git trees: %s", printHashAlgo(hashAlgo));
|
||||
}
|
||||
|
||||
Hash hash(hashAlgo);
|
||||
std::copy(hashs.begin(), hashs.end(), hash.hash);
|
||||
|
||||
hook(
|
||||
|
|
@ -171,6 +177,7 @@ void parse(
|
|||
const CanonPath & sinkPath,
|
||||
Source & source,
|
||||
BlobMode rootModeIfBlob,
|
||||
HashAlgorithm hashAlgo,
|
||||
std::function<SinkHook> hook,
|
||||
const ExperimentalFeatureSettings & xpSettings)
|
||||
{
|
||||
|
|
@ -183,7 +190,7 @@ void parse(
|
|||
parseBlob(sink, sinkPath, source, rootModeIfBlob, xpSettings);
|
||||
break;
|
||||
case ObjectType::Tree:
|
||||
parseTree(sink, sinkPath, source, hook, xpSettings);
|
||||
parseTree(sink, sinkPath, source, hashAlgo, hook, xpSettings);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
|
|
@ -210,9 +217,9 @@ std::optional<Mode> convertMode(SourceAccessor::Type type)
|
|||
}
|
||||
}
|
||||
|
||||
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook)
|
||||
void restore(FileSystemObjectSink & sink, Source & source, HashAlgorithm hashAlgo, std::function<RestoreHook> hook)
|
||||
{
|
||||
parse(sink, CanonPath::root, source, BlobMode::Regular, [&](CanonPath name, TreeEntry entry) {
|
||||
parse(sink, CanonPath::root, source, BlobMode::Regular, hashAlgo, [&](CanonPath name, TreeEntry entry) {
|
||||
auto [accessor, from] = hook(entry.hash);
|
||||
auto stat = accessor->lstat(from);
|
||||
auto gotOpt = convertMode(stat.type);
|
||||
|
|
|
|||
|
|
@ -20,23 +20,6 @@
|
|||
|
||||
namespace nix {
|
||||
|
||||
static size_t regularHashSize(HashAlgorithm type)
|
||||
{
|
||||
switch (type) {
|
||||
case HashAlgorithm::BLAKE3:
|
||||
return blake3HashSize;
|
||||
case HashAlgorithm::MD5:
|
||||
return md5HashSize;
|
||||
case HashAlgorithm::SHA1:
|
||||
return sha1HashSize;
|
||||
case HashAlgorithm::SHA256:
|
||||
return sha256HashSize;
|
||||
case HashAlgorithm::SHA512:
|
||||
return sha512HashSize;
|
||||
}
|
||||
unreachable();
|
||||
}
|
||||
|
||||
const StringSet hashAlgorithms = {"blake3", "md5", "sha1", "sha256", "sha512"};
|
||||
|
||||
const StringSet hashFormats = {"base64", "nix32", "base16", "sri"};
|
||||
|
|
|
|||
|
|
@ -94,10 +94,14 @@ void parseBlob(
|
|||
BlobMode blobMode,
|
||||
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
|
||||
|
||||
/**
|
||||
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
|
||||
*/
|
||||
void parseTree(
|
||||
FileSystemObjectSink & sink,
|
||||
const CanonPath & sinkPath,
|
||||
Source & source,
|
||||
HashAlgorithm hashAlgo,
|
||||
std::function<SinkHook> hook,
|
||||
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
|
||||
|
||||
|
|
@ -107,12 +111,15 @@ void parseTree(
|
|||
* @param rootModeIfBlob How to interpret a root blob, for which there is no
|
||||
* disambiguating dir entry to answer that questino. If the root it not
|
||||
* a blob, this is ignored.
|
||||
*
|
||||
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
|
||||
*/
|
||||
void parse(
|
||||
FileSystemObjectSink & sink,
|
||||
const CanonPath & sinkPath,
|
||||
Source & source,
|
||||
BlobMode rootModeIfBlob,
|
||||
HashAlgorithm hashAlgo,
|
||||
std::function<SinkHook> hook,
|
||||
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
|
||||
|
||||
|
|
@ -131,8 +138,10 @@ using RestoreHook = SourcePath(Hash);
|
|||
|
||||
/**
|
||||
* Wrapper around `parse` and `RestoreSink`
|
||||
*
|
||||
* @param hashAlgo must be `HashAlgo::SHA1` or `HashAlgo::SHA256` for now.
|
||||
*/
|
||||
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook);
|
||||
void restore(FileSystemObjectSink & sink, Source & source, HashAlgorithm hashAlgo, std::function<RestoreHook> hook);
|
||||
|
||||
/**
|
||||
* Dumps a single file to a sink
|
||||
|
|
|
|||
|
|
@ -12,11 +12,26 @@ MakeError(BadHash, Error);
|
|||
|
||||
enum struct HashAlgorithm : char { MD5 = 42, SHA1, SHA256, SHA512, BLAKE3 };
|
||||
|
||||
const int blake3HashSize = 32;
|
||||
const int md5HashSize = 16;
|
||||
const int sha1HashSize = 20;
|
||||
const int sha256HashSize = 32;
|
||||
const int sha512HashSize = 64;
|
||||
/**
|
||||
* @return the size of a hash for the given algorithm
|
||||
*/
|
||||
constexpr inline size_t regularHashSize(HashAlgorithm type)
|
||||
{
|
||||
switch (type) {
|
||||
case HashAlgorithm::BLAKE3:
|
||||
return 32;
|
||||
case HashAlgorithm::MD5:
|
||||
return 16;
|
||||
case HashAlgorithm::SHA1:
|
||||
return 20;
|
||||
case HashAlgorithm::SHA256:
|
||||
return 32;
|
||||
case HashAlgorithm::SHA512:
|
||||
return 64;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
}
|
||||
|
||||
extern const StringSet hashAlgorithms;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue