1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-08 19:46:02 +01:00

Merge pull request #14296 from lovesegfault/nix-s3-more-tests

fix(nix-prefetch-url): correctly extract filename from URLs with query parameters
This commit is contained in:
Sergei Zimmerman 2025-10-20 19:42:22 +00:00 committed by GitHub
commit 6420879728
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 108 additions and 7 deletions

View file

@ -408,6 +408,17 @@ struct VerbatimURL
[](const ParsedURL & url) -> std::string_view { return url.scheme; }},
raw);
}
/**
* Get the last non-empty path segment from the URL.
*
* This is useful for extracting filenames from URLs.
* For example, "https://example.com/path/to/file.txt?query=value"
* returns "file.txt".
*
* @return The last non-empty path segment, or std::nullopt if no such segment exists.
*/
std::optional<std::string> lastPathSegment() const;
};
std::ostream & operator<<(std::ostream & os, const VerbatimURL & url);

View file

@ -4,6 +4,7 @@
#include "nix/util/split.hh"
#include "nix/util/canon-path.hh"
#include "nix/util/strings-inline.hh"
#include "nix/util/file-system.hh"
#include <boost/url.hpp>
@ -440,4 +441,21 @@ std::ostream & operator<<(std::ostream & os, const VerbatimURL & url)
return os;
}
std::optional<std::string> VerbatimURL::lastPathSegment() const
{
try {
auto parsedUrl = parsed();
auto segments = parsedUrl.pathSegments(/*skipEmpty=*/true);
if (std::ranges::empty(segments))
return std::nullopt;
return segments.back();
} catch (BadURL &) {
// Fall back to baseNameOf for unparsable URLs
auto name = baseNameOf(to_string());
if (name.empty())
return std::nullopt;
return std::string{name};
}
}
} // namespace nix

View file

@ -13,6 +13,8 @@
#include "nix/cmd/misc-store-flags.hh"
#include "nix/util/terminal.hh"
#include "nix/util/environment-variables.hh"
#include "nix/util/url.hh"
#include "nix/store/path.hh"
#include "man-pages.hh"
@ -56,7 +58,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url)
std::tuple<StorePath, Hash> prefetchFile(
ref<Store> store,
std::string_view url,
const VerbatimURL & url,
std::optional<std::string> name,
HashAlgorithm hashAlgo,
std::optional<Hash> expectedHash,
@ -68,9 +70,15 @@ std::tuple<StorePath, Hash> prefetchFile(
/* Figure out a name in the Nix store. */
if (!name) {
name = baseNameOf(url);
if (name->empty())
throw Error("cannot figure out file name for '%s'", url);
name = url.lastPathSegment();
if (!name || name->empty())
throw Error("cannot figure out file name for '%s'", url.to_string());
}
try {
checkName(*name);
} catch (BadStorePathName & e) {
e.addTrace({}, "file name '%s' was extracted from URL '%s'", *name, url.to_string());
throw;
}
std::optional<StorePath> storePath;
@ -105,14 +113,14 @@ std::tuple<StorePath, Hash> prefetchFile(
FdSink sink(fd.get());
FileTransferRequest req(VerbatimURL{url});
FileTransferRequest req(url);
req.decompress = false;
getFileTransfer()->download(std::move(req), sink);
}
/* Optionally unpack the file. */
if (unpack) {
Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url));
Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url.to_string()));
auto unpacked = (tmpDir.path() / "unpacked").string();
createDirs(unpacked);
unpackTarfile(tmpFile.string(), unpacked);
@ -128,7 +136,7 @@ std::tuple<StorePath, Hash> prefetchFile(
}
}
Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url));
Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url.to_string()));
auto info = store->addToStoreSlow(
*name, PosixSourceAccessor::createAtRoot(tmpFile), method, hashAlgo, {}, expectedHash);

View file

@ -534,6 +534,69 @@ in
print(" No compression applied by default")
@setup_s3()
def test_nix_prefetch_url(bucket):
"""Test that nix-prefetch-url retrieves actual file content from S3, not empty files (issue #8862)"""
print("\n=== Testing nix-prefetch-url S3 Content Retrieval (issue #8862) ===")
# Create a test file with known content
test_content = "This is test content to verify S3 downloads work correctly!\n"
test_file_size = len(test_content)
server.succeed(f"echo -n '{test_content}' > /tmp/test-file.txt")
# Upload to S3
server.succeed(f"mc cp /tmp/test-file.txt minio/{bucket}/test-file.txt")
# Calculate expected hash
expected_hash = server.succeed(
"nix hash file --type sha256 --base32 /tmp/test-file.txt"
).strip()
print(f" Uploaded test file to S3 ({test_file_size} bytes)")
# Use nix-prefetch-url to download from S3
s3_url = make_s3_url(bucket, path="/test-file.txt")
prefetch_output = client.succeed(
f"{ENV_WITH_CREDS} nix-prefetch-url --print-path '{s3_url}'"
)
# Extract hash and store path
# With --print-path, output is: <hash>\n<store-path>
lines = prefetch_output.strip().split('\n')
prefetch_hash = lines[0] # First line is the hash
store_path = lines[1] # Second line is the store path
# Verify hash matches
if prefetch_hash != expected_hash:
raise Exception(
f"Hash mismatch: expected {expected_hash}, got {prefetch_hash}"
)
print(" nix-prefetch-url completed with correct hash")
# Verify the downloaded file is NOT empty (the bug in #8862)
file_size = int(client.succeed(f"stat -c %s {store_path}").strip())
if file_size == 0:
raise Exception("Downloaded file is EMPTY - issue #8862 regression detected!")
if file_size != test_file_size:
raise Exception(
f"File size mismatch: expected {test_file_size}, got {file_size}"
)
print(f" File has correct size ({file_size} bytes, not empty)")
# Verify actual content matches by comparing hashes instead of printing entire file
downloaded_hash = client.succeed(f"nix hash file --type sha256 --base32 {store_path}").strip()
if downloaded_hash != expected_hash:
raise Exception(f"Content hash mismatch: expected {expected_hash}, got {downloaded_hash}")
print(" File content verified correct (hash matches)")
# ============================================================================
# Main Test Execution
# ============================================================================
@ -562,6 +625,7 @@ in
test_compression_narinfo_gzip()
test_compression_mixed()
test_compression_disabled()
test_nix_prefetch_url()
print("\n" + "="*80)
print(" All S3 Binary Cache Store Tests Passed!")