Merge pull request #14296 from lovesegfault/nix-s3-more-tests

fix(nix-prefetch-url): correctly extract filename from URLs with query parameters
2025-11-09 03:56:01 +01:00 · 2025-10-20 19:42:22 +00:00 · 2025-10-20 19:42:22 +00:00 · 6420879728
commit 6420879728
parent 67f5cb97a3 1b1d7e3047
4 changed files with 108 additions and 7 deletions
--- a/src/libutil/include/nix/util/url.hh
+++ b/src/libutil/include/nix/util/url.hh
@ -408,6 +408,17 @@ struct VerbatimURL
                [](const ParsedURL & url) -> std::string_view { return url.scheme; }},
            raw);
    }
    /**
     * Get the last non-empty path segment from the URL.
     *
     * This is useful for extracting filenames from URLs.
     * For example, "https://example.com/path/to/file.txt?query=value"
     * returns "file.txt".
     *
     * @return The last non-empty path segment, or std::nullopt if no such segment exists.
     */
    std::optional<std::string> lastPathSegment() const;
 };
 std::ostream & operator<<(std::ostream & os, const VerbatimURL & url);
--- a/src/libutil/url.cc
+++ b/src/libutil/url.cc
@ -4,6 +4,7 @@
 #include "nix/util/split.hh"
 #include "nix/util/canon-path.hh"
 #include "nix/util/strings-inline.hh"
 #include "nix/util/file-system.hh"
 #include <boost/url.hpp>
@ -440,4 +441,21 @@ std::ostream & operator<<(std::ostream & os, const VerbatimURL & url)
    return os;
 }
 std::optional<std::string> VerbatimURL::lastPathSegment() const
 {
    try {
        auto parsedUrl = parsed();
        auto segments = parsedUrl.pathSegments(/*skipEmpty=*/true);
        if (std::ranges::empty(segments))
            return std::nullopt;
        return segments.back();
    } catch (BadURL &) {
        // Fall back to baseNameOf for unparsable URLs
        auto name = baseNameOf(to_string());
        if (name.empty())
            return std::nullopt;
        return std::string{name};
    }
 }
 } // namespace nix
--- a/src/nix/prefetch.cc
+++ b/src/nix/prefetch.cc
@ -13,6 +13,8 @@
 #include "nix/cmd/misc-store-flags.hh"
 #include "nix/util/terminal.hh"
 #include "nix/util/environment-variables.hh"
 #include "nix/util/url.hh"
 #include "nix/store/path.hh"
 #include "man-pages.hh"
@ -56,7 +58,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url)
 std::tuple<StorePath, Hash> prefetchFile(
    ref<Store> store,
-    std::string_view url,
+    const VerbatimURL & url,
    std::optional<std::string> name,
    HashAlgorithm hashAlgo,
    std::optional<Hash> expectedHash,
@ -68,9 +70,15 @@ std::tuple<StorePath, Hash> prefetchFile(
    /* Figure out a name in the Nix store. */
    if (!name) {
-        name = baseNameOf(url);
+        name = url.lastPathSegment();
-        if (name->empty())
+        if (!name || name->empty())
-            throw Error("cannot figure out file name for '%s'", url);
+            throw Error("cannot figure out file name for '%s'", url.to_string());
    }
    try {
        checkName(*name);
    } catch (BadStorePathName & e) {
        e.addTrace({}, "file name '%s' was extracted from URL '%s'", *name, url.to_string());
        throw;
    }
    std::optional<StorePath> storePath;
@ -105,14 +113,14 @@ std::tuple<StorePath, Hash> prefetchFile(
            FdSink sink(fd.get());
-            FileTransferRequest req(VerbatimURL{url});
+            FileTransferRequest req(url);
            req.decompress = false;
            getFileTransfer()->download(std::move(req), sink);
        }
        /* Optionally unpack the file. */
        if (unpack) {
-            Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url));
+            Activity act(*logger, lvlChatty, actUnknown, fmt("unpacking '%s'", url.to_string()));
            auto unpacked = (tmpDir.path() / "unpacked").string();
            createDirs(unpacked);
            unpackTarfile(tmpFile.string(), unpacked);
@ -128,7 +136,7 @@ std::tuple<StorePath, Hash> prefetchFile(
            }
        }
-        Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url));
+        Activity act(*logger, lvlChatty, actUnknown, fmt("adding '%s' to the store", url.to_string()));
        auto info = store->addToStoreSlow(
            *name, PosixSourceAccessor::createAtRoot(tmpFile), method, hashAlgo, {}, expectedHash);
--- a/tests/nixos/s3-binary-cache-store.nix
+++ b/tests/nixos/s3-binary-cache-store.nix
@ -534,6 +534,69 @@ in
          print("  ✓ No compression applied by default")
      @setup_s3()
      def test_nix_prefetch_url(bucket):
          """Test that nix-prefetch-url retrieves actual file content from S3, not empty files (issue #8862)"""
          print("\n=== Testing nix-prefetch-url S3 Content Retrieval (issue #8862) ===")
          # Create a test file with known content
          test_content = "This is test content to verify S3 downloads work correctly!\n"
          test_file_size = len(test_content)
          server.succeed(f"echo -n '{test_content}' > /tmp/test-file.txt")
          # Upload to S3
          server.succeed(f"mc cp /tmp/test-file.txt minio/{bucket}/test-file.txt")
          # Calculate expected hash
          expected_hash = server.succeed(
              "nix hash file --type sha256 --base32 /tmp/test-file.txt"
          ).strip()
          print(f"  ✓ Uploaded test file to S3 ({test_file_size} bytes)")
          # Use nix-prefetch-url to download from S3
          s3_url = make_s3_url(bucket, path="/test-file.txt")
          prefetch_output = client.succeed(
              f"{ENV_WITH_CREDS} nix-prefetch-url --print-path '{s3_url}'"
          )
          # Extract hash and store path
          # With --print-path, output is: <hash>\n<store-path>
          lines = prefetch_output.strip().split('\n')
          prefetch_hash = lines[0]  # First line is the hash
          store_path = lines[1]  # Second line is the store path
          # Verify hash matches
          if prefetch_hash != expected_hash:
              raise Exception(
                  f"Hash mismatch: expected {expected_hash}, got {prefetch_hash}"
              )
          print("  ✓ nix-prefetch-url completed with correct hash")
          # Verify the downloaded file is NOT empty (the bug in #8862)
          file_size = int(client.succeed(f"stat -c %s {store_path}").strip())
          if file_size == 0:
              raise Exception("Downloaded file is EMPTY - issue #8862 regression detected!")
          if file_size != test_file_size:
              raise Exception(
                  f"File size mismatch: expected {test_file_size}, got {file_size}"
              )
          print(f"  ✓ File has correct size ({file_size} bytes, not empty)")
          # Verify actual content matches by comparing hashes instead of printing entire file
          downloaded_hash = client.succeed(f"nix hash file --type sha256 --base32 {store_path}").strip()
          if downloaded_hash != expected_hash:
              raise Exception(f"Content hash mismatch: expected {expected_hash}, got {downloaded_hash}")
          print("  ✓ File content verified correct (hash matches)")
      # ============================================================================
      # Main Test Execution
      # ============================================================================
@ -562,6 +625,7 @@ in
      test_compression_narinfo_gzip()
      test_compression_mixed()
      test_compression_disabled()
      test_nix_prefetch_url()
      print("\n" + "="*80)
      print("✓ All S3 Binary Cache Store Tests Passed!")