Fix ParsedURL handling of %2F in URL paths

See the new extensive doxygen in `url.hh`. This fixes fetching gitlab: flakes. Paths are now stored as a std::vector of individual path segments, which can themselves contain path separators '/' (%2F). This is necessary to make the Gitlab's /projects/ API work. Co-authored-by: John Ericson <John.Ericson@Obsidian.Systems> Co-authored-by: Sergei Zimmerman <sergei@zimmerman.foo>
2025-11-08 19:46:02 +01:00 · 2025-08-26 12:49:28 +02:00 · 2025-08-26 12:49:28 +02:00 · c436b7a32a
commit c436b7a32a
parent 6839f3de55
19 changed files with 446 additions and 117 deletions
--- a/src/libfetchers/git-lfs-fetch.cc
+++ b/src/libfetchers/git-lfs-fetch.cc
@ -69,7 +69,8 @@ static LfsApiInfo getLfsApi(const ParsedURL & url)

        args.push_back("--");
        args.push_back("git-lfs-authenticate");
-        args.push_back(url.path);
+        // FIXME %2F encode slashes? Does this command take/accept percent encoding?
+        args.push_back(url.renderPath(/*encode=*/false));
        args.push_back("download");

        auto [status, output] = runProgram({.program = "ssh", .args = args});
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@ -462,8 +462,8 @@ struct GitInputScheme : InputScheme

        // Why are we checking for bare repository?
        // well if it's a bare repository we want to force a git fetch rather than copying the folder
-        bool isBareRepository = url.scheme == "file" && pathExists(url.path) && !pathExists(url.path + "/.git");
-        //
+        auto isBareRepository = [](PathView path) { return pathExists(path) && !pathExists(path + "/.git"); };
+
        // FIXME: here we turn a possibly relative path into an absolute path.
        // This allows relative git flake inputs to be resolved against the
        // **current working directory** (as in POSIX), which tends to work out
@ -472,8 +472,10 @@ struct GitInputScheme : InputScheme
        //
        // See: https://discourse.nixos.org/t/57783 and #9708
        //
-        if (url.scheme == "file" && !forceHttp && !isBareRepository) {
-            if (!isAbsolute(url.path)) {
+        if (url.scheme == "file" && !forceHttp && !isBareRepository(renderUrlPathEnsureLegal(url.path))) {
+            auto path = renderUrlPathEnsureLegal(url.path);
+
+            if (!isAbsolute(path)) {
                warn(
                    "Fetching Git repository '%s', which uses a path relative to the current directory. "
                    "This is not supported and will stop working in a future release. "
@ -483,10 +485,10 @@ struct GitInputScheme : InputScheme

            // If we don't check here for the path existence, then we can give libgit2 any directory
            // and it will initialize them as git directories.
-            if (!pathExists(url.path)) {
-                throw Error("The path '%s' does not exist.", url.path);
+            if (!pathExists(path)) {
+                throw Error("The path '%s' does not exist.", path);
            }
-            repoInfo.location = std::filesystem::absolute(url.path);
+            repoInfo.location = std::filesystem::absolute(path);
        } else {
            if (url.scheme == "file")
                /* Query parameters are meaningless for file://, but
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@ -38,7 +38,7 @@ struct GitArchiveInputScheme : InputScheme
        if (url.scheme != schemeName())
            return {};

-        auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
+        const auto & path = url.path;

        std::optional<Hash> rev;
        std::optional<std::string> ref;
@ -139,12 +139,12 @@ struct GitArchiveInputScheme : InputScheme
        auto repo = getStrAttr(input.attrs, "repo");
        auto ref = input.getRef();
        auto rev = input.getRev();
-        auto path = owner + "/" + repo;
+        std::vector<std::string> path{owner, repo};
        assert(!(ref && rev));
        if (ref)
-            path += "/" + *ref;
+            path.push_back(*ref);
        if (rev)
-            path += "/" + rev->to_string(HashFormat::Base16, false);
+            path.push_back(rev->to_string(HashFormat::Base16, false));
        auto url = ParsedURL{
            .scheme = std::string{schemeName()},
            .path = path,
--- a/src/libfetchers/indirect.cc
+++ b/src/libfetchers/indirect.cc
@ -14,7 +14,7 @@ struct IndirectInputScheme : InputScheme
        if (url.scheme != "flake")
            return {};

-        auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
+        const auto & path = url.path;

        std::optional<Hash> rev;
        std::optional<std::string> ref;
@ -82,16 +82,15 @@ struct IndirectInputScheme : InputScheme

    ParsedURL toURL(const Input & input) const override
    {
-        ParsedURL url;
-        url.scheme = "flake";
-        url.path = getStrAttr(input.attrs, "id");
+        ParsedURL url{
+            .scheme = "flake",
+            .path = {getStrAttr(input.attrs, "id")},
+        };
        if (auto ref = input.getRef()) {
-            url.path += '/';
-            url.path += *ref;
+            url.path.push_back(*ref);
        };
        if (auto rev = input.getRev()) {
-            url.path += '/';
-            url.path += rev->gitRev();
+            url.path.push_back(rev->gitRev());
        };
        return url;
    }
--- a/src/libfetchers/mercurial.cc
+++ b/src/libfetchers/mercurial.cc
@ -120,7 +120,7 @@ struct MercurialInputScheme : InputScheme
    {
        auto url = parseURL(getStrAttr(input.attrs, "url"));
        if (url.scheme == "file" && !input.getRef() && !input.getRev())
-            return url.path;
+            return renderUrlPathEnsureLegal(url.path);
        return {};
    }

@ -152,7 +152,7 @@ struct MercurialInputScheme : InputScheme
    {
        auto url = parseURL(getStrAttr(input.attrs, "url"));
        bool isLocal = url.scheme == "file";
-        return {isLocal, isLocal ? url.path : url.to_string()};
+        return {isLocal, isLocal ? renderUrlPathEnsureLegal(url.path) : url.to_string()};
    }

    StorePath fetchToStore(ref<Store> store, Input & input) const
--- a/src/libfetchers/path.cc
+++ b/src/libfetchers/path.cc
@ -20,7 +20,7 @@ struct PathInputScheme : InputScheme

        Input input{settings};
        input.attrs.insert_or_assign("type", "path");
-        input.attrs.insert_or_assign("path", url.path);
+        input.attrs.insert_or_assign("path", renderUrlPathEnsureLegal(url.path));

        for (auto & [name, value] : url.query)
            if (name == "rev" || name == "narHash")
@ -74,7 +74,7 @@ struct PathInputScheme : InputScheme
        query.erase("__final");
        return ParsedURL{
            .scheme = "path",
-            .path = getStrAttr(input.attrs, "path"),
+            .path = splitString<std::vector<std::string>>(getStrAttr(input.attrs, "path"), "/"),
            .query = query,
        };
    }
--- a/src/libfetchers/tarball.cc
+++ b/src/libfetchers/tarball.cc
@ -107,19 +107,19 @@ DownloadFileResult downloadFile(
 }

 static DownloadTarballResult downloadTarball_(
-    const Settings & settings, const std::string & url, const Headers & headers, const std::string & displayPrefix)
+    const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
 {
+    auto url = parseURL(urlS);

    // Some friendly error messages for common mistakes.
    // Namely lets catch when the url is a local file path, but
    // it is not in fact a tarball.
-    if (url.rfind("file://", 0) == 0) {
-        // Remove "file://" prefix to get the local file path
-        std::string localPath = url.substr(7);
-        if (!std::filesystem::exists(localPath)) {
+    if (url.scheme == "file") {
+        std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path);
+        if (!exists(localPath)) {
            throw Error("tarball '%s' does not exist.", localPath);
        }
-        if (std::filesystem::is_directory(localPath)) {
+        if (is_directory(localPath)) {
            if (std::filesystem::exists(localPath + "/.git")) {
                throw Error(
                    "tarball '%s' is a git repository, not a tarball. Please use `git+file` as the scheme.", localPath);
@ -128,7 +128,7 @@ static DownloadTarballResult downloadTarball_(
        }
    }

-    Cache::Key cacheKey{"tarball", {{"url", url}}};
+    Cache::Key cacheKey{"tarball", {{"url", urlS}}};

    auto cached = settings.getCache()->lookupExpired(cacheKey);

@ -153,7 +153,7 @@ static DownloadTarballResult downloadTarball_(
    auto _res = std::make_shared<Sync<FileTransferResult>>();

    auto source = sinkToSource([&](Sink & sink) {
-        FileTransferRequest req(parseURL(url));
+        FileTransferRequest req(url);
        req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
        getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; });
    });
@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_(

    /* Note: if the download is cached, `importTarball()` will receive
       no data, which causes it to import an empty tarball. */
-    auto archive = hasSuffix(toLower(parseURL(url).path), ".zip") ? ({
+    auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
        /* In streaming mode, libarchive doesn't handle
           symlinks in zip files correctly (#10649). So write
           the entire file to disk so libarchive can access it
@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_(
        }
        TarArchive{path};
    })
-                                                                  : TarArchive{*source};
+                                                                                    : TarArchive{*source};
    auto tarballCache = getTarballCache();
    auto parseSink = tarballCache->getFileSystemObjectSink();
    auto lastModified = unpackTarfileToSink(archive, *parseSink);
@ -234,8 +234,11 @@ struct CurlInputScheme : InputScheme
 {
    const StringSet transportUrlSchemes = {"file", "http", "https"};

-    bool hasTarballExtension(std::string_view path) const
+    bool hasTarballExtension(const ParsedURL & url) const
    {
+        if (url.path.empty())
+            return false;
+        const auto & path = url.path.back();
        return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz")
               || hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
               || hasSuffix(path, ".tar.zst");
@ -336,7 +339,7 @@ struct FileInputScheme : CurlInputScheme
        auto parsedUrlScheme = parseUrlScheme(url.scheme);
        return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
               && (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
-                                               : (!requireTree && !hasTarballExtension(url.path)));
+                                               : (!requireTree && !hasTarballExtension(url)));
    }

    std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
@ -373,7 +376,7 @@ struct TarballInputScheme : CurlInputScheme

        return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
               && (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
-                                               : (requireTree || hasTarballExtension(url.path)));
+                                               : (requireTree || hasTarballExtension(url)));
    }

    std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
--- a/src/libflake/flakeref.cc
+++ b/src/libflake/flakeref.cc
@ -143,7 +143,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
                    auto parsedURL = ParsedURL{
                        .scheme = "git+file",
                        .authority = ParsedURL::Authority{},
-                        .path = flakeRoot,
+                        .path = splitString<std::vector<std::string>>(flakeRoot, "/"),
                        .query = query,
                        .fragment = fragment,
                    };
@ -172,7 +172,13 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(

    return fromParsedURL(
        fetchSettings,
-        {.scheme = "path", .authority = ParsedURL::Authority{}, .path = path, .query = query, .fragment = fragment},
+        {
+            .scheme = "path",
+            .authority = ParsedURL::Authority{},
+            .path = splitString<std::vector<std::string>>(path, "/"),
+            .query = query,
+            .fragment = fragment,
+        },
        isFlake);
 }

@ -193,7 +199,7 @@ parseFlakeIdRef(const fetchers::Settings & fetchSettings, const std::string & ur
        auto parsedURL = ParsedURL{
            .scheme = "flake",
            .authority = ParsedURL::Authority{},
-            .path = match[1],
+            .path = splitString<std::vector<std::string>>(match[1].str(), "/"),
        };

        return std::make_pair(
@ -211,8 +217,12 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
 {
    try {
        auto parsed = parseURL(url, /*lenient=*/true);
-        if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
-            parsed.path = absPath(parsed.path, *baseDir);
+        if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file")) {
+            /* Here we know that the path must not contain encoded '/' or NUL bytes. */
+            auto path = renderUrlPathEnsureLegal(parsed.path);
+            if (!isAbsolute(path))
+                parsed.path = splitString<std::vector<std::string>>(absPath(path, *baseDir), "/");
+        }
        return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
    } catch (BadURL &) {
        return std::nullopt;
--- a/src/libflake/url-name.cc
+++ b/src/libflake/url-name.cc
@ -27,16 +27,21 @@ std::optional<std::string> getNameFromURL(const ParsedURL & url)
        return match.str(2);
    }

+    /* This is not right, because special chars like slashes within the
+       path fragments should be percent encoded, but I don't think any
+       of the regexes above care. */
+    auto path = concatStringsSep("/", url.path);
+
    /* If this is a github/gitlab/sourcehut flake, use the repo name */
-    if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(url.path, match, secondPathSegmentRegex))
+    if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(path, match, secondPathSegmentRegex))
        return match.str(1);

    /* If it is a regular git flake, use the directory name */
-    if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(url.path, match, lastPathSegmentRegex))
+    if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(path, match, lastPathSegmentRegex))
        return match.str(1);

    /* If there is no fragment, take the last element of the path */
-    if (std::regex_match(url.path, match, lastPathSegmentRegex))
+    if (std::regex_match(path, match, lastPathSegmentRegex))
        return match.str(1);

    /* If even that didn't work, the URL does not contain enough info to determine a useful name */
--- a/src/libstore-tests/s3.cc
+++ b/src/libstore-tests/s3.cc
@ -33,7 +33,7 @@ INSTANTIATE_TEST_SUITE_P(
            "s3://my-bucket/my-key.txt",
            {
                .bucket = "my-bucket",
-                .key = "my-key.txt",
+                .key = {"my-key.txt"},
            },
            "basic_s3_bucket",
        },
@ -41,7 +41,7 @@ INSTANTIATE_TEST_SUITE_P(
            "s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1",
            {
                .bucket = "prod-cache",
-                .key = "nix/store/abc123.nar.xz",
+                .key = {"nix", "store", "abc123.nar.xz"},
                .region = "eu-west-1",
            },
            "with_region",
@ -50,7 +50,7 @@ INSTANTIATE_TEST_SUITE_P(
            "s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https&region=us-east-1",
            {
                .bucket = "bucket",
-                .key = "key",
+                .key = {"key"},
                .profile = "prod",
                .region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates)
                .scheme = "https",
@ -62,7 +62,7 @@ INSTANTIATE_TEST_SUITE_P(
            "s3://cache/file.txt?profile=production&region=ap-southeast-2",
            {
                .bucket = "cache",
-                .key = "file.txt",
+                .key = {"file.txt"},
                .profile = "production",
                .region = "ap-southeast-2",
            },
@ -72,13 +72,14 @@ INSTANTIATE_TEST_SUITE_P(
            "s3://bucket/key?endpoint=https://minio.local&scheme=http",
            {
                .bucket = "bucket",
-                .key = "key",
+                .key = {"key"},
                /* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */
                .scheme = "http",
                .endpoint =
                    ParsedURL{
                        .scheme = "https",
                        .authority = ParsedURL::Authority{.host = "minio.local"},
+                        .path = {""},
                    },
            },
            "with_absolute_endpoint_uri",
@ -101,6 +102,7 @@ struct S3ToHttpsConversionTestCase
 {
    ParsedS3URL input;
    ParsedURL expected;
+    std::string expectedRendered;
    std::string description;
 };

@ -113,6 +115,7 @@ TEST_P(S3ToHttpsConversionTest, ConvertsCorrectly)
    const auto & testCase = GetParam();
    auto result = testCase.input.toHttpsUrl();
    EXPECT_EQ(result, testCase.expected) << "Failed for: " << testCase.description;
+    EXPECT_EQ(result.to_string(), testCase.expectedRendered);
 }

 INSTANTIATE_TEST_SUITE_P(
@ -122,71 +125,77 @@ INSTANTIATE_TEST_SUITE_P(
        S3ToHttpsConversionTestCase{
            ParsedS3URL{
                .bucket = "my-bucket",
-                .key = "my-key.txt",
+                .key = {"my-key.txt"},
            },
            ParsedURL{
                .scheme = "https",
                .authority = ParsedURL::Authority{.host = "s3.us-east-1.amazonaws.com"},
-                .path = "/my-bucket/my-key.txt",
+                .path = {"", "my-bucket", "my-key.txt"},
            },
+            "https://s3.us-east-1.amazonaws.com/my-bucket/my-key.txt",
            "basic_s3_default_region",
        },
        S3ToHttpsConversionTestCase{
            ParsedS3URL{
                .bucket = "prod-cache",
-                .key = "nix/store/abc123.nar.xz",
+                .key = {"nix", "store", "abc123.nar.xz"},
                .region = "eu-west-1",
            },
            ParsedURL{
                .scheme = "https",
                .authority = ParsedURL::Authority{.host = "s3.eu-west-1.amazonaws.com"},
-                .path = "/prod-cache/nix/store/abc123.nar.xz",
+                .path = {"", "prod-cache", "nix", "store", "abc123.nar.xz"},
            },
+            "https://s3.eu-west-1.amazonaws.com/prod-cache/nix/store/abc123.nar.xz",
            "with_eu_west_1_region",
        },
        S3ToHttpsConversionTestCase{
            ParsedS3URL{
                .bucket = "bucket",
-                .key = "key",
+                .key = {"key"},
                .scheme = "http",
                .endpoint = ParsedURL::Authority{.host = "custom.s3.com"},
            },
            ParsedURL{
                .scheme = "http",
                .authority = ParsedURL::Authority{.host = "custom.s3.com"},
-                .path = "/bucket/key",
+                .path = {"", "bucket", "key"},
            },
+            "http://custom.s3.com/bucket/key",
            "custom_endpoint_authority",
        },
        S3ToHttpsConversionTestCase{
            ParsedS3URL{
                .bucket = "bucket",
-                .key = "key",
+                .key = {"key"},
                .endpoint =
                    ParsedURL{
                        .scheme = "http",
                        .authority = ParsedURL::Authority{.host = "server", .port = 9000},
+                        .path = {""},
                    },
            },
            ParsedURL{
                .scheme = "http",
                .authority = ParsedURL::Authority{.host = "server", .port = 9000},
-                .path = "/bucket/key",
+                .path = {"", "bucket", "key"},
            },
+            "http://server:9000/bucket/key",
            "custom_endpoint_with_port",
        },
        S3ToHttpsConversionTestCase{
            ParsedS3URL{
                .bucket = "bucket",
-                .key = "path/to/file.txt",
+                .key = {"path", "to", "file.txt"},
                .region = "ap-southeast-2",
                .scheme = "https",
            },
            ParsedURL{
                .scheme = "https",
                .authority = ParsedURL::Authority{.host = "s3.ap-southeast-2.amazonaws.com"},
-                .path = "/bucket/path/to/file.txt",
+                .path = {"", "bucket", "path", "to", "file.txt"},
            },
+            "https://s3.ap-southeast-2.amazonaws.com/bucket/path/to/file.txt",
            "complex_path_and_region",
        }),
    [](const ::testing::TestParamInfo<S3ToHttpsConversionTestCase> & info) { return info.param.description; });
--- a/src/libstore/filetransfer.cc
+++ b/src/libstore/filetransfer.cc
@ -815,7 +815,7 @@ struct curlFileTransfer : public FileTransfer
                S3Helper s3Helper(profile, region, scheme, endpoint);

                // FIXME: implement ETag
-                auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key);
+                auto s3Res = s3Helper.getObject(parsed.bucket, encodeUrlPath(parsed.key));
                FileTransferResult res;
                if (!s3Res.data)
                    throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);
--- a/src/libstore/http-binary-cache-store.cc
+++ b/src/libstore/http-binary-cache-store.cc
@ -27,7 +27,7 @@ HttpBinaryCacheStoreConfig::HttpBinaryCacheStoreConfig(
          + (!_cacheUri.empty() ? _cacheUri
                                : throw UsageError("`%s` Store requires a non-empty authority in Store URL", scheme))))
 {
-    while (!cacheUri.path.empty() && cacheUri.path.back() == '/')
+    while (!cacheUri.path.empty() && cacheUri.path.back() == "")
        cacheUri.path.pop_back();
 }

@ -37,7 +37,7 @@ StoreReference HttpBinaryCacheStoreConfig::getReference() const
        .variant =
            StoreReference::Specified{
                .scheme = cacheUri.scheme,
-                .authority = (cacheUri.authority ? cacheUri.authority->to_string() : "") + cacheUri.path,
+                .authority = cacheUri.renderAuthorityAndPath(),
            },
        .params = cacheUri.query,
    };
@ -157,7 +157,7 @@ protected:
        /* Otherwise the last path fragment will get discarded. */
        auto cacheUriWithTrailingSlash = config->cacheUri;
        if (!cacheUriWithTrailingSlash.path.empty())
-            cacheUriWithTrailingSlash.path += "/";
+            cacheUriWithTrailingSlash.path.push_back("");

        /* path is not a path, but a full relative or absolute
           URL, e.g. we've seen in the wild NARINFO files have a URL
--- a/src/libstore/include/nix/store/s3.hh
+++ b/src/libstore/include/nix/store/s3.hh
@ -54,7 +54,12 @@ struct S3Helper
 struct ParsedS3URL
 {
    std::string bucket;
-    std::string key;
+    /**
+     * @see ParsedURL::path. This is a vector for the same reason.
+     * Unlike ParsedURL::path this doesn't include the leading empty segment,
+     * since the bucket name is necessary.
+     */
+    std::vector<std::string> key;
    std::optional<std::string> profile;
    std::optional<std::string> region;
    std::optional<std::string> scheme;
--- a/src/libstore/include/nix/store/store-reference.hh
+++ b/src/libstore/include/nix/store/store-reference.hh
@ -77,12 +77,22 @@ struct StoreReference
     */
    std::string render(bool withParams = true) const;

+    std::string to_string() const
+    {
+        return render();
+    }
+
    /**
     * Parse a URI into a store reference.
     */
    static StoreReference parse(const std::string & uri, const Params & extraParams = Params{});
 };

+static inline std::ostream & operator<<(std::ostream & os, const StoreReference & ref)
+{
+    return os << ref.render();
+}
+
 /**
 * Split URI into protocol+hierarchy part and its parameter set.
 */
--- a/src/libstore/s3.cc
+++ b/src/libstore/s3.cc
@ -3,6 +3,9 @@
 #include "nix/util/url.hh"
 #include "nix/util/util.hh"
 #include "nix/util/canon-path.hh"
+#include "nix/util/strings-inline.hh"
+
+#include <ranges>

 namespace nix {

@ -24,10 +27,6 @@ try {
        || parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
        throw BadURL("URI has a missing or invalid bucket name");

-    std::string_view key = parsed.path;
-    /* Make the key a relative path. */
-    splitPrefix(key, "/");
-
    /* TODO: Validate the key against:
     * https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
     */
@ -41,10 +40,14 @@ try {
    };

    auto endpoint = getOptionalParam("endpoint");
+    if (parsed.path.size() <= 1 || !parsed.path.front().empty())
+        throw BadURL("URI has a missing or invalid key");
+
+    auto path = std::views::drop(parsed.path, 1) | std::ranges::to<std::vector<std::string>>();

    return ParsedS3URL{
        .bucket = parsed.authority->host,
-        .key = std::string{key},
+        .key = std::move(path),
        .profile = getOptionalParam("profile"),
        .region = getOptionalParam("region"),
        .scheme = getOptionalParam("scheme"),
@ -78,26 +81,35 @@ ParsedURL ParsedS3URL::toHttpsUrl() const
        overloaded{
            [&](const std::monostate &) {
                // No custom endpoint, use standard AWS S3 endpoint
+                std::vector<std::string> path{""};
+                path.push_back(bucket);
+                path.insert(path.end(), key.begin(), key.end());
                return ParsedURL{
                    .scheme = std::string{schemeStr},
                    .authority = ParsedURL::Authority{.host = "s3." + regionStr + ".amazonaws.com"},
-                    .path = (CanonPath::root / bucket / CanonPath(key)).abs(),
+                    .path = std::move(path),
                };
            },
            [&](const ParsedURL::Authority & auth) {
                // Endpoint is just an authority (hostname/port)
+                std::vector<std::string> path{""};
+                path.push_back(bucket);
+                path.insert(path.end(), key.begin(), key.end());
                return ParsedURL{
                    .scheme = std::string{schemeStr},
                    .authority = auth,
-                    .path = (CanonPath::root / bucket / CanonPath(key)).abs(),
+                    .path = std::move(path),
                };
            },
            [&](const ParsedURL & endpointUrl) {
                // Endpoint is already a ParsedURL (e.g., http://server:9000)
+                auto path = endpointUrl.path;
+                path.push_back(bucket);
+                path.insert(path.end(), key.begin(), key.end());
                return ParsedURL{
                    .scheme = endpointUrl.scheme,
                    .authority = endpointUrl.authority,
-                    .path = (CanonPath(endpointUrl.path) / bucket / CanonPath(key)).abs(),
+                    .path = std::move(path),
                };
            },
        },
--- a/src/libstore/store-reference.cc
+++ b/src/libstore/store-reference.cc
@ -48,13 +48,11 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen
        auto parsedUri = parseURL(uri, /*lenient=*/true);
        params.insert(parsedUri.query.begin(), parsedUri.query.end());

-        auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
-
        return {
            .variant =
                Specified{
                    .scheme = std::move(parsedUri.scheme),
-                    .authority = std::move(baseURI),
+                    .authority = parsedUri.renderAuthorityAndPath(),
                },
            .params = std::move(params),
        };
--- a/src/libutil-tests/url.cc
+++ b/src/libutil-tests/url.cc
@ -18,7 +18,7 @@ TEST(parseURL, parsesSimpleHttpUrl)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -35,7 +35,7 @@ TEST(parseURL, parsesSimpleHttpsUrl)
    ParsedURL expected{
        .scheme = "https",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -52,7 +52,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithQueryAndFragment)
    ParsedURL expected{
        .scheme = "https",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {{"download", "fast"}, {"when", "now"}},
        .fragment = "hello",
    };
@ -69,7 +69,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithComplexFragment)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {{"field", "value"}},
        .fragment = "?foo=bar#",
    };
@ -85,7 +85,7 @@ TEST(parseURL, parsesFilePlusHttpsUrl)
    ParsedURL expected{
        .scheme = "file+https",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/video.mp4",
+        .path = {"", "video.mp4"},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -108,7 +108,7 @@ TEST(parseURL, parseIPv4Address)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::IPv4, .host = "127.0.0.1", .port = 8080},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {{"download", "fast"}, {"when", "now"}},
        .fragment = "hello",
    };
@ -125,7 +125,7 @@ TEST(parseURL, parseScopedRFC6874IPv6Address)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080},
-        .path = "",
+        .path = {""},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -147,7 +147,7 @@ TEST(parseURL, parseIPv6Address)
                .host = "2a02:8071:8192:c100:311d:192d:81ac:11ea",
                .port = 8080,
            },
-        .path = "",
+        .path = {""},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -178,7 +178,7 @@ TEST(parseURL, parseUserPassword)
                .password = "pass",
                .port = 8080,
            },
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -195,11 +195,12 @@ TEST(parseURL, parseFileURLWithQueryAndFragment)
    ParsedURL expected{
        .scheme = "file",
        .authority = Authority{},
-        .path = "/none/of//your/business",
+        .path = {"", "none", "of", "", "your", "business"},
        .query = (StringMap) {},
        .fragment = "",
    };

+    ASSERT_EQ(parsed.renderPath(), "/none/of//your/business");
    ASSERT_EQ(parsed, expected);
    ASSERT_EQ(s, parsed.to_string());
 }
@ -212,9 +213,10 @@ TEST(parseURL, parseFileURL)
    ParsedURL expected{
        .scheme = "file",
        .authority = std::nullopt,
-        .path = "/none/of/your/business/",
+        .path = {"", "none", "of", "your", "business", ""},
    };

+    ASSERT_EQ(parsed.renderPath(), "/none/of/your/business/");
    ASSERT_EQ(parsed, expected);
    ASSERT_EQ(s, parsed.to_string());
 }
@ -227,10 +229,11 @@ TEST(parseURL, parseFileURLWithAuthority)
    ParsedURL expected{
        .scheme = "file",
        .authority = Authority{.host = ""},
-        .path = "///of/your/business//",
+        .path = {"", "", "", "of", "your", "business", "", ""},
    };

-    ASSERT_EQ(parsed.authority, expected.authority);
+    ASSERT_EQ(parsed.path, expected.path);
+    ASSERT_EQ(parsed.renderPath(), "///of/your/business//");
    ASSERT_EQ(parsed, expected);
    ASSERT_EQ(s, parsed.to_string());
 }
@ -243,9 +246,10 @@ TEST(parseURL, parseFileURLNoLeadingSlash)
    ParsedURL expected{
        .scheme = "file",
        .authority = std::nullopt,
-        .path = "none/of/your/business/",
+        .path = {"none", "of", "your", "business", ""},
    };

+    ASSERT_EQ(parsed.renderPath(), "none/of/your/business/");
    ASSERT_EQ(parsed, expected);
    ASSERT_EQ("file:none/of/your/business/", parsed.to_string());
 }
@ -258,9 +262,10 @@ TEST(parseURL, parseHttpTrailingSlash)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.host = "example.com"},
-        .path = "/",
+        .path = {"", ""},
    };

+    ASSERT_EQ(parsed.renderPath(), "/");
    ASSERT_EQ(parsed, expected);
    ASSERT_EQ(s, parsed.to_string());
 }
@ -306,7 +311,7 @@ TEST(parseURL, parseFTPUrl)
    ParsedURL expected{
        .scheme = "ftp",
        .authority = Authority{.hostType = HostType::Name, .host = "ftp.nixos.org"},
-        .path = "/downloads/nixos.iso",
+        .path = {"", "downloads", "nixos.iso"},
        .query = (StringMap) {},
        .fragment = "",
    };
@ -342,7 +347,7 @@ TEST(parseURL, parsesHttpUrlWithEmptyPort)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/file.tar.gz",
+        .path = {"", "file.tar.gz"},
        .query = (StringMap) {{"foo", "bar"}},
        .fragment = "",
    };
@ -362,7 +367,7 @@ TEST(parseURLRelative, resolvesRelativePath)
    ParsedURL expected{
        .scheme = "http",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
-        .path = "/dir/subdir/file.txt",
+        .path = {"", "dir", "subdir", "file.txt"},
        .query = {},
        .fragment = "",
    };
@ -376,7 +381,7 @@ TEST(parseURLRelative, baseUrlIpv6AddressWithoutZoneId)
    ParsedURL expected{
        .scheme = "http",
        .authority = ParsedURL::Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c"},
-        .path = "/dir/subdir/file.txt",
+        .path = {"", "dir", "subdir", "file.txt"},
        .query = {},
        .fragment = "",
    };
@ -390,7 +395,7 @@ TEST(parseURLRelative, resolvesRelativePathIpv6AddressWithZoneId)
    ParsedURL expected{
        .scheme = "http",
        .authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080},
-        .path = "/dir/subdir/file2.txt",
+        .path = {"", "dir", "subdir", "file2.txt"},
        .query = {},
        .fragment = "",
    };
@ -405,7 +410,7 @@ TEST(parseURLRelative, resolvesRelativePathWithDot)
    ParsedURL expected{
        .scheme = "http",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
-        .path = "/dir/subdir/file.txt",
+        .path = {"", "dir", "subdir", "file.txt"},
        .query = {},
        .fragment = "",
    };
@ -419,7 +424,21 @@ TEST(parseURLRelative, resolvesParentDirectory)
    ParsedURL expected{
        .scheme = "http",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234},
-        .path = "/up.txt",
+        .path = {"", "up.txt"},
+        .query = {},
+        .fragment = "",
+    };
+    ASSERT_EQ(parsed, expected);
+}
+
+TEST(parseURLRelative, resolvesParentDirectoryNotTrickedByEscapedSlash)
+{
+    ParsedURL base = parseURL("http://example.org:234/dir\%2Ffirst-trick/another-dir\%2Fsecond-trick/page.html");
+    auto parsed = parseURLRelative("../up.txt", base);
+    ParsedURL expected{
+        .scheme = "http",
+        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234},
+        .path = {"", "dir/first-trick", "up.txt"},
        .query = {},
        .fragment = "",
    };
@ -433,7 +452,7 @@ TEST(parseURLRelative, replacesPathWithAbsoluteRelative)
    ParsedURL expected{
        .scheme = "http",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
-        .path = "/rooted.txt",
+        .path = {"", "rooted.txt"},
        .query = {},
        .fragment = "",
    };
@ -448,7 +467,7 @@ TEST(parseURLRelative, keepsQueryAndFragmentFromRelative)
    ParsedURL expected{
        .scheme = "https",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/path/other.html",
+        .path = {"", "path", "other.html"},
        .query = {{"x", "1"}, {"y", "2"}},
        .fragment = "frag",
    };
@ -489,7 +508,7 @@ TEST(parseURLRelative, emptyRelative)
    ParsedURL expected{
        .scheme = "https",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/path/index.html",
+        .path = {"", "path", "index.html"},
        .query = {{"a b", "5 6"}, {"x y", "34"}},
        .fragment = "",
    };
@ -504,7 +523,7 @@ TEST(parseURLRelative, fragmentRelative)
    ParsedURL expected{
        .scheme = "https",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/path/index.html",
+        .path = {"", "path", "index.html"},
        .query = {{"a b", "5 6"}, {"x y", "34"}},
        .fragment = "frag2",
    };
@ -518,7 +537,7 @@ TEST(parseURLRelative, queryRelative)
    ParsedURL expected{
        .scheme = "https",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/path/index.html",
+        .path = {"", "path", "index.html"},
        .query = {{"asdf qwer", "1 2 3"}},
        .fragment = "",
    };
@ -532,7 +551,7 @@ TEST(parseURLRelative, queryFragmentRelative)
    ParsedURL expected{
        .scheme = "https",
        .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
-        .path = "/path/index.html",
+        .path = {"", "path", "index.html"},
        .query = {{"asdf qwer", "1 2 3"}},
        .fragment = "frag2",
    };
@ -648,6 +667,25 @@ TEST(percentEncode, yen)
    ASSERT_EQ(percentDecode(e), s);
 }

+TEST(parseURL, gitlabNamespacedProjectUrls)
+{
+    // Test GitLab URL patterns with namespaced projects
+    // These should preserve %2F encoding in the path
+    auto s = "https://gitlab.example.com/api/v4/projects/group%2Fsubgroup%2Fproject/repository/archive.tar.gz";
+    auto parsed = parseURL(s);
+
+    ParsedURL expected{
+        .scheme = "https",
+        .authority = Authority{.hostType = HostType::Name, .host = "gitlab.example.com"},
+        .path = {"", "api", "v4", "projects", "group/subgroup/project", "repository", "archive.tar.gz"},
+        .query = {},
+        .fragment = "",
+    };
+
+    ASSERT_EQ(parsed, expected);
+    ASSERT_EQ(s, parsed.to_string());
+}
+
 TEST(nix, isValidSchemeName)
 {
    ASSERT_TRUE(isValidSchemeName("http"));
--- a/src/libutil/include/nix/util/url.hh
+++ b/src/libutil/include/nix/util/url.hh
@ -1,7 +1,10 @@
 #pragma once
 ///@file

+#include <span>
+
 #include "nix/util/error.hh"
+#include "nix/util/canon-path.hh"

 namespace nix {

@ -65,6 +68,7 @@ struct ParsedURL
    };

    std::string scheme;
+
    /**
     * Optional parsed authority component of the URL.
     *
@ -75,16 +79,155 @@ struct ParsedURL
     * part of the URL.
     */
    std::optional<Authority> authority;
-    std::string path;
+
+    /**
+     * @note Unlike Unix paths, URLs provide a way to escape path
+     * separators, in the form of the `%2F` encoding of `/`. That means
+     * that if one percent-decodes the path into a single string, that
+     * decoding will be *lossy*, because `/` and `%2F` both become `/`.
+     * The right thing to do is instead split up the path on `/`, and
+     * then percent decode each part.
+     *
+     * For an example, the path
+     * ```
+     * foo/bar%2Fbaz/quux
+     * ```
+     * is parsed as
+     * ```
+     * {"foo, "bar/baz", "quux"}
+     * ```
+     *
+     * We're doing splitting and joining that assumes the separator (`/` in this case) only goes *between* elements.
+     *
+     * That means the parsed representation will begin with an empty
+     * element to make an initial `/`, and will end with an ementy
+     * element to make a trailing `/`. That means that elements of this
+     * vector mostly, but *not always*, correspond to segments of the
+     * path.
+     *
+     * Examples:
+     *
+     * - ```
+     *   https://foo.com/bar
+     *   ```
+     *   has path
+     *   ```
+     *   {"", "bar"}
+     *   ```
+     *
+     * - ```
+     *   https://foo.com/bar/
+     *   ```
+     *   has path
+     *   ```
+     *   {"", "bar", ""}
+     *   ```
+     *
+     * - ```
+     *   https://foo.com//bar///
+     *   ```
+     *   has path
+     *   ```
+     *   {"", "", "bar", "", "", ""}
+     *   ```
+     *
+     * - ```
+     *   https://foo.com
+     *   ```
+     *   has path
+     *   ```
+     *   {""}
+     *   ```
+     *
+     * - ```
+     *   https://foo.com/
+     *   ```
+     *   has path
+     *   ```
+     *   {"", ""}
+     *   ```
+     *
+     * - ```
+     *   tel:01234
+     *   ```
+     *   has path `{"01234"}` (and no authority)
+     *
+     * - ```
+     *   foo:/01234
+     *   ```
+     *   has path `{"", "01234"}` (and no authority)
+     *
+     * Note that both trailing and leading slashes are, in general,
+     * semantically significant.
+     *
+     * For trailing slashes, the main example affecting many schemes is
+     * that `../baz` resolves against a base URL different depending on
+     * the presence/absence of a trailing slash:
+     *
+     * - `https://foo.com/bar` is `https://foo.com/baz`
+     *
+     * - `https://foo.com/bar/` is `https://foo.com/bar/baz`
+     *
+     * See `parseURLRelative` for more details.
+     *
+     * For leading slashes, there are some requirements to be aware of.
+     *
+     * - When there is an authority, the path *must* start with a leading
+     *   slash. Otherwise the path will not be separated from the
+     *   authority, and will not round trip though the parser:
+     *
+     *   ```
+     *   {.scheme="https", .authority.host = "foo", .path={"bad"}}
+     *   ```
+     *   will render to `https://foobar`. but that would parse back as as
+     *   ```
+     *   {.scheme="https", .authority.host = "foobar", .path={}}
+     *   ```
+     *
+     * - When there is no authority, the path must *not* begin with two
+     *   slashes. Otherwise, there will be another parser round trip
+     *   issue:
+     *
+     *   ```
+     *   {.scheme="https", .path={"", "", "bad"}}
+     *   ```
+     *   will render to `https://bad`. but that would parse back as as
+     *   ```
+     *   {.scheme="https", .authority.host = "bad", .path={}}
+     *   ```
+     *
+     * These invariants will be checked in `to_string` and
+     * `renderAuthorityAndPath`.
+     */
+    std::vector<std::string> path;
+
    StringMap query;
+
    std::string fragment;

+    /**
+     * Render just the middle part of a URL, without the `//` which
+     * indicates whether the authority is present.
+     *
+     * @note This is kind of an ad-hoc
+     * operation, but it ends up coming up with some frequency, probably
+     * due to the current design of `StoreReference` in `nix-store`.
+     */
+    std::string renderAuthorityAndPath() const;
+
    std::string to_string() const;

+    /**
+     * Render the path to a string.
+     *
+     * @param encode Whether to percent encode path segments.
+     */
+    std::string renderPath(bool encode = false) const;
+
    auto operator<=>(const ParsedURL & other) const noexcept = default;

    /**
-     * Remove `.` and `..` path elements.
+     * Remove `.` and `..` path segments.
     */
    ParsedURL canonicalise();
 };
@ -96,6 +239,22 @@ MakeError(BadURL, Error);
 std::string percentDecode(std::string_view in);
 std::string percentEncode(std::string_view s, std::string_view keep = "");

+/**
+ * Get the path part of the URL as an absolute or relative Path.
+ *
+ * @throws if any path component contains an slash (which would have
+ * been escaped `%2F` in the rendered URL). This is because OS file
+ * paths have no escape sequences --- file names cannot contain a
+ * `/`.
+ */
+Path renderUrlPathEnsureLegal(const std::vector<std::string> & urlPath);
+
+/**
+ * Percent encode path. `%2F` for "interior slashes" is the most
+ * important.
+ */
+std::string encodeUrlPath(std::span<const std::string> urlPath);
+
 /**
 * @param lenient @see parseURL
 */
@ -114,6 +273,12 @@ std::string encodeQuery(const StringMap & query);
 * @note IPv6 ZoneId literals (RFC4007) are represented in URIs according to RFC6874.
 *
 * @throws BadURL
+ *
+ * The WHATWG specification of the URL constructor in Java Script is
+ * also a useful reference:
+ * https://url.spec.whatwg.org/#concept-basic-url-parser. Note, however,
+ * that it includes various scheme-specific normalizations / extra steps
+ * that we do not implement.
 */
 ParsedURL parseURL(std::string_view url, bool lenient = false);

@ -123,7 +288,11 @@ ParsedURL parseURL(std::string_view url, bool lenient = false);
 *
 * This is specified in [IETF RFC 3986, section 5](https://datatracker.ietf.org/doc/html/rfc3986#section-5)
 *
- * Behavior should also match the `new URL(url, base)` JavaScript constructor.
+ * @throws BadURL
+ *
+ * Behavior should also match the `new URL(url, base)` JavaScript
+ * constructor, except for extra steps specific to the HTTP scheme. See
+ * `parseURL` for link to the relevant WHATWG standard.
 */
 ParsedURL parseURLRelative(std::string_view url, const ParsedURL & base);

--- a/src/libutil/url.cc
+++ b/src/libutil/url.cc
@ -3,6 +3,7 @@
 #include "nix/util/util.hh"
 #include "nix/util/split.hh"
 #include "nix/util/canon-path.hh"
+#include "nix/util/strings-inline.hh"

 #include <boost/url.hpp>

@ -179,11 +180,14 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient)
    if (authority && authority->host.size() && transportIsFile)
        throw BadURL("file:// URL '%s' has unexpected authority '%s'", urlView.buffer(), *authority);

-    auto path = urlView.path();         /* Does pct-decoding */
    auto fragment = urlView.fragment(); /* Does pct-decoding */

-    if (transportIsFile && path.empty())
-        path = "/";
+    boost::core::string_view encodedPath = urlView.encoded_path();
+    if (transportIsFile && encodedPath.empty())
+        encodedPath = "/";
+
+    auto path = std::views::transform(splitString<std::vector<std::string_view>>(encodedPath, "/"), percentDecode)
+                | std::ranges::to<std::vector<std::string>>();

    /* Get the raw query. Store URI supports smuggling doubly nested queries, where
       the inner &/? are pct-encoded. */
@ -192,7 +196,7 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient)
    return ParsedURL{
        .scheme = scheme,
        .authority = authority,
-        .path = path,
+        .path = std::move(path),
        .query = decodeQuery(query, lenient),
        .fragment = fragment,
    };
@ -215,7 +219,7 @@ try {
            if (authority.port)
                resolved.set_port_number(*authority.port);
        }
-        resolved.set_path(base.path);
+        resolved.set_encoded_path(encodeUrlPath(base.path));
        resolved.set_encoded_query(encodeQuery(base.query));
        resolved.set_fragment(base.fragment);
    } catch (boost::system::system_error & e) {
@ -291,7 +295,15 @@ try {
 }

 const static std::string allowedInQuery = ":@/?";
-const static std::string allowedInPath = ":@/";
+const static std::string allowedInPath = ":@";
+
+std::string encodeUrlPath(std::span<const std::string> urlPath)
+{
+    std::vector<std::string> encodedPath;
+    for (auto & p : urlPath)
+        encodedPath.push_back(percentEncode(p, allowedInPath));
+    return concatStringsSep("/", encodedPath);
+}

 std::string encodeQuery(const StringMap & ss)
 {
@ -308,10 +320,62 @@ std::string encodeQuery(const StringMap & ss)
    return res;
 }

+Path renderUrlPathEnsureLegal(const std::vector<std::string> & urlPath)
+{
+    for (const auto & comp : urlPath) {
+        /* This is only really valid for UNIX. Windows has more restrictions. */
+        if (comp.contains('/'))
+            throw BadURL("URL path component '%s' contains '/', which is not allowed in file names", comp);
+        if (comp.contains(char(0)))
+            throw BadURL("URL path component '%s' contains NUL byte which is not allowed", comp);
+    }
+
+    return concatStringsSep("/", urlPath);
+}
+
+std::string ParsedURL::renderPath(bool encode) const
+{
+    if (encode)
+        return encodeUrlPath(path);
+    return concatStringsSep("/", path);
+}
+
+std::string ParsedURL::renderAuthorityAndPath() const
+{
+    std::string res;
+    /* The following assertions correspond to 3.3. Path [rfc3986]. URL parser
+       will never violate these properties, but hand-constructed ParsedURLs might. */
+    if (authority.has_value()) {
+        /* If a URI contains an authority component, then the path component
+           must either be empty or begin with a slash ("/") character. */
+        assert(path.empty() || path.front().empty());
+        res += authority->to_string();
+    } else if (std::ranges::equal(std::views::take(path, 2), std::views::repeat("", 2))) {
+        /* If a URI does not contain an authority component, then the path cannot begin
+           with two slash characters ("//") */
+        unreachable();
+    }
+    res += encodeUrlPath(path);
+    return res;
+}
+
 std::string ParsedURL::to_string() const
 {
-    return scheme + ":" + (authority ? "//" + authority->to_string() : "") + percentEncode(path, allowedInPath)
-           + (query.empty() ? "" : "?" + encodeQuery(query)) + (fragment.empty() ? "" : "#" + percentEncode(fragment));
+    std::string res;
+    res += scheme;
+    res += ":";
+    if (authority.has_value())
+        res += "//";
+    res += renderAuthorityAndPath();
+    if (!query.empty()) {
+        res += "?";
+        res += encodeQuery(query);
+    }
+    if (!fragment.empty()) {
+        res += "#";
+        res += percentEncode(fragment);
+    }
+    return res;
 }

 std::ostream & operator<<(std::ostream & os, const ParsedURL & url)
@ -323,7 +387,7 @@ std::ostream & operator<<(std::ostream & os, const ParsedURL & url)
 ParsedURL ParsedURL::canonicalise()
 {
    ParsedURL res(*this);
-    res.path = CanonPath(res.path).abs();
+    res.path = splitString<std::vector<std::string>>(CanonPath(renderPath()).abs(), "/");
    return res;
 }

@ -352,7 +416,11 @@ ParsedURL fixGitURL(const std::string & url)
    if (hasPrefix(url, "file:"))
        return parseURL(url);
    if (url.find("://") == std::string::npos) {
-        return (ParsedURL{.scheme = "file", .authority = ParsedURL::Authority{}, .path = url});
+        return ParsedURL{
+            .scheme = "file",
+            .authority = ParsedURL::Authority{},
+            .path = splitString<std::vector<std::string>>(url, "/"),
+        };
    }
    return parseURL(url);
 }