diff --git a/src/libfetchers/git-lfs-fetch.cc b/src/libfetchers/git-lfs-fetch.cc index bd9752711..9688daa4a 100644 --- a/src/libfetchers/git-lfs-fetch.cc +++ b/src/libfetchers/git-lfs-fetch.cc @@ -69,7 +69,8 @@ static LfsApiInfo getLfsApi(const ParsedURL & url) args.push_back("--"); args.push_back("git-lfs-authenticate"); - args.push_back(url.path); + // FIXME %2F encode slashes? Does this command take/accept percent encoding? + args.push_back(url.renderPath(/*encode=*/false)); args.push_back("download"); auto [status, output] = runProgram({.program = "ssh", .args = args}); diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index c19e8d7db..a7acc316e 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -462,8 +462,8 @@ struct GitInputScheme : InputScheme // Why are we checking for bare repository? // well if it's a bare repository we want to force a git fetch rather than copying the folder - bool isBareRepository = url.scheme == "file" && pathExists(url.path) && !pathExists(url.path + "/.git"); - // + auto isBareRepository = [](PathView path) { return pathExists(path) && !pathExists(path + "/.git"); }; + // FIXME: here we turn a possibly relative path into an absolute path. // This allows relative git flake inputs to be resolved against the // **current working directory** (as in POSIX), which tends to work out @@ -472,8 +472,10 @@ struct GitInputScheme : InputScheme // // See: https://discourse.nixos.org/t/57783 and #9708 // - if (url.scheme == "file" && !forceHttp && !isBareRepository) { - if (!isAbsolute(url.path)) { + if (url.scheme == "file" && !forceHttp && !isBareRepository(renderUrlPathEnsureLegal(url.path))) { + auto path = renderUrlPathEnsureLegal(url.path); + + if (!isAbsolute(path)) { warn( "Fetching Git repository '%s', which uses a path relative to the current directory. " "This is not supported and will stop working in a future release. " @@ -483,10 +485,10 @@ struct GitInputScheme : InputScheme // If we don't check here for the path existence, then we can give libgit2 any directory // and it will initialize them as git directories. - if (!pathExists(url.path)) { - throw Error("The path '%s' does not exist.", url.path); + if (!pathExists(path)) { + throw Error("The path '%s' does not exist.", path); } - repoInfo.location = std::filesystem::absolute(url.path); + repoInfo.location = std::filesystem::absolute(path); } else { if (url.scheme == "file") /* Query parameters are meaningless for file://, but diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index b3749b01a..e40757dec 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -38,7 +38,7 @@ struct GitArchiveInputScheme : InputScheme if (url.scheme != schemeName()) return {}; - auto path = tokenizeString>(url.path, "/"); + const auto & path = url.path; std::optional rev; std::optional ref; @@ -139,12 +139,12 @@ struct GitArchiveInputScheme : InputScheme auto repo = getStrAttr(input.attrs, "repo"); auto ref = input.getRef(); auto rev = input.getRev(); - auto path = owner + "/" + repo; + std::vector path{owner, repo}; assert(!(ref && rev)); if (ref) - path += "/" + *ref; + path.push_back(*ref); if (rev) - path += "/" + rev->to_string(HashFormat::Base16, false); + path.push_back(rev->to_string(HashFormat::Base16, false)); auto url = ParsedURL{ .scheme = std::string{schemeName()}, .path = path, diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index 4bd4d890d..c5cbf156b 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -14,7 +14,7 @@ struct IndirectInputScheme : InputScheme if (url.scheme != "flake") return {}; - auto path = tokenizeString>(url.path, "/"); + const auto & path = url.path; std::optional rev; std::optional ref; @@ -82,16 +82,15 @@ struct IndirectInputScheme : InputScheme ParsedURL toURL(const Input & input) const override { - ParsedURL url; - url.scheme = "flake"; - url.path = getStrAttr(input.attrs, "id"); + ParsedURL url{ + .scheme = "flake", + .path = {getStrAttr(input.attrs, "id")}, + }; if (auto ref = input.getRef()) { - url.path += '/'; - url.path += *ref; + url.path.push_back(*ref); }; if (auto rev = input.getRev()) { - url.path += '/'; - url.path += rev->gitRev(); + url.path.push_back(rev->gitRev()); }; return url; } diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 9b17d675e..641b3d6a8 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -120,7 +120,7 @@ struct MercurialInputScheme : InputScheme { auto url = parseURL(getStrAttr(input.attrs, "url")); if (url.scheme == "file" && !input.getRef() && !input.getRev()) - return url.path; + return renderUrlPathEnsureLegal(url.path); return {}; } @@ -152,7 +152,7 @@ struct MercurialInputScheme : InputScheme { auto url = parseURL(getStrAttr(input.attrs, "url")); bool isLocal = url.scheme == "file"; - return {isLocal, isLocal ? url.path : url.to_string()}; + return {isLocal, isLocal ? renderUrlPathEnsureLegal(url.path) : url.to_string()}; } StorePath fetchToStore(ref store, Input & input) const diff --git a/src/libfetchers/path.cc b/src/libfetchers/path.cc index e5635ee75..b66459fb9 100644 --- a/src/libfetchers/path.cc +++ b/src/libfetchers/path.cc @@ -20,7 +20,7 @@ struct PathInputScheme : InputScheme Input input{settings}; input.attrs.insert_or_assign("type", "path"); - input.attrs.insert_or_assign("path", url.path); + input.attrs.insert_or_assign("path", renderUrlPathEnsureLegal(url.path)); for (auto & [name, value] : url.query) if (name == "rev" || name == "narHash") @@ -74,7 +74,7 @@ struct PathInputScheme : InputScheme query.erase("__final"); return ParsedURL{ .scheme = "path", - .path = getStrAttr(input.attrs, "path"), + .path = splitString>(getStrAttr(input.attrs, "path"), "/"), .query = query, }; } diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index b89cd99f1..c1b28f674 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -107,19 +107,19 @@ DownloadFileResult downloadFile( } static DownloadTarballResult downloadTarball_( - const Settings & settings, const std::string & url, const Headers & headers, const std::string & displayPrefix) + const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix) { + auto url = parseURL(urlS); // Some friendly error messages for common mistakes. // Namely lets catch when the url is a local file path, but // it is not in fact a tarball. - if (url.rfind("file://", 0) == 0) { - // Remove "file://" prefix to get the local file path - std::string localPath = url.substr(7); - if (!std::filesystem::exists(localPath)) { + if (url.scheme == "file") { + std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path); + if (!exists(localPath)) { throw Error("tarball '%s' does not exist.", localPath); } - if (std::filesystem::is_directory(localPath)) { + if (is_directory(localPath)) { if (std::filesystem::exists(localPath + "/.git")) { throw Error( "tarball '%s' is a git repository, not a tarball. Please use `git+file` as the scheme.", localPath); @@ -128,7 +128,7 @@ static DownloadTarballResult downloadTarball_( } } - Cache::Key cacheKey{"tarball", {{"url", url}}}; + Cache::Key cacheKey{"tarball", {{"url", urlS}}}; auto cached = settings.getCache()->lookupExpired(cacheKey); @@ -153,7 +153,7 @@ static DownloadTarballResult downloadTarball_( auto _res = std::make_shared>(); auto source = sinkToSource([&](Sink & sink) { - FileTransferRequest req(parseURL(url)); + FileTransferRequest req(url); req.expectedETag = cached ? getStrAttr(cached->value, "etag") : ""; getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; }); }); @@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_( /* Note: if the download is cached, `importTarball()` will receive no data, which causes it to import an empty tarball. */ - auto archive = hasSuffix(toLower(parseURL(url).path), ".zip") ? ({ + auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({ /* In streaming mode, libarchive doesn't handle symlinks in zip files correctly (#10649). So write the entire file to disk so libarchive can access it @@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_( } TarArchive{path}; }) - : TarArchive{*source}; + : TarArchive{*source}; auto tarballCache = getTarballCache(); auto parseSink = tarballCache->getFileSystemObjectSink(); auto lastModified = unpackTarfileToSink(archive, *parseSink); @@ -234,8 +234,11 @@ struct CurlInputScheme : InputScheme { const StringSet transportUrlSchemes = {"file", "http", "https"}; - bool hasTarballExtension(std::string_view path) const + bool hasTarballExtension(const ParsedURL & url) const { + if (url.path.empty()) + return false; + const auto & path = url.path.back(); return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz") || hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2") || hasSuffix(path, ".tar.zst"); @@ -336,7 +339,7 @@ struct FileInputScheme : CurlInputScheme auto parsedUrlScheme = parseUrlScheme(url.scheme); return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) && (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName() - : (!requireTree && !hasTarballExtension(url.path))); + : (!requireTree && !hasTarballExtension(url))); } std::pair, Input> getAccessor(ref store, const Input & _input) const override @@ -373,7 +376,7 @@ struct TarballInputScheme : CurlInputScheme return transportUrlSchemes.count(std::string(parsedUrlScheme.transport)) && (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName() - : (requireTree || hasTarballExtension(url.path))); + : (requireTree || hasTarballExtension(url))); } std::pair, Input> getAccessor(ref store, const Input & _input) const override diff --git a/src/libflake/flakeref.cc b/src/libflake/flakeref.cc index 070f4e483..cd176f14b 100644 --- a/src/libflake/flakeref.cc +++ b/src/libflake/flakeref.cc @@ -143,7 +143,7 @@ std::pair parsePathFlakeRefWithFragment( auto parsedURL = ParsedURL{ .scheme = "git+file", .authority = ParsedURL::Authority{}, - .path = flakeRoot, + .path = splitString>(flakeRoot, "/"), .query = query, .fragment = fragment, }; @@ -172,7 +172,13 @@ std::pair parsePathFlakeRefWithFragment( return fromParsedURL( fetchSettings, - {.scheme = "path", .authority = ParsedURL::Authority{}, .path = path, .query = query, .fragment = fragment}, + { + .scheme = "path", + .authority = ParsedURL::Authority{}, + .path = splitString>(path, "/"), + .query = query, + .fragment = fragment, + }, isFlake); } @@ -193,7 +199,7 @@ parseFlakeIdRef(const fetchers::Settings & fetchSettings, const std::string & ur auto parsedURL = ParsedURL{ .scheme = "flake", .authority = ParsedURL::Authority{}, - .path = match[1], + .path = splitString>(match[1].str(), "/"), }; return std::make_pair( @@ -211,8 +217,12 @@ std::optional> parseURLFlakeRef( { try { auto parsed = parseURL(url, /*lenient=*/true); - if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path)) - parsed.path = absPath(parsed.path, *baseDir); + if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file")) { + /* Here we know that the path must not contain encoded '/' or NUL bytes. */ + auto path = renderUrlPathEnsureLegal(parsed.path); + if (!isAbsolute(path)) + parsed.path = splitString>(absPath(path, *baseDir), "/"); + } return fromParsedURL(fetchSettings, std::move(parsed), isFlake); } catch (BadURL &) { return std::nullopt; diff --git a/src/libflake/url-name.cc b/src/libflake/url-name.cc index b3eeca26a..3bba3692e 100644 --- a/src/libflake/url-name.cc +++ b/src/libflake/url-name.cc @@ -27,16 +27,21 @@ std::optional getNameFromURL(const ParsedURL & url) return match.str(2); } + /* This is not right, because special chars like slashes within the + path fragments should be percent encoded, but I don't think any + of the regexes above care. */ + auto path = concatStringsSep("/", url.path); + /* If this is a github/gitlab/sourcehut flake, use the repo name */ - if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(url.path, match, secondPathSegmentRegex)) + if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(path, match, secondPathSegmentRegex)) return match.str(1); /* If it is a regular git flake, use the directory name */ - if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(url.path, match, lastPathSegmentRegex)) + if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(path, match, lastPathSegmentRegex)) return match.str(1); /* If there is no fragment, take the last element of the path */ - if (std::regex_match(url.path, match, lastPathSegmentRegex)) + if (std::regex_match(path, match, lastPathSegmentRegex)) return match.str(1); /* If even that didn't work, the URL does not contain enough info to determine a useful name */ diff --git a/src/libstore-tests/s3.cc b/src/libstore-tests/s3.cc index df61c04c1..44a31ddc9 100644 --- a/src/libstore-tests/s3.cc +++ b/src/libstore-tests/s3.cc @@ -33,7 +33,7 @@ INSTANTIATE_TEST_SUITE_P( "s3://my-bucket/my-key.txt", { .bucket = "my-bucket", - .key = "my-key.txt", + .key = {"my-key.txt"}, }, "basic_s3_bucket", }, @@ -41,7 +41,7 @@ INSTANTIATE_TEST_SUITE_P( "s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1", { .bucket = "prod-cache", - .key = "nix/store/abc123.nar.xz", + .key = {"nix", "store", "abc123.nar.xz"}, .region = "eu-west-1", }, "with_region", @@ -50,7 +50,7 @@ INSTANTIATE_TEST_SUITE_P( "s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https®ion=us-east-1", { .bucket = "bucket", - .key = "key", + .key = {"key"}, .profile = "prod", .region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates) .scheme = "https", @@ -62,7 +62,7 @@ INSTANTIATE_TEST_SUITE_P( "s3://cache/file.txt?profile=production®ion=ap-southeast-2", { .bucket = "cache", - .key = "file.txt", + .key = {"file.txt"}, .profile = "production", .region = "ap-southeast-2", }, @@ -72,13 +72,14 @@ INSTANTIATE_TEST_SUITE_P( "s3://bucket/key?endpoint=https://minio.local&scheme=http", { .bucket = "bucket", - .key = "key", + .key = {"key"}, /* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */ .scheme = "http", .endpoint = ParsedURL{ .scheme = "https", .authority = ParsedURL::Authority{.host = "minio.local"}, + .path = {""}, }, }, "with_absolute_endpoint_uri", @@ -101,6 +102,7 @@ struct S3ToHttpsConversionTestCase { ParsedS3URL input; ParsedURL expected; + std::string expectedRendered; std::string description; }; @@ -113,6 +115,7 @@ TEST_P(S3ToHttpsConversionTest, ConvertsCorrectly) const auto & testCase = GetParam(); auto result = testCase.input.toHttpsUrl(); EXPECT_EQ(result, testCase.expected) << "Failed for: " << testCase.description; + EXPECT_EQ(result.to_string(), testCase.expectedRendered); } INSTANTIATE_TEST_SUITE_P( @@ -122,71 +125,77 @@ INSTANTIATE_TEST_SUITE_P( S3ToHttpsConversionTestCase{ ParsedS3URL{ .bucket = "my-bucket", - .key = "my-key.txt", + .key = {"my-key.txt"}, }, ParsedURL{ .scheme = "https", .authority = ParsedURL::Authority{.host = "s3.us-east-1.amazonaws.com"}, - .path = "/my-bucket/my-key.txt", + .path = {"", "my-bucket", "my-key.txt"}, }, + "https://s3.us-east-1.amazonaws.com/my-bucket/my-key.txt", "basic_s3_default_region", }, S3ToHttpsConversionTestCase{ ParsedS3URL{ .bucket = "prod-cache", - .key = "nix/store/abc123.nar.xz", + .key = {"nix", "store", "abc123.nar.xz"}, .region = "eu-west-1", }, ParsedURL{ .scheme = "https", .authority = ParsedURL::Authority{.host = "s3.eu-west-1.amazonaws.com"}, - .path = "/prod-cache/nix/store/abc123.nar.xz", + .path = {"", "prod-cache", "nix", "store", "abc123.nar.xz"}, }, + "https://s3.eu-west-1.amazonaws.com/prod-cache/nix/store/abc123.nar.xz", "with_eu_west_1_region", }, S3ToHttpsConversionTestCase{ ParsedS3URL{ .bucket = "bucket", - .key = "key", + .key = {"key"}, .scheme = "http", .endpoint = ParsedURL::Authority{.host = "custom.s3.com"}, }, ParsedURL{ .scheme = "http", .authority = ParsedURL::Authority{.host = "custom.s3.com"}, - .path = "/bucket/key", + .path = {"", "bucket", "key"}, }, + "http://custom.s3.com/bucket/key", "custom_endpoint_authority", }, S3ToHttpsConversionTestCase{ ParsedS3URL{ .bucket = "bucket", - .key = "key", + .key = {"key"}, .endpoint = ParsedURL{ .scheme = "http", .authority = ParsedURL::Authority{.host = "server", .port = 9000}, + .path = {""}, }, }, ParsedURL{ .scheme = "http", .authority = ParsedURL::Authority{.host = "server", .port = 9000}, - .path = "/bucket/key", + .path = {"", "bucket", "key"}, }, + "http://server:9000/bucket/key", "custom_endpoint_with_port", }, S3ToHttpsConversionTestCase{ ParsedS3URL{ .bucket = "bucket", - .key = "path/to/file.txt", + .key = {"path", "to", "file.txt"}, .region = "ap-southeast-2", .scheme = "https", }, ParsedURL{ .scheme = "https", .authority = ParsedURL::Authority{.host = "s3.ap-southeast-2.amazonaws.com"}, - .path = "/bucket/path/to/file.txt", + .path = {"", "bucket", "path", "to", "file.txt"}, }, + "https://s3.ap-southeast-2.amazonaws.com/bucket/path/to/file.txt", "complex_path_and_region", }), [](const ::testing::TestParamInfo & info) { return info.param.description; }); diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 7145a3d06..0007b9ad8 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -815,7 +815,7 @@ struct curlFileTransfer : public FileTransfer S3Helper s3Helper(profile, region, scheme, endpoint); // FIXME: implement ETag - auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key); + auto s3Res = s3Helper.getObject(parsed.bucket, encodeUrlPath(parsed.key)); FileTransferResult res; if (!s3Res.data) throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri); diff --git a/src/libstore/http-binary-cache-store.cc b/src/libstore/http-binary-cache-store.cc index ab799617e..7737389a3 100644 --- a/src/libstore/http-binary-cache-store.cc +++ b/src/libstore/http-binary-cache-store.cc @@ -27,7 +27,7 @@ HttpBinaryCacheStoreConfig::HttpBinaryCacheStoreConfig( + (!_cacheUri.empty() ? _cacheUri : throw UsageError("`%s` Store requires a non-empty authority in Store URL", scheme)))) { - while (!cacheUri.path.empty() && cacheUri.path.back() == '/') + while (!cacheUri.path.empty() && cacheUri.path.back() == "") cacheUri.path.pop_back(); } @@ -37,7 +37,7 @@ StoreReference HttpBinaryCacheStoreConfig::getReference() const .variant = StoreReference::Specified{ .scheme = cacheUri.scheme, - .authority = (cacheUri.authority ? cacheUri.authority->to_string() : "") + cacheUri.path, + .authority = cacheUri.renderAuthorityAndPath(), }, .params = cacheUri.query, }; @@ -157,7 +157,7 @@ protected: /* Otherwise the last path fragment will get discarded. */ auto cacheUriWithTrailingSlash = config->cacheUri; if (!cacheUriWithTrailingSlash.path.empty()) - cacheUriWithTrailingSlash.path += "/"; + cacheUriWithTrailingSlash.path.push_back(""); /* path is not a path, but a full relative or absolute URL, e.g. we've seen in the wild NARINFO files have a URL diff --git a/src/libstore/include/nix/store/s3.hh b/src/libstore/include/nix/store/s3.hh index ec0cddf68..0270eeda6 100644 --- a/src/libstore/include/nix/store/s3.hh +++ b/src/libstore/include/nix/store/s3.hh @@ -54,7 +54,12 @@ struct S3Helper struct ParsedS3URL { std::string bucket; - std::string key; + /** + * @see ParsedURL::path. This is a vector for the same reason. + * Unlike ParsedURL::path this doesn't include the leading empty segment, + * since the bucket name is necessary. + */ + std::vector key; std::optional profile; std::optional region; std::optional scheme; diff --git a/src/libstore/include/nix/store/store-reference.hh b/src/libstore/include/nix/store/store-reference.hh index 5cf1e9a11..1df333947 100644 --- a/src/libstore/include/nix/store/store-reference.hh +++ b/src/libstore/include/nix/store/store-reference.hh @@ -77,12 +77,22 @@ struct StoreReference */ std::string render(bool withParams = true) const; + std::string to_string() const + { + return render(); + } + /** * Parse a URI into a store reference. */ static StoreReference parse(const std::string & uri, const Params & extraParams = Params{}); }; +static inline std::ostream & operator<<(std::ostream & os, const StoreReference & ref) +{ + return os << ref.render(); +} + /** * Split URI into protocol+hierarchy part and its parameter set. */ diff --git a/src/libstore/s3.cc b/src/libstore/s3.cc index 739de2532..5396f43b9 100644 --- a/src/libstore/s3.cc +++ b/src/libstore/s3.cc @@ -3,6 +3,9 @@ #include "nix/util/url.hh" #include "nix/util/util.hh" #include "nix/util/canon-path.hh" +#include "nix/util/strings-inline.hh" + +#include namespace nix { @@ -24,10 +27,6 @@ try { || parsed.authority->hostType != ParsedURL::Authority::HostType::Name) throw BadURL("URI has a missing or invalid bucket name"); - std::string_view key = parsed.path; - /* Make the key a relative path. */ - splitPrefix(key, "/"); - /* TODO: Validate the key against: * https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines */ @@ -41,10 +40,14 @@ try { }; auto endpoint = getOptionalParam("endpoint"); + if (parsed.path.size() <= 1 || !parsed.path.front().empty()) + throw BadURL("URI has a missing or invalid key"); + + auto path = std::views::drop(parsed.path, 1) | std::ranges::to>(); return ParsedS3URL{ .bucket = parsed.authority->host, - .key = std::string{key}, + .key = std::move(path), .profile = getOptionalParam("profile"), .region = getOptionalParam("region"), .scheme = getOptionalParam("scheme"), @@ -78,26 +81,35 @@ ParsedURL ParsedS3URL::toHttpsUrl() const overloaded{ [&](const std::monostate &) { // No custom endpoint, use standard AWS S3 endpoint + std::vector path{""}; + path.push_back(bucket); + path.insert(path.end(), key.begin(), key.end()); return ParsedURL{ .scheme = std::string{schemeStr}, .authority = ParsedURL::Authority{.host = "s3." + regionStr + ".amazonaws.com"}, - .path = (CanonPath::root / bucket / CanonPath(key)).abs(), + .path = std::move(path), }; }, [&](const ParsedURL::Authority & auth) { // Endpoint is just an authority (hostname/port) + std::vector path{""}; + path.push_back(bucket); + path.insert(path.end(), key.begin(), key.end()); return ParsedURL{ .scheme = std::string{schemeStr}, .authority = auth, - .path = (CanonPath::root / bucket / CanonPath(key)).abs(), + .path = std::move(path), }; }, [&](const ParsedURL & endpointUrl) { // Endpoint is already a ParsedURL (e.g., http://server:9000) + auto path = endpointUrl.path; + path.push_back(bucket); + path.insert(path.end(), key.begin(), key.end()); return ParsedURL{ .scheme = endpointUrl.scheme, .authority = endpointUrl.authority, - .path = (CanonPath(endpointUrl.path) / bucket / CanonPath(key)).abs(), + .path = std::move(path), }; }, }, diff --git a/src/libstore/store-reference.cc b/src/libstore/store-reference.cc index adc60b391..8b4c19600 100644 --- a/src/libstore/store-reference.cc +++ b/src/libstore/store-reference.cc @@ -48,13 +48,11 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen auto parsedUri = parseURL(uri, /*lenient=*/true); params.insert(parsedUri.query.begin(), parsedUri.query.end()); - auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path; - return { .variant = Specified{ .scheme = std::move(parsedUri.scheme), - .authority = std::move(baseURI), + .authority = parsedUri.renderAuthorityAndPath(), }, .params = std::move(params), }; diff --git a/src/libutil-tests/url.cc b/src/libutil-tests/url.cc index 71c416a3b..9c698a943 100644 --- a/src/libutil-tests/url.cc +++ b/src/libutil-tests/url.cc @@ -18,7 +18,7 @@ TEST(parseURL, parsesSimpleHttpUrl) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {}, .fragment = "", }; @@ -35,7 +35,7 @@ TEST(parseURL, parsesSimpleHttpsUrl) ParsedURL expected{ .scheme = "https", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {}, .fragment = "", }; @@ -52,7 +52,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithQueryAndFragment) ParsedURL expected{ .scheme = "https", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {{"download", "fast"}, {"when", "now"}}, .fragment = "hello", }; @@ -69,7 +69,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithComplexFragment) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {{"field", "value"}}, .fragment = "?foo=bar#", }; @@ -85,7 +85,7 @@ TEST(parseURL, parsesFilePlusHttpsUrl) ParsedURL expected{ .scheme = "file+https", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/video.mp4", + .path = {"", "video.mp4"}, .query = (StringMap) {}, .fragment = "", }; @@ -108,7 +108,7 @@ TEST(parseURL, parseIPv4Address) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::IPv4, .host = "127.0.0.1", .port = 8080}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {{"download", "fast"}, {"when", "now"}}, .fragment = "hello", }; @@ -125,7 +125,7 @@ TEST(parseURL, parseScopedRFC6874IPv6Address) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080}, - .path = "", + .path = {""}, .query = (StringMap) {}, .fragment = "", }; @@ -147,7 +147,7 @@ TEST(parseURL, parseIPv6Address) .host = "2a02:8071:8192:c100:311d:192d:81ac:11ea", .port = 8080, }, - .path = "", + .path = {""}, .query = (StringMap) {}, .fragment = "", }; @@ -178,7 +178,7 @@ TEST(parseURL, parseUserPassword) .password = "pass", .port = 8080, }, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {}, .fragment = "", }; @@ -195,11 +195,12 @@ TEST(parseURL, parseFileURLWithQueryAndFragment) ParsedURL expected{ .scheme = "file", .authority = Authority{}, - .path = "/none/of//your/business", + .path = {"", "none", "of", "", "your", "business"}, .query = (StringMap) {}, .fragment = "", }; + ASSERT_EQ(parsed.renderPath(), "/none/of//your/business"); ASSERT_EQ(parsed, expected); ASSERT_EQ(s, parsed.to_string()); } @@ -212,9 +213,10 @@ TEST(parseURL, parseFileURL) ParsedURL expected{ .scheme = "file", .authority = std::nullopt, - .path = "/none/of/your/business/", + .path = {"", "none", "of", "your", "business", ""}, }; + ASSERT_EQ(parsed.renderPath(), "/none/of/your/business/"); ASSERT_EQ(parsed, expected); ASSERT_EQ(s, parsed.to_string()); } @@ -227,10 +229,11 @@ TEST(parseURL, parseFileURLWithAuthority) ParsedURL expected{ .scheme = "file", .authority = Authority{.host = ""}, - .path = "///of/your/business//", + .path = {"", "", "", "of", "your", "business", "", ""}, }; - ASSERT_EQ(parsed.authority, expected.authority); + ASSERT_EQ(parsed.path, expected.path); + ASSERT_EQ(parsed.renderPath(), "///of/your/business//"); ASSERT_EQ(parsed, expected); ASSERT_EQ(s, parsed.to_string()); } @@ -243,9 +246,10 @@ TEST(parseURL, parseFileURLNoLeadingSlash) ParsedURL expected{ .scheme = "file", .authority = std::nullopt, - .path = "none/of/your/business/", + .path = {"none", "of", "your", "business", ""}, }; + ASSERT_EQ(parsed.renderPath(), "none/of/your/business/"); ASSERT_EQ(parsed, expected); ASSERT_EQ("file:none/of/your/business/", parsed.to_string()); } @@ -258,9 +262,10 @@ TEST(parseURL, parseHttpTrailingSlash) ParsedURL expected{ .scheme = "http", .authority = Authority{.host = "example.com"}, - .path = "/", + .path = {"", ""}, }; + ASSERT_EQ(parsed.renderPath(), "/"); ASSERT_EQ(parsed, expected); ASSERT_EQ(s, parsed.to_string()); } @@ -306,7 +311,7 @@ TEST(parseURL, parseFTPUrl) ParsedURL expected{ .scheme = "ftp", .authority = Authority{.hostType = HostType::Name, .host = "ftp.nixos.org"}, - .path = "/downloads/nixos.iso", + .path = {"", "downloads", "nixos.iso"}, .query = (StringMap) {}, .fragment = "", }; @@ -342,7 +347,7 @@ TEST(parseURL, parsesHttpUrlWithEmptyPort) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/file.tar.gz", + .path = {"", "file.tar.gz"}, .query = (StringMap) {{"foo", "bar"}}, .fragment = "", }; @@ -362,7 +367,7 @@ TEST(parseURLRelative, resolvesRelativePath) ParsedURL expected{ .scheme = "http", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"}, - .path = "/dir/subdir/file.txt", + .path = {"", "dir", "subdir", "file.txt"}, .query = {}, .fragment = "", }; @@ -376,7 +381,7 @@ TEST(parseURLRelative, baseUrlIpv6AddressWithoutZoneId) ParsedURL expected{ .scheme = "http", .authority = ParsedURL::Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c"}, - .path = "/dir/subdir/file.txt", + .path = {"", "dir", "subdir", "file.txt"}, .query = {}, .fragment = "", }; @@ -390,7 +395,7 @@ TEST(parseURLRelative, resolvesRelativePathIpv6AddressWithZoneId) ParsedURL expected{ .scheme = "http", .authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080}, - .path = "/dir/subdir/file2.txt", + .path = {"", "dir", "subdir", "file2.txt"}, .query = {}, .fragment = "", }; @@ -405,7 +410,7 @@ TEST(parseURLRelative, resolvesRelativePathWithDot) ParsedURL expected{ .scheme = "http", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"}, - .path = "/dir/subdir/file.txt", + .path = {"", "dir", "subdir", "file.txt"}, .query = {}, .fragment = "", }; @@ -419,7 +424,21 @@ TEST(parseURLRelative, resolvesParentDirectory) ParsedURL expected{ .scheme = "http", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234}, - .path = "/up.txt", + .path = {"", "up.txt"}, + .query = {}, + .fragment = "", + }; + ASSERT_EQ(parsed, expected); +} + +TEST(parseURLRelative, resolvesParentDirectoryNotTrickedByEscapedSlash) +{ + ParsedURL base = parseURL("http://example.org:234/dir\%2Ffirst-trick/another-dir\%2Fsecond-trick/page.html"); + auto parsed = parseURLRelative("../up.txt", base); + ParsedURL expected{ + .scheme = "http", + .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234}, + .path = {"", "dir/first-trick", "up.txt"}, .query = {}, .fragment = "", }; @@ -433,7 +452,7 @@ TEST(parseURLRelative, replacesPathWithAbsoluteRelative) ParsedURL expected{ .scheme = "http", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"}, - .path = "/rooted.txt", + .path = {"", "rooted.txt"}, .query = {}, .fragment = "", }; @@ -448,7 +467,7 @@ TEST(parseURLRelative, keepsQueryAndFragmentFromRelative) ParsedURL expected{ .scheme = "https", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/path/other.html", + .path = {"", "path", "other.html"}, .query = {{"x", "1"}, {"y", "2"}}, .fragment = "frag", }; @@ -489,7 +508,7 @@ TEST(parseURLRelative, emptyRelative) ParsedURL expected{ .scheme = "https", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/path/index.html", + .path = {"", "path", "index.html"}, .query = {{"a b", "5 6"}, {"x y", "34"}}, .fragment = "", }; @@ -504,7 +523,7 @@ TEST(parseURLRelative, fragmentRelative) ParsedURL expected{ .scheme = "https", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/path/index.html", + .path = {"", "path", "index.html"}, .query = {{"a b", "5 6"}, {"x y", "34"}}, .fragment = "frag2", }; @@ -518,7 +537,7 @@ TEST(parseURLRelative, queryRelative) ParsedURL expected{ .scheme = "https", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/path/index.html", + .path = {"", "path", "index.html"}, .query = {{"asdf qwer", "1 2 3"}}, .fragment = "", }; @@ -532,7 +551,7 @@ TEST(parseURLRelative, queryFragmentRelative) ParsedURL expected{ .scheme = "https", .authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"}, - .path = "/path/index.html", + .path = {"", "path", "index.html"}, .query = {{"asdf qwer", "1 2 3"}}, .fragment = "frag2", }; @@ -648,6 +667,25 @@ TEST(percentEncode, yen) ASSERT_EQ(percentDecode(e), s); } +TEST(parseURL, gitlabNamespacedProjectUrls) +{ + // Test GitLab URL patterns with namespaced projects + // These should preserve %2F encoding in the path + auto s = "https://gitlab.example.com/api/v4/projects/group%2Fsubgroup%2Fproject/repository/archive.tar.gz"; + auto parsed = parseURL(s); + + ParsedURL expected{ + .scheme = "https", + .authority = Authority{.hostType = HostType::Name, .host = "gitlab.example.com"}, + .path = {"", "api", "v4", "projects", "group/subgroup/project", "repository", "archive.tar.gz"}, + .query = {}, + .fragment = "", + }; + + ASSERT_EQ(parsed, expected); + ASSERT_EQ(s, parsed.to_string()); +} + TEST(nix, isValidSchemeName) { ASSERT_TRUE(isValidSchemeName("http")); diff --git a/src/libutil/include/nix/util/url.hh b/src/libutil/include/nix/util/url.hh index 54bd1e533..1d9797551 100644 --- a/src/libutil/include/nix/util/url.hh +++ b/src/libutil/include/nix/util/url.hh @@ -1,7 +1,10 @@ #pragma once ///@file +#include + #include "nix/util/error.hh" +#include "nix/util/canon-path.hh" namespace nix { @@ -65,6 +68,7 @@ struct ParsedURL }; std::string scheme; + /** * Optional parsed authority component of the URL. * @@ -75,16 +79,155 @@ struct ParsedURL * part of the URL. */ std::optional authority; - std::string path; + + /** + * @note Unlike Unix paths, URLs provide a way to escape path + * separators, in the form of the `%2F` encoding of `/`. That means + * that if one percent-decodes the path into a single string, that + * decoding will be *lossy*, because `/` and `%2F` both become `/`. + * The right thing to do is instead split up the path on `/`, and + * then percent decode each part. + * + * For an example, the path + * ``` + * foo/bar%2Fbaz/quux + * ``` + * is parsed as + * ``` + * {"foo, "bar/baz", "quux"} + * ``` + * + * We're doing splitting and joining that assumes the separator (`/` in this case) only goes *between* elements. + * + * That means the parsed representation will begin with an empty + * element to make an initial `/`, and will end with an ementy + * element to make a trailing `/`. That means that elements of this + * vector mostly, but *not always*, correspond to segments of the + * path. + * + * Examples: + * + * - ``` + * https://foo.com/bar + * ``` + * has path + * ``` + * {"", "bar"} + * ``` + * + * - ``` + * https://foo.com/bar/ + * ``` + * has path + * ``` + * {"", "bar", ""} + * ``` + * + * - ``` + * https://foo.com//bar/// + * ``` + * has path + * ``` + * {"", "", "bar", "", "", ""} + * ``` + * + * - ``` + * https://foo.com + * ``` + * has path + * ``` + * {""} + * ``` + * + * - ``` + * https://foo.com/ + * ``` + * has path + * ``` + * {"", ""} + * ``` + * + * - ``` + * tel:01234 + * ``` + * has path `{"01234"}` (and no authority) + * + * - ``` + * foo:/01234 + * ``` + * has path `{"", "01234"}` (and no authority) + * + * Note that both trailing and leading slashes are, in general, + * semantically significant. + * + * For trailing slashes, the main example affecting many schemes is + * that `../baz` resolves against a base URL different depending on + * the presence/absence of a trailing slash: + * + * - `https://foo.com/bar` is `https://foo.com/baz` + * + * - `https://foo.com/bar/` is `https://foo.com/bar/baz` + * + * See `parseURLRelative` for more details. + * + * For leading slashes, there are some requirements to be aware of. + * + * - When there is an authority, the path *must* start with a leading + * slash. Otherwise the path will not be separated from the + * authority, and will not round trip though the parser: + * + * ``` + * {.scheme="https", .authority.host = "foo", .path={"bad"}} + * ``` + * will render to `https://foobar`. but that would parse back as as + * ``` + * {.scheme="https", .authority.host = "foobar", .path={}} + * ``` + * + * - When there is no authority, the path must *not* begin with two + * slashes. Otherwise, there will be another parser round trip + * issue: + * + * ``` + * {.scheme="https", .path={"", "", "bad"}} + * ``` + * will render to `https://bad`. but that would parse back as as + * ``` + * {.scheme="https", .authority.host = "bad", .path={}} + * ``` + * + * These invariants will be checked in `to_string` and + * `renderAuthorityAndPath`. + */ + std::vector path; + StringMap query; + std::string fragment; + /** + * Render just the middle part of a URL, without the `//` which + * indicates whether the authority is present. + * + * @note This is kind of an ad-hoc + * operation, but it ends up coming up with some frequency, probably + * due to the current design of `StoreReference` in `nix-store`. + */ + std::string renderAuthorityAndPath() const; + std::string to_string() const; + /** + * Render the path to a string. + * + * @param encode Whether to percent encode path segments. + */ + std::string renderPath(bool encode = false) const; + auto operator<=>(const ParsedURL & other) const noexcept = default; /** - * Remove `.` and `..` path elements. + * Remove `.` and `..` path segments. */ ParsedURL canonicalise(); }; @@ -96,6 +239,22 @@ MakeError(BadURL, Error); std::string percentDecode(std::string_view in); std::string percentEncode(std::string_view s, std::string_view keep = ""); +/** + * Get the path part of the URL as an absolute or relative Path. + * + * @throws if any path component contains an slash (which would have + * been escaped `%2F` in the rendered URL). This is because OS file + * paths have no escape sequences --- file names cannot contain a + * `/`. + */ +Path renderUrlPathEnsureLegal(const std::vector & urlPath); + +/** + * Percent encode path. `%2F` for "interior slashes" is the most + * important. + */ +std::string encodeUrlPath(std::span urlPath); + /** * @param lenient @see parseURL */ @@ -114,6 +273,12 @@ std::string encodeQuery(const StringMap & query); * @note IPv6 ZoneId literals (RFC4007) are represented in URIs according to RFC6874. * * @throws BadURL + * + * The WHATWG specification of the URL constructor in Java Script is + * also a useful reference: + * https://url.spec.whatwg.org/#concept-basic-url-parser. Note, however, + * that it includes various scheme-specific normalizations / extra steps + * that we do not implement. */ ParsedURL parseURL(std::string_view url, bool lenient = false); @@ -123,7 +288,11 @@ ParsedURL parseURL(std::string_view url, bool lenient = false); * * This is specified in [IETF RFC 3986, section 5](https://datatracker.ietf.org/doc/html/rfc3986#section-5) * - * Behavior should also match the `new URL(url, base)` JavaScript constructor. + * @throws BadURL + * + * Behavior should also match the `new URL(url, base)` JavaScript + * constructor, except for extra steps specific to the HTTP scheme. See + * `parseURL` for link to the relevant WHATWG standard. */ ParsedURL parseURLRelative(std::string_view url, const ParsedURL & base); diff --git a/src/libutil/url.cc b/src/libutil/url.cc index ff0b7a71b..b9bf0b4f4 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -3,6 +3,7 @@ #include "nix/util/util.hh" #include "nix/util/split.hh" #include "nix/util/canon-path.hh" +#include "nix/util/strings-inline.hh" #include @@ -179,11 +180,14 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient) if (authority && authority->host.size() && transportIsFile) throw BadURL("file:// URL '%s' has unexpected authority '%s'", urlView.buffer(), *authority); - auto path = urlView.path(); /* Does pct-decoding */ auto fragment = urlView.fragment(); /* Does pct-decoding */ - if (transportIsFile && path.empty()) - path = "/"; + boost::core::string_view encodedPath = urlView.encoded_path(); + if (transportIsFile && encodedPath.empty()) + encodedPath = "/"; + + auto path = std::views::transform(splitString>(encodedPath, "/"), percentDecode) + | std::ranges::to>(); /* Get the raw query. Store URI supports smuggling doubly nested queries, where the inner &/? are pct-encoded. */ @@ -192,7 +196,7 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient) return ParsedURL{ .scheme = scheme, .authority = authority, - .path = path, + .path = std::move(path), .query = decodeQuery(query, lenient), .fragment = fragment, }; @@ -215,7 +219,7 @@ try { if (authority.port) resolved.set_port_number(*authority.port); } - resolved.set_path(base.path); + resolved.set_encoded_path(encodeUrlPath(base.path)); resolved.set_encoded_query(encodeQuery(base.query)); resolved.set_fragment(base.fragment); } catch (boost::system::system_error & e) { @@ -291,7 +295,15 @@ try { } const static std::string allowedInQuery = ":@/?"; -const static std::string allowedInPath = ":@/"; +const static std::string allowedInPath = ":@"; + +std::string encodeUrlPath(std::span urlPath) +{ + std::vector encodedPath; + for (auto & p : urlPath) + encodedPath.push_back(percentEncode(p, allowedInPath)); + return concatStringsSep("/", encodedPath); +} std::string encodeQuery(const StringMap & ss) { @@ -308,10 +320,62 @@ std::string encodeQuery(const StringMap & ss) return res; } +Path renderUrlPathEnsureLegal(const std::vector & urlPath) +{ + for (const auto & comp : urlPath) { + /* This is only really valid for UNIX. Windows has more restrictions. */ + if (comp.contains('/')) + throw BadURL("URL path component '%s' contains '/', which is not allowed in file names", comp); + if (comp.contains(char(0))) + throw BadURL("URL path component '%s' contains NUL byte which is not allowed", comp); + } + + return concatStringsSep("/", urlPath); +} + +std::string ParsedURL::renderPath(bool encode) const +{ + if (encode) + return encodeUrlPath(path); + return concatStringsSep("/", path); +} + +std::string ParsedURL::renderAuthorityAndPath() const +{ + std::string res; + /* The following assertions correspond to 3.3. Path [rfc3986]. URL parser + will never violate these properties, but hand-constructed ParsedURLs might. */ + if (authority.has_value()) { + /* If a URI contains an authority component, then the path component + must either be empty or begin with a slash ("/") character. */ + assert(path.empty() || path.front().empty()); + res += authority->to_string(); + } else if (std::ranges::equal(std::views::take(path, 2), std::views::repeat("", 2))) { + /* If a URI does not contain an authority component, then the path cannot begin + with two slash characters ("//") */ + unreachable(); + } + res += encodeUrlPath(path); + return res; +} + std::string ParsedURL::to_string() const { - return scheme + ":" + (authority ? "//" + authority->to_string() : "") + percentEncode(path, allowedInPath) - + (query.empty() ? "" : "?" + encodeQuery(query)) + (fragment.empty() ? "" : "#" + percentEncode(fragment)); + std::string res; + res += scheme; + res += ":"; + if (authority.has_value()) + res += "//"; + res += renderAuthorityAndPath(); + if (!query.empty()) { + res += "?"; + res += encodeQuery(query); + } + if (!fragment.empty()) { + res += "#"; + res += percentEncode(fragment); + } + return res; } std::ostream & operator<<(std::ostream & os, const ParsedURL & url) @@ -323,7 +387,7 @@ std::ostream & operator<<(std::ostream & os, const ParsedURL & url) ParsedURL ParsedURL::canonicalise() { ParsedURL res(*this); - res.path = CanonPath(res.path).abs(); + res.path = splitString>(CanonPath(renderPath()).abs(), "/"); return res; } @@ -352,7 +416,11 @@ ParsedURL fixGitURL(const std::string & url) if (hasPrefix(url, "file:")) return parseURL(url); if (url.find("://") == std::string::npos) { - return (ParsedURL{.scheme = "file", .authority = ParsedURL::Authority{}, .path = url}); + return ParsedURL{ + .scheme = "file", + .authority = ParsedURL::Authority{}, + .path = splitString>(url, "/"), + }; } return parseURL(url); }