mirror of
https://github.com/NixOS/nix.git
synced 2025-11-08 19:46:02 +01:00
Fix ParsedURL handling of %2F in URL paths
See the new extensive doxygen in `url.hh`. This fixes fetching gitlab: flakes. Paths are now stored as a std::vector of individual path segments, which can themselves contain path separators '/' (%2F). This is necessary to make the Gitlab's /projects/ API work. Co-authored-by: John Ericson <John.Ericson@Obsidian.Systems> Co-authored-by: Sergei Zimmerman <sergei@zimmerman.foo>
This commit is contained in:
parent
6839f3de55
commit
c436b7a32a
19 changed files with 446 additions and 117 deletions
|
|
@ -69,7 +69,8 @@ static LfsApiInfo getLfsApi(const ParsedURL & url)
|
|||
|
||||
args.push_back("--");
|
||||
args.push_back("git-lfs-authenticate");
|
||||
args.push_back(url.path);
|
||||
// FIXME %2F encode slashes? Does this command take/accept percent encoding?
|
||||
args.push_back(url.renderPath(/*encode=*/false));
|
||||
args.push_back("download");
|
||||
|
||||
auto [status, output] = runProgram({.program = "ssh", .args = args});
|
||||
|
|
|
|||
|
|
@ -462,8 +462,8 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
// Why are we checking for bare repository?
|
||||
// well if it's a bare repository we want to force a git fetch rather than copying the folder
|
||||
bool isBareRepository = url.scheme == "file" && pathExists(url.path) && !pathExists(url.path + "/.git");
|
||||
//
|
||||
auto isBareRepository = [](PathView path) { return pathExists(path) && !pathExists(path + "/.git"); };
|
||||
|
||||
// FIXME: here we turn a possibly relative path into an absolute path.
|
||||
// This allows relative git flake inputs to be resolved against the
|
||||
// **current working directory** (as in POSIX), which tends to work out
|
||||
|
|
@ -472,8 +472,10 @@ struct GitInputScheme : InputScheme
|
|||
//
|
||||
// See: https://discourse.nixos.org/t/57783 and #9708
|
||||
//
|
||||
if (url.scheme == "file" && !forceHttp && !isBareRepository) {
|
||||
if (!isAbsolute(url.path)) {
|
||||
if (url.scheme == "file" && !forceHttp && !isBareRepository(renderUrlPathEnsureLegal(url.path))) {
|
||||
auto path = renderUrlPathEnsureLegal(url.path);
|
||||
|
||||
if (!isAbsolute(path)) {
|
||||
warn(
|
||||
"Fetching Git repository '%s', which uses a path relative to the current directory. "
|
||||
"This is not supported and will stop working in a future release. "
|
||||
|
|
@ -483,10 +485,10 @@ struct GitInputScheme : InputScheme
|
|||
|
||||
// If we don't check here for the path existence, then we can give libgit2 any directory
|
||||
// and it will initialize them as git directories.
|
||||
if (!pathExists(url.path)) {
|
||||
throw Error("The path '%s' does not exist.", url.path);
|
||||
if (!pathExists(path)) {
|
||||
throw Error("The path '%s' does not exist.", path);
|
||||
}
|
||||
repoInfo.location = std::filesystem::absolute(url.path);
|
||||
repoInfo.location = std::filesystem::absolute(path);
|
||||
} else {
|
||||
if (url.scheme == "file")
|
||||
/* Query parameters are meaningless for file://, but
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ struct GitArchiveInputScheme : InputScheme
|
|||
if (url.scheme != schemeName())
|
||||
return {};
|
||||
|
||||
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
|
||||
const auto & path = url.path;
|
||||
|
||||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
|
|
@ -139,12 +139,12 @@ struct GitArchiveInputScheme : InputScheme
|
|||
auto repo = getStrAttr(input.attrs, "repo");
|
||||
auto ref = input.getRef();
|
||||
auto rev = input.getRev();
|
||||
auto path = owner + "/" + repo;
|
||||
std::vector<std::string> path{owner, repo};
|
||||
assert(!(ref && rev));
|
||||
if (ref)
|
||||
path += "/" + *ref;
|
||||
path.push_back(*ref);
|
||||
if (rev)
|
||||
path += "/" + rev->to_string(HashFormat::Base16, false);
|
||||
path.push_back(rev->to_string(HashFormat::Base16, false));
|
||||
auto url = ParsedURL{
|
||||
.scheme = std::string{schemeName()},
|
||||
.path = path,
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ struct IndirectInputScheme : InputScheme
|
|||
if (url.scheme != "flake")
|
||||
return {};
|
||||
|
||||
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
|
||||
const auto & path = url.path;
|
||||
|
||||
std::optional<Hash> rev;
|
||||
std::optional<std::string> ref;
|
||||
|
|
@ -82,16 +82,15 @@ struct IndirectInputScheme : InputScheme
|
|||
|
||||
ParsedURL toURL(const Input & input) const override
|
||||
{
|
||||
ParsedURL url;
|
||||
url.scheme = "flake";
|
||||
url.path = getStrAttr(input.attrs, "id");
|
||||
ParsedURL url{
|
||||
.scheme = "flake",
|
||||
.path = {getStrAttr(input.attrs, "id")},
|
||||
};
|
||||
if (auto ref = input.getRef()) {
|
||||
url.path += '/';
|
||||
url.path += *ref;
|
||||
url.path.push_back(*ref);
|
||||
};
|
||||
if (auto rev = input.getRev()) {
|
||||
url.path += '/';
|
||||
url.path += rev->gitRev();
|
||||
url.path.push_back(rev->gitRev());
|
||||
};
|
||||
return url;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ struct MercurialInputScheme : InputScheme
|
|||
{
|
||||
auto url = parseURL(getStrAttr(input.attrs, "url"));
|
||||
if (url.scheme == "file" && !input.getRef() && !input.getRev())
|
||||
return url.path;
|
||||
return renderUrlPathEnsureLegal(url.path);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ struct MercurialInputScheme : InputScheme
|
|||
{
|
||||
auto url = parseURL(getStrAttr(input.attrs, "url"));
|
||||
bool isLocal = url.scheme == "file";
|
||||
return {isLocal, isLocal ? url.path : url.to_string()};
|
||||
return {isLocal, isLocal ? renderUrlPathEnsureLegal(url.path) : url.to_string()};
|
||||
}
|
||||
|
||||
StorePath fetchToStore(ref<Store> store, Input & input) const
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ struct PathInputScheme : InputScheme
|
|||
|
||||
Input input{settings};
|
||||
input.attrs.insert_or_assign("type", "path");
|
||||
input.attrs.insert_or_assign("path", url.path);
|
||||
input.attrs.insert_or_assign("path", renderUrlPathEnsureLegal(url.path));
|
||||
|
||||
for (auto & [name, value] : url.query)
|
||||
if (name == "rev" || name == "narHash")
|
||||
|
|
@ -74,7 +74,7 @@ struct PathInputScheme : InputScheme
|
|||
query.erase("__final");
|
||||
return ParsedURL{
|
||||
.scheme = "path",
|
||||
.path = getStrAttr(input.attrs, "path"),
|
||||
.path = splitString<std::vector<std::string>>(getStrAttr(input.attrs, "path"), "/"),
|
||||
.query = query,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -107,19 +107,19 @@ DownloadFileResult downloadFile(
|
|||
}
|
||||
|
||||
static DownloadTarballResult downloadTarball_(
|
||||
const Settings & settings, const std::string & url, const Headers & headers, const std::string & displayPrefix)
|
||||
const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
|
||||
{
|
||||
auto url = parseURL(urlS);
|
||||
|
||||
// Some friendly error messages for common mistakes.
|
||||
// Namely lets catch when the url is a local file path, but
|
||||
// it is not in fact a tarball.
|
||||
if (url.rfind("file://", 0) == 0) {
|
||||
// Remove "file://" prefix to get the local file path
|
||||
std::string localPath = url.substr(7);
|
||||
if (!std::filesystem::exists(localPath)) {
|
||||
if (url.scheme == "file") {
|
||||
std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path);
|
||||
if (!exists(localPath)) {
|
||||
throw Error("tarball '%s' does not exist.", localPath);
|
||||
}
|
||||
if (std::filesystem::is_directory(localPath)) {
|
||||
if (is_directory(localPath)) {
|
||||
if (std::filesystem::exists(localPath + "/.git")) {
|
||||
throw Error(
|
||||
"tarball '%s' is a git repository, not a tarball. Please use `git+file` as the scheme.", localPath);
|
||||
|
|
@ -128,7 +128,7 @@ static DownloadTarballResult downloadTarball_(
|
|||
}
|
||||
}
|
||||
|
||||
Cache::Key cacheKey{"tarball", {{"url", url}}};
|
||||
Cache::Key cacheKey{"tarball", {{"url", urlS}}};
|
||||
|
||||
auto cached = settings.getCache()->lookupExpired(cacheKey);
|
||||
|
||||
|
|
@ -153,7 +153,7 @@ static DownloadTarballResult downloadTarball_(
|
|||
auto _res = std::make_shared<Sync<FileTransferResult>>();
|
||||
|
||||
auto source = sinkToSource([&](Sink & sink) {
|
||||
FileTransferRequest req(parseURL(url));
|
||||
FileTransferRequest req(url);
|
||||
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
|
||||
getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; });
|
||||
});
|
||||
|
|
@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_(
|
|||
|
||||
/* Note: if the download is cached, `importTarball()` will receive
|
||||
no data, which causes it to import an empty tarball. */
|
||||
auto archive = hasSuffix(toLower(parseURL(url).path), ".zip") ? ({
|
||||
auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
|
||||
/* In streaming mode, libarchive doesn't handle
|
||||
symlinks in zip files correctly (#10649). So write
|
||||
the entire file to disk so libarchive can access it
|
||||
|
|
@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_(
|
|||
}
|
||||
TarArchive{path};
|
||||
})
|
||||
: TarArchive{*source};
|
||||
: TarArchive{*source};
|
||||
auto tarballCache = getTarballCache();
|
||||
auto parseSink = tarballCache->getFileSystemObjectSink();
|
||||
auto lastModified = unpackTarfileToSink(archive, *parseSink);
|
||||
|
|
@ -234,8 +234,11 @@ struct CurlInputScheme : InputScheme
|
|||
{
|
||||
const StringSet transportUrlSchemes = {"file", "http", "https"};
|
||||
|
||||
bool hasTarballExtension(std::string_view path) const
|
||||
bool hasTarballExtension(const ParsedURL & url) const
|
||||
{
|
||||
if (url.path.empty())
|
||||
return false;
|
||||
const auto & path = url.path.back();
|
||||
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz")
|
||||
|| hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
|
||||
|| hasSuffix(path, ".tar.zst");
|
||||
|
|
@ -336,7 +339,7 @@ struct FileInputScheme : CurlInputScheme
|
|||
auto parsedUrlScheme = parseUrlScheme(url.scheme);
|
||||
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
|
||||
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
|
||||
: (!requireTree && !hasTarballExtension(url.path)));
|
||||
: (!requireTree && !hasTarballExtension(url)));
|
||||
}
|
||||
|
||||
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
|
||||
|
|
@ -373,7 +376,7 @@ struct TarballInputScheme : CurlInputScheme
|
|||
|
||||
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
|
||||
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
|
||||
: (requireTree || hasTarballExtension(url.path)));
|
||||
: (requireTree || hasTarballExtension(url)));
|
||||
}
|
||||
|
||||
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
|
|||
auto parsedURL = ParsedURL{
|
||||
.scheme = "git+file",
|
||||
.authority = ParsedURL::Authority{},
|
||||
.path = flakeRoot,
|
||||
.path = splitString<std::vector<std::string>>(flakeRoot, "/"),
|
||||
.query = query,
|
||||
.fragment = fragment,
|
||||
};
|
||||
|
|
@ -172,7 +172,13 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
|
|||
|
||||
return fromParsedURL(
|
||||
fetchSettings,
|
||||
{.scheme = "path", .authority = ParsedURL::Authority{}, .path = path, .query = query, .fragment = fragment},
|
||||
{
|
||||
.scheme = "path",
|
||||
.authority = ParsedURL::Authority{},
|
||||
.path = splitString<std::vector<std::string>>(path, "/"),
|
||||
.query = query,
|
||||
.fragment = fragment,
|
||||
},
|
||||
isFlake);
|
||||
}
|
||||
|
||||
|
|
@ -193,7 +199,7 @@ parseFlakeIdRef(const fetchers::Settings & fetchSettings, const std::string & ur
|
|||
auto parsedURL = ParsedURL{
|
||||
.scheme = "flake",
|
||||
.authority = ParsedURL::Authority{},
|
||||
.path = match[1],
|
||||
.path = splitString<std::vector<std::string>>(match[1].str(), "/"),
|
||||
};
|
||||
|
||||
return std::make_pair(
|
||||
|
|
@ -211,8 +217,12 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
|
|||
{
|
||||
try {
|
||||
auto parsed = parseURL(url, /*lenient=*/true);
|
||||
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
|
||||
parsed.path = absPath(parsed.path, *baseDir);
|
||||
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file")) {
|
||||
/* Here we know that the path must not contain encoded '/' or NUL bytes. */
|
||||
auto path = renderUrlPathEnsureLegal(parsed.path);
|
||||
if (!isAbsolute(path))
|
||||
parsed.path = splitString<std::vector<std::string>>(absPath(path, *baseDir), "/");
|
||||
}
|
||||
return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
|
||||
} catch (BadURL &) {
|
||||
return std::nullopt;
|
||||
|
|
|
|||
|
|
@ -27,16 +27,21 @@ std::optional<std::string> getNameFromURL(const ParsedURL & url)
|
|||
return match.str(2);
|
||||
}
|
||||
|
||||
/* This is not right, because special chars like slashes within the
|
||||
path fragments should be percent encoded, but I don't think any
|
||||
of the regexes above care. */
|
||||
auto path = concatStringsSep("/", url.path);
|
||||
|
||||
/* If this is a github/gitlab/sourcehut flake, use the repo name */
|
||||
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(url.path, match, secondPathSegmentRegex))
|
||||
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(path, match, secondPathSegmentRegex))
|
||||
return match.str(1);
|
||||
|
||||
/* If it is a regular git flake, use the directory name */
|
||||
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(url.path, match, lastPathSegmentRegex))
|
||||
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(path, match, lastPathSegmentRegex))
|
||||
return match.str(1);
|
||||
|
||||
/* If there is no fragment, take the last element of the path */
|
||||
if (std::regex_match(url.path, match, lastPathSegmentRegex))
|
||||
if (std::regex_match(path, match, lastPathSegmentRegex))
|
||||
return match.str(1);
|
||||
|
||||
/* If even that didn't work, the URL does not contain enough info to determine a useful name */
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"s3://my-bucket/my-key.txt",
|
||||
{
|
||||
.bucket = "my-bucket",
|
||||
.key = "my-key.txt",
|
||||
.key = {"my-key.txt"},
|
||||
},
|
||||
"basic_s3_bucket",
|
||||
},
|
||||
|
|
@ -41,7 +41,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"s3://prod-cache/nix/store/abc123.nar.xz?region=eu-west-1",
|
||||
{
|
||||
.bucket = "prod-cache",
|
||||
.key = "nix/store/abc123.nar.xz",
|
||||
.key = {"nix", "store", "abc123.nar.xz"},
|
||||
.region = "eu-west-1",
|
||||
},
|
||||
"with_region",
|
||||
|
|
@ -50,7 +50,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"s3://bucket/key?region=us-west-2&profile=prod&endpoint=custom.s3.com&scheme=https®ion=us-east-1",
|
||||
{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
.key = {"key"},
|
||||
.profile = "prod",
|
||||
.region = "us-west-2", //< using the first parameter (decodeQuery ignores dupicates)
|
||||
.scheme = "https",
|
||||
|
|
@ -62,7 +62,7 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"s3://cache/file.txt?profile=production®ion=ap-southeast-2",
|
||||
{
|
||||
.bucket = "cache",
|
||||
.key = "file.txt",
|
||||
.key = {"file.txt"},
|
||||
.profile = "production",
|
||||
.region = "ap-southeast-2",
|
||||
},
|
||||
|
|
@ -72,13 +72,14 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
"s3://bucket/key?endpoint=https://minio.local&scheme=http",
|
||||
{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
.key = {"key"},
|
||||
/* TODO: Figure out what AWS SDK is doing when both endpointOverride and scheme are set. */
|
||||
.scheme = "http",
|
||||
.endpoint =
|
||||
ParsedURL{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.host = "minio.local"},
|
||||
.path = {""},
|
||||
},
|
||||
},
|
||||
"with_absolute_endpoint_uri",
|
||||
|
|
@ -101,6 +102,7 @@ struct S3ToHttpsConversionTestCase
|
|||
{
|
||||
ParsedS3URL input;
|
||||
ParsedURL expected;
|
||||
std::string expectedRendered;
|
||||
std::string description;
|
||||
};
|
||||
|
||||
|
|
@ -113,6 +115,7 @@ TEST_P(S3ToHttpsConversionTest, ConvertsCorrectly)
|
|||
const auto & testCase = GetParam();
|
||||
auto result = testCase.input.toHttpsUrl();
|
||||
EXPECT_EQ(result, testCase.expected) << "Failed for: " << testCase.description;
|
||||
EXPECT_EQ(result.to_string(), testCase.expectedRendered);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
|
|
@ -122,71 +125,77 @@ INSTANTIATE_TEST_SUITE_P(
|
|||
S3ToHttpsConversionTestCase{
|
||||
ParsedS3URL{
|
||||
.bucket = "my-bucket",
|
||||
.key = "my-key.txt",
|
||||
.key = {"my-key.txt"},
|
||||
},
|
||||
ParsedURL{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.host = "s3.us-east-1.amazonaws.com"},
|
||||
.path = "/my-bucket/my-key.txt",
|
||||
.path = {"", "my-bucket", "my-key.txt"},
|
||||
},
|
||||
"https://s3.us-east-1.amazonaws.com/my-bucket/my-key.txt",
|
||||
"basic_s3_default_region",
|
||||
},
|
||||
S3ToHttpsConversionTestCase{
|
||||
ParsedS3URL{
|
||||
.bucket = "prod-cache",
|
||||
.key = "nix/store/abc123.nar.xz",
|
||||
.key = {"nix", "store", "abc123.nar.xz"},
|
||||
.region = "eu-west-1",
|
||||
},
|
||||
ParsedURL{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.host = "s3.eu-west-1.amazonaws.com"},
|
||||
.path = "/prod-cache/nix/store/abc123.nar.xz",
|
||||
.path = {"", "prod-cache", "nix", "store", "abc123.nar.xz"},
|
||||
},
|
||||
"https://s3.eu-west-1.amazonaws.com/prod-cache/nix/store/abc123.nar.xz",
|
||||
"with_eu_west_1_region",
|
||||
},
|
||||
S3ToHttpsConversionTestCase{
|
||||
ParsedS3URL{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
.key = {"key"},
|
||||
.scheme = "http",
|
||||
.endpoint = ParsedURL::Authority{.host = "custom.s3.com"},
|
||||
},
|
||||
ParsedURL{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.host = "custom.s3.com"},
|
||||
.path = "/bucket/key",
|
||||
.path = {"", "bucket", "key"},
|
||||
},
|
||||
"http://custom.s3.com/bucket/key",
|
||||
"custom_endpoint_authority",
|
||||
},
|
||||
S3ToHttpsConversionTestCase{
|
||||
ParsedS3URL{
|
||||
.bucket = "bucket",
|
||||
.key = "key",
|
||||
.key = {"key"},
|
||||
.endpoint =
|
||||
ParsedURL{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.host = "server", .port = 9000},
|
||||
.path = {""},
|
||||
},
|
||||
},
|
||||
ParsedURL{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.host = "server", .port = 9000},
|
||||
.path = "/bucket/key",
|
||||
.path = {"", "bucket", "key"},
|
||||
},
|
||||
"http://server:9000/bucket/key",
|
||||
"custom_endpoint_with_port",
|
||||
},
|
||||
S3ToHttpsConversionTestCase{
|
||||
ParsedS3URL{
|
||||
.bucket = "bucket",
|
||||
.key = "path/to/file.txt",
|
||||
.key = {"path", "to", "file.txt"},
|
||||
.region = "ap-southeast-2",
|
||||
.scheme = "https",
|
||||
},
|
||||
ParsedURL{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.host = "s3.ap-southeast-2.amazonaws.com"},
|
||||
.path = "/bucket/path/to/file.txt",
|
||||
.path = {"", "bucket", "path", "to", "file.txt"},
|
||||
},
|
||||
"https://s3.ap-southeast-2.amazonaws.com/bucket/path/to/file.txt",
|
||||
"complex_path_and_region",
|
||||
}),
|
||||
[](const ::testing::TestParamInfo<S3ToHttpsConversionTestCase> & info) { return info.param.description; });
|
||||
|
|
|
|||
|
|
@ -815,7 +815,7 @@ struct curlFileTransfer : public FileTransfer
|
|||
S3Helper s3Helper(profile, region, scheme, endpoint);
|
||||
|
||||
// FIXME: implement ETag
|
||||
auto s3Res = s3Helper.getObject(parsed.bucket, parsed.key);
|
||||
auto s3Res = s3Helper.getObject(parsed.bucket, encodeUrlPath(parsed.key));
|
||||
FileTransferResult res;
|
||||
if (!s3Res.data)
|
||||
throw FileTransferError(NotFound, {}, "S3 object '%s' does not exist", request.uri);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ HttpBinaryCacheStoreConfig::HttpBinaryCacheStoreConfig(
|
|||
+ (!_cacheUri.empty() ? _cacheUri
|
||||
: throw UsageError("`%s` Store requires a non-empty authority in Store URL", scheme))))
|
||||
{
|
||||
while (!cacheUri.path.empty() && cacheUri.path.back() == '/')
|
||||
while (!cacheUri.path.empty() && cacheUri.path.back() == "")
|
||||
cacheUri.path.pop_back();
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ StoreReference HttpBinaryCacheStoreConfig::getReference() const
|
|||
.variant =
|
||||
StoreReference::Specified{
|
||||
.scheme = cacheUri.scheme,
|
||||
.authority = (cacheUri.authority ? cacheUri.authority->to_string() : "") + cacheUri.path,
|
||||
.authority = cacheUri.renderAuthorityAndPath(),
|
||||
},
|
||||
.params = cacheUri.query,
|
||||
};
|
||||
|
|
@ -157,7 +157,7 @@ protected:
|
|||
/* Otherwise the last path fragment will get discarded. */
|
||||
auto cacheUriWithTrailingSlash = config->cacheUri;
|
||||
if (!cacheUriWithTrailingSlash.path.empty())
|
||||
cacheUriWithTrailingSlash.path += "/";
|
||||
cacheUriWithTrailingSlash.path.push_back("");
|
||||
|
||||
/* path is not a path, but a full relative or absolute
|
||||
URL, e.g. we've seen in the wild NARINFO files have a URL
|
||||
|
|
|
|||
|
|
@ -54,7 +54,12 @@ struct S3Helper
|
|||
struct ParsedS3URL
|
||||
{
|
||||
std::string bucket;
|
||||
std::string key;
|
||||
/**
|
||||
* @see ParsedURL::path. This is a vector for the same reason.
|
||||
* Unlike ParsedURL::path this doesn't include the leading empty segment,
|
||||
* since the bucket name is necessary.
|
||||
*/
|
||||
std::vector<std::string> key;
|
||||
std::optional<std::string> profile;
|
||||
std::optional<std::string> region;
|
||||
std::optional<std::string> scheme;
|
||||
|
|
|
|||
|
|
@ -77,12 +77,22 @@ struct StoreReference
|
|||
*/
|
||||
std::string render(bool withParams = true) const;
|
||||
|
||||
std::string to_string() const
|
||||
{
|
||||
return render();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a URI into a store reference.
|
||||
*/
|
||||
static StoreReference parse(const std::string & uri, const Params & extraParams = Params{});
|
||||
};
|
||||
|
||||
static inline std::ostream & operator<<(std::ostream & os, const StoreReference & ref)
|
||||
{
|
||||
return os << ref.render();
|
||||
}
|
||||
|
||||
/**
|
||||
* Split URI into protocol+hierarchy part and its parameter set.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
#include "nix/util/url.hh"
|
||||
#include "nix/util/util.hh"
|
||||
#include "nix/util/canon-path.hh"
|
||||
#include "nix/util/strings-inline.hh"
|
||||
|
||||
#include <ranges>
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
|
@ -24,10 +27,6 @@ try {
|
|||
|| parsed.authority->hostType != ParsedURL::Authority::HostType::Name)
|
||||
throw BadURL("URI has a missing or invalid bucket name");
|
||||
|
||||
std::string_view key = parsed.path;
|
||||
/* Make the key a relative path. */
|
||||
splitPrefix(key, "/");
|
||||
|
||||
/* TODO: Validate the key against:
|
||||
* https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines
|
||||
*/
|
||||
|
|
@ -41,10 +40,14 @@ try {
|
|||
};
|
||||
|
||||
auto endpoint = getOptionalParam("endpoint");
|
||||
if (parsed.path.size() <= 1 || !parsed.path.front().empty())
|
||||
throw BadURL("URI has a missing or invalid key");
|
||||
|
||||
auto path = std::views::drop(parsed.path, 1) | std::ranges::to<std::vector<std::string>>();
|
||||
|
||||
return ParsedS3URL{
|
||||
.bucket = parsed.authority->host,
|
||||
.key = std::string{key},
|
||||
.key = std::move(path),
|
||||
.profile = getOptionalParam("profile"),
|
||||
.region = getOptionalParam("region"),
|
||||
.scheme = getOptionalParam("scheme"),
|
||||
|
|
@ -78,26 +81,35 @@ ParsedURL ParsedS3URL::toHttpsUrl() const
|
|||
overloaded{
|
||||
[&](const std::monostate &) {
|
||||
// No custom endpoint, use standard AWS S3 endpoint
|
||||
std::vector<std::string> path{""};
|
||||
path.push_back(bucket);
|
||||
path.insert(path.end(), key.begin(), key.end());
|
||||
return ParsedURL{
|
||||
.scheme = std::string{schemeStr},
|
||||
.authority = ParsedURL::Authority{.host = "s3." + regionStr + ".amazonaws.com"},
|
||||
.path = (CanonPath::root / bucket / CanonPath(key)).abs(),
|
||||
.path = std::move(path),
|
||||
};
|
||||
},
|
||||
[&](const ParsedURL::Authority & auth) {
|
||||
// Endpoint is just an authority (hostname/port)
|
||||
std::vector<std::string> path{""};
|
||||
path.push_back(bucket);
|
||||
path.insert(path.end(), key.begin(), key.end());
|
||||
return ParsedURL{
|
||||
.scheme = std::string{schemeStr},
|
||||
.authority = auth,
|
||||
.path = (CanonPath::root / bucket / CanonPath(key)).abs(),
|
||||
.path = std::move(path),
|
||||
};
|
||||
},
|
||||
[&](const ParsedURL & endpointUrl) {
|
||||
// Endpoint is already a ParsedURL (e.g., http://server:9000)
|
||||
auto path = endpointUrl.path;
|
||||
path.push_back(bucket);
|
||||
path.insert(path.end(), key.begin(), key.end());
|
||||
return ParsedURL{
|
||||
.scheme = endpointUrl.scheme,
|
||||
.authority = endpointUrl.authority,
|
||||
.path = (CanonPath(endpointUrl.path) / bucket / CanonPath(key)).abs(),
|
||||
.path = std::move(path),
|
||||
};
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -48,13 +48,11 @@ StoreReference StoreReference::parse(const std::string & uri, const StoreReferen
|
|||
auto parsedUri = parseURL(uri, /*lenient=*/true);
|
||||
params.insert(parsedUri.query.begin(), parsedUri.query.end());
|
||||
|
||||
auto baseURI = parsedUri.authority.value_or(ParsedURL::Authority{}).to_string() + parsedUri.path;
|
||||
|
||||
return {
|
||||
.variant =
|
||||
Specified{
|
||||
.scheme = std::move(parsedUri.scheme),
|
||||
.authority = std::move(baseURI),
|
||||
.authority = parsedUri.renderAuthorityAndPath(),
|
||||
},
|
||||
.params = std::move(params),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ TEST(parseURL, parsesSimpleHttpUrl)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -35,7 +35,7 @@ TEST(parseURL, parsesSimpleHttpsUrl)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -52,7 +52,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithQueryAndFragment)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {{"download", "fast"}, {"when", "now"}},
|
||||
.fragment = "hello",
|
||||
};
|
||||
|
|
@ -69,7 +69,7 @@ TEST(parseURL, parsesSimpleHttpUrlWithComplexFragment)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {{"field", "value"}},
|
||||
.fragment = "?foo=bar#",
|
||||
};
|
||||
|
|
@ -85,7 +85,7 @@ TEST(parseURL, parsesFilePlusHttpsUrl)
|
|||
ParsedURL expected{
|
||||
.scheme = "file+https",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/video.mp4",
|
||||
.path = {"", "video.mp4"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -108,7 +108,7 @@ TEST(parseURL, parseIPv4Address)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::IPv4, .host = "127.0.0.1", .port = 8080},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {{"download", "fast"}, {"when", "now"}},
|
||||
.fragment = "hello",
|
||||
};
|
||||
|
|
@ -125,7 +125,7 @@ TEST(parseURL, parseScopedRFC6874IPv6Address)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080},
|
||||
.path = "",
|
||||
.path = {""},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -147,7 +147,7 @@ TEST(parseURL, parseIPv6Address)
|
|||
.host = "2a02:8071:8192:c100:311d:192d:81ac:11ea",
|
||||
.port = 8080,
|
||||
},
|
||||
.path = "",
|
||||
.path = {""},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -178,7 +178,7 @@ TEST(parseURL, parseUserPassword)
|
|||
.password = "pass",
|
||||
.port = 8080,
|
||||
},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -195,11 +195,12 @@ TEST(parseURL, parseFileURLWithQueryAndFragment)
|
|||
ParsedURL expected{
|
||||
.scheme = "file",
|
||||
.authority = Authority{},
|
||||
.path = "/none/of//your/business",
|
||||
.path = {"", "none", "of", "", "your", "business"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed.renderPath(), "/none/of//your/business");
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ(s, parsed.to_string());
|
||||
}
|
||||
|
|
@ -212,9 +213,10 @@ TEST(parseURL, parseFileURL)
|
|||
ParsedURL expected{
|
||||
.scheme = "file",
|
||||
.authority = std::nullopt,
|
||||
.path = "/none/of/your/business/",
|
||||
.path = {"", "none", "of", "your", "business", ""},
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed.renderPath(), "/none/of/your/business/");
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ(s, parsed.to_string());
|
||||
}
|
||||
|
|
@ -227,10 +229,11 @@ TEST(parseURL, parseFileURLWithAuthority)
|
|||
ParsedURL expected{
|
||||
.scheme = "file",
|
||||
.authority = Authority{.host = ""},
|
||||
.path = "///of/your/business//",
|
||||
.path = {"", "", "", "of", "your", "business", "", ""},
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed.authority, expected.authority);
|
||||
ASSERT_EQ(parsed.path, expected.path);
|
||||
ASSERT_EQ(parsed.renderPath(), "///of/your/business//");
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ(s, parsed.to_string());
|
||||
}
|
||||
|
|
@ -243,9 +246,10 @@ TEST(parseURL, parseFileURLNoLeadingSlash)
|
|||
ParsedURL expected{
|
||||
.scheme = "file",
|
||||
.authority = std::nullopt,
|
||||
.path = "none/of/your/business/",
|
||||
.path = {"none", "of", "your", "business", ""},
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed.renderPath(), "none/of/your/business/");
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ("file:none/of/your/business/", parsed.to_string());
|
||||
}
|
||||
|
|
@ -258,9 +262,10 @@ TEST(parseURL, parseHttpTrailingSlash)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.host = "example.com"},
|
||||
.path = "/",
|
||||
.path = {"", ""},
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed.renderPath(), "/");
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ(s, parsed.to_string());
|
||||
}
|
||||
|
|
@ -306,7 +311,7 @@ TEST(parseURL, parseFTPUrl)
|
|||
ParsedURL expected{
|
||||
.scheme = "ftp",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "ftp.nixos.org"},
|
||||
.path = "/downloads/nixos.iso",
|
||||
.path = {"", "downloads", "nixos.iso"},
|
||||
.query = (StringMap) {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -342,7 +347,7 @@ TEST(parseURL, parsesHttpUrlWithEmptyPort)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/file.tar.gz",
|
||||
.path = {"", "file.tar.gz"},
|
||||
.query = (StringMap) {{"foo", "bar"}},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -362,7 +367,7 @@ TEST(parseURLRelative, resolvesRelativePath)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
|
||||
.path = "/dir/subdir/file.txt",
|
||||
.path = {"", "dir", "subdir", "file.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -376,7 +381,7 @@ TEST(parseURLRelative, baseUrlIpv6AddressWithoutZoneId)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c"},
|
||||
.path = "/dir/subdir/file.txt",
|
||||
.path = {"", "dir", "subdir", "file.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -390,7 +395,7 @@ TEST(parseURLRelative, resolvesRelativePathIpv6AddressWithZoneId)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = Authority{.hostType = HostType::IPv6, .host = "fe80::818c:da4d:8975:415c\%enp0s25", .port = 8080},
|
||||
.path = "/dir/subdir/file2.txt",
|
||||
.path = {"", "dir", "subdir", "file2.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -405,7 +410,7 @@ TEST(parseURLRelative, resolvesRelativePathWithDot)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
|
||||
.path = "/dir/subdir/file.txt",
|
||||
.path = {"", "dir", "subdir", "file.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -419,7 +424,21 @@ TEST(parseURLRelative, resolvesParentDirectory)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234},
|
||||
.path = "/up.txt",
|
||||
.path = {"", "up.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
ASSERT_EQ(parsed, expected);
|
||||
}
|
||||
|
||||
TEST(parseURLRelative, resolvesParentDirectoryNotTrickedByEscapedSlash)
|
||||
{
|
||||
ParsedURL base = parseURL("http://example.org:234/dir\%2Ffirst-trick/another-dir\%2Fsecond-trick/page.html");
|
||||
auto parsed = parseURLRelative("../up.txt", base);
|
||||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org", .port = 234},
|
||||
.path = {"", "dir/first-trick", "up.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -433,7 +452,7 @@ TEST(parseURLRelative, replacesPathWithAbsoluteRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "http",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "example.org"},
|
||||
.path = "/rooted.txt",
|
||||
.path = {"", "rooted.txt"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -448,7 +467,7 @@ TEST(parseURLRelative, keepsQueryAndFragmentFromRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/path/other.html",
|
||||
.path = {"", "path", "other.html"},
|
||||
.query = {{"x", "1"}, {"y", "2"}},
|
||||
.fragment = "frag",
|
||||
};
|
||||
|
|
@ -489,7 +508,7 @@ TEST(parseURLRelative, emptyRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/path/index.html",
|
||||
.path = {"", "path", "index.html"},
|
||||
.query = {{"a b", "5 6"}, {"x y", "34"}},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -504,7 +523,7 @@ TEST(parseURLRelative, fragmentRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/path/index.html",
|
||||
.path = {"", "path", "index.html"},
|
||||
.query = {{"a b", "5 6"}, {"x y", "34"}},
|
||||
.fragment = "frag2",
|
||||
};
|
||||
|
|
@ -518,7 +537,7 @@ TEST(parseURLRelative, queryRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/path/index.html",
|
||||
.path = {"", "path", "index.html"},
|
||||
.query = {{"asdf qwer", "1 2 3"}},
|
||||
.fragment = "",
|
||||
};
|
||||
|
|
@ -532,7 +551,7 @@ TEST(parseURLRelative, queryFragmentRelative)
|
|||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = ParsedURL::Authority{.hostType = HostType::Name, .host = "www.example.org"},
|
||||
.path = "/path/index.html",
|
||||
.path = {"", "path", "index.html"},
|
||||
.query = {{"asdf qwer", "1 2 3"}},
|
||||
.fragment = "frag2",
|
||||
};
|
||||
|
|
@ -648,6 +667,25 @@ TEST(percentEncode, yen)
|
|||
ASSERT_EQ(percentDecode(e), s);
|
||||
}
|
||||
|
||||
TEST(parseURL, gitlabNamespacedProjectUrls)
|
||||
{
|
||||
// Test GitLab URL patterns with namespaced projects
|
||||
// These should preserve %2F encoding in the path
|
||||
auto s = "https://gitlab.example.com/api/v4/projects/group%2Fsubgroup%2Fproject/repository/archive.tar.gz";
|
||||
auto parsed = parseURL(s);
|
||||
|
||||
ParsedURL expected{
|
||||
.scheme = "https",
|
||||
.authority = Authority{.hostType = HostType::Name, .host = "gitlab.example.com"},
|
||||
.path = {"", "api", "v4", "projects", "group/subgroup/project", "repository", "archive.tar.gz"},
|
||||
.query = {},
|
||||
.fragment = "",
|
||||
};
|
||||
|
||||
ASSERT_EQ(parsed, expected);
|
||||
ASSERT_EQ(s, parsed.to_string());
|
||||
}
|
||||
|
||||
TEST(nix, isValidSchemeName)
|
||||
{
|
||||
ASSERT_TRUE(isValidSchemeName("http"));
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
#pragma once
|
||||
///@file
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "nix/util/error.hh"
|
||||
#include "nix/util/canon-path.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
|
@ -65,6 +68,7 @@ struct ParsedURL
|
|||
};
|
||||
|
||||
std::string scheme;
|
||||
|
||||
/**
|
||||
* Optional parsed authority component of the URL.
|
||||
*
|
||||
|
|
@ -75,16 +79,155 @@ struct ParsedURL
|
|||
* part of the URL.
|
||||
*/
|
||||
std::optional<Authority> authority;
|
||||
std::string path;
|
||||
|
||||
/**
|
||||
* @note Unlike Unix paths, URLs provide a way to escape path
|
||||
* separators, in the form of the `%2F` encoding of `/`. That means
|
||||
* that if one percent-decodes the path into a single string, that
|
||||
* decoding will be *lossy*, because `/` and `%2F` both become `/`.
|
||||
* The right thing to do is instead split up the path on `/`, and
|
||||
* then percent decode each part.
|
||||
*
|
||||
* For an example, the path
|
||||
* ```
|
||||
* foo/bar%2Fbaz/quux
|
||||
* ```
|
||||
* is parsed as
|
||||
* ```
|
||||
* {"foo, "bar/baz", "quux"}
|
||||
* ```
|
||||
*
|
||||
* We're doing splitting and joining that assumes the separator (`/` in this case) only goes *between* elements.
|
||||
*
|
||||
* That means the parsed representation will begin with an empty
|
||||
* element to make an initial `/`, and will end with an ementy
|
||||
* element to make a trailing `/`. That means that elements of this
|
||||
* vector mostly, but *not always*, correspond to segments of the
|
||||
* path.
|
||||
*
|
||||
* Examples:
|
||||
*
|
||||
* - ```
|
||||
* https://foo.com/bar
|
||||
* ```
|
||||
* has path
|
||||
* ```
|
||||
* {"", "bar"}
|
||||
* ```
|
||||
*
|
||||
* - ```
|
||||
* https://foo.com/bar/
|
||||
* ```
|
||||
* has path
|
||||
* ```
|
||||
* {"", "bar", ""}
|
||||
* ```
|
||||
*
|
||||
* - ```
|
||||
* https://foo.com//bar///
|
||||
* ```
|
||||
* has path
|
||||
* ```
|
||||
* {"", "", "bar", "", "", ""}
|
||||
* ```
|
||||
*
|
||||
* - ```
|
||||
* https://foo.com
|
||||
* ```
|
||||
* has path
|
||||
* ```
|
||||
* {""}
|
||||
* ```
|
||||
*
|
||||
* - ```
|
||||
* https://foo.com/
|
||||
* ```
|
||||
* has path
|
||||
* ```
|
||||
* {"", ""}
|
||||
* ```
|
||||
*
|
||||
* - ```
|
||||
* tel:01234
|
||||
* ```
|
||||
* has path `{"01234"}` (and no authority)
|
||||
*
|
||||
* - ```
|
||||
* foo:/01234
|
||||
* ```
|
||||
* has path `{"", "01234"}` (and no authority)
|
||||
*
|
||||
* Note that both trailing and leading slashes are, in general,
|
||||
* semantically significant.
|
||||
*
|
||||
* For trailing slashes, the main example affecting many schemes is
|
||||
* that `../baz` resolves against a base URL different depending on
|
||||
* the presence/absence of a trailing slash:
|
||||
*
|
||||
* - `https://foo.com/bar` is `https://foo.com/baz`
|
||||
*
|
||||
* - `https://foo.com/bar/` is `https://foo.com/bar/baz`
|
||||
*
|
||||
* See `parseURLRelative` for more details.
|
||||
*
|
||||
* For leading slashes, there are some requirements to be aware of.
|
||||
*
|
||||
* - When there is an authority, the path *must* start with a leading
|
||||
* slash. Otherwise the path will not be separated from the
|
||||
* authority, and will not round trip though the parser:
|
||||
*
|
||||
* ```
|
||||
* {.scheme="https", .authority.host = "foo", .path={"bad"}}
|
||||
* ```
|
||||
* will render to `https://foobar`. but that would parse back as as
|
||||
* ```
|
||||
* {.scheme="https", .authority.host = "foobar", .path={}}
|
||||
* ```
|
||||
*
|
||||
* - When there is no authority, the path must *not* begin with two
|
||||
* slashes. Otherwise, there will be another parser round trip
|
||||
* issue:
|
||||
*
|
||||
* ```
|
||||
* {.scheme="https", .path={"", "", "bad"}}
|
||||
* ```
|
||||
* will render to `https://bad`. but that would parse back as as
|
||||
* ```
|
||||
* {.scheme="https", .authority.host = "bad", .path={}}
|
||||
* ```
|
||||
*
|
||||
* These invariants will be checked in `to_string` and
|
||||
* `renderAuthorityAndPath`.
|
||||
*/
|
||||
std::vector<std::string> path;
|
||||
|
||||
StringMap query;
|
||||
|
||||
std::string fragment;
|
||||
|
||||
/**
|
||||
* Render just the middle part of a URL, without the `//` which
|
||||
* indicates whether the authority is present.
|
||||
*
|
||||
* @note This is kind of an ad-hoc
|
||||
* operation, but it ends up coming up with some frequency, probably
|
||||
* due to the current design of `StoreReference` in `nix-store`.
|
||||
*/
|
||||
std::string renderAuthorityAndPath() const;
|
||||
|
||||
std::string to_string() const;
|
||||
|
||||
/**
|
||||
* Render the path to a string.
|
||||
*
|
||||
* @param encode Whether to percent encode path segments.
|
||||
*/
|
||||
std::string renderPath(bool encode = false) const;
|
||||
|
||||
auto operator<=>(const ParsedURL & other) const noexcept = default;
|
||||
|
||||
/**
|
||||
* Remove `.` and `..` path elements.
|
||||
* Remove `.` and `..` path segments.
|
||||
*/
|
||||
ParsedURL canonicalise();
|
||||
};
|
||||
|
|
@ -96,6 +239,22 @@ MakeError(BadURL, Error);
|
|||
std::string percentDecode(std::string_view in);
|
||||
std::string percentEncode(std::string_view s, std::string_view keep = "");
|
||||
|
||||
/**
|
||||
* Get the path part of the URL as an absolute or relative Path.
|
||||
*
|
||||
* @throws if any path component contains an slash (which would have
|
||||
* been escaped `%2F` in the rendered URL). This is because OS file
|
||||
* paths have no escape sequences --- file names cannot contain a
|
||||
* `/`.
|
||||
*/
|
||||
Path renderUrlPathEnsureLegal(const std::vector<std::string> & urlPath);
|
||||
|
||||
/**
|
||||
* Percent encode path. `%2F` for "interior slashes" is the most
|
||||
* important.
|
||||
*/
|
||||
std::string encodeUrlPath(std::span<const std::string> urlPath);
|
||||
|
||||
/**
|
||||
* @param lenient @see parseURL
|
||||
*/
|
||||
|
|
@ -114,6 +273,12 @@ std::string encodeQuery(const StringMap & query);
|
|||
* @note IPv6 ZoneId literals (RFC4007) are represented in URIs according to RFC6874.
|
||||
*
|
||||
* @throws BadURL
|
||||
*
|
||||
* The WHATWG specification of the URL constructor in Java Script is
|
||||
* also a useful reference:
|
||||
* https://url.spec.whatwg.org/#concept-basic-url-parser. Note, however,
|
||||
* that it includes various scheme-specific normalizations / extra steps
|
||||
* that we do not implement.
|
||||
*/
|
||||
ParsedURL parseURL(std::string_view url, bool lenient = false);
|
||||
|
||||
|
|
@ -123,7 +288,11 @@ ParsedURL parseURL(std::string_view url, bool lenient = false);
|
|||
*
|
||||
* This is specified in [IETF RFC 3986, section 5](https://datatracker.ietf.org/doc/html/rfc3986#section-5)
|
||||
*
|
||||
* Behavior should also match the `new URL(url, base)` JavaScript constructor.
|
||||
* @throws BadURL
|
||||
*
|
||||
* Behavior should also match the `new URL(url, base)` JavaScript
|
||||
* constructor, except for extra steps specific to the HTTP scheme. See
|
||||
* `parseURL` for link to the relevant WHATWG standard.
|
||||
*/
|
||||
ParsedURL parseURLRelative(std::string_view url, const ParsedURL & base);
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "nix/util/util.hh"
|
||||
#include "nix/util/split.hh"
|
||||
#include "nix/util/canon-path.hh"
|
||||
#include "nix/util/strings-inline.hh"
|
||||
|
||||
#include <boost/url.hpp>
|
||||
|
||||
|
|
@ -179,11 +180,14 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient)
|
|||
if (authority && authority->host.size() && transportIsFile)
|
||||
throw BadURL("file:// URL '%s' has unexpected authority '%s'", urlView.buffer(), *authority);
|
||||
|
||||
auto path = urlView.path(); /* Does pct-decoding */
|
||||
auto fragment = urlView.fragment(); /* Does pct-decoding */
|
||||
|
||||
if (transportIsFile && path.empty())
|
||||
path = "/";
|
||||
boost::core::string_view encodedPath = urlView.encoded_path();
|
||||
if (transportIsFile && encodedPath.empty())
|
||||
encodedPath = "/";
|
||||
|
||||
auto path = std::views::transform(splitString<std::vector<std::string_view>>(encodedPath, "/"), percentDecode)
|
||||
| std::ranges::to<std::vector<std::string>>();
|
||||
|
||||
/* Get the raw query. Store URI supports smuggling doubly nested queries, where
|
||||
the inner &/? are pct-encoded. */
|
||||
|
|
@ -192,7 +196,7 @@ static ParsedURL fromBoostUrlView(boost::urls::url_view urlView, bool lenient)
|
|||
return ParsedURL{
|
||||
.scheme = scheme,
|
||||
.authority = authority,
|
||||
.path = path,
|
||||
.path = std::move(path),
|
||||
.query = decodeQuery(query, lenient),
|
||||
.fragment = fragment,
|
||||
};
|
||||
|
|
@ -215,7 +219,7 @@ try {
|
|||
if (authority.port)
|
||||
resolved.set_port_number(*authority.port);
|
||||
}
|
||||
resolved.set_path(base.path);
|
||||
resolved.set_encoded_path(encodeUrlPath(base.path));
|
||||
resolved.set_encoded_query(encodeQuery(base.query));
|
||||
resolved.set_fragment(base.fragment);
|
||||
} catch (boost::system::system_error & e) {
|
||||
|
|
@ -291,7 +295,15 @@ try {
|
|||
}
|
||||
|
||||
const static std::string allowedInQuery = ":@/?";
|
||||
const static std::string allowedInPath = ":@/";
|
||||
const static std::string allowedInPath = ":@";
|
||||
|
||||
std::string encodeUrlPath(std::span<const std::string> urlPath)
|
||||
{
|
||||
std::vector<std::string> encodedPath;
|
||||
for (auto & p : urlPath)
|
||||
encodedPath.push_back(percentEncode(p, allowedInPath));
|
||||
return concatStringsSep("/", encodedPath);
|
||||
}
|
||||
|
||||
std::string encodeQuery(const StringMap & ss)
|
||||
{
|
||||
|
|
@ -308,10 +320,62 @@ std::string encodeQuery(const StringMap & ss)
|
|||
return res;
|
||||
}
|
||||
|
||||
Path renderUrlPathEnsureLegal(const std::vector<std::string> & urlPath)
|
||||
{
|
||||
for (const auto & comp : urlPath) {
|
||||
/* This is only really valid for UNIX. Windows has more restrictions. */
|
||||
if (comp.contains('/'))
|
||||
throw BadURL("URL path component '%s' contains '/', which is not allowed in file names", comp);
|
||||
if (comp.contains(char(0)))
|
||||
throw BadURL("URL path component '%s' contains NUL byte which is not allowed", comp);
|
||||
}
|
||||
|
||||
return concatStringsSep("/", urlPath);
|
||||
}
|
||||
|
||||
std::string ParsedURL::renderPath(bool encode) const
|
||||
{
|
||||
if (encode)
|
||||
return encodeUrlPath(path);
|
||||
return concatStringsSep("/", path);
|
||||
}
|
||||
|
||||
std::string ParsedURL::renderAuthorityAndPath() const
|
||||
{
|
||||
std::string res;
|
||||
/* The following assertions correspond to 3.3. Path [rfc3986]. URL parser
|
||||
will never violate these properties, but hand-constructed ParsedURLs might. */
|
||||
if (authority.has_value()) {
|
||||
/* If a URI contains an authority component, then the path component
|
||||
must either be empty or begin with a slash ("/") character. */
|
||||
assert(path.empty() || path.front().empty());
|
||||
res += authority->to_string();
|
||||
} else if (std::ranges::equal(std::views::take(path, 2), std::views::repeat("", 2))) {
|
||||
/* If a URI does not contain an authority component, then the path cannot begin
|
||||
with two slash characters ("//") */
|
||||
unreachable();
|
||||
}
|
||||
res += encodeUrlPath(path);
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string ParsedURL::to_string() const
|
||||
{
|
||||
return scheme + ":" + (authority ? "//" + authority->to_string() : "") + percentEncode(path, allowedInPath)
|
||||
+ (query.empty() ? "" : "?" + encodeQuery(query)) + (fragment.empty() ? "" : "#" + percentEncode(fragment));
|
||||
std::string res;
|
||||
res += scheme;
|
||||
res += ":";
|
||||
if (authority.has_value())
|
||||
res += "//";
|
||||
res += renderAuthorityAndPath();
|
||||
if (!query.empty()) {
|
||||
res += "?";
|
||||
res += encodeQuery(query);
|
||||
}
|
||||
if (!fragment.empty()) {
|
||||
res += "#";
|
||||
res += percentEncode(fragment);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & os, const ParsedURL & url)
|
||||
|
|
@ -323,7 +387,7 @@ std::ostream & operator<<(std::ostream & os, const ParsedURL & url)
|
|||
ParsedURL ParsedURL::canonicalise()
|
||||
{
|
||||
ParsedURL res(*this);
|
||||
res.path = CanonPath(res.path).abs();
|
||||
res.path = splitString<std::vector<std::string>>(CanonPath(renderPath()).abs(), "/");
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -352,7 +416,11 @@ ParsedURL fixGitURL(const std::string & url)
|
|||
if (hasPrefix(url, "file:"))
|
||||
return parseURL(url);
|
||||
if (url.find("://") == std::string::npos) {
|
||||
return (ParsedURL{.scheme = "file", .authority = ParsedURL::Authority{}, .path = url});
|
||||
return ParsedURL{
|
||||
.scheme = "file",
|
||||
.authority = ParsedURL::Authority{},
|
||||
.path = splitString<std::vector<std::string>>(url, "/"),
|
||||
};
|
||||
}
|
||||
return parseURL(url);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue