Implement caching of fetchurl/fetchTarball results
ETags are used to prevent redownloading unchanged files.
This commit is contained in:
parent
1711679ea5
commit
60340ce3e2
4 changed files with 152 additions and 29 deletions
|
@ -1490,6 +1490,9 @@ struct Curl
|
|||
{
|
||||
CURL * curl;
|
||||
string data;
|
||||
string etag, status, expectedETag;
|
||||
|
||||
struct curl_slist * requestHeaders;
|
||||
|
||||
static size_t writeCallback(void * contents, size_t size, size_t nmemb, void * userp)
|
||||
{
|
||||
|
@ -1499,36 +1502,92 @@ struct Curl
|
|||
return realSize;
|
||||
}
|
||||
|
||||
static size_t headerCallback(void * contents, size_t size, size_t nmemb, void * userp)
|
||||
{
|
||||
Curl & c(* (Curl *) userp);
|
||||
size_t realSize = size * nmemb;
|
||||
string line = string((char *) contents, realSize);
|
||||
printMsg(lvlVomit, format("got header: %1%") % trim(line));
|
||||
if (line.compare(0, 5, "HTTP/") == 0) { // new response starts
|
||||
c.etag = "";
|
||||
auto ss = tokenizeString<vector<string>>(line, " ");
|
||||
c.status = ss.size() >= 2 ? ss[1] : "";
|
||||
} else {
|
||||
auto i = line.find(':');
|
||||
if (i != string::npos) {
|
||||
string name = trim(string(line, 0, i));
|
||||
if (name == "ETag") { // FIXME: case
|
||||
c.etag = trim(string(line, i + 1));
|
||||
/* Hack to work around a GitHub bug: it sends
|
||||
ETags, but ignores If-None-Match. So if we get
|
||||
the expected ETag on a 200 response, then shut
|
||||
down the connection because we already have the
|
||||
data. */
|
||||
printMsg(lvlDebug, format("got ETag: %1%") % c.etag);
|
||||
if (c.etag == c.expectedETag && c.status == "200") {
|
||||
printMsg(lvlDebug, format("shutting down on 200 HTTP response with expected ETag"));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return realSize;
|
||||
}
|
||||
|
||||
Curl()
|
||||
{
|
||||
requestHeaders = 0;
|
||||
|
||||
curl = curl_easy_init();
|
||||
if (!curl) throw Error("unable to initialize curl");
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||
curl_easy_setopt(curl, CURLOPT_CAINFO, getEnv("SSL_CERT_FILE", "/etc/ssl/certs/ca-certificates.crt").c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_USERAGENT, ("Nix/" + nixVersion).c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, writeCallback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &curl);
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, headerCallback);
|
||||
curl_easy_setopt(curl, CURLOPT_HEADERDATA, (void *) &curl);
|
||||
}
|
||||
|
||||
~Curl()
|
||||
{
|
||||
if (curl) curl_easy_cleanup(curl);
|
||||
if (requestHeaders) curl_slist_free_all(requestHeaders);
|
||||
}
|
||||
|
||||
string fetch(const string & url)
|
||||
bool fetch(const string & url, const string & expectedETag = "")
|
||||
{
|
||||
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||
|
||||
data.clear();
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res != CURLE_OK)
|
||||
throw Error(format("unable to download ‘%1%’: %2%")
|
||||
% url % curl_easy_strerror(res));
|
||||
if (requestHeaders) {
|
||||
curl_slist_free_all(requestHeaders);
|
||||
requestHeaders = 0;
|
||||
}
|
||||
|
||||
return data;
|
||||
if (!expectedETag.empty()) {
|
||||
this->expectedETag = expectedETag;
|
||||
requestHeaders = curl_slist_append(requestHeaders, ("If-None-Match: " + expectedETag).c_str());
|
||||
}
|
||||
|
||||
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, requestHeaders);
|
||||
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
if (res == CURLE_WRITE_ERROR && etag == expectedETag) return false;
|
||||
if (res != CURLE_OK)
|
||||
throw Error(format("unable to download ‘%1%’: %2% (%3%)")
|
||||
% url % curl_easy_strerror(res) % res);
|
||||
|
||||
long httpStatus = 0;
|
||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &httpStatus);
|
||||
if (httpStatus == 304) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1560,24 +1619,70 @@ void fetch(EvalState & state, const Pos & pos, Value * * args, Value & v,
|
|||
} else
|
||||
url = state.forceStringNoCtx(*args[0], pos);
|
||||
|
||||
// TODO: cache downloads.
|
||||
Path cacheDir = getEnv("XDG_CACHE_HOME", getEnv("HOME", "") + "/.cache") + "/nix/tarballs";
|
||||
createDirs(cacheDir);
|
||||
|
||||
Curl curl;
|
||||
string data = curl.fetch(url);
|
||||
string urlHash = printHash32(hashString(htSHA256, url));
|
||||
|
||||
Path dataFile = cacheDir + "/" + urlHash + ".info";
|
||||
Path fileLink = cacheDir + "/" + urlHash + "-file";
|
||||
|
||||
Path storePath;
|
||||
|
||||
string expectedETag;
|
||||
|
||||
if (pathExists(fileLink) && pathExists(dataFile)) {
|
||||
storePath = readLink(fileLink);
|
||||
store->addTempRoot(storePath);
|
||||
if (store->isValidPath(storePath)) {
|
||||
auto ss = tokenizeString<vector<string>>(readFile(dataFile), "\n");
|
||||
if (ss.size() >= 2 && ss[0] == url) {
|
||||
printMsg(lvlDebug, format("verifying previous ETag ‘%1%’") % ss[1]);
|
||||
expectedETag = ss[1];
|
||||
}
|
||||
} else
|
||||
storePath = "";
|
||||
}
|
||||
|
||||
string name;
|
||||
string::size_type p = url.rfind('/');
|
||||
auto p = url.rfind('/');
|
||||
if (p != string::npos) name = string(url, p + 1);
|
||||
|
||||
Path storePath = store->addTextToStore(name, data, PathSet(), state.repair);
|
||||
if (expectedETag.empty())
|
||||
printMsg(lvlInfo, format("downloading ‘%1%’...") % url);
|
||||
else
|
||||
printMsg(lvlInfo, format("checking ‘%1%’...") % url);
|
||||
Curl curl;
|
||||
|
||||
if (curl.fetch(url, expectedETag))
|
||||
storePath = store->addTextToStore(name, curl.data, PathSet(), state.repair);
|
||||
|
||||
assert(!storePath.empty());
|
||||
replaceSymlink(storePath, fileLink);
|
||||
|
||||
writeFile(dataFile, url + "\n" + curl.etag + "\n");
|
||||
|
||||
if (unpack) {
|
||||
Path unpackedLink = cacheDir + "/" + baseNameOf(storePath) + "-unpacked";
|
||||
Path unpackedStorePath;
|
||||
if (pathExists(unpackedLink)) {
|
||||
unpackedStorePath = readLink(unpackedLink);
|
||||
store->addTempRoot(unpackedStorePath);
|
||||
if (!store->isValidPath(unpackedStorePath))
|
||||
unpackedStorePath = "";
|
||||
}
|
||||
if (unpackedStorePath.empty()) {
|
||||
printMsg(lvlDebug, format("unpacking ‘%1%’...") % storePath);
|
||||
Path tmpDir = createTempDir();
|
||||
AutoDelete autoDelete(tmpDir, true);
|
||||
runProgram("tar", true, {"xf", storePath, "-C", tmpDir, "--strip-components", "1"}, "");
|
||||
storePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair);
|
||||
unpackedStorePath = store->addToStore(name, tmpDir, true, htSHA256, defaultPathFilter, state.repair);
|
||||
}
|
||||
replaceSymlink(unpackedStorePath, unpackedLink);
|
||||
mkString(v, unpackedStorePath, singleton<PathSet>(unpackedStorePath));
|
||||
}
|
||||
|
||||
else
|
||||
mkString(v, storePath, singleton<PathSet>(storePath));
|
||||
}
|
||||
|
||||
|
|
|
@ -413,6 +413,17 @@ void createSymlink(const Path & target, const Path & link)
|
|||
}
|
||||
|
||||
|
||||
void replaceSymlink(const Path & target, const Path & link)
|
||||
{
|
||||
Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link));
|
||||
|
||||
createSymlink(target, tmp);
|
||||
|
||||
if (rename(tmp.c_str(), link.c_str()) != 0)
|
||||
throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link);
|
||||
}
|
||||
|
||||
|
||||
LogType logType = ltPretty;
|
||||
Verbosity verbosity = lvlInfo;
|
||||
|
||||
|
@ -1076,6 +1087,15 @@ string chomp(const string & s)
|
|||
}
|
||||
|
||||
|
||||
string trim(const string & s, const string & whitespace)
|
||||
{
|
||||
auto i = s.find_first_not_of(whitespace);
|
||||
if (i == string::npos) return "";
|
||||
auto j = s.find_last_not_of(whitespace);
|
||||
return string(s, i, j == string::npos ? j : j - i + 1);
|
||||
}
|
||||
|
||||
|
||||
string statusToString(int status)
|
||||
{
|
||||
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
|
||||
|
|
|
@ -110,6 +110,9 @@ Paths createDirs(const Path & path);
|
|||
/* Create a symlink. */
|
||||
void createSymlink(const Path & target, const Path & link);
|
||||
|
||||
/* Atomically create or replace a symlink. */
|
||||
void replaceSymlink(const Path & target, const Path & link);
|
||||
|
||||
|
||||
template<class T, class A>
|
||||
T singleton(const A & a)
|
||||
|
@ -334,6 +337,10 @@ string concatStringsSep(const string & sep, const StringSet & ss);
|
|||
string chomp(const string & s);
|
||||
|
||||
|
||||
/* Remove whitespace from the start and end of a string. */
|
||||
string trim(const string & s, const string & whitespace = " \n\r\t");
|
||||
|
||||
|
||||
/* Convert the exit status of a child as returned by wait() into an
|
||||
error string. */
|
||||
string statusToString(int status);
|
||||
|
|
|
@ -116,16 +116,7 @@ void switchLink(Path link, Path target)
|
|||
/* Hacky. */
|
||||
if (dirOf(target) == dirOf(link)) target = baseNameOf(target);
|
||||
|
||||
Path tmp = canonPath(dirOf(link) + "/.new_" + baseNameOf(link));
|
||||
createSymlink(target, tmp);
|
||||
/* The rename() system call is supposed to be essentially atomic
|
||||
on Unix. That is, if we have links `current -> X' and
|
||||
`new_current -> Y', and we rename new_current to current, a
|
||||
process accessing current will see X or Y, but never a
|
||||
file-not-found or other error condition. This is sufficient to
|
||||
atomically switch user environments. */
|
||||
if (rename(tmp.c_str(), link.c_str()) != 0)
|
||||
throw SysError(format("renaming ‘%1%’ to ‘%2%’") % tmp % link);
|
||||
replaceSymlink(target, link);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue