* New command `nix-store --optimise' to reduce Nix store disk space
usage by finding identical files in the store and hard-linking them to each other. It typically reduces the size of the store by something like 25-35%. This is what the optimise-store.pl script did, but the new command is faster and more correct (it's safe wrt garbage collection and concurrent builds).
This commit is contained in:
parent
27a0662828
commit
a8629de827
5 changed files with 188 additions and 99 deletions
|
@ -1,91 +0,0 @@
|
||||||
#! /usr/bin/perl -w
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use File::Basename;
|
|
||||||
|
|
||||||
|
|
||||||
my @paths = ("/nix/store");
|
|
||||||
|
|
||||||
|
|
||||||
print "hashing...\n";
|
|
||||||
|
|
||||||
my $hashList = "/tmp/nix-optimise-hash-list";
|
|
||||||
|
|
||||||
system("find @paths -type f -print0 | xargs -0 md5sum -- > $hashList") == 0
|
|
||||||
or die "cannot hash store files";
|
|
||||||
|
|
||||||
|
|
||||||
print "sorting by hash...\n";
|
|
||||||
|
|
||||||
system("sort $hashList > $hashList.sorted") == 0
|
|
||||||
or die "cannot sort list";
|
|
||||||
|
|
||||||
|
|
||||||
sub atomicLink {
|
|
||||||
my $target = shift;
|
|
||||||
my $new = shift;
|
|
||||||
my $tmpNew = "${new}_optimise.$$";
|
|
||||||
|
|
||||||
# Make the directory writable temporarily.
|
|
||||||
my $dir = dirname $new;
|
|
||||||
my @st = stat $dir or die;
|
|
||||||
|
|
||||||
chmod ($st[2] | 0200, $dir) or die "cannot make `$dir' writable: $!";
|
|
||||||
|
|
||||||
link $target, $tmpNew or die "cannot create hard link `$tmpNew': $!";
|
|
||||||
|
|
||||||
rename $tmpNew, $new or die "cannot rename `$tmpNew' to `$new': $!";
|
|
||||||
|
|
||||||
chmod ($st[2], $dir) or die "cannot restore permission on `$dir': $!";
|
|
||||||
utime ($st[8], $st[9], $dir) or die "cannot restore timestamp on `$dir': $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
print "hard-linking...\n";
|
|
||||||
|
|
||||||
open LIST, "<$hashList.sorted" or die;
|
|
||||||
|
|
||||||
my $prevFile;
|
|
||||||
my $prevHash;
|
|
||||||
my $prevInode;
|
|
||||||
my $prevExec;
|
|
||||||
|
|
||||||
my $totalSpace = 0;
|
|
||||||
my $savedSpace = 0;
|
|
||||||
|
|
||||||
while (<LIST>) {
|
|
||||||
/^([0-9a-f]*)\s+(.*)$/ or die;
|
|
||||||
my $curFile = $2;
|
|
||||||
my $curHash = $1;
|
|
||||||
|
|
||||||
my @st = stat $curFile or die;
|
|
||||||
next if ($st[2] & 0222) != 0; # skip writable files
|
|
||||||
|
|
||||||
my $fileSize = $st[7];
|
|
||||||
$totalSpace += $fileSize;
|
|
||||||
my $isExec = ($st[2] & 0111) == 0111;
|
|
||||||
|
|
||||||
if (defined $prevHash && $curHash eq $prevHash
|
|
||||||
&& $prevExec == $isExec)
|
|
||||||
{
|
|
||||||
|
|
||||||
if ($st[1] != $prevInode) {
|
|
||||||
print "$curFile = $prevFile\n";
|
|
||||||
atomicLink $prevFile, $curFile;
|
|
||||||
$savedSpace += $fileSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
$prevFile = $curFile;
|
|
||||||
$prevHash = $curHash;
|
|
||||||
$prevInode = $st[1];
|
|
||||||
$prevExec = ($st[2] & 0111) == 0111;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
print "total space = $totalSpace\n";
|
|
||||||
print "saved space = $savedSpace\n";
|
|
||||||
my $savings = ($savedSpace / $totalSpace) * 100.0;
|
|
||||||
print "savings = $savings %\n";
|
|
||||||
|
|
||||||
close LIST;
|
|
|
@ -174,7 +174,7 @@ void copyPath(const Path & src, const Path & dst, PathFilter & filter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void _canonicalisePathMetaData(const Path & path)
|
static void _canonicalisePathMetaData(const Path & path, bool recurse)
|
||||||
{
|
{
|
||||||
checkInterrupt();
|
checkInterrupt();
|
||||||
|
|
||||||
|
@ -223,17 +223,17 @@ static void _canonicalisePathMetaData(const Path & path)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (S_ISDIR(st.st_mode)) {
|
if (recurse && S_ISDIR(st.st_mode)) {
|
||||||
Strings names = readDirectory(path);
|
Strings names = readDirectory(path);
|
||||||
for (Strings::iterator i = names.begin(); i != names.end(); ++i)
|
for (Strings::iterator i = names.begin(); i != names.end(); ++i)
|
||||||
_canonicalisePathMetaData(path + "/" + *i);
|
_canonicalisePathMetaData(path + "/" + *i, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void canonicalisePathMetaData(const Path & path)
|
void canonicalisePathMetaData(const Path & path)
|
||||||
{
|
{
|
||||||
_canonicalisePathMetaData(path);
|
_canonicalisePathMetaData(path, true);
|
||||||
|
|
||||||
/* On platforms that don't have lchown(), the top-level path can't
|
/* On platforms that don't have lchown(), the top-level path can't
|
||||||
be a symlink, since we can't change its ownership. */
|
be a symlink, since we can't change its ownership. */
|
||||||
|
@ -625,7 +625,7 @@ void LocalStore::exportPath(const Path & path, bool sign,
|
||||||
consistent metadata. */
|
consistent metadata. */
|
||||||
Transaction txn(nixDB);
|
Transaction txn(nixDB);
|
||||||
addTempRoot(path);
|
addTempRoot(path);
|
||||||
if (!isValidPath(path))
|
if (!isValidPathTxn(txn, path))
|
||||||
throw Error(format("path `%1%' is not valid") % path);
|
throw Error(format("path `%1%' is not valid") % path);
|
||||||
|
|
||||||
HashAndWriteSink hashAndWriteSink(sink);
|
HashAndWriteSink hashAndWriteSink(sink);
|
||||||
|
@ -950,6 +950,121 @@ void verifyStore(bool checkContents)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef std::map<Hash, std::pair<Path, ino_t> > HashToPath;
|
||||||
|
|
||||||
|
|
||||||
|
static void toggleWritable(const Path & path, bool writable)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
if (lstat(path.c_str(), &st))
|
||||||
|
throw SysError(format("getting attributes of path `%1%'") % path);
|
||||||
|
|
||||||
|
mode_t mode = st.st_mode;
|
||||||
|
if (writable) mode |= S_IWUSR;
|
||||||
|
else mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
|
||||||
|
|
||||||
|
if (chmod(path.c_str(), mode) == -1)
|
||||||
|
throw SysError(format("changing writability of `%1%'") % path);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void hashAndLink(bool dryRun, HashToPath & hashToPath,
|
||||||
|
OptimiseStats & stats, const Path & path)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
if (lstat(path.c_str(), &st))
|
||||||
|
throw SysError(format("getting attributes of path `%1%'") % path);
|
||||||
|
|
||||||
|
/* Sometimes SNAFUs can cause files in the Nix store to be
|
||||||
|
modified, in particular when running programs as root under
|
||||||
|
NixOS (example: $fontconfig/var/cache being modified). Skip
|
||||||
|
those files. */
|
||||||
|
if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
|
||||||
|
printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We can hard link regular files and symlinks. */
|
||||||
|
if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
|
||||||
|
|
||||||
|
/* Hash the file. Note that hashPath() returns the hash over
|
||||||
|
the NAR serialisation, which includes the execute bit on
|
||||||
|
the file. Thus, executable and non-executable files with
|
||||||
|
the same contents *won't* be linked (which is good because
|
||||||
|
otherwise the permissions would be screwed up).
|
||||||
|
|
||||||
|
Also note that if `path' is a symlink, then we're hashing
|
||||||
|
the contents of the symlink (i.e. the result of
|
||||||
|
readlink()), not the contents of the target (which may not
|
||||||
|
even exist). */
|
||||||
|
Hash hash = hashPath(htSHA256, path);
|
||||||
|
stats.totalFiles++;
|
||||||
|
printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
|
||||||
|
|
||||||
|
std::pair<Path, ino_t> prevPath = hashToPath[hash];
|
||||||
|
|
||||||
|
if (prevPath.first == "") {
|
||||||
|
hashToPath[hash] = std::pair<Path, ino_t>(path, st.st_ino);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Yes! We've seen a file with the same contents. Replace
|
||||||
|
the current file with a hard link to that file. */
|
||||||
|
stats.sameContents++;
|
||||||
|
if (prevPath.second == st.st_ino) {
|
||||||
|
printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % prevPath.first);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
printMsg(lvlTalkative, format("linking `%1%' to `%2%'") % path % prevPath.first);
|
||||||
|
|
||||||
|
Path tempLink = (format("%1%.tmp-%2%-%3%")
|
||||||
|
% path % getpid() % rand()).str();
|
||||||
|
|
||||||
|
toggleWritable(dirOf(path), true);
|
||||||
|
|
||||||
|
if (link(prevPath.first.c_str(), tempLink.c_str()) == -1)
|
||||||
|
throw SysError(format("cannot link `%1%' to `%2%'")
|
||||||
|
% tempLink % prevPath.first);
|
||||||
|
|
||||||
|
/* Atomically replace the old file with the new hard link. */
|
||||||
|
if (rename(tempLink.c_str(), path.c_str()) == -1)
|
||||||
|
throw SysError(format("cannot rename `%1%' to `%2%'")
|
||||||
|
% tempLink % path);
|
||||||
|
|
||||||
|
/* Make the directory read-only again and reset its timestamp
|
||||||
|
back to 0. */
|
||||||
|
_canonicalisePathMetaData(dirOf(path), false);
|
||||||
|
|
||||||
|
stats.filesLinked++;
|
||||||
|
stats.bytesFreed += st.st_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (S_ISDIR(st.st_mode)) {
|
||||||
|
Strings names = readDirectory(path);
|
||||||
|
for (Strings::iterator i = names.begin(); i != names.end(); ++i)
|
||||||
|
hashAndLink(dryRun, hashToPath, stats, path + "/" + *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void LocalStore::optimiseStore(bool dryRun, OptimiseStats & stats)
|
||||||
|
{
|
||||||
|
HashToPath hashToPath;
|
||||||
|
|
||||||
|
Paths paths;
|
||||||
|
PathSet validPaths;
|
||||||
|
nixDB.enumTable(noTxn, dbValidPaths, paths);
|
||||||
|
|
||||||
|
for (Paths::iterator i = paths.begin(); i != paths.end(); ++i) {
|
||||||
|
addTempRoot(*i);
|
||||||
|
if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
|
||||||
|
startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
|
||||||
|
hashAndLink(dryRun, hashToPath, stats, *i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Upgrade from schema 1 (Nix <= 0.7) to schema 2 (Nix >= 0.8). */
|
/* Upgrade from schema 1 (Nix <= 0.7) to schema 2 (Nix >= 0.8). */
|
||||||
static void upgradeStore07()
|
static void upgradeStore07()
|
||||||
{
|
{
|
||||||
|
|
|
@ -21,6 +21,20 @@ const int nixSchemaVersion = 4;
|
||||||
extern string drvsLogDir;
|
extern string drvsLogDir;
|
||||||
|
|
||||||
|
|
||||||
|
struct OptimiseStats
|
||||||
|
{
|
||||||
|
unsigned long totalFiles;
|
||||||
|
unsigned long sameContents;
|
||||||
|
unsigned long filesLinked;
|
||||||
|
unsigned long long bytesFreed;
|
||||||
|
OptimiseStats()
|
||||||
|
{
|
||||||
|
totalFiles = sameContents = filesLinked = 0;
|
||||||
|
bytesFreed = 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class LocalStore : public StoreAPI
|
class LocalStore : public StoreAPI
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
@ -83,6 +97,10 @@ public:
|
||||||
|
|
||||||
void collectGarbage(GCAction action, const PathSet & pathsToDelete,
|
void collectGarbage(GCAction action, const PathSet & pathsToDelete,
|
||||||
bool ignoreLiveness, PathSet & result, unsigned long long & bytesFreed);
|
bool ignoreLiveness, PathSet & result, unsigned long long & bytesFreed);
|
||||||
|
|
||||||
|
/* Optimise the disk space usage of the Nix store by hard-linking
|
||||||
|
files with the same contents. */
|
||||||
|
void optimiseStore(bool dryRun, OptimiseStats & stats);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ Operations:
|
||||||
|
|
||||||
--init: initialise the Nix database
|
--init: initialise the Nix database
|
||||||
--verify: verify Nix structures
|
--verify: verify Nix structures
|
||||||
|
--optimise: optimise the Nix store by hard-linking identical files
|
||||||
|
|
||||||
--version: output version information
|
--version: output version information
|
||||||
--help: display help
|
--help: display help
|
||||||
|
|
|
@ -466,6 +466,13 @@ static void opCheckValidity(Strings opFlags, Strings opArgs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static string showBytes(unsigned long long bytes)
|
||||||
|
{
|
||||||
|
return (format("%d bytes (%.2f MiB)")
|
||||||
|
% bytes % (bytes / (1024.0 * 1024.0))).str();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct PrintFreed
|
struct PrintFreed
|
||||||
{
|
{
|
||||||
bool show, dryRun;
|
bool show, dryRun;
|
||||||
|
@ -477,9 +484,9 @@ struct PrintFreed
|
||||||
if (show)
|
if (show)
|
||||||
cout << format(
|
cout << format(
|
||||||
(dryRun
|
(dryRun
|
||||||
? "%d bytes would be freed (%.2f MiB)\n"
|
? "%1% would be freed\n"
|
||||||
: "%d bytes freed (%.2f MiB)\n"))
|
: "%1% freed (%.2f MiB)\n"))
|
||||||
% bytesFreed % (bytesFreed / (1024.0 * 1024.0));
|
% showBytes(bytesFreed);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -614,6 +621,43 @@ static void opVerify(Strings opFlags, Strings opArgs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void showOptimiseStats(OptimiseStats & stats)
|
||||||
|
{
|
||||||
|
printMsg(lvlError,
|
||||||
|
format("%1% freed by hard-linking %2% files; there are %3% files with equal contents out of %4% files in total")
|
||||||
|
% showBytes(stats.bytesFreed)
|
||||||
|
% stats.filesLinked
|
||||||
|
% stats.sameContents
|
||||||
|
% stats.totalFiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Optimise the disk space usage of the Nix store by hard-linking
|
||||||
|
files with the same contents. */
|
||||||
|
static void opOptimise(Strings opFlags, Strings opArgs)
|
||||||
|
{
|
||||||
|
if (!opArgs.empty())
|
||||||
|
throw UsageError("no arguments expected");
|
||||||
|
|
||||||
|
for (Strings::iterator i = opFlags.begin();
|
||||||
|
i != opFlags.end(); ++i)
|
||||||
|
throw UsageError(format("unknown flag `%1%'") % *i);
|
||||||
|
|
||||||
|
LocalStore * store2(dynamic_cast<LocalStore *>(store.get()));
|
||||||
|
if (!store2) throw Error("you don't have sufficient rights to use --optimise");
|
||||||
|
|
||||||
|
OptimiseStats stats;
|
||||||
|
try {
|
||||||
|
store2->optimiseStore(true, stats);
|
||||||
|
} catch (...) {
|
||||||
|
showOptimiseStats(stats);
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
showOptimiseStats(stats);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Scan the arguments; find the operation, set global flags, put all
|
/* Scan the arguments; find the operation, set global flags, put all
|
||||||
other flags in a list, and put all other arguments in another
|
other flags in a list, and put all other arguments in another
|
||||||
list. */
|
list. */
|
||||||
|
@ -659,6 +703,8 @@ void run(Strings args)
|
||||||
op = opInit;
|
op = opInit;
|
||||||
else if (arg == "--verify")
|
else if (arg == "--verify")
|
||||||
op = opVerify;
|
op = opVerify;
|
||||||
|
else if (arg == "--optimise")
|
||||||
|
op = opOptimise;
|
||||||
else if (arg == "--add-root") {
|
else if (arg == "--add-root") {
|
||||||
if (i == args.end())
|
if (i == args.end())
|
||||||
throw UsageError("`--add-root requires an argument");
|
throw UsageError("`--add-root requires an argument");
|
||||||
|
|
Loading…
Reference in a new issue