From 049e74ccf664d532a0556ad6b7e50da6f636e3dd Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sat, 21 May 2005 00:52:04 +0000 Subject: [PATCH] * Some experimental code for a fully content-addressed Nix store. The idea is that any component in the Nix store resides has a store path name that has a hash component equal to the hash of the contents of that component, i.e., hashPartOf(path) = hashOf(contentsAt(path)) E.g., a path /nix/store/nc35k7yr8...-foo would have content hash nc35k7yr8... Of course, when building components in the Nix store, we don't know the content hash until after the component has been built. We will handle this by building the component at some randomly generated prefix in the Nix store, and then afterwards *rewriting* the random prefix to the hash of the actual contents. The tricky part is components that reference themselves, such as ELF executables that contain themselves in their RPATH. We can support this by computing content hashes "modulo" the original prefix, i.e., we zero out every occurence of the randomly generated prefix, compute the content hash, then rewrite the random prefix to the final location. --- src/libstore/Makefile.am | 9 ++- src/libstore/store-new.cc | 145 ++++++++++++++++++++++++++++++++++++++ src/libstore/store-new.hh | 38 ++++++++++ 3 files changed, 191 insertions(+), 1 deletion(-) create mode 100644 src/libstore/store-new.cc create mode 100644 src/libstore/store-new.hh diff --git a/src/libstore/Makefile.am b/src/libstore/Makefile.am index 100fd0be2..5c9536284 100644 --- a/src/libstore/Makefile.am +++ b/src/libstore/Makefile.am @@ -15,4 +15,11 @@ AM_CXXFLAGS = -Wall \ derivations-ast.cc derivations-ast.hh: ../aterm-helper.pl derivations-ast.def $(perl) ../aterm-helper.pl derivations-ast.hh derivations-ast.cc < derivations-ast.def -derivations.cc store.cc: derivations-ast.hh \ No newline at end of file +derivations.cc store.cc: derivations-ast.hh + + +bin_PROGRAMS = testprog + +testprog_SOURCES = store-new.cc +testprog_LDADD = ../libutil/libutil.a \ + ../boost/format/libformat.a ${bdb_lib} ${aterm_lib} diff --git a/src/libstore/store-new.cc b/src/libstore/store-new.cc new file mode 100644 index 000000000..2197c8718 --- /dev/null +++ b/src/libstore/store-new.cc @@ -0,0 +1,145 @@ +#include "store-new.hh" + +#include "util.hh" +#include "archive.hh" + + +const unsigned int pathHashLen = 32; /* characters */ +const string nullPathHashRef(pathHashLen, 0); + + +PathHash::PathHash() +{ + rep = nullPathHashRef; +} + + +PathHash::PathHash(const Hash & h) +{ + assert(h.type == htSHA256); + rep = printHash32(compressHash(h, 20)); +} + + +string PathHash::toString() const +{ + return rep; +} + + +bool PathHash::isNull() const +{ + return rep == nullPathHashRef; +} + + +bool PathHash::operator ==(const PathHash & hash2) const +{ + debug("foo"); + return rep == hash2.rep; +} + + +bool PathHash::operator <(const PathHash & hash2) const +{ + debug("bar"); + return rep < hash2.rep; +} + + +struct CopySink : DumpSink +{ + string s; + virtual void operator () (const unsigned char * data, unsigned int len) + { + s.append((const char *) data, len); + } +}; + + +struct CopySource : RestoreSource +{ + string & s; + unsigned int pos; + CopySource(string & _s) : s(_s), pos(0) { } + virtual void operator () (unsigned char * data, unsigned int len) + { + s.copy((char *) data, len, pos); + pos += len; + assert(pos <= s.size()); + } +}; + + +string rewriteHashes(string s, const HashRewrites & rewrites) +{ + for (HashRewrites::const_iterator i = rewrites.begin(); + i != rewrites.end(); ++i) + { + string from = i->first.toString(), to = i->second.toString(); + + assert(from.size() == to.size()); + + unsigned int j = 0; + while ((j = s.find(from, j)) != string::npos) { + debug(format("rewriting @ %1%") % j); + s.replace(j, to.size(), to); + } + } + + return s; +} + + +PathHash hashModulo(string s, const PathHash & modulus) +{ + if (!modulus.isNull()) { + /* Zero out occurences of `modulus'. */ + HashRewrites rewrites; + rewrites[modulus] = PathHash(); /* = null hash */ + s = rewriteHashes(s, rewrites); + } + + return PathHash(hashString(htSHA256, s)); +} + + +Path addToStore(const Path & srcPath, const PathHash & selfHash) +{ + debug(format("adding %1%") % srcPath); + + CopySink sink; + dumpPath(srcPath, sink); + + PathHash newHash = hashModulo(sink.s, selfHash); + + debug(format("newHash %1%") % newHash.toString()); + + if (!selfHash.isNull()) { + HashRewrites rewrites; + rewrites[selfHash] = newHash; + sink.s = rewriteHashes(sink.s, rewrites); + PathHash newHash2 = hashModulo(sink.s, newHash); + assert(newHash2 == newHash); + debug(format("newHash2 %1%") % newHash2.toString()); + } + + Path path = "./out/" + newHash.toString() + "-" + baseNameOf(srcPath); + + CopySource source(sink.s); + restorePath(path, source); + + return path; +} + + +int main(int argc, char * * argv) +{ + verbosity = (Verbosity) ((int) 10); + + Path p = addToStore("./bar", PathHash(parseHash32(htSHA256, "8myr6ajc52b5sky7iplgz8jv703ljc0q"))); + + cout << p << endl; + + return 0; +} diff --git a/src/libstore/store-new.hh b/src/libstore/store-new.hh new file mode 100644 index 000000000..3b87d7465 --- /dev/null +++ b/src/libstore/store-new.hh @@ -0,0 +1,38 @@ +#ifndef __STORE_H +#define __STORE_H + +#include +#include + +#include "hash.hh" +#include "db.hh" + +using namespace std; + + +struct PathHash +{ +private: + string rep; +public: + PathHash(); + PathHash(const Hash & h); + string toString() const; + bool PathHash::isNull() const; + bool operator ==(const PathHash & hash2) const; + bool operator <(const PathHash & hash2) const; +}; + + +/* Add the contents of the specified path to the Nix store. Any + occurence of the representation of `selfHash' (if not empty) is + rewritten to the hash of the new store path. */ +Path addToStore(const Path & srcPath, const PathHash & selfHash); + + +/* Rewrite a set of hashes in the given path. */ +typedef map HashRewrites; +//Path rewriteHashes(const Path & srcPath, HashRewrites rewrites); + + +#endif /* !__STORE_H */