1
1
Fork 0
mirror of https://github.com/NixOS/nix.git synced 2025-11-26 04:00:59 +01:00

* Equivalence class consolidation. This solves the problem that when

we combine closures built by different users, the resulting set may
  contain multiple paths from the same output path equivalence class.

  For instance, if we do

    $ NIX_USER_ID=foo nix-env -i libXext
    $ NIX_USER_ID=root nix-env -i libXt
    $ NIX_USER_ID=foo nix-env -i libXmu

  (where libXmu depends on libXext and libXt, who both depend on
  libX11), then the following will happen:

    * User foo builds libX11 and libXext because they don't exist
      yet.
      
    * User root builds libX11 and libXt because the latter doesn't
      exist yet, while the former *does* exist but cannot be trusted.
      The instance of libX11 built by root will almost certainly
      differ from the one built by foo, so they are stored in separate
      locations.
      
    * User foo builds libXmu, which requires libXext and libXt.  Foo
      has trusted copies of both (libXext was built by himself, while
      libXt was built by root, who is trusted by foo).  So libXmu is
      built with foo's libXext and root's libXt as inputs.

    * The resulting libXmu will link against two copies of libX11,
      namely the one used by foo's libXext and the one used by root's
      libXt.  This is bad semantically (it's observable behaviour, and
      might well lead to build time or runtime failure (e.g.,
      duplicate definitions of symbols)) and in terms of efficiency
      (the closure of libXmu contains two copies of libX11, so both
      must be deployed).

  The problem is to apply hash rewriting to "consolidate" the set of
  input paths to a build.  The invariant we wish to maintain is that
  any closure may contain at most one path from each equivalence
  class.
  
  So in the case of a collision, we select one path from each class,
  and *rewrite* all paths in that set to point only to paths in that
  set.  For instance, in the example above, we can rewrite foo's
  libXext to link against root's libX11.  That is, the hash part of
  foo's libX11 is replaced by the hash part of root's libX11.

  The hard part is to figure out which path to select from each
  class.  Some selections may be cheaper than others (i.e., require
  fewer rewrites).  The current implementation is rather dumb: it
  tries all possible selections, and picks the cheapest.  This is an
  exponential time algorithm.

  There certainly are more efficient common-case (heuristical)
  approaches.  But I don't know yet if there is a worst-case
  polynomial time algorithm.
This commit is contained in:
Eelco Dolstra 2005-05-30 10:49:00 +00:00
parent 4f83146459
commit b119dd279e
7 changed files with 231 additions and 24 deletions

View file

@ -1,4 +1,5 @@
#include "build.hh"
#include "misc.hh"
Derivation derivationFromPath(const Path & drvPath)
@ -49,3 +50,158 @@ Path findTrustedEqClassMember(const OutputEqClass & eqClass,
return "";
}
typedef map<OutputEqClass, PathSet> ClassMap;
typedef map<OutputEqClass, Path> FinalClassMap;
static void findBestRewrite(const ClassMap::const_iterator & pos,
const ClassMap::const_iterator & end,
const PathSet & selection, const PathSet & unselection,
unsigned int & bestCost, PathSet & bestSelection)
{
if (pos != end) {
for (PathSet::iterator i = pos->second.begin();
i != pos->second.end(); ++i)
{
PathSet selection2(selection);
selection2.insert(*i);
PathSet unselection2(unselection);
for (PathSet::iterator j = pos->second.begin();
j != pos->second.end(); ++j)
if (i != j) unselection2.insert(*j);
ClassMap::const_iterator j = pos; ++j;
findBestRewrite(j, end, selection2, unselection2,
bestCost, bestSelection);
}
return;
}
// printMsg(lvlError, format("selection %1%") % showPaths(selection));
PathSet badPaths;
for (PathSet::iterator i = selection.begin();
i != selection.end(); ++i)
{
PathSet closure;
computeFSClosure(*i, closure);
for (PathSet::iterator j = closure.begin();
j != closure.end(); ++j)
if (unselection.find(*j) != unselection.end())
badPaths.insert(*i);
}
printMsg(lvlError, format("cost %1% %2%") % badPaths.size() % showPaths(badPaths));
if (badPaths.size() < bestCost) {
bestCost = badPaths.size();
bestSelection = selection;
}
}
static Path maybeRewrite(const Path & path, const PathSet & selection,
const FinalClassMap & finalClassMap)
{
assert(selection.find(path) != selection.end());
PathSet references;
queryReferences(noTxn, path, references);
HashRewrites rewrites;
bool okay = true;
for (PathSet::iterator i = references.begin(); i != references.end(); ++i) {
if (*i == path) continue; /* ignore self-references */
if (selection.find(*i) == selection.end()) {
OutputEqClasses classes;
queryOutputEqClasses(noTxn, *i, classes);
if (classes.size() > 0) /* !!! hacky; ignore sources; they
are not in any eq class */
{
printMsg(lvlError, format("in `%1%': missing `%2%'") % path % *i);
okay = false;
FinalClassMap::const_iterator j = finalClassMap.find(*(classes.begin()));
assert(j != finalClassMap.end());
printMsg(lvlError, format("replacing with `%1%'") % j->second);
Path newPath = maybeRewrite(j->second, selection, finalClassMap);
if (*i != newPath)
rewrites[hashPartOf(*i)] = hashPartOf(newPath);
}
}
}
if (rewrites.size() == 0) return path;
printMsg(lvlError, format("rewriting `%1%'") % path);
Path newPath = addToStore(path,
hashPartOf(path), namePartOf(path),
references, rewrites);
printMsg(lvlError, format("rewrote `%1%' to `%2%'") % path % newPath);
return newPath;
}
PathSet consolidatePaths(const PathSet & paths, bool checkOnly)
{
printMsg(lvlError, format("consolidating"));
ClassMap classMap;
for (PathSet::const_iterator i = paths.begin(); i != paths.end(); ++i) {
OutputEqClasses classes;
queryOutputEqClasses(noTxn, *i, classes);
/* !!! deal with sources */
for (OutputEqClasses::iterator j = classes.begin(); j != classes.end(); ++j) {
classMap[*j].insert(*i);
}
}
bool conflict = false;
for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
if (i->second.size() >= 2) {
printMsg(lvlError, format("conflict in eq class `%1%'") % i->first);
conflict = true;
}
if (!conflict) return paths;
assert(!checkOnly);
/* !!! exponential-time algorithm! */
const unsigned int infinity = 1000000;
unsigned int bestCost = infinity;
PathSet bestSelection;
findBestRewrite(classMap.begin(), classMap.end(),
PathSet(), PathSet(), bestCost, bestSelection);
assert(bestCost != infinity);
printMsg(lvlError, format("cheapest selection %1% %2%")
% bestCost % showPaths(bestSelection));
FinalClassMap finalClassMap;
for (ClassMap::iterator i = classMap.begin(); i != classMap.end(); ++i)
for (PathSet::const_iterator j = i->second.begin(); j != i->second.end(); ++j)
if (bestSelection.find(*j) != bestSelection.end())
finalClassMap[i->first] = *j;
PathSet newPaths;
for (PathSet::iterator i = bestSelection.begin();
i != bestSelection.end(); ++i)
newPaths.insert(maybeRewrite(*i, bestSelection, finalClassMap));
return newPaths;
}