chore(3p/nix/hash): eliminate exposed global variable

Change-Id: I3b34e3e17a751e225831ae599c6c6bb782a25679
Reviewed-on: https://cl.tvl.fyi/c/depot/+/1486
Tested-by: BuildkiteCI
Reviewed-by: tazjin <mail@tazj.in>
This commit is contained in:
Kane York 2020-07-27 17:07:51 -07:00 committed by kanepyork
parent d9262bd6c6
commit 976a36c2e4
3 changed files with 61 additions and 38 deletions

View file

@ -13,38 +13,21 @@ namespace nix {
static unsigned int refLength = 32; /* characters */ static unsigned int refLength = 32; /* characters */
static void search(const unsigned char* s, size_t len, StringSet& hashes, static void search(const char* s, size_t len, StringSet& hashes,
StringSet& seen) { StringSet& seen) {
static bool initialised = false;
static bool isBase32[256];
if (!initialised) {
for (bool& i : isBase32) {
i = false;
}
for (char base32Char : base32Chars) {
isBase32[static_cast<unsigned char>(base32Char)] = true;
}
initialised = true;
}
for (size_t i = 0; i + refLength <= len;) { for (size_t i = 0; i + refLength <= len;) {
int j = 0; absl::string_view ref(s + i, refLength);
bool match = true; bool match = Hash::IsValidBase32(ref);
for (j = refLength - 1; j >= 0; --j) {
if (!isBase32[s[i + j]]) {
i += j + 1;
match = false;
break;
}
}
if (!match) { if (!match) {
continue; continue;
} }
std::string ref(reinterpret_cast<const char*>(s) + i, refLength); // TODO(kanepyork): convert StringSet to flat_hash_set, delay owned string
if (hashes.find(ref) != hashes.end()) { // conversion into the 'if'
std::string sref(ref);
if (hashes.find(sref) != hashes.end()) {
DLOG(INFO) << "found reference to '" << ref << "' at offset " << i; DLOG(INFO) << "found reference to '" << ref << "' at offset " << i;
seen.insert(ref); seen.insert(sref);
hashes.erase(ref); hashes.erase(sref);
} }
++i; ++i;
} }
@ -65,22 +48,21 @@ struct RefScanSink : Sink {
void RefScanSink::operator()(const unsigned char* data, size_t len) { void RefScanSink::operator()(const unsigned char* data, size_t len) {
hashSink(data, len); hashSink(data, len);
const char* data_ = reinterpret_cast<const char*>(data);
/* It's possible that a reference spans the previous and current /* It's possible that a reference spans the previous and current
fragment, so search in the concatenation of the tail of the fragment, so search in the concatenation of the tail of the
previous fragment and the start of the current fragment. */ previous fragment and the start of the current fragment. */
std::string s = tail + std::string(reinterpret_cast<const char*>(data), std::string s = tail + std::string(data_, len > refLength ? refLength : len);
len > refLength ? refLength : len); search(s.data(), s.size(), hashes, seen);
search(reinterpret_cast<const unsigned char*>(s.data()), s.size(), hashes,
seen);
search(data, len, hashes, seen); search(data_, len, hashes, seen);
size_t tailLen = len <= refLength ? len : refLength; size_t tailLen = len <= refLength ? len : refLength;
tail = tail = std::string(tail, tail.size() < refLength - tailLen
std::string(tail, tail.size() < refLength - tailLen ? 0
? 0 : tail.size() - (refLength - tailLen)) +
: tail.size() - (refLength - tailLen)) + std::string(data_ + len - tailLen, tailLen);
std::string(reinterpret_cast<const char*>(data) + len - tailLen, tailLen);
} }
PathSet scanForReferences(const std::string& path, const PathSet& refs, PathSet scanForReferences(const std::string& path, const PathSet& refs,

View file

@ -78,6 +78,45 @@ static std::string printHash16(const Hash& hash) {
// omitted: E O U T // omitted: E O U T
const std::string base32Chars = "0123456789abcdfghijklmnpqrsvwxyz"; const std::string base32Chars = "0123456789abcdfghijklmnpqrsvwxyz";
constexpr signed char kUnBase32[] = {
-1, -1, -1, -1, -1, -1, -1, -1, /* unprintables */
-1, -1, -1, -1, -1, -1, -1, -1, /* unprintables */
-1, -1, -1, -1, -1, -1, -1, -1, /* unprintables */
-1, -1, -1, -1, -1, -1, -1, -1, /* unprintables */
-1, -1, -1, -1, -1, -1, -1, -1, /* SP..' */
-1, -1, -1, -1, -1, -1, -1, -1, /* (../ */
0, 1, 2, 3, 4, 5, 6, 7, /* 0..7 */
8, 9, -1, -1, -1, -1, -1, -1, /* 8..? */
-1, -1, -1, -1, -1, -1, -1, -1, /* @..G */
-1, -1, -1, -1, -1, -1, -1, -1, /* H..O */
-1, -1, -1, -1, -1, -1, -1, -1, /* P..W */
-1, -1, -1, -1, -1, -1, -1, -1, /* X.._ */
-1, 10, 11, 12, 13, -1, 14, 15, /* `..g */
16, 17, 18, 19, 20, 21, 22, -1, /* h..o */
23, 24, 25, 26, -1, -1, 27, 28, /* p..w */
29, 30, 31, -1, -1, -1, -1, -1, /* x..DEL */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* high */
};
bool Hash::IsValidBase32(absl::string_view s) {
static_assert(sizeof(kUnBase32) == 256);
for (char c : s) {
if (kUnBase32[static_cast<unsigned char>(c)] == -1) {
return false;
}
}
return true;
}
static std::string printHash32(const Hash& hash) { static std::string printHash32(const Hash& hash) {
assert(hash.hashSize); assert(hash.hashSize);
size_t len = hash.base32Len(); size_t len = hash.base32Len();

View file

@ -14,8 +14,6 @@ const int sha1HashSize = 20;
const int sha256HashSize = 32; const int sha256HashSize = 32;
const int sha512HashSize = 64; const int sha512HashSize = 64;
extern const std::string base32Chars;
enum Base : int { Base64, Base32, Base16, SRI }; enum Base : int { Base64, Base32, Base16, SRI };
struct Hash { struct Hash {
@ -65,6 +63,10 @@ struct Hash {
or base-64. By default, this is prefixed by the hash type or base-64. By default, this is prefixed by the hash type
(e.g. "sha256:"). */ (e.g. "sha256:"). */
std::string to_string(Base base = Base32, bool includeType = true) const; std::string to_string(Base base = Base32, bool includeType = true) const;
/* Returns whether the passed string contains entirely valid base32
characters. */
static bool IsValidBase32(absl::string_view s);
}; };
/* Print a hash in base-16 if it's MD5, or base-32 otherwise. */ /* Print a hash in base-16 if it's MD5, or base-32 otherwise. */