feat(tvix/nix-compat): decode base32 with a lookup table
This also takes input validation out of the loop, leaving the loop backedge as the sole branch in the hot path. Change-Id: Id08e6fb9cf5b074780efa09a7ad389352a601bcc Reviewed-on: https://cl.tvl.fyi/c/depot/+/9847 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
parent
67b08469db
commit
55c37a2871
1 changed files with 41 additions and 21 deletions
|
@ -51,17 +51,21 @@ pub fn encode(input: &[u8]) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This maps a nixbase32-encoded character to its binary representation, which
|
/// This maps a nixbase32-encoded character to its binary representation, which
|
||||||
/// is also the index of the character in the alphabet.
|
/// is also the index of the character in the alphabet. Invalid characters are
|
||||||
fn decode_char(encoded_char: u8) -> Option<u8> {
|
/// mapped to 0xFF, which is itself an invalid value.
|
||||||
Some(match encoded_char {
|
const BASE32_ORD: [u8; 256] = {
|
||||||
b'0'..=b'9' => encoded_char - b'0',
|
let mut ord = [0xFF; 256];
|
||||||
b'a'..=b'd' => encoded_char - b'a' + 10_u8,
|
let mut alphabet = ALPHABET.as_slice();
|
||||||
b'f'..=b'n' => encoded_char - b'f' + 14_u8,
|
let mut i = 0;
|
||||||
b'p'..=b's' => encoded_char - b'p' + 23_u8,
|
|
||||||
b'v'..=b'z' => encoded_char - b'v' + 27_u8,
|
while let &[c, ref tail @ ..] = alphabet {
|
||||||
_ => return None,
|
ord[c as usize] = i;
|
||||||
})
|
alphabet = tail;
|
||||||
}
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ord
|
||||||
|
};
|
||||||
|
|
||||||
/// Returns decoded input
|
/// Returns decoded input
|
||||||
pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
|
pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
|
||||||
|
@ -70,18 +74,23 @@ pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
|
||||||
|
|
||||||
// loop over all characters in reverse, and keep the iteration count in n.
|
// loop over all characters in reverse, and keep the iteration count in n.
|
||||||
let mut carry = 0;
|
let mut carry = 0;
|
||||||
|
let mut mask = 0;
|
||||||
for (n, &c) in input.iter().rev().enumerate() {
|
for (n, &c) in input.iter().rev().enumerate() {
|
||||||
if let Some(digit) = decode_char(c) {
|
let b = n * 5;
|
||||||
let b = n * 5;
|
let i = b / 8;
|
||||||
let i = b / 8;
|
let j = b % 8;
|
||||||
let j = b % 8;
|
|
||||||
|
|
||||||
let value = (digit as u16) << j;
|
let digit = BASE32_ORD[c as usize];
|
||||||
output[i] |= value as u8 | carry;
|
let value = (digit as u16) << j;
|
||||||
carry = (value >> 8) as u8;
|
output[i] |= value as u8 | carry;
|
||||||
} else {
|
carry = (value >> 8) as u8;
|
||||||
return Err(Nixbase32DecodeError::CharacterNotInAlphabet(c));
|
|
||||||
}
|
mask |= digit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if mask == 0xFF {
|
||||||
|
let c = find_invalid(input);
|
||||||
|
return Err(Nixbase32DecodeError::CharacterNotInAlphabet(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we're at the end, but have a nonzero carry, the encoding is invalid.
|
// if we're at the end, but have a nonzero carry, the encoding is invalid.
|
||||||
|
@ -92,6 +101,17 @@ pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cold]
|
||||||
|
fn find_invalid(input: &[u8]) -> u8 {
|
||||||
|
for &c in input {
|
||||||
|
if !ALPHABET.contains(&c) {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the decoded length of an input of length len.
|
/// Returns the decoded length of an input of length len.
|
||||||
pub fn decode_len(len: usize) -> usize {
|
pub fn decode_len(len: usize) -> usize {
|
||||||
(len * 5) / 8
|
(len * 5) / 8
|
||||||
|
|
Loading…
Add table
Reference in a new issue