2022-12-29 22:47:02 +01:00
|
|
|
//! Implements the slightly odd "base32" encoding that's used in Nix.
|
|
|
|
//!
|
|
|
|
//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
|
|
|
|
//! encoding to "nix base32" doesn't use any padding, and reads in characters
|
|
|
|
//! in reverse order.
|
|
|
|
//!
|
2023-01-26 14:42:16 +01:00
|
|
|
//! This is also the main reason why we can't use `data_encoding::Encoding` -
|
|
|
|
//! it gets things wrong if there normally would be a need for padding.
|
|
|
|
|
|
|
|
use std::fmt::Write;
|
|
|
|
|
2023-11-10 19:32:40 +01:00
|
|
|
use data_encoding::{DecodeError, DecodeKind};
|
2022-12-29 22:47:02 +01:00
|
|
|
|
2023-02-01 11:12:28 +01:00
|
|
|
const ALPHABET: &[u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz";
|
2023-01-26 14:42:16 +01:00
|
|
|
|
|
|
|
/// Returns encoded input
|
|
|
|
pub fn encode(input: &[u8]) -> String {
|
|
|
|
let output_len = encode_len(input.len());
|
|
|
|
let mut output = String::with_capacity(output_len);
|
|
|
|
|
2023-10-27 19:06:29 +02:00
|
|
|
for n in (0..output_len).rev() {
|
|
|
|
let b = n * 5; // bit offset within the entire input
|
|
|
|
let i = b / 8; // input byte index
|
|
|
|
let j = b % 8; // bit offset within that input byte
|
2022-12-29 22:47:02 +01:00
|
|
|
|
2023-10-27 19:06:29 +02:00
|
|
|
// 5-bit words aren't aligned to bytes
|
|
|
|
// we can only read byte-aligned units
|
|
|
|
// read 16 bits then shift and mask to 5
|
|
|
|
let c = {
|
|
|
|
let mut word = input[i] as u16;
|
|
|
|
if let Some(&msb) = input.get(i + 1) {
|
|
|
|
word |= (msb as u16) << 8;
|
2023-01-26 14:42:16 +01:00
|
|
|
}
|
2023-10-27 19:06:29 +02:00
|
|
|
(word >> j) & 0x1f
|
|
|
|
};
|
2023-01-26 14:42:16 +01:00
|
|
|
|
2023-10-27 19:06:29 +02:00
|
|
|
output.write_char(ALPHABET[c as usize] as char).unwrap();
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
2023-01-26 14:42:16 +01:00
|
|
|
|
|
|
|
output
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
|
2023-01-26 14:42:16 +01:00
|
|
|
/// This maps a nixbase32-encoded character to its binary representation, which
|
2023-10-27 02:34:19 +02:00
|
|
|
/// is also the index of the character in the alphabet. Invalid characters are
|
|
|
|
/// mapped to 0xFF, which is itself an invalid value.
|
|
|
|
const BASE32_ORD: [u8; 256] = {
|
|
|
|
let mut ord = [0xFF; 256];
|
|
|
|
let mut alphabet = ALPHABET.as_slice();
|
|
|
|
let mut i = 0;
|
|
|
|
|
|
|
|
while let &[c, ref tail @ ..] = alphabet {
|
|
|
|
ord[c as usize] = i;
|
|
|
|
alphabet = tail;
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
ord
|
|
|
|
};
|
2022-12-29 22:47:02 +01:00
|
|
|
|
2023-01-26 14:42:16 +01:00
|
|
|
/// Returns decoded input
|
2023-11-10 19:32:40 +01:00
|
|
|
pub fn decode(input: impl AsRef<[u8]>) -> Result<Vec<u8>, DecodeError> {
|
2023-10-27 09:50:38 +02:00
|
|
|
let input = input.as_ref();
|
|
|
|
|
2023-01-26 14:42:16 +01:00
|
|
|
let output_len = decode_len(input.len());
|
|
|
|
let mut output: Vec<u8> = vec![0x00; output_len];
|
|
|
|
|
2023-10-27 09:36:06 +02:00
|
|
|
decode_inner(input, &mut output)?;
|
|
|
|
Ok(output)
|
|
|
|
}
|
|
|
|
|
2023-11-10 19:32:40 +01:00
|
|
|
pub fn decode_fixed<const K: usize>(input: impl AsRef<[u8]>) -> Result<[u8; K], DecodeError> {
|
2023-10-27 09:36:06 +02:00
|
|
|
let input = input.as_ref();
|
|
|
|
|
|
|
|
if input.len() != encode_len(K) {
|
2023-11-10 19:32:40 +01:00
|
|
|
return Err(DecodeError {
|
|
|
|
position: input.len().min(encode_len(K)),
|
|
|
|
kind: DecodeKind::Length,
|
|
|
|
});
|
2023-10-27 09:36:06 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut output = [0; K];
|
|
|
|
decode_inner(input, &mut output)?;
|
|
|
|
Ok(output)
|
|
|
|
}
|
|
|
|
|
2023-11-10 19:32:40 +01:00
|
|
|
fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), DecodeError> {
|
2023-01-26 14:42:16 +01:00
|
|
|
// loop over all characters in reverse, and keep the iteration count in n.
|
2023-10-27 02:06:01 +02:00
|
|
|
let mut carry = 0;
|
2023-10-27 02:34:19 +02:00
|
|
|
let mut mask = 0;
|
2023-10-27 02:06:01 +02:00
|
|
|
for (n, &c) in input.iter().rev().enumerate() {
|
2023-10-27 02:34:19 +02:00
|
|
|
let b = n * 5;
|
|
|
|
let i = b / 8;
|
|
|
|
let j = b % 8;
|
|
|
|
|
|
|
|
let digit = BASE32_ORD[c as usize];
|
|
|
|
let value = (digit as u16) << j;
|
|
|
|
output[i] |= value as u8 | carry;
|
|
|
|
carry = (value >> 8) as u8;
|
|
|
|
|
|
|
|
mask |= digit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if mask == 0xFF {
|
2023-11-10 19:32:40 +01:00
|
|
|
return Err(DecodeError {
|
|
|
|
position: find_invalid(input),
|
|
|
|
kind: DecodeKind::Symbol,
|
|
|
|
});
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
|
2023-10-27 02:06:01 +02:00
|
|
|
// if we're at the end, but have a nonzero carry, the encoding is invalid.
|
|
|
|
if carry != 0 {
|
2023-11-10 19:32:40 +01:00
|
|
|
return Err(DecodeError {
|
|
|
|
position: 0,
|
|
|
|
kind: DecodeKind::Trailing,
|
|
|
|
});
|
2023-10-27 02:06:01 +02:00
|
|
|
}
|
|
|
|
|
2023-10-27 09:36:06 +02:00
|
|
|
Ok(())
|
2023-01-26 14:42:16 +01:00
|
|
|
}
|
|
|
|
|
2023-11-10 19:32:40 +01:00
|
|
|
fn find_invalid(input: &[u8]) -> usize {
|
|
|
|
for (i, &c) in input.iter().enumerate() {
|
2023-10-27 02:34:19 +02:00
|
|
|
if !ALPHABET.contains(&c) {
|
2023-11-10 19:32:40 +01:00
|
|
|
return i;
|
2023-10-27 02:34:19 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unreachable!()
|
|
|
|
}
|
|
|
|
|
2023-01-26 14:42:16 +01:00
|
|
|
/// Returns the decoded length of an input of length len.
|
2023-11-04 17:22:39 +01:00
|
|
|
pub const fn decode_len(len: usize) -> usize {
|
2023-02-01 11:13:37 +01:00
|
|
|
(len * 5) / 8
|
2023-01-26 14:42:16 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the encoded length of an input of length len
|
2023-11-04 17:22:39 +01:00
|
|
|
pub const fn encode_len(len: usize) -> usize {
|
2023-10-27 02:01:58 +02:00
|
|
|
(len * 8 + 4) / 5
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2023-10-27 03:12:26 +02:00
|
|
|
use hex_literal::hex;
|
2024-04-19 13:20:10 +02:00
|
|
|
use rstest::rstest;
|
|
|
|
|
|
|
|
#[rstest]
|
|
|
|
#[case::empty_bytes("", &[])]
|
|
|
|
#[case::one_byte("0z", &hex!("1f"))]
|
|
|
|
#[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", &hex!("8a12321522fd91efbd60ebb2481af88580f61600"))]
|
|
|
|
#[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", &hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"))]
|
|
|
|
#[test]
|
|
|
|
fn encode(#[case] enc: &str, #[case] dec: &[u8]) {
|
2023-12-09 12:11:28 +01:00
|
|
|
assert_eq!(enc, super::encode(dec));
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
|
2024-04-19 13:20:10 +02:00
|
|
|
#[rstest]
|
|
|
|
#[case::empty_bytes("", Some(&[][..]) )]
|
|
|
|
#[case::one_byte("0z", Some(&hex!("1f")[..]))]
|
|
|
|
#[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(&hex!("8a12321522fd91efbd60ebb2481af88580f61600")[..]))]
|
|
|
|
#[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(&hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30")[..]))]
|
2023-10-03 10:05:28 +02:00
|
|
|
// this is invalid encoding, because it encodes 10 1-bits, so the carry
|
|
|
|
// would be 2 1-bits
|
2024-04-19 13:20:10 +02:00
|
|
|
#[case::invalid_encoding_1("zz", None)]
|
2022-12-29 22:47:02 +01:00
|
|
|
// this is an even more specific example - it'd decode as 00000000 11
|
2024-04-19 13:20:10 +02:00
|
|
|
#[case::invalid_encoding_2("c0", None)]
|
|
|
|
#[test]
|
|
|
|
fn decode(#[case] enc: &str, #[case] dec: Option<&[u8]>) {
|
2022-12-29 22:47:02 +01:00
|
|
|
match dec {
|
|
|
|
Some(dec) => {
|
|
|
|
// The decode needs to match what's passed in dec
|
2023-10-27 09:50:38 +02:00
|
|
|
assert_eq!(dec, super::decode(enc).unwrap());
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
None => {
|
|
|
|
// the decode needs to be an error
|
2023-10-27 09:50:38 +02:00
|
|
|
assert!(super::decode(enc).is_err());
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-27 09:36:06 +02:00
|
|
|
#[test]
|
|
|
|
fn decode_fixed() {
|
|
|
|
assert_eq!(
|
|
|
|
super::decode_fixed("00bgd045z0d4icpbc2yyz4gx48ak44la").unwrap(),
|
|
|
|
hex!("8a12321522fd91efbd60ebb2481af88580f61600")
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
super::decode_fixed::<32>("00").unwrap_err(),
|
2023-11-10 19:32:40 +01:00
|
|
|
super::DecodeError {
|
|
|
|
position: 2,
|
|
|
|
kind: super::DecodeKind::Length
|
|
|
|
}
|
2023-10-27 09:36:06 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-12-29 22:47:02 +01:00
|
|
|
#[test]
|
|
|
|
fn encode_len() {
|
2023-10-27 02:01:58 +02:00
|
|
|
assert_eq!(super::encode_len(0), 0);
|
|
|
|
assert_eq!(super::encode_len(20), 32);
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn decode_len() {
|
2023-10-27 02:01:58 +02:00
|
|
|
assert_eq!(super::decode_len(0), 0);
|
|
|
|
assert_eq!(super::decode_len(32), 20);
|
2022-12-29 22:47:02 +01:00
|
|
|
}
|
|
|
|
}
|