feat(tazjin/german-string): init initial representation

This adds an initial implementation of the so-called "German Strings" in Rust.

https://cedardb.com/blog/german_strings/

This implementation is *far from* complete, the only thing that can be done
right now is construct a string, and even that I'm not fully happy with.

Change-Id: I2697932a0ef373be76ffd14d59677493a5783b58
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12234
Autosubmit: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Vincent Ambo 2024-08-16 13:35:48 +03:00 committed by clbot
parent abff828ccc
commit ef75a6300b
4 changed files with 82 additions and 0 deletions

1
users/tazjin/german-string/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
target/

7
users/tazjin/german-string/Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "german-string"
version = "0.1.0"

View file

@ -0,0 +1,6 @@
[package]
name = "german-string"
version = "0.1.0"
edition = "2021"
[dependencies]

View file

@ -0,0 +1,68 @@
use std::alloc::Layout;
#[derive(Clone, Copy)]
#[repr(C)]
struct GSSmall {
len: u32,
data: [u8; 12],
}
#[derive(Clone, Copy)]
#[repr(C)]
struct GSLarge {
len: u32,
prefix: [u8; 4],
data: *mut u8,
}
const _ASSERT_VARIANTS_SIZE: () = assert!(
std::mem::size_of::<GSSmall>() == std::mem::size_of::<GSLarge>(),
"German String variants must have the same size"
);
union GSRepr {
small: GSSmall,
large: GSLarge,
}
#[repr(transparent)]
pub struct GermanString(GSRepr);
const _ASSERT_GSTRING_SIZE: () = assert!(
std::mem::size_of::<GermanString>() == 16,
"German String should be 16 bytes in size",
);
impl GermanString {
// Creates a new transient German String from the given bytes. Transient
// strings are destroyed when the object is destroyed. Persistent strings
// are not supported yet.
pub fn new_transient(bytes: &[u8]) -> GermanString {
if bytes.len() <= 12 {
let mut s = GSSmall {
len: bytes.len() as u32,
data: [0u8; 12],
};
s.data[..bytes.len()].copy_from_slice(bytes);
GermanString(GSRepr { small: s })
} else {
let layout = Layout::array::<u8>(bytes.len()).unwrap();
let mut large = GSLarge {
len: bytes.len() as u32,
prefix: [0u8; 4],
data: unsafe {
let ptr = std::alloc::alloc(layout);
if ptr.is_null() {
std::alloc::handle_alloc_error(layout);
}
std::ptr::copy_nonoverlapping(bytes.as_ptr(), ptr, bytes.len());
ptr
},
};
large.prefix.copy_from_slice(&bytes[..4]);
GermanString(GSRepr { large })
}
}
}