feat(tazjin/german-string): add constructor method from owned data
Makes it possible to construct a GermanString from an owned byte vector, without having to clone the data. This is done by "disowning" the vector using ManuallyDrop to access its internal pointer. For transient strings, this memory is then owned (and freed) by the GermanString instance. Small strings are copied out of the heap and stored inline as before, to avoid any dereferencing operations. Change-Id: I754736099f71d646d430aed73e558a5a7626c394 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12249 Autosubmit: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
parent
0d6d31bc66
commit
ed4f68b1d3
1 changed files with 40 additions and 12 deletions
|
@ -36,10 +36,9 @@ const _ASSERT_GSTRING_SIZE: () = assert!(
|
||||||
);
|
);
|
||||||
|
|
||||||
impl GermanString {
|
impl GermanString {
|
||||||
// Creates a new transient German String from the given bytes. Transient
|
/// Creates a new transient German String from the given slice, copying the
|
||||||
// strings are destroyed when the object is destroyed. Persistent strings
|
/// data in the process.
|
||||||
// are not supported yet.
|
pub fn transient(bytes: &[u8]) -> GermanString {
|
||||||
pub fn new_transient(bytes: &[u8]) -> GermanString {
|
|
||||||
if bytes.len() > u32::MAX as usize {
|
if bytes.len() > u32::MAX as usize {
|
||||||
panic!("GermanString maximum length is {} bytes", u32::MAX);
|
panic!("GermanString maximum length is {} bytes", u32::MAX);
|
||||||
}
|
}
|
||||||
|
@ -72,6 +71,35 @@ impl GermanString {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new transient German String from the given owned bytes. Short
|
||||||
|
/// strings will be copied into the string representation, long strings will
|
||||||
|
/// be moved out of the given vector without additional allocations.
|
||||||
|
pub fn transient_from_owned(bytes: Vec<u8>) -> GermanString {
|
||||||
|
if bytes.len() > u32::MAX as usize {
|
||||||
|
panic!("GermanString maximum length is {} bytes", u32::MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.len() <= 12 {
|
||||||
|
let mut s = GSSmall {
|
||||||
|
len: bytes.len() as u32,
|
||||||
|
data: [0u8; 12],
|
||||||
|
};
|
||||||
|
|
||||||
|
s.data[..bytes.len()].copy_from_slice(&bytes);
|
||||||
|
GermanString(GSRepr { small: s })
|
||||||
|
} else {
|
||||||
|
let mut md = std::mem::ManuallyDrop::new(bytes);
|
||||||
|
let mut large = GSLarge {
|
||||||
|
len: md.len() as u32,
|
||||||
|
prefix: [0u8; 4],
|
||||||
|
data: md.as_mut_ptr(),
|
||||||
|
};
|
||||||
|
|
||||||
|
large.prefix.copy_from_slice(&md[..4]);
|
||||||
|
GermanString(GSRepr { large })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
// SAFETY: The length field is located in the same location for both
|
// SAFETY: The length field is located in the same location for both
|
||||||
// variants, reading it from either is safe.
|
// variants, reading it from either is safe.
|
||||||
|
@ -186,14 +214,14 @@ mod tests {
|
||||||
|
|
||||||
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
|
||||||
any_with::<String>(args)
|
any_with::<String>(args)
|
||||||
.prop_map(|s| GermanString::new_transient(s.as_bytes()))
|
.prop_map(|s| GermanString::transient(s.as_bytes()))
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_empty_string() {
|
fn test_empty_string() {
|
||||||
let empty = GermanString::new_transient(b"");
|
let empty = GermanString::transient(b"");
|
||||||
|
|
||||||
assert_eq!(empty.len(), 0, "empty string should be empty");
|
assert_eq!(empty.len(), 0, "empty string should be empty");
|
||||||
assert_eq!(empty.as_bytes(), b"", "empty string should contain nothing");
|
assert_eq!(empty.as_bytes(), b"", "empty string should contain nothing");
|
||||||
|
@ -206,7 +234,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_short_string() {
|
fn test_short_string() {
|
||||||
let short = GermanString::new_transient(b"meow");
|
let short = GermanString::transient(b"meow");
|
||||||
|
|
||||||
assert_eq!(short.len(), 4, "'meow' is four characters");
|
assert_eq!(short.len(), 4, "'meow' is four characters");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -224,7 +252,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_long_string() {
|
fn test_long_string() {
|
||||||
let input: &str = "This code was written at https://signal.live";
|
let input: &str = "This code was written at https://signal.live";
|
||||||
let long = GermanString::new_transient(input.as_bytes());
|
let long = GermanString::transient(input.as_bytes());
|
||||||
|
|
||||||
assert_eq!(long.len(), 44, "long string has correct length");
|
assert_eq!(long.len(), 44, "long string has correct length");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -243,7 +271,7 @@ mod tests {
|
||||||
proptest! {
|
proptest! {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_roundtrip_vec(input: Vec<u8>) {
|
fn test_roundtrip_vec(input: Vec<u8>) {
|
||||||
let gs = GermanString::new_transient(input.as_slice());
|
let gs = GermanString::transient_from_owned(input.clone());
|
||||||
assert_eq!(input.len(), gs.len(), "length should match");
|
assert_eq!(input.len(), gs.len(), "length should match");
|
||||||
|
|
||||||
let out = gs.as_bytes().to_owned();
|
let out = gs.as_bytes().to_owned();
|
||||||
|
@ -252,7 +280,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_roundtrip_string(input: String) {
|
fn test_roundtrip_string(input: String) {
|
||||||
let gs = GermanString::new_transient(input.as_bytes());
|
let gs = GermanString::transient_from_owned(input.clone().into_bytes());
|
||||||
assert_eq!(input.len(), gs.len(), "length should match");
|
assert_eq!(input.len(), gs.len(), "length should match");
|
||||||
|
|
||||||
let out = String::from_utf8(gs.as_bytes().to_owned())
|
let out = String::from_utf8(gs.as_bytes().to_owned())
|
||||||
|
@ -264,8 +292,8 @@ mod tests {
|
||||||
// Test [`Eq`] implementation.
|
// Test [`Eq`] implementation.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_eq(lhs: Vec<u8>, rhs: Vec<u8>) {
|
fn test_eq(lhs: Vec<u8>, rhs: Vec<u8>) {
|
||||||
let lhs_gs = GermanString::new_transient(lhs.as_slice());
|
let lhs_gs = GermanString::transient(lhs.as_slice());
|
||||||
let rhs_gs = GermanString::new_transient(rhs.as_slice());
|
let rhs_gs = GermanString::transient(rhs.as_slice());
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
(lhs == rhs),
|
(lhs == rhs),
|
||||||
|
|
Loading…
Reference in a new issue