feat(users/edef/refscan): AArch64 support
Change-Id: I5062078739f0bf9f70c6789a9f2eafceff65d76e Reviewed-on: https://cl.tvl.fyi/c/depot/+/7690 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
1afb4a9f44
commit
ec470d254f
1 changed files with 60 additions and 0 deletions
|
@ -55,6 +55,7 @@ mod test {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
mod simd {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86 as arch;
|
||||
|
@ -92,3 +93,62 @@ mod simd {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod simd {
|
||||
use std::{
|
||||
arch::aarch64::{
|
||||
uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8,
|
||||
vget_low_u8, vshlq_u8,
|
||||
},
|
||||
mem, ptr,
|
||||
};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[derive(Copy, Clone)]
|
||||
#[repr(transparent)]
|
||||
pub struct u8x32([u8x16; 2]);
|
||||
|
||||
impl u8x32 {
|
||||
#[cfg(target_endian = "little")]
|
||||
#[inline(always)]
|
||||
pub fn from_slice_unaligned(slice: &[u8]) -> Self {
|
||||
assert_eq!(slice.len(), 32);
|
||||
u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn splat(x: u8) -> Self {
|
||||
u8x32(unsafe {
|
||||
let x = vdupq_n_u8(x);
|
||||
[x, x]
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn gt(&self, b: Self) -> u32 {
|
||||
let u8x32([al, ah]) = *self;
|
||||
let u8x32([bl, bh]) = b;
|
||||
|
||||
fn f(a: u8x16, b: u8x16) -> u32 {
|
||||
unsafe {
|
||||
let c = vshlq_u8(
|
||||
vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)),
|
||||
mem::transmute([
|
||||
-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8,
|
||||
]),
|
||||
);
|
||||
|
||||
(vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8
|
||||
}
|
||||
}
|
||||
|
||||
f(al, bl) << 0 | f(ah, bh) << 16
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn lt(self, b: Self) -> u32 {
|
||||
b.gt(self)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue