feat(users/edef/refscan): AArch64 support
Change-Id: I5062078739f0bf9f70c6789a9f2eafceff65d76e Reviewed-on: https://cl.tvl.fyi/c/depot/+/7690 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
1afb4a9f44
commit
ec470d254f
1 changed files with 60 additions and 0 deletions
|
@ -55,6 +55,7 @@ mod test {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
mod simd {
|
mod simd {
|
||||||
#[cfg(target_arch = "x86")]
|
#[cfg(target_arch = "x86")]
|
||||||
use std::arch::x86 as arch;
|
use std::arch::x86 as arch;
|
||||||
|
@ -92,3 +93,62 @@ mod simd {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
mod simd {
|
||||||
|
use std::{
|
||||||
|
arch::aarch64::{
|
||||||
|
uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8,
|
||||||
|
vget_low_u8, vshlq_u8,
|
||||||
|
},
|
||||||
|
mem, ptr,
|
||||||
|
};
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct u8x32([u8x16; 2]);
|
||||||
|
|
||||||
|
impl u8x32 {
|
||||||
|
#[cfg(target_endian = "little")]
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn from_slice_unaligned(slice: &[u8]) -> Self {
|
||||||
|
assert_eq!(slice.len(), 32);
|
||||||
|
u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn splat(x: u8) -> Self {
|
||||||
|
u8x32(unsafe {
|
||||||
|
let x = vdupq_n_u8(x);
|
||||||
|
[x, x]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn gt(&self, b: Self) -> u32 {
|
||||||
|
let u8x32([al, ah]) = *self;
|
||||||
|
let u8x32([bl, bh]) = b;
|
||||||
|
|
||||||
|
fn f(a: u8x16, b: u8x16) -> u32 {
|
||||||
|
unsafe {
|
||||||
|
let c = vshlq_u8(
|
||||||
|
vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)),
|
||||||
|
mem::transmute([
|
||||||
|
-7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8,
|
||||||
|
]),
|
||||||
|
);
|
||||||
|
|
||||||
|
(vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
f(al, bl) << 0 | f(ah, bh) << 16
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn lt(self, b: Self) -> u32 {
|
||||||
|
b.gt(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue