diff --git a/Cargo.toml b/Cargo.toml index 56158b6..9d47e60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ repository = "https://github.com/cberner/raptorq" readme = "README.md" version = "1.6.5" edition = "2021" -rust-version = "1.56" +rust-version = "1.60" authors = ["Christopher Berner "] [lib] @@ -45,7 +45,6 @@ lto = false [features] benchmarking = [] -use_neon = [] python = ["pyo3"] serde_support = ["serde"] diff --git a/py_publish.sh b/py_publish.sh index 2db8d98..f741bdb 100755 --- a/py_publish.sh +++ b/py_publish.sh @@ -2,7 +2,7 @@ cd /raptorq yum install -y python3-pip -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=1.46.0 +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain=1.60.0 source $HOME/.cargo/env pip3 install toml diff --git a/src/lib.rs b/src/lib.rs index da8d9a2..a93da5f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,4 @@ #![allow(clippy::needless_return, clippy::unreadable_literal)] -#![cfg_attr(feature = "use_neon", feature(stdsimd))] -#![cfg_attr(feature = "use_neon", feature(aarch64_target_feature))] -#![cfg_attr(feature = "use_neon", feature(arm_target_feature))] mod arraymap; mod base; diff --git a/src/octets.rs b/src/octets.rs index 4ecd3dc..5d9ba57 100644 --- a/src/octets.rs +++ b/src/octets.rs @@ -1,10 +1,23 @@ use crate::octet::Octet; use crate::octet::OCTET_MUL; -#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "use_neon"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] use crate::octet::OCTET_MUL_HI_BITS; -#[cfg(any(target_arch = "x86", target_arch = "x86_64", feature = "use_neon"))] +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64" +))] use crate::octet::OCTET_MUL_LOW_BITS; +#[cfg(target_arch = "aarch64")] +use std::arch::is_aarch64_feature_detected; + // An octet vec containing only binary values, which are bit-packed for efficiency pub struct BinaryOctetVec { // Values are stored packed into the highest bits, with the last value at the highest bit of the @@ -84,7 +97,7 @@ pub fn fused_addassign_mul_scalar_binary( } } } - #[cfg(all(target_arch = "aarch64", feature = "use_neon"))] + #[cfg(target_arch = "aarch64")] { if is_aarch64_feature_detected!("neon") { unsafe { @@ -92,13 +105,14 @@ pub fn fused_addassign_mul_scalar_binary( } } } - #[cfg(all(target_arch = "arm", feature = "use_neon"))] + #[cfg(target_arch = "arm")] { - if is_arm_feature_detected!("neon") { - unsafe { - return fused_addassign_mul_scalar_binary_neon(octets, other, scalar); - } - } + // TODO: enable when stable + // if is_arm_feature_detected!("neon") { + // unsafe { + // return fused_addassign_mul_scalar_binary_neon(octets, other, scalar); + // } + // } } // TODO: write an optimized fallback that does call .to_octet_vec() @@ -109,11 +123,10 @@ pub fn fused_addassign_mul_scalar_binary( } } -#[cfg(all( - any(target_arch = "arm", target_arch = "aarch64"), - feature = "use_neon" -))] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +// TODO: enable when stable +// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +// #[target_feature(enable = "neon")] unsafe fn fused_addassign_mul_scalar_binary_neon( octets: &mut [u8], other: &BinaryOctetVec, @@ -265,11 +278,10 @@ fn mulassign_scalar_fallback(octets: &mut [u8], scalar: &Octet) { } } -#[cfg(all( - any(target_arch = "arm", target_arch = "aarch64"), - feature = "use_neon" -))] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +// TODO: enable when stable +// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +// #[target_feature(enable = "neon")] unsafe fn mulassign_scalar_neon(octets: &mut [u8], scalar: &Octet) { #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; @@ -402,7 +414,7 @@ pub fn mulassign_scalar(octets: &mut [u8], scalar: &Octet) { } } } - #[cfg(all(target_arch = "aarch64", feature = "use_neon"))] + #[cfg(target_arch = "aarch64")] { if is_aarch64_feature_detected!("neon") { unsafe { @@ -410,13 +422,14 @@ pub fn mulassign_scalar(octets: &mut [u8], scalar: &Octet) { } } } - #[cfg(all(target_arch = "arm", feature = "use_neon"))] + #[cfg(target_arch = "arm")] { - if is_arm_feature_detected!("neon") { - unsafe { - return mulassign_scalar_neon(octets, scalar); - } - } + // TODO: enable when stable + // if is_arm_feature_detected!("neon") { + // unsafe { + // return mulassign_scalar_neon(octets, scalar); + // } + // } } return mulassign_scalar_fallback(octets, scalar); @@ -433,11 +446,10 @@ fn fused_addassign_mul_scalar_fallback(octets: &mut [u8], other: &[u8], scalar: } } -#[cfg(all( - any(target_arch = "arm", target_arch = "aarch64"), - feature = "use_neon" -))] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +// TODO: enable when stable +// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +// #[target_feature(enable = "neon")] unsafe fn fused_addassign_mul_scalar_neon(octets: &mut [u8], other: &[u8], scalar: &Octet) { #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; @@ -603,7 +615,7 @@ pub fn fused_addassign_mul_scalar(octets: &mut [u8], other: &[u8], scalar: &Octe } } } - #[cfg(all(target_arch = "aarch64", feature = "use_neon"))] + #[cfg(target_arch = "aarch64")] { if is_aarch64_feature_detected!("neon") { unsafe { @@ -611,13 +623,14 @@ pub fn fused_addassign_mul_scalar(octets: &mut [u8], other: &[u8], scalar: &Octe } } } - #[cfg(all(target_arch = "arm", feature = "use_neon"))] + #[cfg(target_arch = "arm")] { - if is_arm_feature_detected!("neon") { - unsafe { - return fused_addassign_mul_scalar_neon(octets, other, scalar); - } - } + // TODO: enable when stable + // if is_arm_feature_detected!("neon") { + // unsafe { + // return fused_addassign_mul_scalar_neon(octets, other, scalar); + // } + // } } return fused_addassign_mul_scalar_fallback(octets, other, scalar); @@ -646,16 +659,16 @@ fn add_assign_fallback(octets: &mut [u8], other: &[u8]) { } } -#[cfg(all(target_arch = "aarch64", feature = "use_neon"))] +#[cfg(target_arch = "aarch64")] use std::arch::aarch64::uint8x16_t; -#[cfg(all(target_arch = "arm", feature = "use_neon"))] -use std::arch::arm::uint8x16_t; +// TODO: enable when stable +// #[cfg(target_arch = "arm")] +// use std::arch::arm::uint8x16_t; -#[cfg(all( - any(target_arch = "arm", target_arch = "aarch64"), - feature = "use_neon" -))] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +// TODO: enable when stable +// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +// #[target_feature(enable = "neon")] unsafe fn store_neon(ptr: *mut uint8x16_t, value: uint8x16_t) { #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; @@ -668,11 +681,10 @@ unsafe fn store_neon(ptr: *mut uint8x16_t, value: uint8x16_t) { *(ptr as *mut u64).add(1) = vgetq_lane_u64(reinterp, 1); } -#[cfg(all( - any(target_arch = "arm", target_arch = "aarch64"), - feature = "use_neon" -))] -#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +// TODO: enable when stable +// #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +// #[target_feature(enable = "neon")] unsafe fn add_assign_neon(octets: &mut [u8], other: &[u8]) { #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; @@ -806,7 +818,7 @@ pub fn add_assign(octets: &mut [u8], other: &[u8]) { } } } - #[cfg(all(target_arch = "aarch64", feature = "use_neon"))] + #[cfg(target_arch = "aarch64")] { if is_aarch64_feature_detected!("neon") { unsafe { @@ -814,13 +826,14 @@ pub fn add_assign(octets: &mut [u8], other: &[u8]) { } } } - #[cfg(all(target_arch = "arm", feature = "use_neon"))] + #[cfg(target_arch = "arm")] { - if is_arm_feature_detected!("neon") { - unsafe { - return add_assign_neon(octets, other); - } - } + // TODO: enable when stable + // if is_arm_feature_detected!("neon") { + // unsafe { + // return add_assign_neon(octets, other); + // } + // } } return add_assign_fallback(octets, other); }