From 6efb380e4a79d879937689d769a66898b90379ab Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Wed, 5 May 2021 10:58:35 -0700 Subject: [PATCH] cpufeatures: aarch64 support (Linux and macOS/M4) Adds preliminary support for runtime feature detection on `aarch64` targets, presently restricted to the following set of target features which are present on both `aarch64-unknown-linux-gnu` and `aarch64-apple-darwin` targets: - `aes`: AES support - `sha2`: SHA1 and SHA256 support - `sha3`: SHA512 and SHA3 support --- .github/workflows/cpufeatures.yml | 95 +++++++++++++++++++++--- Cargo.lock | 9 +++ cpufeatures/Cargo.toml | 11 ++- cpufeatures/src/aarch64.rs | 112 ++++++++++++++++++++++++++++ cpufeatures/src/lib.rs | 117 ++++++++++++------------------ cpufeatures/src/x86.rs | 75 +++++++++++++++++++ cpufeatures/tests/aarch64.rs | 17 +++++ cpufeatures/tests/x86.rs | 17 +++++ 8 files changed, 371 insertions(+), 82 deletions(-) create mode 100644 cpufeatures/src/aarch64.rs create mode 100644 cpufeatures/src/x86.rs create mode 100644 cpufeatures/tests/aarch64.rs create mode 100644 cpufeatures/tests/x86.rs diff --git a/.github/workflows/cpufeatures.yml b/.github/workflows/cpufeatures.yml index daf76847..8d6828e8 100644 --- a/.github/workflows/cpufeatures.yml +++ b/.github/workflows/cpufeatures.yml @@ -17,18 +17,95 @@ env: RUSTFLAGS: "-Dwarnings" jobs: - test: + # Linux tests + linux: + strategy: + matrix: + include: + # 32-bit Linux/x86 + - target: i686-unknown-linux-gnu + rust: 1.40.0 # MSRV + deps: sudo apt update && sudo apt install gcc-multilib + - target: i686-unknown-linux-gnu + rust: stable + deps: sudo apt update && sudo apt install gcc-multilib + + # 64-bit Linux/x86_64 + - target: x86_64-unknown-linux-gnu + rust: 1.40.0 # MSRV + - target: x86_64-unknown-linux-gnu + rust: stable runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + override: true + profile: minimal + - run: ${{ matrix.deps }} + - run: cargo test --target ${{ matrix.target }} --release + + # macOS tests + macos: strategy: matrix: - rust: + toolchain: - 1.40.0 # MSRV - stable + runs-on: macos-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.toolchain }} + target: x86_64-apple-darwin + override: true + - run: cargo test --release + + # Windows tests + windows: + strategy: + matrix: + include: + # 64-bit Windows (GNU) + # TODO(tarcieri): try re-enabling this when we bump MSRV + #- target: x86_64-pc-windows-gnu + # toolchain: 1.40.0 # MSRV + - target: x86_64-pc-windows-gnu + toolchain: stable + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: ${{ matrix.toolchain }} + target: ${{ matrix.target }} + override: true + - run: cargo test --target ${{ matrix.target }} --release + + # Cross-compiled tests + cross: + strategy: + matrix: + include: + # ARM64 + # TODO(tarcieri): try re-enabling this when we bump MSRV + #- target: aarch64-unknown-linux-gnu + # rust: 1.40.0 # MSRV + - target: aarch64-unknown-linux-gnu + rust: stable + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: ${{ matrix.rust }} - override: true - - run: cargo test --release + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + target: ${{ matrix.target }} + override: true + profile: minimal + - run: cargo install cross + - run: cross test --target ${{ matrix.target }} --release diff --git a/Cargo.lock b/Cargo.lock index 496b8058..34555eb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,9 @@ dependencies = [ [[package]] name = "cpufeatures" version = "0.1.0" +dependencies = [ + "libc", +] [[package]] name = "cpufeatures" @@ -189,6 +192,12 @@ dependencies = [ "digest", ] +[[package]] +name = "libc" +version = "0.2.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18794a8ad5b29321f790b55d93dfba91e125cb1a9edbd4f8e3150acc771c1a5e" + [[package]] name = "opaque-debug" version = "0.3.0" diff --git a/cpufeatures/Cargo.toml b/cpufeatures/Cargo.toml index 5c727ff0..0b61182f 100644 --- a/cpufeatures/Cargo.toml +++ b/cpufeatures/Cargo.toml @@ -1,12 +1,21 @@ [package] name = "cpufeatures" version = "0.1.0" # Also update html_root_url in lib.rs when bumping this +description = """ +Lightweight and efficient no-std compatible alternative to the +is_x86_feature_detected! macro +""" authors = ["RustCrypto Developers"] license = "MIT OR Apache-2.0" -description = "Lightweight and efficient no-std compatible alternative to the is_x86_feature_detected macro" documentation = "https://docs.rs/cpufeatures" repository = "https://github.com/RustCrypto/utils" keywords = ["cpuid", "target-feature"] categories = ["no-std"] edition = "2018" readme = "README.md" + +[target.aarch64-apple-darwin.dependencies] +libc = "0.2" + +[target.'cfg(all(target_arch = "aarch64", target_os = "linux"))'.dependencies] +libc = "0.2" diff --git a/cpufeatures/src/aarch64.rs b/cpufeatures/src/aarch64.rs new file mode 100644 index 00000000..07bdad22 --- /dev/null +++ b/cpufeatures/src/aarch64.rs @@ -0,0 +1,112 @@ +//! ARM64 CPU feature detection support. +//! +//! Unfortunately ARM instructions to detect CPU features cannot be called from +//! unprivileged userspace code, so this implementation relies on OS-specific +//! APIs for feature detection. + +#[macro_export] +#[doc(hidden)] +macro_rules! __unless_target_features { + ($($tf:tt),+ => $body:expr ) => { + { + #[cfg(not(all($(target_feature=$tf,)*)))] + $body + + #[cfg(all($(target_feature=$tf,)*))] + true + } + }; +} + +#[cfg(target_os = "linux")] +#[macro_export] +#[doc(hidden)] +macro_rules! __detect_target_features { + ($($tf:tt),+) => {{ + let hwcaps = unsafe { libc::getauxval(libc::AT_HWCAP) }; + $($crate::check!(hwcaps, $tf) & )+ true + }}; +} + +#[cfg(target_os = "macos")] +#[macro_export] +#[doc(hidden)] +macro_rules! __detect_target_features { + ($($tf:tt),+) => {{ + $($crate::check!($tf) & )+ true + }}; +} + +/// Linux `expand_check_macro` +#[cfg(target_os = "linux")] +macro_rules! __expand_check_macro { + ($(($name:tt, $hwcap:expr)),* $(,)?) => { + #[macro_export] + #[doc(hidden)] + macro_rules! check { + $( + ($hwcaps:expr, $name) => { (($hwcaps & libc::$hwcap) != 0) }; + )* + } + }; +} + +/// Linux `expand_check_macro` +#[cfg(target_os = "linux")] +__expand_check_macro! { + ("aes", HWCAP_AES), // Enable AES support. + ("sha2", HWCAP_SHA2), // Enable SHA1 and SHA256 support. + ("sha3", HWCAP_SHA3), // Enable SHA512 and SHA3 support. +} + +/// macOS `check!` macro. +/// +/// NOTE: several of these instructions (e.g. `aes`, `sha2`) can be assumed to +/// be present on all Apple ARM64 hardware. +/// +/// Newer CPU instructions now have nodes within sysctl's `hw.optional` +/// namespace, however the ones that do not can safely be assumed to be +/// present on all Apple ARM64 devices, now and for the foreseeable future. +/// +/// See discussion on this issue for more information: +/// +#[cfg(target_os = "macos")] +#[macro_export] +#[doc(hidden)] +macro_rules! check { + ("aes") => { + true + }; + ("sha2") => { + true + }; + ("sha3") => { + unsafe { $crate::aarch64::sysctlbyname(b"hw.optional.armv8_2_sha3\0") } + }; +} + +/// macOS helper function for calling `sysctlbyname`. +#[cfg(target_os = "macos")] +pub unsafe fn sysctlbyname(name: &[u8]) -> bool { + assert_eq!( + name.last().cloned(), + Some(0), + "name is not NUL terminated: {:?}", + name + ); + + let mut value: u32 = 0; + let mut size = core::mem::size_of::(); + + let rc = libc::sysctlbyname( + name.as_ptr() as *const i8, + &mut value as *mut _ as *mut libc::c_void, + &mut size, + core::ptr::null_mut(), + 0, + ); + + assert_eq!(size, 4, "unexpected sysctlbyname(3) result size"); + assert_eq!(rc, 0, "sysctlbyname returned error code: {}", rc); + value != 0 +} diff --git a/cpufeatures/src/lib.rs b/cpufeatures/src/lib.rs index fc4e78f6..29b9fdda 100644 --- a/cpufeatures/src/lib.rs +++ b/cpufeatures/src/lib.rs @@ -1,7 +1,15 @@ -//! Macro for checking CPU capabilities at runtime. +//! This crate provides macros for runtime CPU feature detection. It's intended +//! as a stopgap until Rust [RFC 2725] adding first-class target feature detection +//! macros to `libcore` is implemented. +//! +//! Supported target architectures: +//! - `aarch64` (Linux and macOS/M4) +//! - `x86`/`x86_64` (OS independent and `no_std`-friendly) //! //! # Example //! ``` +//! # #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +//! { //! // This macro creates `cpuid_aes_sha` module //! cpufeatures::new!(cpuid_aes_sha, "aes", "sha"); //! @@ -23,7 +31,9 @@ //! // Additionally you can get both token and value //! let (token, val) = cpuid_aes_sha::init_get(); //! assert_eq!(val, token.get()); +//! # } //! ``` +//! //! Note that if all tested target features are enabled via compiler options //! (e.g. by using `RUSTFLAGS`), the `get` method will always return `true` //! and `init` will not use CPUID instruction. Such behavior allows @@ -31,6 +41,9 @@ //! //! After first call macro caches result and returns it in subsequent //! calls, thus runtime overhead for them is minimal. +//! +//! [RFC 2725]: https://github.com/rust-lang/rfcs/pull/2725 + #![no_std] #![doc( html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg", @@ -38,13 +51,24 @@ html_root_url = "https://docs.rs/cpufeatures/0.1.0" )] -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] -compile_error!("This crate works only on x86 and x86-64 targets."); +#[cfg(all(target_arch = "aarch64", any(target_os = "linux", target_os = "macos")))] +#[doc(hidden)] +pub mod aarch64; -/// Create module with CPUID bool code. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod x86; + +#[cfg(not(any( + all(target_arch = "aarch64", any(target_os = "linux", target_os = "macos")), + target_arch = "x86", + target_arch = "x86_64" +)))] +compile_error!("This crate works only on `aarch64` (Linux/Mac), `x86`, and `x86-64` targets."); + +/// Create module with CPU feature detection code. #[macro_export] macro_rules! new { - ($mod_name:ident, $($tf:tt),+ $(,)? ) => { + ($mod_name:ident, $($tf:tt),+ $(,)?) => { mod $mod_name { use core::sync::atomic::{AtomicU8, Ordering::Relaxed}; @@ -59,14 +83,11 @@ macro_rules! new { /// Get initialized value #[inline(always)] pub fn get(&self) -> bool { - // CPUID is not available on SGX targets - #[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))] - let res = STORAGE.load(Relaxed) == 1; - #[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))] - let res = false; - #[cfg(all($(target_feature=$tf, )*))] - let res = true; - res + $crate::__unless_target_features! { + $($tf),+ => { + STORAGE.load(Relaxed) == 1 + } + } } } @@ -74,32 +95,20 @@ macro_rules! new { /// stored value and initialization token. #[inline] pub fn init_get() -> (InitToken, bool) { - // CPUID is not available on SGX targets - #[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))] - let res = { - #[cfg(target_arch = "x86")] - use core::arch::x86::{__cpuid, __cpuid_count}; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::{__cpuid, __cpuid_count}; + let res = $crate::__unless_target_features! { + $($tf),+ => { + // Relaxed ordering is fine, as we only have a single atomic variable. + let val = STORAGE.load(Relaxed); - // Relaxed ordering is fine, as we only have a single atomic variable. - let val = STORAGE.load(Relaxed); - if val == UNINIT { - #[allow(unused_variables)] - let cr = unsafe { - [__cpuid(1), __cpuid_count(7, 0)] - }; - let res = $(cpufeatures::check!(cr, $tf) & )+ true; - STORAGE.store(res as u8, Relaxed); - res - } else { - val == 1 + if val == UNINIT { + let res = $crate::__detect_target_features!($($tf),+); + STORAGE.store(res as u8, Relaxed); + res + } else { + val == 1 + } } }; - #[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))] - let res = false; - #[cfg(all($(target_feature=$tf, )*))] - let res = true; (InitToken(()), res) } @@ -120,39 +129,3 @@ macro_rules! new { } }; } - -// TODO: find how to define private macro usable inside a public one -macro_rules! expand_check_macro { - ($(($name:tt, $i:expr, $reg:ident, $offset:expr)),* $(,)?) => { - #[macro_export] - #[doc(hidden)] - macro_rules! check { - $( - ($cr:expr, $name) => { ($cr[$i].$reg & (1 << $offset) != 0) }; - )* - } - }; -} - -expand_check_macro! { - ("mmx", 0, edx, 23), - ("sse", 0, edx, 25), - ("sse2", 0, edx, 26), - ("sse3", 0, ecx, 0), - ("pclmulqdq", 0, ecx, 1), - ("ssse3", 0, ecx, 9), - ("fma", 0, ecx, 12), - ("sse4.1", 0, ecx, 19), - ("sse4.2", 0, ecx, 20), - ("popcnt", 0, ecx, 23), - ("aes", 0, ecx, 25), - ("avx", 0, ecx, 28), - ("rdrand", 0, ecx, 30), - ("sgx", 1, ebx, 2), - ("bmi1", 1, ebx, 3), - ("avx2", 1, ebx, 5), - ("bmi2", 1, ebx, 8), - ("rdseed", 1, ebx, 18), - ("adx", 1, ebx, 19), - ("sha", 1, ebx, 29), -} diff --git a/cpufeatures/src/x86.rs b/cpufeatures/src/x86.rs new file mode 100644 index 00000000..2131f174 --- /dev/null +++ b/cpufeatures/src/x86.rs @@ -0,0 +1,75 @@ +//! x86/x86-64 CPU feature detection support. +//! +//! Portable, `no_std`-friendly implementation that relies on the x86 `CPUID` +//! instruction for feature detection. + +#[macro_export] +#[doc(hidden)] +macro_rules! __unless_target_features { + ($($tf:tt),+ => $body:expr ) => {{ + #[cfg(not(all($(target_feature=$tf,)*)))] + { + #[cfg(not(target_env = "sgx"))] + $body + + // CPUID is not available on SGX targets + #[cfg(target_env = "sgx")] + false + } + + #[cfg(all($(target_feature=$tf,)*))] + true + }}; +} + +#[macro_export] +#[doc(hidden)] +macro_rules! __detect_target_features { + ($($tf:tt),+) => {{ + #[cfg(target_arch = "x86")] + use core::arch::x86::{__cpuid, __cpuid_count}; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::{__cpuid, __cpuid_count}; + + let cr = unsafe { + [__cpuid(1), __cpuid_count(7, 0)] + }; + + $($crate::check!(cr, $tf) & )+ true + }}; +} + +macro_rules! __expand_check_macro { + ($(($name:tt, $i:expr, $reg:ident, $offset:expr)),* $(,)?) => { + #[macro_export] + #[doc(hidden)] + macro_rules! check { + $( + ($cr:expr, $name) => { ($cr[$i].$reg & (1 << $offset) != 0) }; + )* + } + }; +} + +__expand_check_macro! { + ("mmx", 0, edx, 23), + ("sse", 0, edx, 25), + ("sse2", 0, edx, 26), + ("sse3", 0, ecx, 0), + ("pclmulqdq", 0, ecx, 1), + ("ssse3", 0, ecx, 9), + ("fma", 0, ecx, 12), + ("sse4.1", 0, ecx, 19), + ("sse4.2", 0, ecx, 20), + ("popcnt", 0, ecx, 23), + ("aes", 0, ecx, 25), + ("avx", 0, ecx, 28), + ("rdrand", 0, ecx, 30), + ("sgx", 1, ebx, 2), + ("bmi1", 1, ebx, 3), + ("avx2", 1, ebx, 5), + ("bmi2", 1, ebx, 8), + ("rdseed", 1, ebx, 18), + ("adx", 1, ebx, 19), + ("sha", 1, ebx, 29), +} diff --git a/cpufeatures/tests/aarch64.rs b/cpufeatures/tests/aarch64.rs new file mode 100644 index 00000000..7fa387d1 --- /dev/null +++ b/cpufeatures/tests/aarch64.rs @@ -0,0 +1,17 @@ +//! ARM64 tests + +#![cfg(target_arch = "aarch64")] + +cpufeatures::new!(armcaps_aes_sha2_sha3, "aes", "sha2", "sha3"); + +#[test] +fn init() { + let token: armcaps_aes_sha2_sha3::InitToken = armcaps_aes_sha2_sha3::init(); + assert_eq!(token.get(), armcaps_aes_sha2_sha3::get()); +} + +#[test] +fn init_get() { + let (token, val) = armcaps_aes_sha2_sha3::init_get(); + assert_eq!(val, token.get()); +} diff --git a/cpufeatures/tests/x86.rs b/cpufeatures/tests/x86.rs new file mode 100644 index 00000000..3ef57242 --- /dev/null +++ b/cpufeatures/tests/x86.rs @@ -0,0 +1,17 @@ +//! x86/x86_64 tests + +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] + +cpufeatures::new!(cpuid_aes_sha, "aes", "sha"); + +#[test] +fn init() { + let token: cpuid_aes_sha::InitToken = cpuid_aes_sha::init(); + assert_eq!(token.get(), cpuid_aes_sha::get()); +} + +#[test] +fn init_get() { + let (token, val) = cpuid_aes_sha::init_get(); + assert_eq!(val, token.get()); +}