Source code

Revision control

Copy as Markdown

Other Tools

use super::fallback;
// We only use AVX when we can detect at runtime whether it's available, which
// requires std.
#[cfg(feature = "std")]
mod avx;
mod sse2;
/// This macro employs a gcc-like "ifunc" trick where by upon first calling
/// `memchr` (for example), CPU feature detection will be performed at runtime
/// to determine the best implementation to use. After CPU feature detection
/// is done, we replace `memchr`'s function pointer with the selection. Upon
/// subsequent invocations, the CPU-specific routine is invoked directly, which
/// skips the CPU feature detection and subsequent branch that's required.
///
/// While this typically doesn't matter for rare occurrences or when used on
/// larger haystacks, `memchr` can be called in tight loops where the overhead
/// of this branch can actually add up *and is measurable*. This trick was
/// necessary to bring this implementation up to glibc's speeds for the 'tiny'
/// benchmarks, for example.
///
/// At some point, I expect the Rust ecosystem will get a nice macro for doing
/// exactly this, at which point, we can replace our hand-jammed version of it.
///
/// N.B. The ifunc strategy does prevent function inlining of course, but
/// on modern CPUs, you'll probably end up with the AVX2 implementation,
/// which probably can't be inlined anyway---unless you've compiled your
/// entire program with AVX2 enabled. However, even then, the various memchr
/// implementations aren't exactly small, so inlining might not help anyway!
///
/// # Safety
///
/// Callers must ensure that fnty is function pointer type.
#[cfg(feature = "std")]
macro_rules! unsafe_ifunc {
($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
use std::{mem, sync::atomic::{AtomicPtr, Ordering}};
type FnRaw = *mut ();
static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
let fun =
if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
avx::$name as FnRaw
} else if cfg!(memchr_runtime_sse2) {
sse2::$name as FnRaw
} else {
fallback::$name as FnRaw
};
FN.store(fun as FnRaw, Ordering::Relaxed);
// SAFETY: By virtue of the caller contract, $fnty is a function
// pointer, which is always safe to transmute with a *mut ().
// Also, if 'fun is the AVX routine, then it is guaranteed to be
// supported since we checked the avx2 feature.
unsafe {
mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
}
}
// SAFETY: By virtue of the caller contract, $fnty is a function
// pointer, which is always safe to transmute with a *mut (). Also, if
// 'fun is the AVX routine, then it is guaranteed to be supported since
// we checked the avx2 feature.
unsafe {
let fun = FN.load(Ordering::Relaxed);
mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
}
}}
}
/// When std isn't available to provide runtime CPU feature detection, or if
/// runtime CPU feature detection has been explicitly disabled, then just
/// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
/// targets, so no CPU feature detection is necessary.
///
/// # Safety
///
/// There are no safety requirements for this definition of the macro. It is
/// safe for all inputs since it is restricted to either the fallback routine
/// or the SSE routine, which is always safe to call on x86_64.
#[cfg(not(feature = "std"))]
macro_rules! unsafe_ifunc {
($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
if cfg!(memchr_runtime_sse2) {
unsafe { sse2::$name($($needle),+, $haystack) }
} else {
fallback::$name($($needle),+, $haystack)
}
}}
}
#[inline(always)]
pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
}
#[inline(always)]
pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, &[u8]) -> Option<usize>,
memchr2,
haystack,
n1,
n2
)
}
#[inline(always)]
pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, u8, &[u8]) -> Option<usize>,
memchr3,
haystack,
n1,
n2,
n3
)
}
#[inline(always)]
pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
}
#[inline(always)]
pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, &[u8]) -> Option<usize>,
memrchr2,
haystack,
n1,
n2
)
}
#[inline(always)]
pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
unsafe_ifunc!(
fn(u8, u8, u8, &[u8]) -> Option<usize>,
memrchr3,
haystack,
n1,
n2,
n3
)
}