blob: 8ca7683a6fb969f3afa09964b7d900595b908cac (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
include(CheckCSourceRuns)
# Compile + run a probe so we only enable a feature the host CPU
# actually implements (toolchains accept flags the silicon may lack).
# Cross-compile without an emulator: feature off.
function(detect_cpu_feature _result_var _flags _source)
set(_save_flags "${CMAKE_REQUIRED_FLAGS}")
set(_save_quiet "${CMAKE_REQUIRED_QUIET}")
set(CMAKE_REQUIRED_FLAGS "${_save_flags} ${_flags}")
set(CMAKE_REQUIRED_QUIET TRUE)
if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)
set(${_result_var} FALSE CACHE INTERNAL
"${_result_var} (cross-compile without emulator: off)")
else()
check_c_source_runs("${_source}" ${_result_var})
endif()
set(CMAKE_REQUIRED_FLAGS "${_save_flags}")
set(CMAKE_REQUIRED_QUIET "${_save_quiet}")
endfunction()
# x86 PCLMULQDQ + SSE4.1. argc-derived input defeats constant folding;
# SIGILL handler exits cleanly so the kernel skips the core dump.
function(detect_pclmul)
detect_cpu_feature(_HAVE_PCLMUL "-mpclmul"
"#include <wmmintrin.h>
#include <signal.h>
#include <unistd.h>
static void on_sigill(int sig) { (void) sig; _exit(1); }
int main(int argc, char ** argv) {
__m128i a;
__m128i b;
(void) argv;
signal(SIGILL, on_sigill);
a = _mm_set1_epi32(argc);
b = _mm_clmulepi64_si128(a, a, 0);
return _mm_cvtsi128_si32(b) & 0;
}")
detect_cpu_feature(_HAVE_SSE41 "-msse4.1"
"#include <smmintrin.h>
#include <signal.h>
#include <unistd.h>
static void on_sigill(int sig) { (void) sig; _exit(1); }
int main(int argc, char ** argv) {
__m128i a;
(void) argv;
signal(SIGILL, on_sigill);
a = _mm_set1_epi32(argc);
return _mm_extract_epi32(a, 0) & 0;
}")
if(_HAVE_PCLMUL AND _HAVE_SSE41)
set(HAVE_PCLMUL TRUE CACHE INTERNAL
"x86 PCLMUL + SSE4.1 intrinsics available")
else()
unset(HAVE_PCLMUL CACHE)
endif()
endfunction()
# aarch64 FEAT_PMULL (vmull_p64). Pi 4's BCM2711 accepts +crypto at
# compile time but lacks the hardware — the runtime probe catches that.
function(detect_pmull)
detect_cpu_feature(_HAVE_PMULL "-march=armv8-a+crypto"
"#include <arm_neon.h>
#include <signal.h>
#include <stdint.h>
#include <unistd.h>
static void on_sigill(int sig) { (void) sig; _exit(1); }
int main(int argc, char ** argv) {
poly64_t a;
poly128_t c;
(void) argv;
signal(SIGILL, on_sigill);
a = (poly64_t) (uint64_t) argc;
c = vmull_p64(a, a);
return (int) (vgetq_lane_u64((uint64x2_t) c, 0) & 0);
}")
if(_HAVE_PMULL)
set(HAVE_PMULL TRUE CACHE INTERNAL
"aarch64 PMULL intrinsics available")
else()
unset(HAVE_PMULL CACHE)
endif()
endfunction()
|