From 6341064e439a1891dc0fb7fe8b0eef878fdea1db Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Thu, 19 Feb 2026 12:03:03 +0200 Subject: [PATCH 1/3] Fix SVE benchmark compilation bug on aarch64 In the OPT_SVE blocks, the code incorrectly referenced sve2_supported (only defined in OPT_SVE2 block) instead of sve_supported, causing compilation errors when OPT_SVE is defined but OPT_SVE2 is not. --- tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp | 4 ++-- tests/benchmark/spaces_benchmarks/bm_spaces_uint8.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp index a4b63ee73..ec3c4e84f 100644 --- a/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp +++ b/tests/benchmark/spaces_benchmarks/bm_spaces_int8.cpp @@ -49,8 +49,8 @@ INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_INT8, INT8, SVE2, 32, #endif #ifdef OPT_SVE bool sve_supported = opt.sve; // Check for SVE support -INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, SVE, 32, sve2_supported); -INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_INT8, INT8, SVE, 32, sve2_supported); +INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_INT8, INT8, SVE, 32, sve_supported); +INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_INT8, INT8, SVE, 32, sve_supported); #endif #ifdef OPT_NEON_DOTPROD bool neon_dotprod_supported = opt.asimddp; diff --git a/tests/benchmark/spaces_benchmarks/bm_spaces_uint8.cpp b/tests/benchmark/spaces_benchmarks/bm_spaces_uint8.cpp index 0d8bc35cc..602fff719 100644 --- a/tests/benchmark/spaces_benchmarks/bm_spaces_uint8.cpp +++ b/tests/benchmark/spaces_benchmarks/bm_spaces_uint8.cpp @@ -49,8 +49,8 @@ INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_UINT8, UINT8, SVE2, 32 #endif #ifdef OPT_SVE bool sve_supported = opt.sve; // Check for SVE support -INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_UINT8, UINT8, SVE, 32, sve2_supported); -INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_UINT8, UINT8, SVE, 32, sve2_supported); +INITIALIZE_BENCHMARKS_SET_L2_IP(BM_VecSimSpaces_Integers_UINT8, UINT8, SVE, 32, sve_supported); +INITIALIZE_BENCHMARKS_SET_Cosine(BM_VecSimSpaces_Integers_UINT8, UINT8, SVE, 32, sve_supported); #endif #ifdef OPT_NEON_DOTPROD // NEON_DOTPROD functions From 505f506258c759f49d51c74423089bc561dc68e3 Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 22 Feb 2026 09:29:03 +0200 Subject: [PATCH 2/3] Fix cross-compilation: use CMAKE_SYSTEM_PROCESSOR instead of CMAKE_HOST_SYSTEM_PROCESSOR CMAKE_HOST_SYSTEM_PROCESSOR returns the host machine's architecture (where CMake runs), while CMAKE_SYSTEM_PROCESSOR returns the target architecture (what we're building for). In cross-compilation scenarios (e.g., building ARM binaries on an x86 CI runner), these differ. Using CMAKE_HOST_SYSTEM_PROCESSOR caused x86 AVX512 instructions to be compiled when cross-compiling for ARM, resulting in assembler errors like: Error: no such instruction: 'vfmadd132ph %zmm2,%zmm1,%zmm0{%k1}' --- src/VecSim/spaces/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/VecSim/spaces/CMakeLists.txt b/src/VecSim/spaces/CMakeLists.txt index d88750e91..fe354ded5 100644 --- a/src/VecSim/spaces/CMakeLists.txt +++ b/src/VecSim/spaces/CMakeLists.txt @@ -13,7 +13,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall") set(OPTIMIZATIONS "") -if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") # Check that the compiler supports instructions flag. # from gcc14+ -mavx512bw is implicitly enabled when -mavx512vbmi2 is requested include(${root}/cmake/x86_64InstructionFlags.cmake) @@ -93,7 +93,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") endif() endif() -if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv.*)") +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)|(ARM64)|(armv.*)") include(${root}/cmake/aarch64InstructionFlags.cmake) # Create different optimization implementations for ARM architecture From dcde91f2796dd33c94263da1a199140f15fc6d99 Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 22 Feb 2026 18:14:31 +0200 Subject: [PATCH 3/3] Fix AVX512-FP16 build on systems with old binutils The compiler (GCC 13+) may support -mavx512fp16 flag, but the assembler (binutils) may be too old to handle the generated AVX512-FP16 instructions. This caused build failures on Ubuntu 20.04 (focal) which has: - GCC 13.1.0 (installed via PPA) - supports AVX512-FP16 - binutils 2.34 (system default) - does NOT support AVX512-FP16 AVX512-FP16 instructions (vfmadd132ph, vmovw, vcvtsh2ss, etc.) require binutils >= 2.38. Replace the hardcoded Ubuntu 18.04 check with proper binutils version detection: - AVX512-BF16: requires binutils >= 2.34 - AVX512-FP16: requires binutils >= 2.38 --- cmake/x86_64InstructionFlags.cmake | 36 +++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/cmake/x86_64InstructionFlags.cmake b/cmake/x86_64InstructionFlags.cmake index dadd550a8..f19ef7662 100644 --- a/cmake/x86_64InstructionFlags.cmake +++ b/cmake/x86_64InstructionFlags.cmake @@ -21,15 +21,35 @@ CHECK_CXX_COMPILER_FLAG(-msse4.1 CXX_SSE4) CHECK_CXX_COMPILER_FLAG(-msse3 CXX_SSE3) CHECK_CXX_COMPILER_FLAG(-msse CXX_SSE) -# Turn off AVX512BF16 on Ubuntu 18.04 as it is not supported by its binutils assembler version. +# Check binutils version for AVX512 instruction support. +# Even if the compiler supports certain flags, the assembler (binutils) may not. +# - AVX512-BF16 requires binutils >= 2.34 +# - AVX512-FP16 requires binutils >= 2.38 if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - execute_process(COMMAND lsb_release -rs - OUTPUT_VARIABLE UBUNTU_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if("${UBUNTU_VERSION}" STREQUAL "18.04") - message(STATUS "Compiling on Ubuntu 18.04, turning off CXX_AVX512BF16 flag.") - set(CXX_AVX512BF16 FALSE) + # Get binutils/assembler version + execute_process(COMMAND as --version + OUTPUT_VARIABLE AS_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + # Extract version number (e.g., "2.34" from "GNU assembler (GNU Binutils for Ubuntu) 2.34") + string(REGEX MATCH "[0-9]+\\.[0-9]+" BINUTILS_VERSION "${AS_VERSION_OUTPUT}") + + if(BINUTILS_VERSION) + message(STATUS "Detected binutils version: ${BINUTILS_VERSION}") + + # AVX512-BF16 requires binutils >= 2.34 + if(BINUTILS_VERSION VERSION_LESS "2.34") + message(STATUS "binutils ${BINUTILS_VERSION} < 2.34, turning off CXX_AVX512BF16 flag.") + set(CXX_AVX512BF16 FALSE) + endif() + + # AVX512-FP16 requires binutils >= 2.38 + if(BINUTILS_VERSION VERSION_LESS "2.38") + message(STATUS "binutils ${BINUTILS_VERSION} < 2.38, turning off CXX_AVX512FP16 flag.") + set(CXX_AVX512FP16 FALSE) + endif() + else() + message(WARNING "Could not detect binutils version, AVX512 features may fail to assemble") endif() endif()