Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hyperscan: support aarch64 platform #212

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
126 changes: 89 additions & 37 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ include (${CMAKE_MODULE_PATH}/boost.cmake)
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
find_program(RAGEL ragel)
find_program(COPY cp)

if(PYTHONINTERP_FOUND)
set(PYTHON ${PYTHON_EXECUTABLE})
Expand Down Expand Up @@ -130,8 +131,8 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r

CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)

option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
OFF)
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
OFF)

option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)

Expand Down Expand Up @@ -181,26 +182,33 @@ else()
# generic, which isn't very good in some cases. march=native looks at
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")

# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
endif()
endif()

# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")

# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=armv8-a -mtune=armv8-a)
endif()

set(TUNE_FLAG ${GNUCC_ARCH})
else ()
set(TUNE_FLAG native)
Expand Down Expand Up @@ -232,6 +240,13 @@ else()
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsigned-char")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fsigned-char")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc")
endif()

if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
Expand All @@ -245,11 +260,19 @@ else()
endif()

if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(ARCH_C_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
endif ()
endif()

if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(ARCH_CXX_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
endif()
endif()

if(CMAKE_COMPILER_IS_GNUCC)
Expand All @@ -272,7 +295,6 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()


if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
else ()
Expand All @@ -281,10 +303,18 @@ else()
endif()

CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
CHECK_INCLUDE_FILES(arm_neon.h HAVE_C_ARM_NEON_H)
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_CXX_ARM_NEON_H)
endif()

CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
Expand Down Expand Up @@ -317,7 +347,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
(CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja")))
message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher")
set (FAT_RUNTIME_REQUISITES FALSE)
else()
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
message(STATUS "AARCH64 platform don't support fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE)
else()
include (${CMAKE_MODULE_PATH}/attrib.cmake)
if (NOT HAS_C_ATTR_IFUNC)
message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime")
Expand All @@ -329,7 +362,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif ()

include (${CMAKE_MODULE_PATH}/arch.cmake)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
include (${CMAKE_MODULE_PATH}/arch.cmake)
endif()

# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
Expand All @@ -346,6 +381,7 @@ set(C_FLAGS_TO_CHECK
"-Wstrict-prototypes"
"-Wmissing-prototypes"
)

foreach (FLAG ${C_FLAGS_TO_CHECK})
# munge the name so it doesn't break things
string(REPLACE "-" "_" FNAME C_FLAG${FLAG})
Expand Down Expand Up @@ -395,12 +431,6 @@ if (CXX_IGNORED_ATTR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
endif()

# gcc 9 complains about redundant move for returned variable
CHECK_CXX_COMPILER_FLAG("-Wredundant-move" CXX_REDUNDANT_MOVE)
if (CXX_REDUNDANT_MOVE)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-redundant-move")
endif()

# note this for later
# g++ doesn't have this flag but clang does
CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES)
Expand Down Expand Up @@ -449,6 +479,14 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()

# Test case for neon function.
option(UNIT_SIMD "Simd funtion test case, default is OFF" OFF)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
if (UNIT_SIMD)
add_subdirectory(unit-simd)
endif()
endif()

add_subdirectory(util)
add_subdirectory(doc/dev-reference)

Expand All @@ -469,6 +507,7 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
endif()

add_subdirectory(unit)

if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
add_subdirectory(tools)
endif()
Expand Down Expand Up @@ -538,14 +577,27 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")

ragelmaker(src/parser/Parser.rl)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
ragelmaker(src/parser/Parser.rl)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
ragelcopyer(src/parser/Parser.rl)
endif()

set_source_files_properties(
${CMAKE_BINARY_DIR}/src/parser/control_verbs.cpp
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")

ragelmaker(src/parser/control_verbs.rl)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
ragelmaker(src/parser/control_verbs.rl)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
ragelcopyer(src/parser/control_verbs.rl)
endif()

SET(hs_HEADERS
src/hs.h
Expand Down
9 changes: 9 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
/* "Define if building for EM64T" */
#cmakedefine ARCH_X86_64

/* "Define if building for aarch64" */
#cmakedefine ARCH_AARCH64

/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT

Expand Down Expand Up @@ -45,6 +48,12 @@
/* C compiler has intrin.h */
#cmakedefine HAVE_C_INTRIN_H

/* C++ compiler has arm_neon.h */
#cmakedefine HAVE_CXX_ARM_NEON_H

/* C compiler has arm_neon.h */
#cmakedefine HAVE_C_ARM_NEON_H

/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
Expand Down
13 changes: 9 additions & 4 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# determine the target arch

# really only interested in the preprocessor here
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_64_BIT)
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_X86_64)

CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)

set(ARCH_X86_64 ${ARCH_64_BIT})
set(ARCH_IA32 ${ARCH_32_BIT})
CHECK_C_SOURCE_COMPILES("#if !(defined(__aarch64__))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)

if (ARCH_X86_64 OR ARCH_AARCH64)
set(ARCH_64_BIT 1)
elseif (ARCH_IA32)
set(ARCH_32_BIT 1)
endif()
20 changes: 20 additions & 0 deletions cmake/ragel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,23 @@ function(ragelmaker src_rl)
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelmaker)

# On the aarch64 platform, char is unsigned by default, so in order to be consistent with
# the x86 platform, we will add -fsigned-char to the compile option to force the char type.
# However, when the ragel generates c++ code, the char variable used will still be considered
# unsigned, resulting in the overflow of the char variable value in the generated code,
# resulting in some errors.
# function for copying the previously modified code to the specified path

function(ragelcopyer src_rl)
get_filename_component(src_dir ${src_rl} PATH) # old cmake needs PATH
get_filename_component(src_file ${src_rl} NAME_WE)
set(rl_out ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}
COMMAND ${COPY} -f ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp ${rl_out} 2>/dev/null ||:
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp
)
add_custom_target(ragel_${src_file} DEPENDS ${rl_out})
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelcopyer)
43 changes: 43 additions & 0 deletions src/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,47 @@
#include "util/arch.h"
#include "util/intrinsics.h"

#if defined(HAVE_NEON)

#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC_WORD 8
#define CRC_TYPE u64a
static really_inline
u32 crc32c_neon(u32 running_crc, const unsigned char * p_buf, const size_t length)
{
u32 crc=running_crc;

//Processbyte-by-byteuntilp_bufisaligned
const unsigned char * aligned_buf = ROUNDUP_PTR(p_buf, CRC_WORD);
size_t init_bytes = aligned_buf - p_buf;
size_t running_length = ((length - init_bytes) / CRC_WORD) * CRC_WORD;
size_t end_bytes = length - init_bytes - running_length;

while(p_buf < aligned_buf){
CRC32CB(crc, *p_buf);
p_buf++;
}

//Main aligned loop, processes a word at a time.
for(size_t li = 0; li < running_length / CRC_WORD; li++){
CRC_TYPE block = *(const CRC_TYPE *)p_buf;
CRC32CX(crc,block);
p_buf += CRC_WORD;
}

//Remainingbytes
for(size_t li = 0; li < end_bytes; li++){
CRC32CB(crc,*p_buf);
p_buf++;
}
return crc;
}
#endif


#if !defined(HAVE_SSE42)

/***
Expand Down Expand Up @@ -636,6 +677,8 @@ u32 crc32c_sse42(u32 running_crc, const unsigned char* p_buf,
u32 Crc32c_ComputeBuf(u32 inCrc32, const void *buf, size_t bufLen) {
#if defined(HAVE_SSE42)
u32 crc = crc32c_sse42(inCrc32, (const unsigned char *)buf, bufLen);
#elif defined(HAVE_NEON)
u32 crc = crc32c_neon(inCrc32, (const unsigned char *)buf, bufLen);
#else
u32 crc = crc32c_sb8_64_bit(inCrc32, (const unsigned char *)buf, bufLen);
#endif
Expand Down
9 changes: 8 additions & 1 deletion src/hs_valid_platform.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,16 @@
HS_PUBLIC_API
hs_error_t HS_CDECL hs_valid_platform(void) {
/* Hyperscan requires SSSE3, anything else is a bonus */
#if defined(__x86_64__)
if (check_ssse3()) {
return HS_SUCCESS;
} else {
}
#else
if (check_neon()) {
return HS_SUCCESS;
}
#endif
else {
return HS_ARCH_ERROR;
}
}