Skip to content

Commit

Permalink
hyperscan: support aarch64 platform
Browse files Browse the repository at this point in the history
  • Loading branch information
tqltech authored and TaiQilong committed Jan 14, 2020
1 parent d79973e commit 9b14ae1
Show file tree
Hide file tree
Showing 27 changed files with 10,410 additions and 1,448 deletions.
125 changes: 88 additions & 37 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ include (${CMAKE_MODULE_PATH}/boost.cmake)
# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
find_program(RAGEL ragel)
find_program(COPY cp)

if(PYTHONINTERP_FOUND)
set(PYTHON ${PYTHON_EXECUTABLE})
Expand Down Expand Up @@ -130,8 +131,7 @@ CMAKE_DEPENDENT_OPTION(DUMP_SUPPORT "Dump code support; normally on, except in r

CMAKE_DEPENDENT_OPTION(DISABLE_ASSERTS "Disable assert(); Asserts are enabled in debug builds, disabled in release builds" OFF "NOT RELEASE_BUILD" ON)

option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime"
OFF)
option(BUILD_AVX512 "Experimental: support avx512 in the fat runtime" OFF)

option(WINDOWS_ICC "Use Intel C++ Compiler on Windows, default off, requires ICC to be set in project" OFF)

Expand Down Expand Up @@ -181,26 +181,33 @@ else()
# generic, which isn't very good in some cases. march=native looks at
# cpuid info and then chooses the best microarch it can (and replaces
# the flag), so use that for tune.

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")

# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
endif()
endif()

# arg1 might exist if using ccache
string (STRIP "${CMAKE_C_COMPILER_ARG1}" CC_ARG1)
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=native -mtune=native)
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_VARIABLE _GCC_OUTPUT)
string(FIND "${_GCC_OUTPUT}" "Known" POS)
string(SUBSTRING "${_GCC_OUTPUT}" 0 ${POS} _GCC_OUTPUT)
string(REGEX REPLACE ".*march=[ \t]*([^ \n]*)[ \n].*" "\\1"
GNUCC_ARCH "${_GCC_OUTPUT}")

# test the parsed flag
set (EXEC_ARGS ${CC_ARG1} -E - -mtune=${GNUCC_ARCH})
execute_process(COMMAND ${CMAKE_C_COMPILER} ${EXEC_ARGS}
OUTPUT_QUIET ERROR_QUIET
INPUT_FILE /dev/null
RESULT_VARIABLE GNUCC_TUNE_TEST)
if (NOT GNUCC_TUNE_TEST EQUAL 0)
message(SEND_ERROR "Something went wrong determining gcc tune: -mtune=${GNUCC_ARCH} not valid")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set (EXEC_ARGS ${CC_ARG1} -c -Q --help=target -march=armv8-a -mtune=armv8-a)
endif()

set(TUNE_FLAG ${GNUCC_ARCH})
else ()
set(TUNE_FLAG native)
Expand Down Expand Up @@ -232,6 +239,13 @@ else()
set(EXTRA_C_FLAGS "${OPT_C_FLAG} -std=c99 -Wall -Wextra -Wshadow -Wcast-qual -fno-strict-aliasing")
set(EXTRA_CXX_FLAGS "${OPT_CXX_FLAG} -std=c++11 -Wall -Wextra -Wshadow -Wswitch -Wreturn-type -Wcast-qual -Wno-deprecated -Wnon-virtual-dtor -fno-strict-aliasing")

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -fsigned-char")
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fsigned-char")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc")
endif()

if (NOT RELEASE_BUILD)
# -Werror is most useful during development, don't potentially break
# release builds
Expand All @@ -245,11 +259,19 @@ else()
endif()

if (NOT CMAKE_C_FLAGS MATCHES .*march.* AND NOT CMAKE_C_FLAGS MATCHES .*mtune.*)
set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
set(ARCH_C_FLAGS "-march=native -mtune=${TUNE_FLAG}")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(ARCH_C_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
endif ()
endif()

if (NOT CMAKE_CXX_FLAGS MATCHES .*march.* AND NOT CMAKE_CXX_FLAGS MATCHES .*mtune.*)
set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
set(ARCH_CXX_FLAGS "-march=native -mtune=${TUNE_FLAG}")
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
set(ARCH_CXX_FLAGS "-march=armv8-a -mtune=${TUNE_FLAG}")
endif()
endif()

if(CMAKE_COMPILER_IS_GNUCC)
Expand All @@ -272,7 +294,6 @@ else()
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -fno-omit-frame-pointer")
endif()


if (CMAKE_C_COMPILER_ID MATCHES "Intel")
set(SKYLAKE_FLAG "-xCORE-AVX512")
else ()
Expand All @@ -281,10 +302,18 @@ else()
endif()

CHECK_INCLUDE_FILES(unistd.h HAVE_UNISTD_H)
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
CHECK_INCLUDE_FILES(intrin.h HAVE_C_INTRIN_H)
CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
CHECK_INCLUDE_FILES(arm_neon.h HAVE_C_ARM_NEON_H)
CHECK_INCLUDE_FILE_CXX(arm_neon.h HAVE_CXX_ARM_NEON_H)
endif()

CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)
Expand Down Expand Up @@ -317,7 +346,10 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
(CMAKE_VERSION VERSION_GREATER "3.0" AND CMAKE_GENERATOR MATCHES "Ninja")))
message (STATUS "Building the fat runtime requires the Unix Makefiles generator, or Ninja with CMake v3.0 or higher")
set (FAT_RUNTIME_REQUISITES FALSE)
else()
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
message(STATUS "AARCH64 platform don't support fat runtime")
set (FAT_RUNTIME_REQUISITES FALSE)
else()
include (${CMAKE_MODULE_PATH}/attrib.cmake)
if (NOT HAS_C_ATTR_IFUNC)
message(STATUS "Compiler does not support ifunc attribute, cannot build fat runtime")
Expand All @@ -329,7 +361,9 @@ if (CMAKE_SYSTEM_NAME MATCHES "Linux")
CMAKE_DEPENDENT_OPTION(FAT_RUNTIME "Build a library that supports multiple microarchitecures" ${RELEASE_BUILD} "FAT_RUNTIME_REQUISITES" OFF)
endif ()

include (${CMAKE_MODULE_PATH}/arch.cmake)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
include (${CMAKE_MODULE_PATH}/arch.cmake)
endif()

# testing a builtin takes a little more work
CHECK_C_SOURCE_COMPILES("void *aa_test(void *x) { return __builtin_assume_aligned(x, 16);}\nint main(void) { return 0; }" HAVE_CC_BUILTIN_ASSUME_ALIGNED)
Expand All @@ -346,6 +380,7 @@ set(C_FLAGS_TO_CHECK
"-Wstrict-prototypes"
"-Wmissing-prototypes"
)

foreach (FLAG ${C_FLAGS_TO_CHECK})
# munge the name so it doesn't break things
string(REPLACE "-" "_" FNAME C_FLAG${FLAG})
Expand Down Expand Up @@ -395,12 +430,6 @@ if (CXX_IGNORED_ATTR)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-ignored-attributes")
endif()

# gcc 9 complains about redundant move for returned variable
CHECK_CXX_COMPILER_FLAG("-Wredundant-move" CXX_REDUNDANT_MOVE)
if (CXX_REDUNDANT_MOVE)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-redundant-move")
endif()

# note this for later
# g++ doesn't have this flag but clang does
CHECK_CXX_COMPILER_FLAG("-Wweak-vtables" CXX_WEAK_VTABLES)
Expand Down Expand Up @@ -449,6 +478,14 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()

# Test case for neon function.
option(UNIT_SIMD "Simd funtion test case, default is OFF" OFF)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
if (UNIT_SIMD)
add_subdirectory(unit-simd)
endif()
endif()

add_subdirectory(util)
add_subdirectory(doc/dev-reference)

Expand All @@ -469,6 +506,7 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
endif()

add_subdirectory(unit)

if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
add_subdirectory(tools)
endif()
Expand Down Expand Up @@ -538,14 +576,27 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")

ragelmaker(src/parser/Parser.rl)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
ragelmaker(src/parser/Parser.rl)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
ragelcopyer(src/parser/Parser.rl)
endif()

set_source_files_properties(
${CMAKE_BINARY_DIR}/src/parser/control_verbs.cpp
PROPERTIES
COMPILE_FLAGS "${RAGEL_C_FLAGS}")

ragelmaker(src/parser/control_verbs.rl)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i386")
ragelmaker(src/parser/control_verbs.rl)
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
ragelcopyer(src/parser/control_verbs.rl)
endif()

SET(hs_HEADERS
src/hs.h
Expand Down
39 changes: 29 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,41 @@ project repository.

# Versioning

The `master` branch on Github will always contain the most recent release of
Hyperscan. Each version released to `master` goes through QA and testing before
it is released; if you're a user, rather than a developer, this is the version
you should be using.

Further development towards the next release takes place on the `develop`
branch.
The `master` branch on Github/kunpengcompute will always contain the most recent
release of Intel Hyperscan.

The `aarch64` branch on Github/kunpengcompute will always contains the most recent
release that supports the use of aarch64 architecture. The aarch64 branch was developed
based on Intel hyperscan 5.2.1. Each version released to `aarch64` goes through QA and
testing before it is released; if you're a user of aarch64, rather than a developer,
this is the version you should be using.

# Transplant
Add x86 and aarch64 platform judgment branches to the code of aarch64 branch.
According to the judgment results, choose to perform different operations,
including compilation options, detecting specific header files, simd instruction
judgment, and so on.

# Optimization
Through the use of NEON instructions, inline assembly, data alignment, instruction
alignment, memory data prefetching, static branch prediction, code structure
optimization, etc., to achieve performance improvements on the Kunpeng platform.

# Get Involved

The official homepage for Hyperscan is at [www.hyperscan.io](https://www.hyperscan.io).

If you have questions or comments, we encourage you to [join the mailing
list](https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
`master` branch:
If you have questions or comments, you can [join the mailing list]
(https://lists.01.org/mailman/listinfo/hyperscan). Bugs can be filed by
sending email to the list, or by creating an issue on Github.

If you wish to contact the Hyperscan team at Intel directly, without posting
publicly to the mailing list, send email to
[hyperscan@intel.com](mailto:hyperscan@intel.com).

`aarch64` branch:
If you have questions or comments, we encourage you to create an issue on
Github/kunpengcompute.

If you wish to contact the Huawei team directly, you can send email to
kunpengcompute@huawei.com.
9 changes: 9 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
/* "Define if building for EM64T" */
#cmakedefine ARCH_X86_64

/* "Define if building for aarch64" */
#cmakedefine ARCH_AARCH64

/* internal build, switch on dump support. */
#cmakedefine DUMP_SUPPORT

Expand Down Expand Up @@ -45,6 +48,12 @@
/* C compiler has intrin.h */
#cmakedefine HAVE_C_INTRIN_H

/* C++ compiler has arm_neon.h */
#cmakedefine HAVE_CXX_ARM_NEON_H

/* C compiler has arm_neon.h */
#cmakedefine HAVE_C_ARM_NEON_H

/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
Expand Down
13 changes: 9 additions & 4 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# determine the target arch

# really only interested in the preprocessor here
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_64_BIT)
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_X86_64)

CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)
CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)

set(ARCH_X86_64 ${ARCH_64_BIT})
set(ARCH_IA32 ${ARCH_32_BIT})
CHECK_C_SOURCE_COMPILES("#if !(defined(__aarch64__))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)

if (ARCH_X86_64 OR ARCH_AARCH64)
set(ARCH_64_BIT 1)
elseif (ARCH_IA32)
set(ARCH_32_BIT 1)
endif()
20 changes: 20 additions & 0 deletions cmake/ragel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,23 @@ function(ragelmaker src_rl)
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelmaker)

# On the aarch64 platform, char is unsigned by default, so in order to be consistent with
# the x86 platform, we will add -fsigned-char to the compile option to force the char type.
# However, when the ragel generates c++ code, the char variable used will still be considered
# unsigned, resulting in the overflow of the char variable value in the generated code,
# resulting in some errors.
# function for copying the previously modified code to the specified path

function(ragelcopyer src_rl)
get_filename_component(src_dir ${src_rl} PATH) # old cmake needs PATH
get_filename_component(src_file ${src_rl} NAME_WE)
set(rl_out ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}/${src_file}.cpp
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/${src_dir}
COMMAND ${COPY} -f ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp ${rl_out} 2>/dev/null ||:
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${src_dir}/${src_file}.cpp
)
add_custom_target(ragel_${src_file} DEPENDS ${rl_out})
set_source_files_properties(${rl_out} PROPERTIES GENERATED TRUE)
endfunction(ragelcopyer)

0 comments on commit 9b14ae1

Please sign in to comment.