Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port to web assembly #367

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 32 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ set (HS_MINOR_VERSION 4)
set (HS_PATCH_VERSION 0)
set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION})

option(HYPERSCAN_WASM32 "Enable hyperscan compilation to wasm32" OFF)
option(HYPERSCAN_EMSCRIPTEN_BOOST "Use boost library that is bundled with emscripten" OFF)

set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CheckCCompilerFlag)
include(CheckCXXCompilerFlag)
Expand Down Expand Up @@ -40,6 +43,11 @@ endif()
set(BINDIR "${PROJECT_BINARY_DIR}/bin")
set(LIBDIR "${PROJECT_BINARY_DIR}/lib")

if (HYPERSCAN_WASM32)
string(APPEND CMAKE_C_FLAGS " -msse -msse2 -mssse3 -msimd128")
string(APPEND CMAKE_CXX_FLAGS " -msse -msse2 -mssse3 -msimd128 -fwasm-exceptions")
endif()

set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})

# First for the generic no-config case
Expand All @@ -64,12 +72,21 @@ if (XCODE OR CMAKE_CXX_COMPILER_ID MATCHES "Intel")
set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-isystem")
endif ()

if (HYPERSCAN_WASM32)
set(CMAKE_INCLUDE_SYSTEM_FLAG_CXX "-Xclang -isystem")
endif()

set(CMAKE_INCLUDE_CURRENT_DIR 1)
include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PROJECT_BINARY_DIR})
include_directories(SYSTEM include)

include (${CMAKE_MODULE_PATH}/boost.cmake)
if (HYPERSCAN_EMSCRIPTEN_BOOST)
string(APPEND CMAKE_CXX_FLAGS " -sUSE_BOOST_HEADERS")
else()
include (${CMAKE_MODULE_PATH}/boost.cmake)
endif()


# -- make this work? set(python_ADDITIONAL_VERSIONS 2.7 2.6)
find_package(PythonInterp)
Expand Down Expand Up @@ -294,6 +311,11 @@ CHECK_INCLUDE_FILE_CXX(intrin.h HAVE_CXX_INTRIN_H)
CHECK_INCLUDE_FILES(x86intrin.h HAVE_C_X86INTRIN_H)
CHECK_INCLUDE_FILE_CXX(x86intrin.h HAVE_CXX_X86INTRIN_H)

if (HYPERSCAN_WASM32)
CHECK_INCLUDE_FILES(tmmintrin.h HAVE_C_TMMINTRIN_H)
set(HAVE_CXX_TMMINTRIN_H 1)
endif()

CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
CHECK_FUNCTION_EXISTS(_aligned_malloc HAVE__ALIGNED_MALLOC)

Expand Down Expand Up @@ -482,7 +504,9 @@ if (CORRECT_PCRE_VERSION AND PCRE_BUILD_SOURCE AND BUILD_STATIC_LIBS)
set(BUILD_CHIMERA TRUE)
endif()

add_subdirectory(unit)
if (NOT HYPERSCAN_WASM32)
add_subdirectory(unit)
endif()
if (EXISTS ${CMAKE_SOURCE_DIR}/tools/CMakeLists.txt)
add_subdirectory(tools)
endif()
Expand Down Expand Up @@ -1190,6 +1214,12 @@ if (NOT FAT_RUNTIME)
src/hs_valid_platform.c
$<TARGET_OBJECTS:hs_exec>
$<TARGET_OBJECTS:hs_compile>)
if (HYPERSCAN_WASM32)
target_compile_definitions(hs_exec PRIVATE WASM32)
target_compile_definitions(hs_runtime PRIVATE WASM32)
target_compile_definitions(hs_compile PRIVATE WASM32)
target_compile_definitions(hs PRIVATE WASM32)
endif()
endif (BUILD_STATIC_LIBS)

if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)
Expand Down
4 changes: 4 additions & 0 deletions chimera/ch_runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
if (top_id == id) {
break;
}
continue;
}

if (top_id == id) {
Expand Down
2 changes: 2 additions & 0 deletions cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if (HAVE_C_X86INTRIN_H)
set (INTRIN_INC_H "x86intrin.h")
elseif (HAVE_C_INTRIN_H)
set (INTRIN_INC_H "intrin.h")
elseif (HAVE_C_TMMINTRIN_H)
set (INTRIN_INC_H "tmmintrin.h")
else ()
message (FATAL_ERROR "No intrinsics header found")
endif ()
Expand Down
2 changes: 1 addition & 1 deletion cmake/build_wrapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ KEEPSYMS=$(mktemp -p /tmp keep.syms.XXXXX)
LIBC_SO=$("$@" --print-file-name=libc.so.6)
cp ${KEEPSYMS_IN} ${KEEPSYMS}
# get all symbols from libc and turn them into patterns
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ ]*\).*/^\1$/' >> ${KEEPSYMS}
nm -f p -g -D ${LIBC_SO} | sed -s 's/\([^ @]*\).*/^\1$/' >> ${KEEPSYMS}
# build the object
"$@"
# rename the symbols in the object
Expand Down
6 changes: 6 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@
/* C compiler has intrin.h */
#cmakedefine HAVE_C_INTRIN_H

/* C++ compiler has tmmintrin.h */
#cmakedefine HAVE_CXX_TMMINTRIN_H

/* C compiler has tmmintrin.h */
#cmakedefine HAVE_C_TMMINTRIN_H

/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
Expand Down
4 changes: 4 additions & 0 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,9 @@ CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error n

CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_32_BIT)

if (HYPERSCAN_WASM32)
set(ARCH_32_BIT 1)
endif()

set(ARCH_X86_64 ${ARCH_64_BIT})
set(ARCH_IA32 ${ARCH_32_BIT})
9 changes: 7 additions & 2 deletions src/compiler/compiler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}

// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
size_t maxlen = cc.grey.limitPatternLength + 1;
if (strnlen(expression, maxlen) >= maxlen) {
throw CompileError("Pattern length exceeds limit.");
}

Expand Down Expand Up @@ -416,6 +417,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
}

if (!strcmp(expression, "")) {
throw CompileError("Pure literal API doesn't support empty string.");
}

// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);
Expand Down
8 changes: 7 additions & 1 deletion src/hs.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -514,6 +514,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
return HS_COMPILER_ERROR;
}

if (flags & HS_FLAG_COMBINATION) {
*error = generateCompileError("Invalid parameter: unsupported "
"logical combination expression", -1);
return HS_COMPILER_ERROR;
}

*info = nullptr;
*error = nullptr;

Expand Down
12 changes: 3 additions & 9 deletions src/hs_compile.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param info
* On success, a pointer to the pattern information will be returned in
Expand Down Expand Up @@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines
Expand Down
6 changes: 4 additions & 2 deletions src/hs_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, Intel Corporation
* Copyright (c) 2019-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -80,7 +80,9 @@ extern "C"
| HS_FLAG_PREFILTER \
| HS_FLAG_SINGLEMATCH \
| HS_FLAG_ALLOWEMPTY \
| HS_FLAG_SOM_LEFTMOST)
| HS_FLAG_SOM_LEFTMOST \
| HS_FLAG_COMBINATION \
| HS_FLAG_QUIET)

#ifdef __cplusplus
} /* extern "C" */
Expand Down
6 changes: 4 additions & 2 deletions src/nfa/mcclellancompile.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -1082,7 +1082,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
// Use the daddy already set for this state so long as it isn't already
// a Sherman state.
dstate_id_t daddy = currState.daddy;
if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
if (info.is_widestate(daddy)) {
return;
} else if (!info.is_sherman(daddy)) {
hinted.insert(currState.daddy);
} else {
// Fall back to granddaddy, which has already been processed (due
Expand Down
23 changes: 22 additions & 1 deletion src/rose/program_runtime.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -3110,6 +3110,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,

const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;

const char *pc_base = getByOffset(t, programOffset);
const char *pc = pc_base;
Expand Down Expand Up @@ -3206,13 +3207,33 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(CATCH_UP_MPV) {
if (from_mpv || skip_mpv_catchup) {
DEBUG_PRINTF("skipping mpv catchup\n");
} else if (roseCatchUpMPV(t,
end - scratch->core_info.buf_offset,
scratch) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(SOM_FROM_REPORT) {
som = handleSomExternal(scratch, &ri->som, end);
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
som);
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(TRIGGER_SUFFIX) {
if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som,
end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(DEDUPE) {
updateSeqPoint(tctxt, end, from_mpv);
const char do_som = t->hasSom; // TODO: constant propagate
Expand Down
6 changes: 5 additions & 1 deletion src/util/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ class AlignedAllocator {

T *allocate(std::size_t size) const {
size_t alloc_size = size * sizeof(T);
return static_cast<T *>(aligned_malloc_internal(alloc_size, N));
T *ptr = static_cast<T *>(aligned_malloc_internal(alloc_size, N));
if (!ptr) {
throw std::bad_alloc();
}
return ptr;
}

void deallocate(T *x, std::size_t) const noexcept {
Expand Down
2 changes: 1 addition & 1 deletion src/util/arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
/*
* MSVC uses a different form of inline asm
*/
#if defined(_WIN32) && defined(_MSC_VER)
#if defined(WASM32) || defined(_WIN32) && defined(_MSC_VER)
#define NO_ASM
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/util/cpuid_flags.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "hs_internal.h"
#include "util/arch.h"

#if !defined(_WIN32) && !defined(CPUID_H_)
#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32)
#include <cpuid.h>
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/util/cpuid_flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

#include "ue2common.h"

#if !defined(_WIN32) && !defined(CPUID_H_)
#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_
Expand Down
16 changes: 13 additions & 3 deletions src/util/cpuid_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "ue2common.h"
#include "cpuid_flags.h"

#if !defined(_WIN32) && !defined(CPUID_H_)
#if !defined(_WIN32) && !defined(CPUID_H_) && !defined(WASM32)
#include <cpuid.h>
/* system header doesn't have a header guard */
#define CPUID_H_
Expand All @@ -46,7 +46,14 @@ extern "C"
static inline
void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
#ifndef _WIN32
#if defined(WASM32)
(void)(op);
(void)(leaf);
*eax = 0;
*ebx = 0;
*ecx = 0;
*edx = 0;
#elif !defined(_WIN32)
__cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
#else
int a[4];
Expand Down Expand Up @@ -95,7 +102,10 @@ void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,

static inline
u64a xgetbv(u32 op) {
#if defined(_WIN32) || defined(__INTEL_COMPILER)
#if defined(WASM32)
(void) op;
return 0;
#elif defined(_WIN32) || defined(__INTEL_COMPILER)
return _xgetbv(op);
#else
u32 a, d;
Expand Down
12 changes: 12 additions & 0 deletions src/util/intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,22 @@
# endif
#endif

#ifdef __cplusplus
# if defined(HAVE_CXX_TMMINTRIN_H)
# define USE_TMMINTRIN_H
# endif
#else // C
# if defined(HAVE_C_TMMINTRIN_H)
# define USE_TMMINTRIN_H
# endif
#endif

#if defined(USE_X86INTRIN_H)
#include <x86intrin.h>
#elif defined(USE_INTRIN_H)
#include <intrin.h>
#elif defined(USE_TMMINTRIN_H)
#include <tmmintrin.h>
#else
#error no intrinsics file
#endif
Expand Down