| From 1c54b13cb29463af514a107c42946abd97b5ca41 Mon Sep 17 00:00:00 2001 |
| From: Alexander Alekhin <alexander.alekhin@intel.com> |
| Date: Wed, 7 Sep 2016 18:02:36 +0300 |
| Subject: [PATCH] cmake: support multiple CPU targets |
| |
| Backported from: https://github.com/opencv/opencv/commit/e16227b53cabab1caa4b7aba8ff59a630528348f |
| Signed-off-by: Samuel Martin <s.martin49@gmail.com> |
| --- |
| CMakeLists.txt | 50 +- |
| cmake/OpenCVCompilerOptimizations.cmake | 651 +++++++++++++++++++++ |
| cmake/OpenCVCompilerOptions.cmake | 161 +---- |
| cmake/OpenCVGenHeaders.cmake | 4 + |
| cmake/OpenCVModule.cmake | 3 + |
| cmake/OpenCVPCHSupport.cmake | 5 +- |
| cmake/OpenCVUtils.cmake | 50 +- |
| cmake/checks/cpu_avx.cpp | 9 + |
| cmake/checks/cpu_avx2.cpp | 10 + |
| cmake/checks/cpu_avx512.cpp | 10 + |
| cmake/checks/cpu_fp16.cpp | 33 ++ |
| cmake/checks/cpu_popcnt.cpp | 8 + |
| cmake/checks/cpu_sse.cpp | 2 + |
| cmake/checks/cpu_sse2.cpp | 2 + |
| cmake/checks/cpu_sse3.cpp | 7 + |
| cmake/checks/cpu_sse41.cpp | 6 + |
| cmake/checks/cpu_sse42.cpp | 5 + |
| cmake/checks/cpu_ssse3.cpp | 7 + |
| cmake/checks/fp16.cpp | 33 -- |
| cmake/templates/cv_cpu_config.h.in | 5 + |
| cmake/templates/cvconfig.h.in | 13 + |
| .../core/include/opencv2/core/cv_cpu_dispatch.h | 166 ++++++ |
| modules/core/include/opencv2/core/cv_cpu_helper.h | 133 +++++ |
| modules/core/include/opencv2/core/cvdef.h | 145 +---- |
| modules/core/include/opencv2/core/fast_math.hpp | 60 +- |
| modules/core/src/system.cpp | 301 ++++++++-- |
| modules/highgui/CMakeLists.txt | 2 +- |
| modules/imgproc/src/imgwarp.cpp | 4 +- |
| modules/objdetect/src/haar.cpp | 5 +- |
| 29 files changed, 1472 insertions(+), 418 deletions(-) |
| create mode 100644 cmake/OpenCVCompilerOptimizations.cmake |
| create mode 100644 cmake/checks/cpu_avx.cpp |
| create mode 100644 cmake/checks/cpu_avx2.cpp |
| create mode 100644 cmake/checks/cpu_avx512.cpp |
| create mode 100644 cmake/checks/cpu_fp16.cpp |
| create mode 100644 cmake/checks/cpu_popcnt.cpp |
| create mode 100644 cmake/checks/cpu_sse.cpp |
| create mode 100644 cmake/checks/cpu_sse2.cpp |
| create mode 100644 cmake/checks/cpu_sse3.cpp |
| create mode 100644 cmake/checks/cpu_sse41.cpp |
| create mode 100644 cmake/checks/cpu_sse42.cpp |
| create mode 100644 cmake/checks/cpu_ssse3.cpp |
| delete mode 100644 cmake/checks/fp16.cpp |
| create mode 100644 cmake/templates/cv_cpu_config.h.in |
| create mode 100644 modules/core/include/opencv2/core/cv_cpu_dispatch.h |
| create mode 100644 modules/core/include/opencv2/core/cv_cpu_helper.h |
| |
| diff --git a/CMakeLists.txt b/CMakeLists.txt |
| index cc45f6f..9c9971e 100644 |
| --- a/CMakeLists.txt |
| +++ b/CMakeLists.txt |
| @@ -85,6 +85,10 @@ if(POLICY CMP0042) |
| cmake_policy(SET CMP0042 NEW) |
| endif() |
| |
| +if(POLICY CMP0051) |
| + cmake_policy(SET CMP0051 NEW) |
| +endif() |
| + |
| include(cmake/OpenCVUtils.cmake) |
| |
| # must go before the project command |
| @@ -274,16 +278,6 @@ OCV_OPTION(ENABLE_COVERAGE "Enable coverage collection with GCov" |
| OCV_OPTION(ENABLE_OMIT_FRAME_POINTER "Enable -fomit-frame-pointer for GCC" ON IF CMAKE_COMPILER_IS_GNUCXX AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX) ) |
| OCV_OPTION(ENABLE_POWERPC "Enable PowerPC for GCC" ON IF (CMAKE_COMPILER_IS_GNUCXX AND CMAKE_SYSTEM_PROCESSOR MATCHES powerpc.*) ) |
| OCV_OPTION(ENABLE_FAST_MATH "Enable -ffast-math (not recommended for GCC 4.6.x)" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSE "Enable SSE instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSE2 "Enable SSE2 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSE3 "Enable SSE3 instructions" ON IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX OR CV_ICC) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_POPCNT "Enable POPCNT instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_AVX2 "Enable AVX2 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| -OCV_OPTION(ENABLE_FMA3 "Enable FMA3 instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) |
| OCV_OPTION(ENABLE_NEON "Enable NEON instructions" "${NEON}" IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) |
| OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND (ARM OR AARCH64 OR IOS) ) |
| OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) |
| @@ -292,6 +286,9 @@ OCV_OPTION(ANDROID_EXAMPLES_WITH_LIBS "Build binaries of Android examples with n |
| OCV_OPTION(ENABLE_IMPL_COLLECTION "Collect implementation data on function call" OFF ) |
| OCV_OPTION(ENABLE_INSTRUMENTATION "Instrument functions to collect calls trace and performance" OFF ) |
| OCV_OPTION(GENERATE_ABI_DESCRIPTOR "Generate XML file for abi_compliance_checker tool" OFF IF UNIX) |
| +OCV_OPTION(CV_ENABLE_INTRINSICS "Use intrinsic-based optimized code" ON ) |
| +OCV_OPTION(CV_DISABLE_OPTIMIZATION "Disable explicit optimized code (dispatched code/intrinsics/loop unrolling/etc)" OFF ) |
| + |
| |
| OCV_OPTION(DOWNLOAD_EXTERNAL_TEST_DATA "Download external test data (Python executable and OPENCV_TEST_DATA_PATH environment variable may be required)" OFF ) |
| |
| @@ -492,6 +489,9 @@ if(CMAKE_GENERATOR MATCHES "Makefiles|Ninja" AND "${CMAKE_BUILD_TYPE}" STREQUAL |
| set(CMAKE_BUILD_TYPE Release) |
| endif() |
| |
| +# --- Python Support --- |
| +include(cmake/OpenCVDetectPython.cmake) |
| + |
| include(cmake/OpenCVCompilerOptions.cmake) |
| |
| |
| @@ -569,9 +569,6 @@ else() |
| unset(DOXYGEN_FOUND CACHE) |
| endif() |
| |
| -# --- Python Support --- |
| -include(cmake/OpenCVDetectPython.cmake) |
| - |
| # --- Java Support --- |
| include(cmake/OpenCVDetectApacheAnt.cmake) |
| if(ANDROID) |
| @@ -860,6 +857,33 @@ if(NOT CMAKE_GENERATOR MATCHES "Xcode|Visual Studio") |
| status(" Configuration:" ${CMAKE_BUILD_TYPE}) |
| endif() |
| |
| + |
| +# ========================= CPU code generation mode ========================= |
| +status("") |
| +status(" CPU/HW features:") |
| +status(" Baseline:" "${CPU_BASELINE_FINAL}") |
| +if(NOT CPU_BASELINE STREQUAL CPU_BASELINE_FINAL) |
| + status(" requested:" "${CPU_BASELINE}") |
| +endif() |
| +if(CPU_BASELINE_REQUIRE) |
| + status(" required:" "${CPU_BASELINE_REQUIRE}") |
| +endif() |
| +if(CPU_BASELINE_DISABLE) |
| + status(" disabled:" "${CPU_BASELINE_DISABLE}") |
| +endif() |
| +if(CPU_DISPATCH_FINAL OR CPU_DISPATCH) |
| + status(" Dispatched code generation:" "${CPU_DISPATCH_FINAL}") |
| + if(NOT CPU_DISPATCH STREQUAL CPU_DISPATCH_FINAL) |
| + status(" requested:" "${CPU_DISPATCH}") |
| + endif() |
| + if(CPU_DISPATCH_REQUIRE) |
| + status(" required:" "${CPU_DISPATCH_REQUIRE}") |
| + endif() |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + status(" ${OPT} (${CPU_${OPT}_USAGE_COUNT} files):" "+ ${CPU_DISPATCH_${OPT}_INCLUDED}") |
| + endforeach() |
| +endif() |
| + |
| # ========================== C/C++ options ========================== |
| if(CMAKE_CXX_COMPILER_VERSION) |
| set(OPENCV_COMPILER_STR "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1} (ver ${CMAKE_CXX_COMPILER_VERSION})") |
| diff --git a/cmake/OpenCVCompilerOptimizations.cmake b/cmake/OpenCVCompilerOptimizations.cmake |
| new file mode 100644 |
| index 0000000..b849f02 |
| --- /dev/null |
| +++ b/cmake/OpenCVCompilerOptimizations.cmake |
| @@ -0,0 +1,651 @@ |
| +# x86/x86-64 arch: |
| +# SSE / SSE2 (always available on 64-bit CPUs) |
| +# SSE3 / SSSE3 |
| +# SSE4_1 / SSE4_2 / POPCNT |
| +# AVX / AVX2 / AVX512 |
| +# FMA3 |
| + |
| +# CPU_{opt}_SUPPORTED=ON/OFF - compiler support (possibly with additional flag) |
| +# CPU_{opt}_IMPLIES=<list> |
| +# CPU_{opt}_FORCE=<list> - subset of "implies" list |
| +# CPU_{opt}_FLAGS_ON="" |
| +# CPU_{opt}_FEATURE_ALIAS - mapping to CV_CPU_* HWFeature enum |
| + |
| +# Input variables: |
| +# CPU_BASELINE=<list> - preferred list of baseline optimizations |
| +# CPU_DISPATCH=<list> - preferred list of dispatched optimizations |
| + |
| +# Advanced input variables: |
| +# CPU_BASELINE_REQUIRE=<list> - list of required baseline optimizations |
| +# CPU_DISPATCH_REQUIRE=<list> - list of required dispatched optimizations |
| +# CPU_BASELINE_DISABLE=<list> - list of disabled baseline optimizations |
| + |
| +# Output variables: |
| +# CPU_BASELINE_FINAL=<list> - final list of enabled compiler optimizations |
| +# CPU_DISPATCH_FINAL=<list> - final list of dispatched optimizations |
| +# |
| +# CPU_DISPATCH_FLAGS_${opt} - flags for source files compiled separately (_opt_avx2.cpp) |
| + |
| +set(CPU_ALL_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;POPCNT;AVX;FP16;AVX2;FMA3") # without AVX512 |
| +list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16) |
| +list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS) |
| + |
| +ocv_update(CPU_VFPV3_FEATURE_ALIAS "") |
| + |
| + |
| +set(HELP_CPU_BASELINE "Specify list of enabled baseline CPU optimizations") |
| +set(HELP_CPU_BASELINE_REQUIRE "Specify list of required baseline CPU optimizations") |
| +set(HELP_CPU_BASELINE_DISABLE "Specify list of forbidden baseline CPU optimizations") |
| +set(HELP_CPU_DISPATCH "Specify list of dispatched CPU optimizations") |
| +set(HELP_CPU_DISPATCH_REQUIRE "Specify list of required dispatched CPU optimizations") |
| + |
| +foreach(var CPU_BASELINE CPU_BASELINE_REQUIRE CPU_BASELINE_DISABLE CPU_DISPATCH CPU_DISPATCH_REQUIRE) |
| + if(DEFINED ${var}) |
| + string(REPLACE "," ";" _list "${${var}}") |
| + set(${var} "${_list}" CACHE STRING "${HELP_${var}}" FORCE) |
| + endif() |
| +endforeach() |
| + |
| +# process legacy flags |
| +macro(ocv_optimization_process_obsolete_option legacy_flag OPT legacy_warn) |
| + if(DEFINED ${legacy_flag}) |
| + if(${legacy_warn}) |
| + message(STATUS "WARNING: Option ${legacy_flag}='${${legacy_flag}}' is deprecated and should not be used anymore") |
| + message(STATUS " Behaviour of this option is not backward compatible") |
| + message(STATUS " Refer to 'CPU_BASELINE'/'CPU_DISPATCH' CMake options documentation") |
| + endif() |
| + if(${legacy_flag}) |
| + if(NOT ";${CPU_BASELINE_REQUIRE};" MATCHES ";${OPT};") |
| + set(CPU_BASELINE_REQUIRE "${CPU_BASELINE_REQUIRE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_REQUIRE}" FORCE) |
| + endif() |
| + else() |
| + if(NOT ";${CPU_BASELINE_DISABLE};" MATCHES ";${OPT};") |
| + set(CPU_BASELINE_DISABLE "${CPU_BASELINE_DISABLE};${OPT}" CACHE STRING "${HELP_CPU_BASELINE_DISABLE}" FORCE) |
| + endif() |
| + endif() |
| + endif() |
| +endmacro() |
| +ocv_optimization_process_obsolete_option(ENABLE_SSE SSE ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_SSE2 SSE2 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_SSE3 SSE3 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_SSSE3 SSSE3 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_SSE41 SSE4_1 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_SSE42 SSE4_2 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_POPCNT POPCNT ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_AVX AVX ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_AVX2 AVX2 ON) |
| +ocv_optimization_process_obsolete_option(ENABLE_FMA3 FMA3 ON) |
| + |
| +ocv_optimization_process_obsolete_option(ENABLE_VFPV3 VFPV3 OFF) |
| +ocv_optimization_process_obsolete_option(ENABLE_NEON NEON OFF) |
| + |
| + |
| +macro(ocv_is_optimization_in_list resultvar check_opt) |
| + set(__checked "") |
| + set(__queue ${ARGN}) |
| + set(${resultvar} 0) |
| + while(__queue AND NOT ${resultvar}) |
| + list(REMOVE_DUPLICATES __queue) |
| + set(__queue_current ${__queue}) |
| + set(__queue "") |
| + foreach(OPT ${__queue_current}) |
| + if("x${OPT}" STREQUAL "x${check_opt}") |
| + set(${resultvar} 1) |
| + break() |
| + elseif(NOT ";${__checked};" MATCHES ";${OPT};") |
| + list(APPEND __queue ${CPU_${OPT}_IMPLIES}) |
| + endif() |
| + list(APPEND __checked ${OPT}) |
| + endforeach() |
| + endwhile() |
| +endmacro() |
| + |
| +macro(ocv_is_optimization_in_force_list resultvar check_opt) |
| + set(__checked "") |
| + set(__queue ${ARGN}) |
| + set(${resultvar} 0) |
| + while(__queue AND NOT ${resultvar}) |
| + list(REMOVE_DUPLICATES __queue) |
| + set(__queue_current ${__queue}) |
| + set(__queue "") |
| + foreach(OPT ${__queue_current}) |
| + if(OPT STREQUAL "${check_opt}") |
| + set(${resultvar} 1) |
| + break() |
| + elseif(NOT ";${__checked};" MATCHES ";${OPT};") |
| + list(APPEND __queue ${CPU_${OPT}_FORCE}) |
| + endif() |
| + list(APPEND __checked ${OPT}) |
| + endforeach() |
| + endwhile() |
| +endmacro() |
| + |
| +macro(ocv_append_optimization_flag var OPT) |
| + if(CPU_${OPT}_FLAGS_CONFLICT) |
| + string(REGEX REPLACE " ${CPU_${OPT}_FLAGS_CONFLICT}" "" ${var} " ${${var}}") |
| + string(REGEX REPLACE "^ +" "" ${var} "${${var}}") |
| + endif() |
| + set(${var} "${${var}} ${CPU_${OPT}_FLAGS_ON}") |
| +endmacro() |
| + |
| +# Support GCC -march=native or Intel Compiler -xHost flags |
| +if(";${CPU_BASELINE};" MATCHES ";NATIVE;" OR ";${CPU_BASELINE};" MATCHES ";HOST;") |
| + set(CPU_BASELINE_DETECT ON) |
| + set(_add_native_flag ON) |
| +elseif(";${CPU_BASELINE};" MATCHES ";DETECT;") |
| + set(CPU_BASELINE_DETECT ON) |
| +elseif(" ${CMAKE_CXX_FLAGS} " MATCHES " -march=native | -xHost | /QxHost ") |
| + if(DEFINED CPU_BASELINE) |
| + message(STATUS "CPU: Detected '-march=native' or '-xHost' compiler flag. Force CPU_BASELINE=DETECT.") |
| + endif() |
| + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") |
| + set(CPU_BASELINE_DETECT ON) |
| +endif() |
| + |
| +if(X86 OR X86_64) |
| + ocv_update(CPU_KNOWN_OPTIMIZATIONS "SSE;SSE2;SSE3;SSSE3;SSE4_1;POPCNT;SSE4_2;FP16;FMA3;AVX;AVX2;AVX512") |
| + |
| + ocv_update(CPU_SSE_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse.cpp") |
| + ocv_update(CPU_SSE2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse2.cpp") |
| + ocv_update(CPU_SSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse3.cpp") |
| + ocv_update(CPU_SSSE3_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_ssse3.cpp") |
| + ocv_update(CPU_SSE4_1_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse41.cpp") |
| + ocv_update(CPU_SSE4_2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_sse42.cpp") |
| + ocv_update(CPU_POPCNT_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_popcnt.cpp") |
| + ocv_update(CPU_AVX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx.cpp") |
| + ocv_update(CPU_AVX2_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx2.cpp") |
| + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") |
| + ocv_update(CPU_AVX512_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_avx512.cpp") |
| + |
| + if(NOT OPENCV_CPU_OPT_IMPLIES_IGNORE) |
| + ocv_update(CPU_AVX512_IMPLIES "AVX2") |
| + ocv_update(CPU_AVX512_FORCE "") # Don't force other optimizations |
| + ocv_update(CPU_AVX2_IMPLIES "AVX;FMA3;FP16") |
| + ocv_update(CPU_FMA3_IMPLIES "AVX2") |
| + ocv_update(CPU_FMA3_FORCE "") # Don't force other optimizations |
| + ocv_update(CPU_FP16_IMPLIES "AVX") |
| + ocv_update(CPU_FP16_FORCE "") # Don't force other optimizations |
| + ocv_update(CPU_AVX_IMPLIES "SSE4_2") |
| + ocv_update(CPU_SSE4_2_IMPLIES "SSE4_1;POPCNT") |
| + ocv_update(CPU_POPCNT_IMPLIES "SSE4_1") |
| + ocv_update(CPU_POPCNT_FORCE "") # Don't force other optimizations |
| + ocv_update(CPU_SSE4_1_IMPLIES "SSE3;SSSE3") |
| + ocv_update(CPU_SSSE3_IMPLIES "SSE3") |
| + ocv_update(CPU_SSE3_IMPLIES "SSE2") |
| + ocv_update(CPU_SSE2_IMPLIES "SSE") |
| + endif() |
| + |
| + if(CV_ICC) |
| + macro(ocv_intel_compiler_optimization_option name unix_flags msvc_flags) |
| + ocv_update(CPU_${name}_FLAGS_NAME "${name}") |
| + if(MSVC) |
| + set(enable_flags "${msvc_flags}") |
| + set(flags_conflict "/arch:[^ ]+") |
| + else() |
| + set(enable_flags "${unix_flags}") |
| + set(flags_conflict "-msse[^ ]*|-mssse3|-mavx[^ ]*|-march[^ ]+") |
| + endif() |
| + ocv_update(CPU_${name}_FLAGS_ON "${enable_flags}") |
| + if(flags_conflict) |
| + ocv_update(CPU_${name}_FLAGS_CONFLICT "${flags_conflict}") |
| + endif() |
| + endmacro() |
| + ocv_intel_compiler_optimization_option(AVX2 "-march=core-avx2" "/arch:CORE-AVX2") |
| + ocv_intel_compiler_optimization_option(FP16 "-mavx" "/arch:AVX") |
| + ocv_intel_compiler_optimization_option(AVX "-mavx" "/arch:AVX") |
| + ocv_intel_compiler_optimization_option(FMA3 "" "") |
| + ocv_intel_compiler_optimization_option(POPCNT "" "") |
| + ocv_intel_compiler_optimization_option(SSE4_2 "-msse4.2" "/arch:SSE4.2") |
| + ocv_intel_compiler_optimization_option(SSE4_1 "-msse4.1" "/arch:SSE4.1") |
| + ocv_intel_compiler_optimization_option(SSE3 "-msse3" "/arch:SSE3") |
| + ocv_intel_compiler_optimization_option(SSSE3 "-mssse3" "/arch:SSSE3") |
| + ocv_intel_compiler_optimization_option(SSE2 "-msse2" "/arch:SSE2") |
| + if(NOT X86_64) # x64 compiler doesn't support /arch:sse |
| + ocv_intel_compiler_optimization_option(SSE "-msse" "/arch:SSE") |
| + endif() |
| + #ocv_intel_compiler_optimization_option(AVX512 "-march=core-avx512") |
| + elseif(CMAKE_COMPILER_IS_GNUCXX) |
| + ocv_update(CPU_AVX2_FLAGS_ON "-mavx2") |
| + ocv_update(CPU_FP16_FLAGS_ON "-mf16c") |
| + ocv_update(CPU_AVX_FLAGS_ON "-mavx") |
| + ocv_update(CPU_FMA3_FLAGS_ON "-mfma") |
| + ocv_update(CPU_POPCNT_FLAGS_ON "-mpopcnt") |
| + ocv_update(CPU_SSE4_2_FLAGS_ON "-msse4.2") |
| + ocv_update(CPU_SSE4_1_FLAGS_ON "-msse4.1") |
| + ocv_update(CPU_SSE3_FLAGS_ON "-msse3") |
| + ocv_update(CPU_SSSE3_FLAGS_ON "-mssse3") |
| + ocv_update(CPU_SSE2_FLAGS_ON "-msse2") |
| + ocv_update(CPU_SSE_FLAGS_ON "-msse") |
| + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "5.0") |
| + ocv_update(CPU_AVX512_FLAGS_ON "-mavx512f -mavx512pf -mavx512er -mavx512cd -mavx512vl -mavx512bw -mavx512dq -mavx512ifma -mavx512vbmi") |
| + endif() |
| + elseif(MSVC) |
| + ocv_update(CPU_AVX2_FLAGS_ON "/arch:AVX2") |
| + ocv_update(CPU_AVX_FLAGS_ON "/arch:AVX") |
| + if(NOT MSVC64) |
| + # 64-bit MSVC compiler uses SSE/SSE2 by default |
| + ocv_update(CPU_SSE_FLAGS_ON "/arch:SSE") |
| + ocv_update(CPU_SSE_SUPPORTED ON) |
| + ocv_update(CPU_SSE2_FLAGS_ON "/arch:SSE2") |
| + ocv_update(CPU_SSE2_SUPPORTED ON) |
| + else() |
| + ocv_update(CPU_SSE_SUPPORTED ON) |
| + ocv_update(CPU_SSE2_SUPPORTED ON) |
| + endif() |
| + # Other instruction sets are supported by default since MSVC 2008 at least |
| + else() |
| + message(WARNING "TODO: Unsupported compiler") |
| + endif() |
| + |
| + if(NOT DEFINED CPU_DISPATCH) |
| + set(CPU_DISPATCH "SSE4_1;AVX;FP16;AVX2" CACHE STRING "${HELP_CPU_DISPATCH}") |
| + endif() |
| + |
| + if(NOT DEFINED CPU_BASELINE) |
| + if(X86_64) |
| + set(CPU_BASELINE "SSSE3" CACHE STRING "${HELP_CPU_BASELINE}") |
| + else() |
| + set(CPU_BASELINE "SSE2" CACHE STRING "${HELP_CPU_BASELINE}") |
| + endif() |
| + endif() |
| + |
| +elseif(ARM OR AARCH64) |
| + ocv_update(CPU_FP16_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp") |
| + if(NOT AARCH64) |
| + ocv_update(CPU_KNOWN_OPTIMIZATIONS "VFPV3;NEON;FP16") |
| + ocv_update(CPU_NEON_FLAGS_ON "-mfpu=neon") |
| + ocv_update(CPU_VFPV3_FLAGS_ON "-mfpu=vfpv3") |
| + ocv_update(CPU_FP16_FLAGS_ON "-mfpu=neon-fp16") |
| + set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") |
| + else() |
| + ocv_update(CPU_KNOWN_OPTIMIZATIONS "NEON;FP16") |
| + ocv_update(CPU_NEON_FLAGS_ON "") |
| + set(CPU_BASELINE "NEON" CACHE STRING "${HELP_CPU_BASELINE}") |
| + endif() |
| +endif() |
| + |
| +# Helper values for cmake-gui |
| +set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}") |
| +set(CPU_DISPATCH "" CACHE STRING "${HELP_CPU_DISPATCH}") |
| +set_property(CACHE CPU_BASELINE PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) |
| +set_property(CACHE CPU_DISPATCH PROPERTY STRINGS "" ${CPU_KNOWN_OPTIMIZATIONS}) |
| + |
| +set(CPU_BASELINE_FLAGS "") |
| + |
| +set(CPU_BASELINE_FINAL "") |
| +set(CPU_DISPATCH_FINAL "") |
| + |
| +macro(ocv_check_compiler_optimization OPT) |
| + if(NOT DEFINED CPU_${OPT}_SUPPORTED) |
| + if((DEFINED CPU_${OPT}_FLAGS_ON AND NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") OR CPU_${OPT}_TEST_FILE) |
| + set(_varname "") |
| + if(CPU_${OPT}_TEST_FILE) |
| + set(__available 0) |
| + if(CPU_BASELINE_DETECT) |
| + set(_varname "HAVE_CPU_${OPT}_SUPPORT") |
| + ocv_check_compiler_flag(CXX "${CPU_BASELINE_FLAGS}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") |
| + if(${_varname}) |
| + list(APPEND CPU_BASELINE_FINAL ${OPT}) |
| + set(__available 1) |
| + endif() |
| + endif() |
| + if(NOT __available) |
| + if(NOT "x${CPU_${OPT}_FLAGS_NAME}" STREQUAL "x") |
| + set(_varname "HAVE_CPU_${CPU_${OPT}_FLAGS_NAME}") |
| + set(_compile_flags "${CPU_BASELINE_FLAGS}") |
| + ocv_append_optimization_flag(_compile_flags ${OPT}) |
| + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") |
| + elseif(NOT "x${CPU_${OPT}_FLAGS_ON}" STREQUAL "x") |
| + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "" "${CPU_${OPT}_TEST_FILE}") |
| + else() |
| + set(_varname "HAVE_CPU_${OPT}_SUPPORT") |
| + set(_compile_flags "${CPU_BASELINE_FLAGS}") |
| + ocv_append_optimization_flag(_compile_flags ${OPT}) |
| + ocv_check_compiler_flag(CXX "${_compile_flags}" "${_varname}" "${CPU_${OPT}_TEST_FILE}") |
| + endif() |
| + endif() |
| + else() |
| + ocv_check_flag_support(CXX "${CPU_${OPT}_FLAGS_ON}" _varname "") |
| + endif() |
| + if(_varname AND ${_varname}) |
| + set(CPU_${OPT}_SUPPORTED ON) |
| + elseif(NOT CPU_${OPT}_SUPPORTED) |
| + message(STATUS "${OPT} is not supported by C++ compiler") |
| + endif() |
| + else() |
| + set(CPU_${OPT}_SUPPORTED ON) |
| + endif() |
| + endif() |
| +endmacro() |
| + |
| +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) |
| + set(CPU_${OPT}_USAGE_COUNT 0 CACHE INTERNAL "" FORCE) |
| + if(NOT DEFINED CPU_${OPT}_FORCE) |
| + set(CPU_${OPT}_FORCE "${CPU_${OPT}_IMPLIES}") |
| + endif() |
| +endforeach() |
| + |
| +if(_add_native_flag) |
| + set(_varname "HAVE_CPU_NATIVE_SUPPORT") |
| + ocv_check_compiler_flag(CXX "-march=native" "${_varname}" "") |
| + if(_varname) |
| + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} -march=native") |
| + else() |
| + set(_varname "HAVE_CPU_HOST_SUPPORT") |
| + if(MSVC) |
| + set(_flag "/QxHost") |
| + else() |
| + set(_flag "-xHost") |
| + endif() |
| + ocv_check_compiler_flag(CXX "${_flag}" "${_varname}" "") |
| + if(_varname) |
| + set(CPU_BASELINE_FLAGS "${CPU_BASELINE_FLAGS} ${flag}") |
| + endif() |
| + endif() |
| +endif() |
| + |
| +foreach(OPT ${CPU_KNOWN_OPTIMIZATIONS}) |
| + set(__is_disabled 0) |
| + foreach(OPT2 ${CPU_BASELINE_DISABLE}) |
| + ocv_is_optimization_in_list(__is_disabled ${OPT2} ${OPT}) |
| + if(__is_disabled) |
| + break() |
| + endif() |
| + endforeach() |
| + if(__is_disabled) |
| + set(__is_from_baseline 0) |
| + else() |
| + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_REQUIRE}) |
| + if(NOT __is_from_baseline) |
| + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE}) |
| + endif() |
| + endif() |
| + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH_REQUIRE}) |
| + if(NOT __is_from_dispatch) |
| + ocv_is_optimization_in_list(__is_from_dispatch ${OPT} ${CPU_DISPATCH}) |
| + endif() |
| + if(__is_from_dispatch OR __is_from_baseline OR CPU_BASELINE_DETECT) |
| + ocv_check_compiler_optimization(${OPT}) |
| + endif() |
| + if(CPU_BASELINE_DETECT AND NOT __is_from_baseline AND NOT __is_disabled) |
| + ocv_is_optimization_in_list(__is_from_baseline ${OPT} ${CPU_BASELINE_FINAL}) |
| + endif() |
| + if(CPU_${OPT}_SUPPORTED) |
| + if(";${CPU_DISPATCH};" MATCHES ";${OPT};" AND NOT __is_from_baseline) |
| + list(APPEND CPU_DISPATCH_FINAL ${OPT}) |
| + elseif(__is_from_baseline AND NOT CPU_BASELINE_DETECT) |
| + list(APPEND CPU_BASELINE_FINAL ${OPT}) |
| + ocv_append_optimization_flag(CPU_BASELINE_FLAGS ${OPT}) |
| + endif() |
| + endif() |
| +endforeach() |
| + |
| +foreach(OPT ${CPU_BASELINE_REQUIRE}) |
| + if(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") |
| + message(SEND_ERROR "Required baseline optimization is not supported: ${OPT} (CPU_BASELINE_REQUIRE=${CPU_BASELINE_REQUIRE})") |
| + endif() |
| +endforeach() |
| + |
| +foreach(OPT ${CPU_BASELINE}) |
| + if(OPT STREQUAL "DETECT" OR OPT STREQUAL "HOST" OR OPT STREQUAL "NATIVE") |
| + # nothing |
| + elseif(NOT ";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") |
| + message(STATUS "Optimization ${OPT} is not available, skipped") |
| + endif() |
| +endforeach() |
| + |
| +foreach(OPT ${CPU_DISPATCH_REQUIRE}) |
| + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") |
| + # OK |
| + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") |
| + message(SEND_ERROR "Dispatched optimization ${OPT} is in baseline list (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") |
| + else() |
| + message(SEND_ERROR "Required dispatch optimization is not supported: ${OPT} (CPU_DISPATCH_REQUIRE=${CPU_DISPATCH_REQUIRE})") |
| + endif() |
| +endforeach() |
| + |
| +foreach(OPT ${CPU_DISPATCH}) |
| + if(";${CPU_DISPATCH_FINAL};" MATCHES ";${OPT};") |
| + # OK |
| + elseif(";${CPU_BASELINE_FINAL};" MATCHES ";${OPT};") |
| + # OK |
| + else() |
| + message(STATUS "Dispatch optimization ${OPT} is not available, skipped") |
| + endif() |
| +endforeach() |
| + |
| +#message(STATUS "CPU_BASELINE_FINAL=${CPU_BASELINE_FINAL}") |
| +#message(STATUS "CPU_DISPATCH_FINAL=${CPU_DISPATCH_FINAL}") |
| + |
| +#if(CPU_DISPATCH_FINAL AND NOT PYTHON_DEFAULT_EXECUTABLE) |
| +# message(FATAL_ERROR "Python is required for CPU dispatched optimization support") |
| +#endif() |
| + |
| +macro(ocv_compiler_optimization_options) |
| + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${CPU_BASELINE_FLAGS}") |
| + if(NOT __flags STREQUAL CACHED_CPU_BASELINE_FLAGS) |
| + set(CACHED_CPU_BASELINE_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) |
| + ocv_clear_vars(HAVE_CPU_BASELINE_FLAGS) |
| + endif() |
| + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_BASELINE_FLAGS) |
| + if(NOT HAVE_CPU_BASELINE_FLAGS) |
| + message(FATAL_ERROR "Compiler doesn't support baseline optimization flags: ${CPU_BASELINE_FLAGS}") |
| + endif() |
| + add_extra_compiler_option_force("${CPU_BASELINE_FLAGS}") |
| + |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + set(__dispatch_flags "") |
| + set(__dispatch_definitions "") |
| + set(__dispatch_opts "") |
| + set(__dispatch_opts_force "") |
| + foreach(OPT2 ${CPU_KNOWN_OPTIMIZATIONS}) |
| + if(NOT CPU_${OPT2}_SUPPORTED) |
| + #continue() |
| + else() |
| + ocv_is_optimization_in_list(__is_from_baseline ${OPT2} ${CPU_BASELINE_FINAL}) |
| + if(NOT __is_from_baseline) |
| + ocv_is_optimization_in_list(__is_active ${OPT2} ${OPT}) |
| + if(__is_active) |
| + ocv_append_optimization_flag(__dispatch_flags ${OPT2}) |
| + list(APPEND __dispatch_definitions "CV_CPU_COMPILE_${OPT2}=1") |
| + list(APPEND __dispatch_opts "${OPT2}") |
| + endif() |
| + ocv_is_optimization_in_force_list(__is_force ${OPT2} ${OPT}) |
| + if(__is_force) |
| + list(APPEND __dispatch_opts_force "${OPT2}") |
| + endif() |
| + endif() |
| + endif() |
| + endforeach() |
| + set(__flags "${OPENCV_EXTRA_CXX_FLAGS} ${__dispatch_flags}") |
| + if(NOT __flags STREQUAL CACHED_CPU_DISPATCH_${OPT}_FLAGS) |
| + set(CACHED_CPU_DISPATCH_${OPT}_FLAGS "${__flags}" CACHE INTERNAL "" FORCE) |
| + ocv_clear_vars(HAVE_CPU_DISPATCH_FLAGS_${OPT}) |
| + endif() |
| + ocv_check_compiler_flag(CXX "${__flags}" HAVE_CPU_DISPATCH_FLAGS_${OPT}) |
| + if(NOT HAVE_CPU_DISPATCH_FLAGS_${OPT}) |
| + message(FATAL_ERROR "Compiler doesn't support optimization flags for ${OPT} dispatch mode: ${__dispatch_flags}") |
| + endif() |
| + set(CPU_DISPATCH_FLAGS_${OPT} "${__dispatch_flags}") |
| + set(CPU_DISPATCH_DEFINITIONS_${OPT} "${__dispatch_definitions}") |
| + set(CPU_DISPATCH_${OPT}_INCLUDED "${__dispatch_opts}") |
| + set(CPU_DISPATCH_${OPT}_FORCED "${__dispatch_opts_force}") |
| + endforeach() |
| + |
| + if(ENABLE_POWERPC) |
| + add_extra_compiler_option("-mcpu=G3 -mtune=G5") |
| + endif() |
| + if(ARM) |
| + add_extra_compiler_option("-mfp16-format=ieee") |
| + endif(ARM) |
| + if(ENABLE_NEON) |
| + add_extra_compiler_option("-mfpu=neon") |
| + endif() |
| + if(ENABLE_VFPV3 AND NOT ENABLE_NEON) |
| + add_extra_compiler_option("-mfpu=vfpv3") |
| + endif() |
| +endmacro() |
| + |
| +macro(ocv_compiler_optimization_options_finalize) |
| + if(CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) |
| + if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) |
| + if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") |
| + add_extra_compiler_option(-mfpmath=sse) # !! important - be on the same wave with x64 compilers |
| + else() |
| + add_extra_compiler_option(-mfpmath=387) |
| + endif() |
| + endif() |
| + endif() |
| + |
| + if(MSVC) |
| + # Generate Intrinsic Functions |
| + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") |
| + |
| + if((X86 OR X86_64) AND CMAKE_SIZEOF_VOID_P EQUAL 4 AND ";${CPU_BASELINE_FINAL};" MATCHES ";SSE;") |
| + set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers |
| + endif() |
| + endif(MSVC) |
| +endmacro() |
| + |
| +macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME TARGET_BASE_NAME) |
| + set(__result "") |
| + set(__result_libs "") |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + set(__result_${OPT} "") |
| + endforeach() |
| + foreach(fname ${${SOURCES_VAR_NAME}}) |
| + string(TOLOWER "${fname}" fname_LOWER) |
| + if(fname_LOWER MATCHES "[.]opt_.*[.]cpp$") |
| + if(CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS) |
| + message(STATUS "Excluding from source files list: ${fname}") |
| + #continue() |
| + else() |
| + set(__opt_found 0) |
| + foreach(OPT ${CPU_BASELINE_FINAL}) |
| + string(TOLOWER "${OPT}" OPT_LOWER) |
| + if(fname_LOWER MATCHES "_${OPT_LOWER}[.]cpp$") |
| +#message("${fname} BASELINE-${OPT}") |
| + set(__opt_found 1) |
| + list(APPEND __result "${fname}") |
| + break() |
| + endif() |
| + endforeach() |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + foreach(OPT2 ${CPU_DISPATCH_${OPT}_FORCED}) |
| + string(TOLOWER "${OPT2}" OPT2_LOWER) |
| + if(fname_LOWER MATCHES "_${OPT2_LOWER}[.]cpp$") |
| + list(APPEND __result_${OPT} "${fname}") |
| + math(EXPR CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}+1") |
| + set(CPU_${OPT}_USAGE_COUNT "${CPU_${OPT}_USAGE_COUNT}" CACHE INTERNAL "" FORCE) |
| +#message("${fname} ${OPT}") |
| +#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}") |
| +#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}") |
| +#message(" ${CPU_DISPATCH_FLAGS_${OPT}}") |
| + set(__opt_found 1) |
| + break() |
| + endif() |
| + endforeach() |
| + if(__opt_found) |
| + set(__opt_found 1) |
| + break() |
| + endif() |
| + endforeach() |
| + if(NOT __opt_found) |
| + message(STATUS "Excluding from source files list: ${fname}") |
| + endif() |
| + endif() |
| + else() |
| + list(APPEND __result "${fname}") |
| + endif() |
| + endforeach() |
| + |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + if(__result_${OPT}) |
| +#message("${OPT}: ${__result_${OPT}}") |
| + if(CMAKE_GENERATOR MATCHES "^Visual") |
| + # extra flags are added before common flags, so switching between optimizations doesn't work correctly |
| + # Also CMAKE_CXX_FLAGS doesn't work (it is directory-based, so add_subdirectory is required) |
| + add_library(${TARGET_BASE_NAME}_${OPT} OBJECT ${__result_${OPT}}) |
| + ocv_append_dependant_targets(${TARGET_BASE_NAME} ${TARGET_BASE_NAME}_${OPT}) |
| + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") |
| + set_target_properties(${TARGET_BASE_NAME}_${OPT} PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") |
| + #list(APPEND __result_libs ${TARGET_BASE_NAME}_${OPT}) |
| + list(APPEND __result "$<TARGET_OBJECTS:${TARGET_BASE_NAME}_${OPT}>") |
| + else() |
| + foreach(fname ${__result_${OPT}}) |
| + set_source_files_properties("${fname}" PROPERTIES COMPILE_DEFINITIONS "${CPU_DISPATCH_DEFINITIONS_${OPT}}") |
| + set_source_files_properties("${fname}" PROPERTIES COMPILE_FLAGS "${CPU_DISPATCH_FLAGS_${OPT}}") |
| + endforeach() |
| + list(APPEND __result ${__result_${OPT}}) |
| + endif() |
| + endif() |
| + endforeach() |
| + set(${SOURCES_VAR_NAME} "${__result}") |
| + list(APPEND ${LIBS_VAR_NAME} ${__result_libs}) |
| +endmacro() |
| + |
| +macro(ocv_compiler_optimization_fill_cpu_config) |
| + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "") |
| + foreach(OPT ${CPU_BASELINE_FINAL}) |
| + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} |
| +#define CV_CPU_COMPILE_${OPT} 1 |
| +#define CV_CPU_BASELINE_COMPILE_${OPT} 1 |
| +") |
| + endforeach() |
| + |
| + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} |
| +#define CV_CPU_BASELINE_FEATURES 0 \\") |
| + foreach(OPT ${CPU_BASELINE_FINAL}) |
| + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") |
| + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE} |
| + , CV_CPU_${OPT} \\") |
| + endif() |
| + endforeach() |
| + set(OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE}\n") |
| + |
| + set(__dispatch_modes "") |
| + foreach(OPT ${CPU_DISPATCH_FINAL}) |
| + list(APPEND __dispatch_modes ${CPU_DISPATCH_${OPT}_FORCE} ${OPT}) |
| + endforeach() |
| + list(REMOVE_DUPLICATES __dispatch_modes) |
| + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "") |
| + foreach(OPT ${__dispatch_modes}) |
| + set(OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE} |
| +#define CV_CPU_DISPATCH_COMPILE_${OPT} 1") |
| + endforeach() |
| + |
| + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "// AUTOGENERATED, DO NOT EDIT\n") |
| + foreach(OPT ${CPU_ALL_OPTIMIZATIONS}) |
| + if(NOT DEFINED CPU_${OPT}_FEATURE_ALIAS OR NOT "x${CPU_${OPT}_FEATURE_ALIAS}" STREQUAL "x") |
| + set(OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE} |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_${OPT} |
| +# define CV_CPU_HAS_SUPPORT_${OPT} 1 |
| +# define CV_CPU_CALL_${OPT}(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_${OPT} |
| +# define CV_CPU_HAS_SUPPORT_${OPT} (cv::checkHardwareSupport(CV_CPU_${OPT})) |
| +# define CV_CPU_CALL_${OPT}(...) if (CV_CPU_HAS_SUPPORT_${OPT}) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_${OPT} 0 |
| +# define CV_CPU_CALL_${OPT}(...) |
| +#endif |
| +") |
| + endif() |
| + endforeach() |
| + |
| + set(__file "${CMAKE_SOURCE_DIR}/modules/core/include/opencv2/core/cv_cpu_helper.h") |
| + if(EXISTS "${__file}") |
| + file(READ "${__file}" __content) |
| + endif() |
| + if(__content STREQUAL OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE) |
| + #message(STATUS "${__file} contains same content") |
| + else() |
| + file(WRITE "${__file}" "${OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE}") |
| + message(WARNING "${__file} is updated") |
| + endif() |
| +endmacro() |
| + |
| +if(CV_DISABLE_OPTIMIZATION OR CV_ICC) |
| + ocv_update(CV_ENABLE_UNROLLED 0) |
| +else() |
| + ocv_update(CV_ENABLE_UNROLLED 1) |
| +endif() |
| diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake |
| index 5bb0479..0eb68b6 100644 |
| --- a/cmake/OpenCVCompilerOptions.cmake |
| +++ b/cmake/OpenCVCompilerOptions.cmake |
| @@ -31,24 +31,21 @@ endif() |
| if(MINGW OR (X86 AND UNIX AND NOT APPLE)) |
| # mingw compiler is known to produce unstable SSE code with -O3 hence we are trying to use -O2 instead |
| if(CMAKE_COMPILER_IS_GNUCXX) |
| - foreach(flags CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) |
| - string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") |
| - endforeach() |
| - endif() |
| - |
| - if(CMAKE_COMPILER_IS_GNUCC) |
| - foreach(flags CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) |
| + foreach(flags |
| + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG |
| + CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_DEBUG) |
| string(REPLACE "-O3" "-O2" ${flags} "${${flags}}") |
| endforeach() |
| endif() |
| endif() |
| |
| if(MSVC) |
| - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") |
| - string(REGEX REPLACE "^ *| * $" "" CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT}") |
| + string(STRIP "${CMAKE_CXX_FLAGS}" CMAKE_CXX_FLAGS) |
| + string(STRIP "${CMAKE_CXX_FLAGS_INIT}" CMAKE_CXX_FLAGS_INIT) |
| if(CMAKE_CXX_FLAGS STREQUAL CMAKE_CXX_FLAGS_INIT) |
| # override cmake default exception handling option |
| - string(REPLACE "/EHsc" "/EHa" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") |
| + string(REPLACE "/EHsc" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") |
| + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHa") |
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "Flags used by the compiler during all build types." FORCE) |
| endif() |
| endif() |
| @@ -63,9 +60,6 @@ set(OPENCV_EXTRA_EXE_LINKER_FLAGS_RELEASE "") |
| set(OPENCV_EXTRA_EXE_LINKER_FLAGS_DEBUG "") |
| |
| macro(add_extra_compiler_option option) |
| - if(CMAKE_BUILD_TYPE) |
| - set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) |
| - endif() |
| ocv_check_flag_support(CXX "${option}" _varname "${OPENCV_EXTRA_CXX_FLAGS} ${ARGN}") |
| if(${_varname}) |
| set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") |
| @@ -77,6 +71,12 @@ macro(add_extra_compiler_option option) |
| endif() |
| endmacro() |
| |
| +macro(add_extra_compiler_option_force option) |
| + set(OPENCV_EXTRA_CXX_FLAGS "${OPENCV_EXTRA_CXX_FLAGS} ${option}") |
| + set(OPENCV_EXTRA_C_FLAGS "${OPENCV_EXTRA_C_FLAGS} ${option}") |
| +endmacro() |
| + |
| + |
| # Gets environment variable and puts its value to the corresponding preprocessor definition |
| # Useful for WINRT that has no access to environment variables |
| macro(add_env_definitions option) |
| @@ -102,7 +102,11 @@ if(MINGW) |
| endif() |
| |
| if(CV_ICC AND NOT ENABLE_FAST_MATH) |
| - add_extra_compiler_option("-fp-model precise") |
| + if(MSVC) |
| + add_extra_compiler_option("/fp:precise") |
| + else() |
| + add_extra_compiler_option("-fp-model precise") |
| + endif() |
| endif() |
| |
| if(CMAKE_COMPILER_IS_GNUCXX) |
| @@ -141,7 +145,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) |
| endif() |
| |
| # We need pthread's |
| - if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) |
| + if(UNIX AND NOT ANDROID AND NOT (APPLE AND CMAKE_COMPILER_IS_CLANGCXX)) # TODO |
| add_extra_compiler_option(-pthread) |
| endif() |
| |
| @@ -170,83 +174,6 @@ if(CMAKE_COMPILER_IS_GNUCXX) |
| if(ENABLE_FAST_MATH) |
| add_extra_compiler_option(-ffast-math) |
| endif() |
| - if(ENABLE_POWERPC) |
| - add_extra_compiler_option("-mcpu=G3 -mtune=G5") |
| - endif() |
| - if(ENABLE_SSE) |
| - add_extra_compiler_option(-msse) |
| - endif() |
| - if(ENABLE_SSE2) |
| - add_extra_compiler_option(-msse2) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-sse2) |
| - endif() |
| - if(ARM) |
| - add_extra_compiler_option("-mfp16-format=ieee") |
| - endif(ARM) |
| - if(ENABLE_NEON) |
| - add_extra_compiler_option("-mfpu=neon") |
| - endif() |
| - if(ENABLE_VFPV3 AND NOT ENABLE_NEON) |
| - add_extra_compiler_option("-mfpu=vfpv3") |
| - endif() |
| - |
| - # SSE3 and further should be disabled under MingW because it generates compiler errors |
| - if(NOT MINGW) |
| - if(ENABLE_AVX) |
| - add_extra_compiler_option(-mavx) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-avx) |
| - endif() |
| - if(ENABLE_AVX2) |
| - add_extra_compiler_option(-mavx2) |
| - |
| - if(ENABLE_FMA3) |
| - add_extra_compiler_option(-mfma) |
| - endif() |
| - endif() |
| - |
| - # GCC depresses SSEx instructions when -mavx is used. Instead, it generates new AVX instructions or AVX equivalence for all SSEx instructions when needed. |
| - if(NOT OPENCV_EXTRA_CXX_FLAGS MATCHES "-mavx") |
| - if(ENABLE_SSE3) |
| - add_extra_compiler_option(-msse3) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-sse3) |
| - endif() |
| - |
| - if(ENABLE_SSSE3) |
| - add_extra_compiler_option(-mssse3) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-ssse3) |
| - endif() |
| - |
| - if(ENABLE_SSE41) |
| - add_extra_compiler_option(-msse4.1) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-sse4.1) |
| - endif() |
| - |
| - if(ENABLE_SSE42) |
| - add_extra_compiler_option(-msse4.2) |
| - elseif(X86 OR X86_64) |
| - add_extra_compiler_option(-mno-sse4.2) |
| - endif() |
| - |
| - if(ENABLE_POPCNT) |
| - add_extra_compiler_option(-mpopcnt) |
| - endif() |
| - endif() |
| - endif(NOT MINGW) |
| - |
| - if(X86 OR X86_64) |
| - if(NOT APPLE AND CMAKE_SIZEOF_VOID_P EQUAL 4) |
| - if(OPENCV_EXTRA_CXX_FLAGS MATCHES "-m(sse2|avx)") |
| - add_extra_compiler_option(-mfpmath=sse)# !! important - be on the same wave with x64 compilers |
| - else() |
| - add_extra_compiler_option(-mfpmath=387) |
| - endif() |
| - endif() |
| - endif() |
| |
| # Profiling? |
| if(ENABLE_PROFILING) |
| @@ -257,7 +184,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) |
| string(REPLACE "-fomit-frame-pointer" "" ${flags} "${${flags}}") |
| string(REPLACE "-ffunction-sections" "" ${flags} "${${flags}}") |
| endforeach() |
| - elseif(NOT APPLE AND NOT ANDROID) |
| + elseif(NOT ((IOS OR ANDROID) AND NOT BUILD_SHARED_LIBS)) |
| # Remove unreferenced functions: function level linking |
| add_extra_compiler_option(-ffunction-sections) |
| endif() |
| @@ -296,41 +223,6 @@ if(MSVC) |
| set(OPENCV_EXTRA_FLAGS_RELEASE "${OPENCV_EXTRA_FLAGS_RELEASE} /Zi") |
| endif() |
| |
| - if(ENABLE_AVX2 AND NOT MSVC_VERSION LESS 1800) |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX2") |
| - endif() |
| - if(ENABLE_AVX AND NOT MSVC_VERSION LESS 1600 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:AVX") |
| - endif() |
| - |
| - if(ENABLE_SSE4_1 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE4.1") |
| - endif() |
| - |
| - if(ENABLE_SSE3 AND CV_ICC AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE3") |
| - endif() |
| - |
| - if(NOT MSVC64) |
| - # 64-bit MSVC compiler uses SSE/SSE2 by default |
| - if(ENABLE_SSE2 AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE2") |
| - endif() |
| - if(ENABLE_SSE AND NOT OPENCV_EXTRA_FLAGS MATCHES "/arch:") |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /arch:SSE") |
| - endif() |
| - endif() |
| - |
| - if(ENABLE_SSE OR ENABLE_SSE2 OR ENABLE_SSE3 OR ENABLE_SSE4_1 OR ENABLE_AVX OR ENABLE_AVX2) |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /Oi") |
| - endif() |
| - |
| - if(X86 OR X86_64) |
| - if(CMAKE_SIZEOF_VOID_P EQUAL 4 AND ENABLE_SSE2) |
| - set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /fp:fast") # !! important - be on the same wave with x64 compilers |
| - endif() |
| - endif() |
| - |
| if(OPENCV_WARNINGS_ARE_ERRORS) |
| set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS} /WX") |
| endif() |
| @@ -353,6 +245,16 @@ if(NOT BUILD_SHARED_LIBS AND CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID) |
| set(OPENCV_EXTRA_FLAGS "-fPIC ${OPENCV_EXTRA_FLAGS}") |
| endif() |
| |
| +include(cmake/OpenCVCompilerOptimizations.cmake) |
| + |
| +if(COMMAND ocv_compiler_optimization_options) |
| + ocv_compiler_optimization_options() |
| +endif() |
| + |
| +if(COMMAND ocv_compiler_optimization_options_finalize) |
| + ocv_compiler_optimization_options_finalize() |
| +endif() |
| + |
| # Add user supplied extra options (optimization, etc...) |
| # ========================================================== |
| set(OPENCV_EXTRA_FLAGS "${OPENCV_EXTRA_FLAGS}" CACHE INTERNAL "Extra compiler options") |
| @@ -370,6 +272,7 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) |
| add_extra_compiler_option(-fvisibility-inlines-hidden) |
| endif() |
| |
| +# TODO !!!!! |
| if(NOT OPENCV_FP16_DISABLE AND NOT IOS) |
| if(ARM AND ENABLE_NEON) |
| set(FP16_OPTION "-mfpu=neon-fp16") |
| @@ -378,7 +281,7 @@ if(NOT OPENCV_FP16_DISABLE AND NOT IOS) |
| endif() |
| try_compile(__VALID_FP16 |
| "${OpenCV_BINARY_DIR}" |
| - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" |
| + "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp" |
| COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" |
| OUTPUT_VARIABLE TRY_OUT |
| ) |
| diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake |
| index 2988979..477b910 100644 |
| --- a/cmake/OpenCVGenHeaders.cmake |
| +++ b/cmake/OpenCVGenHeaders.cmake |
| @@ -3,6 +3,10 @@ configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CO |
| configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/cvconfig.h") |
| install(FILES "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2 COMPONENT dev) |
| |
| +# platform-specific config file |
| +ocv_compiler_optimization_fill_cpu_config() |
| +configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cv_cpu_config.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cv_cpu_config.h") |
| + |
| # ---------------------------------------------------------------------------- |
| # opencv_modules.hpp based on actual modules list |
| # ---------------------------------------------------------------------------- |
| diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake |
| index 742a287..3e98bf5 100644 |
| --- a/cmake/OpenCVModule.cmake |
| +++ b/cmake/OpenCVModule.cmake |
| @@ -65,6 +65,7 @@ foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MOD |
| unset(OPENCV_MODULE_${mod}_PRIVATE_OPT_DEPS CACHE) |
| unset(OPENCV_MODULE_${mod}_LINK_DEPS CACHE) |
| unset(OPENCV_MODULE_${mod}_WRAPPERS CACHE) |
| + unset(OPENCV_DEPENDANT_TARGETS_${mod} CACHE) |
| endforeach() |
| |
| # clean modules info which needs to be recalculated |
| @@ -641,6 +642,8 @@ macro(ocv_set_module_sources) |
| # use full paths for module to be independent from the module location |
| ocv_convert_to_full_paths(OPENCV_MODULE_${the_module}_HEADERS) |
| |
| + ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) |
| + |
| set(OPENCV_MODULE_${the_module}_HEADERS ${OPENCV_MODULE_${the_module}_HEADERS} CACHE INTERNAL "List of header files for ${the_module}") |
| set(OPENCV_MODULE_${the_module}_SOURCES ${OPENCV_MODULE_${the_module}_SOURCES} CACHE INTERNAL "List of source files for ${the_module}") |
| endmacro() |
| diff --git a/cmake/OpenCVPCHSupport.cmake b/cmake/OpenCVPCHSupport.cmake |
| index 90437cb..45968e7 100644 |
| --- a/cmake/OpenCVPCHSupport.cmake |
| +++ b/cmake/OpenCVPCHSupport.cmake |
| @@ -326,7 +326,10 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input) |
| |
| get_target_property(_sources ${_targetName} SOURCES) |
| foreach(src ${_sources}) |
| - if(NOT "${src}" MATCHES "\\.mm$") |
| + if(NOT "${src}" MATCHES "\\.mm$" |
| + AND NOT "${src}" MATCHES "\\.h$" AND NOT "${src}" MATCHES "\\.hpp$" # header files |
| + AND NOT "${src}" MATCHES "^\$" # CMake generator expressions |
| + ) |
| get_source_file_property(oldProps "${src}" COMPILE_FLAGS) |
| if(NOT oldProps) |
| set(newProperties "/Yu\"${_input}\" /FI\"${_input}\"") |
| diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake |
| index cdf257d..8a5ee28 100644 |
| --- a/cmake/OpenCVUtils.cmake |
| +++ b/cmake/OpenCVUtils.cmake |
| @@ -37,7 +37,11 @@ endmacro() |
| |
| macro(ocv_update VAR) |
| if(NOT DEFINED ${VAR}) |
| - set(${VAR} ${ARGN}) |
| + if("x${ARGN}" STREQUAL "x") |
| + set(${VAR} "") |
| + else() |
| + set(${VAR} ${ARGN}) |
| + endif() |
| else() |
| #ocv_debug_message("Preserve old value for ${VAR}: ${${VAR}}") |
| endif() |
| @@ -151,8 +155,15 @@ function(ocv_append_target_property target prop) |
| endif() |
| endfunction() |
| |
| +function(ocv_append_dependant_targets target) |
| + #ocv_debug_message("ocv_append_dependant_targets(${target} ${ARGN})") |
| + _ocv_fix_target(target) |
| + set(OPENCV_DEPENDANT_TARGETS_${target} "${OPENCV_DEPENDANT_TARGETS_${target}};${ARGN}" CACHE INTERNAL "" FORCE) |
| +endfunction() |
| + |
| # adds include directories in such way that directories from the OpenCV source tree go first |
| function(ocv_target_include_directories target) |
| + #ocv_debug_message("ocv_target_include_directories(${target} ${ARGN})") |
| _ocv_fix_target(target) |
| set(__params "") |
| if(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.0" AND |
| @@ -173,6 +184,11 @@ function(ocv_target_include_directories target) |
| else() |
| if(TARGET ${target}) |
| target_include_directories(${target} PRIVATE ${__params}) |
| + if(OPENCV_DEPENDANT_TARGETS_${target}) |
| + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) |
| + target_include_directories(${t} PRIVATE ${__params}) |
| + endforeach() |
| + endif() |
| else() |
| set(__new_inc "${OCV_TARGET_INCLUDE_DIRS_${target}};${__params}") |
| set(OCV_TARGET_INCLUDE_DIRS_${target} "${__new_inc}" CACHE INTERNAL "") |
| @@ -205,8 +221,11 @@ set(OCV_COMPILER_FAIL_REGEX |
| ) |
| |
| MACRO(ocv_check_compiler_flag LANG FLAG RESULT) |
| + set(_fname "${ARGN}") |
| if(NOT DEFINED ${RESULT}) |
| - if("_${LANG}_" MATCHES "_CXX_") |
| + if(_fname) |
| + # nothing |
| + elseif("_${LANG}_" MATCHES "_CXX_") |
| set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/src.cxx") |
| if("${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror " OR "${CMAKE_CXX_FLAGS} ${FLAG} " MATCHES "-Werror=unknown-pragmas ") |
| FILE(WRITE "${_fname}" "int main() { return 0; }\n") |
| @@ -231,7 +250,13 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) |
| unset(_fname) |
| endif() |
| if(_fname) |
| - MESSAGE(STATUS "Performing Test ${RESULT}") |
| + if(NOT "x${ARGN}" STREQUAL "x") |
| + file(RELATIVE_PATH __msg "${CMAKE_SOURCE_DIR}" "${ARGN}") |
| + set(__msg " (check file: ${__msg})") |
| + else() |
| + set(__msg "") |
| + endif() |
| + MESSAGE(STATUS "Performing Test ${RESULT}${__msg}") |
| TRY_COMPILE(${RESULT} |
| "${CMAKE_BINARY_DIR}" |
| "${_fname}" |
| @@ -278,7 +303,11 @@ MACRO(ocv_check_compiler_flag LANG FLAG RESULT) |
| endif() |
| ENDMACRO() |
| |
| -macro(ocv_check_flag_support lang flag varname) |
| +macro(ocv_check_flag_support lang flag varname base_options) |
| + if(CMAKE_BUILD_TYPE) |
| + set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE}) |
| + endif() |
| + |
| if("_${lang}_" MATCHES "_CXX_") |
| set(_lang CXX) |
| elseif("_${lang}_" MATCHES "_C_") |
| @@ -293,7 +322,7 @@ macro(ocv_check_flag_support lang flag varname) |
| string(REGEX REPLACE "^(/|-)" "HAVE_${_lang}_" ${varname} "${${varname}}") |
| string(REGEX REPLACE " -|-|=| |\\." "_" ${varname} "${${varname}}") |
| |
| - ocv_check_compiler_flag("${_lang}" "${ARGN} ${flag}" ${${varname}}) |
| + ocv_check_compiler_flag("${_lang}" "${base_options} ${flag}" ${${varname}} ${ARGN}) |
| endmacro() |
| |
| # turns off warnings |
| @@ -327,7 +356,7 @@ macro(ocv_warnings_disable) |
| string(REPLACE "${warning}" "" ${var} "${${var}}") |
| string(REPLACE "-W" "-Wno-" warning "${warning}") |
| endif() |
| - ocv_check_flag_support(${var} "${warning}" _varname) |
| + ocv_check_flag_support(${var} "${warning}" _varname "") |
| if(${_varname}) |
| set(${var} "${${var}} ${warning}") |
| endif() |
| @@ -342,7 +371,7 @@ macro(ocv_warnings_disable) |
| else() |
| string(REPLACE "-wd" "-Qwd" warning "${warning}") |
| endif() |
| - ocv_check_flag_support(${var} "${warning}" _varname) |
| + ocv_check_flag_support(${var} "${warning}" _varname "") |
| if(${_varname}) |
| set(${var} "${${var}} ${warning}") |
| endif() |
| @@ -357,7 +386,7 @@ macro(ocv_warnings_disable) |
| endmacro() |
| |
| macro(add_apple_compiler_options the_module) |
| - ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS) |
| + ocv_check_flag_support(OBJCXX "-fobjc-exceptions" HAVE_OBJC_EXCEPTIONS "") |
| if(HAVE_OBJC_EXCEPTIONS) |
| foreach(source ${OPENCV_MODULE_${the_module}_SOURCES}) |
| if("${source}" MATCHES "\\.mm$") |
| @@ -892,6 +921,11 @@ function(_ocv_append_target_includes target) |
| if (TARGET ${target}_object) |
| target_include_directories(${target}_object PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) |
| endif() |
| + if(OPENCV_DEPENDANT_TARGETS_${target}) |
| + foreach(t ${OPENCV_DEPENDANT_TARGETS_${target}}) |
| + target_include_directories(${t} PRIVATE ${OCV_TARGET_INCLUDE_DIRS_${target}}) |
| + endforeach() |
| + endif() |
| unset(OCV_TARGET_INCLUDE_DIRS_${target} CACHE) |
| endif() |
| endfunction() |
| diff --git a/cmake/checks/cpu_avx.cpp b/cmake/checks/cpu_avx.cpp |
| new file mode 100644 |
| index 0000000..05536f4 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_avx.cpp |
| @@ -0,0 +1,9 @@ |
| +#if !defined __AVX__ // MSVC supports this flag since MSVS 2013 |
| +#error "__AVX__ define is missing" |
| +#endif |
| +#include <immintrin.h> |
| +void test() |
| +{ |
| + __m256 a = _mm256_set1_ps(0.0f); |
| +} |
| +int main() { return 0; } |
| diff --git a/cmake/checks/cpu_avx2.cpp b/cmake/checks/cpu_avx2.cpp |
| new file mode 100644 |
| index 0000000..3ab1143 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_avx2.cpp |
| @@ -0,0 +1,10 @@ |
| +#if !defined __AVX2__ // MSVC supports this flag since MSVS 2013 |
| +#error "__AVX2__ define is missing" |
| +#endif |
| +#include <immintrin.h> |
| +void test() |
| +{ |
| + int data[8] = {0,0,0,0, 0,0,0,0}; |
| + __m256i a = _mm256_loadu_si256((const __m256i *)data); |
| +} |
| +int main() { return 0; } |
| diff --git a/cmake/checks/cpu_avx512.cpp b/cmake/checks/cpu_avx512.cpp |
| new file mode 100644 |
| index 0000000..d0898ab |
| --- /dev/null |
| +++ b/cmake/checks/cpu_avx512.cpp |
| @@ -0,0 +1,10 @@ |
| +#if defined __AVX512__ || defined __AVX512F__ |
| +#include <immintrin.h> |
| +void test() |
| +{ |
| + __m512i zmm = _mm512_setzero_si512(); |
| +} |
| +#else |
| +#error "AVX512 is not supported" |
| +#endif |
| +int main() { return 0; } |
| diff --git a/cmake/checks/cpu_fp16.cpp b/cmake/checks/cpu_fp16.cpp |
| new file mode 100644 |
| index 0000000..6951f1c |
| --- /dev/null |
| +++ b/cmake/checks/cpu_fp16.cpp |
| @@ -0,0 +1,33 @@ |
| +#include <stdio.h> |
| + |
| +#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) || (defined __INTEL_COMPILER && defined __AVX__) |
| +#include <immintrin.h> |
| +int test() |
| +{ |
| + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
| + short dst[8]; |
| + __m128 v_src = _mm_load_ps(src); |
| + __m128i v_dst = _mm_cvtps_ph(v_src, 0); |
| + _mm_storel_epi64((__m128i*)dst, v_dst); |
| + return (int)dst[0]; |
| +} |
| +#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
| +#include "arm_neon.h" |
| +int test() |
| +{ |
| + const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
| + short dst[8]; |
| + float32x4_t v_src = *(float32x4_t*)src; |
| + float16x4_t v_dst = vcvt_f16_f32(v_src); |
| + *(float16x4_t*)dst = v_dst; |
| + return (int)dst[0]; |
| +} |
| +#else |
| +#error "FP16 is not supported" |
| +#endif |
| + |
| +int main() |
| +{ |
| + printf("%d\n", test()); |
| + return 0; |
| +} |
| diff --git a/cmake/checks/cpu_popcnt.cpp b/cmake/checks/cpu_popcnt.cpp |
| new file mode 100644 |
| index 0000000..f55c9f3 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_popcnt.cpp |
| @@ -0,0 +1,8 @@ |
| +#include <nmmintrin.h> |
| +#ifndef _MSC_VER |
| +#include <popcntintrin.h> |
| +#endif |
| +int main() { |
| + int i = _mm_popcnt_u64(1); |
| + return 0; |
| +} |
| diff --git a/cmake/checks/cpu_sse.cpp b/cmake/checks/cpu_sse.cpp |
| new file mode 100644 |
| index 0000000..c6269ac |
| --- /dev/null |
| +++ b/cmake/checks/cpu_sse.cpp |
| @@ -0,0 +1,2 @@ |
| +#include <xmmintrin.h> |
| +int main() { return 0; } |
| diff --git a/cmake/checks/cpu_sse2.cpp b/cmake/checks/cpu_sse2.cpp |
| new file mode 100644 |
| index 0000000..68a69f8 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_sse2.cpp |
| @@ -0,0 +1,2 @@ |
| +#include <emmintrin.h> |
| +int main() { return 0; } |
| diff --git a/cmake/checks/cpu_sse3.cpp b/cmake/checks/cpu_sse3.cpp |
| new file mode 100644 |
| index 0000000..98ce219 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_sse3.cpp |
| @@ -0,0 +1,7 @@ |
| +#include <pmmintrin.h> |
| +int main() { |
| + __m128 u, v; |
| + u = _mm_set1_ps(0.0f); |
| + v = _mm_moveldup_ps(u); // SSE3 |
| + return 0; |
| +} |
| diff --git a/cmake/checks/cpu_sse41.cpp b/cmake/checks/cpu_sse41.cpp |
| new file mode 100644 |
| index 0000000..ddd835b |
| --- /dev/null |
| +++ b/cmake/checks/cpu_sse41.cpp |
| @@ -0,0 +1,6 @@ |
| +#include <smmintrin.h> |
| +int main() { |
| + __m128i a = _mm_setzero_si128(), b = _mm_setzero_si128(); |
| + __m128i c = _mm_packus_epi32(a, b); |
| + return 0; |
| +} |
| diff --git a/cmake/checks/cpu_sse42.cpp b/cmake/checks/cpu_sse42.cpp |
| new file mode 100644 |
| index 0000000..56f5665 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_sse42.cpp |
| @@ -0,0 +1,5 @@ |
| +#include <nmmintrin.h> |
| +int main() { |
| + int i = _mm_popcnt_u64(1); |
| + return 0; |
| +} |
| diff --git a/cmake/checks/cpu_ssse3.cpp b/cmake/checks/cpu_ssse3.cpp |
| new file mode 100644 |
| index 0000000..e583199 |
| --- /dev/null |
| +++ b/cmake/checks/cpu_ssse3.cpp |
| @@ -0,0 +1,7 @@ |
| +#include <tmmintrin.h> |
| +const double v = 0; |
| +int main() { |
| + __m128i a = _mm_setzero_si128(); |
| + __m128i b = _mm_abs_epi32(a); |
| + return 0; |
| +} |
| diff --git a/cmake/checks/fp16.cpp b/cmake/checks/fp16.cpp |
| deleted file mode 100644 |
| index c77c844..0000000 |
| --- a/cmake/checks/fp16.cpp |
| +++ /dev/null |
| @@ -1,33 +0,0 @@ |
| -#include <stdio.h> |
| - |
| -#if defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700) |
| -#include <immintrin.h> |
| -int test() |
| -{ |
| - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
| - short dst[8]; |
| - __m128 v_src = _mm_load_ps(src); |
| - __m128i v_dst = _mm_cvtps_ph(v_src, 0); |
| - _mm_storel_epi64((__m128i*)dst, v_dst); |
| - return (int)dst[0]; |
| -} |
| -#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
| -#include "arm_neon.h" |
| -int test() |
| -{ |
| - const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
| - short dst[8]; |
| - float32x4_t v_src = *(float32x4_t*)src; |
| - float16x4_t v_dst = vcvt_f16_f32(v_src); |
| - *(float16x4_t*)dst = v_dst; |
| - return (int)dst[0]; |
| -} |
| -#else |
| -#error "FP16 is not supported" |
| -#endif |
| - |
| -int main() |
| -{ |
| - printf("%d\n", test()); |
| - return 0; |
| -} |
| diff --git a/cmake/templates/cv_cpu_config.h.in b/cmake/templates/cv_cpu_config.h.in |
| new file mode 100644 |
| index 0000000..27b2731 |
| --- /dev/null |
| +++ b/cmake/templates/cv_cpu_config.h.in |
| @@ -0,0 +1,5 @@ |
| +// OpenCV CPU baseline features |
| +@OPENCV_CPU_BASELINE_DEFINITIONS_CONFIGMAKE@ |
| + |
| +// OpenCV supported CPU dispatched features |
| +@OPENCV_CPU_DISPATCH_DEFINITIONS_CONFIGMAKE@ |
| diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in |
| index 05add9e..658d12c 100644 |
| --- a/cmake/templates/cvconfig.h.in |
| +++ b/cmake/templates/cvconfig.h.in |
| @@ -1,6 +1,15 @@ |
| +#ifndef OPENCV_CVCONFIG_H_INCLUDED |
| +#define OPENCV_CVCONFIG_H_INCLUDED |
| + |
| /* OpenCV compiled as static or dynamic libs */ |
| #cmakedefine BUILD_SHARED_LIBS |
| |
| +/* OpenCV intrinsics optimized code */ |
| +#cmakedefine CV_ENABLE_INTRINSICS |
| + |
| +/* OpenCV additional optimized code */ |
| +#cmakedefine CV_DISABLE_OPTIMIZATION |
| + |
| /* Compile for 'real' NVIDIA GPU architectures */ |
| #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" |
| |
| @@ -206,3 +215,7 @@ |
| |
| /* OpenVX */ |
| #cmakedefine HAVE_OPENVX |
| + |
| + |
| + |
| +#endif // OPENCV_CVCONFIG_H_INCLUDED |
| diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h |
| new file mode 100644 |
| index 0000000..9a8537f |
| --- /dev/null |
| +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h |
| @@ -0,0 +1,166 @@ |
| +// This file is part of OpenCV project. |
| +// It is subject to the license terms in the LICENSE file found in the top-level directory |
| +// of this distribution and at http://opencv.org/license.html. |
| + |
| +#if defined __OPENCV_BUILD \ |
| + |
| +#include "cv_cpu_config.h" |
| +#include "cv_cpu_helper.h" |
| + |
| +#if defined CV_ENABLE_INTRINSICS \ |
| + && !defined CV_DISABLE_OPTIMIZATION \ |
| + && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \ |
| + |
| +#ifdef CV_CPU_COMPILE_SSE2 |
| +# include <emmintrin.h> |
| +# define CV_MMX 1 |
| +# define CV_SSE 1 |
| +# define CV_SSE2 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_SSE3 |
| +# include <pmmintrin.h> |
| +# define CV_SSE3 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_SSSE3 |
| +# include <tmmintrin.h> |
| +# define CV_SSSE3 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_SSE4_1 |
| +# include <smmintrin.h> |
| +# define CV_SSE4_1 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_SSE4_2 |
| +# include <nmmintrin.h> |
| +# define CV_SSE4_2 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_POPCNT |
| +# ifdef _MSC_VER |
| +# include <nmmintrin.h> |
| +# if defined(_M_X64) |
| +# define CV_POPCNT_U64 _mm_popcnt_u64 |
| +# endif |
| +# define CV_POPCNT_U32 _mm_popcnt_u32 |
| +# else |
| +# include <popcntintrin.h> |
| +# if defined(__x86_64__) |
| +# define CV_POPCNT_U64 __builtin_popcountll |
| +# endif |
| +# define CV_POPCNT_U32 __builtin_popcount |
| +# endif |
| +# define CV_POPCNT 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_AVX |
| +# include <immintrin.h> |
| +# define CV_AVX 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_AVX2 |
| +# include <immintrin.h> |
| +# define CV_AVX2 1 |
| +#endif |
| +#ifdef CV_CPU_COMPILE_FMA3 |
| +# define CV_FMA3 1 |
| +#endif |
| + |
| +#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) |
| +# include <Intrin.h> |
| +# include <arm_neon.h> |
| +# define CV_NEON 1 |
| +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) |
| +# include <arm_neon.h> |
| +# define CV_NEON 1 |
| +#endif |
| + |
| +#if defined(__ARM_NEON__) || defined(__aarch64__) |
| +# include <arm_neon.h> |
| +#endif |
| + |
| +#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__ |
| + |
| +#endif // __OPENCV_BUILD |
| + |
| + |
| + |
| +#if !defined __OPENCV_BUILD // Compatibility code |
| + |
| +#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) |
| +# include <emmintrin.h> |
| +# define CV_MMX 1 |
| +# define CV_SSE 1 |
| +# define CV_SSE2 1 |
| +#elif (defined WIN32 || defined _WIN32) && defined(_M_ARM) |
| +# include <Intrin.h> |
| +# include <arm_neon.h> |
| +# define CV_NEON 1 |
| +#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) |
| +# include <arm_neon.h> |
| +# define CV_NEON 1 |
| +#endif |
| + |
| +#endif // !__OPENCV_BUILD (Compatibility code) |
| + |
| + |
| + |
| +#ifndef CV_MMX |
| +# define CV_MMX 0 |
| +#endif |
| +#ifndef CV_SSE |
| +# define CV_SSE 0 |
| +#endif |
| +#ifndef CV_SSE2 |
| +# define CV_SSE2 0 |
| +#endif |
| +#ifndef CV_SSE3 |
| +# define CV_SSE3 0 |
| +#endif |
| +#ifndef CV_SSSE3 |
| +# define CV_SSSE3 0 |
| +#endif |
| +#ifndef CV_SSE4_1 |
| +# define CV_SSE4_1 0 |
| +#endif |
| +#ifndef CV_SSE4_2 |
| +# define CV_SSE4_2 0 |
| +#endif |
| +#ifndef CV_POPCNT |
| +# define CV_POPCNT 0 |
| +#endif |
| +#ifndef CV_AVX |
| +# define CV_AVX 0 |
| +#endif |
| +#ifndef CV_AVX2 |
| +# define CV_AVX2 0 |
| +#endif |
| +#ifndef CV_FMA3 |
| +# define CV_FMA3 0 |
| +#endif |
| +#ifndef CV_AVX_512F |
| +# define CV_AVX_512F 0 |
| +#endif |
| +#ifndef CV_AVX_512BW |
| +# define CV_AVX_512BW 0 |
| +#endif |
| +#ifndef CV_AVX_512CD |
| +# define CV_AVX_512CD 0 |
| +#endif |
| +#ifndef CV_AVX_512DQ |
| +# define CV_AVX_512DQ 0 |
| +#endif |
| +#ifndef CV_AVX_512ER |
| +# define CV_AVX_512ER 0 |
| +#endif |
| +#ifndef CV_AVX_512IFMA512 |
| +# define CV_AVX_512IFMA512 0 |
| +#endif |
| +#ifndef CV_AVX_512PF |
| +# define CV_AVX_512PF 0 |
| +#endif |
| +#ifndef CV_AVX_512VBMI |
| +# define CV_AVX_512VBMI 0 |
| +#endif |
| +#ifndef CV_AVX_512VL |
| +# define CV_AVX_512VL 0 |
| +#endif |
| + |
| +#ifndef CV_NEON |
| +# define CV_NEON 0 |
| +#endif |
| diff --git a/modules/core/include/opencv2/core/cv_cpu_helper.h b/modules/core/include/opencv2/core/cv_cpu_helper.h |
| new file mode 100644 |
| index 0000000..cb755d6 |
| --- /dev/null |
| +++ b/modules/core/include/opencv2/core/cv_cpu_helper.h |
| @@ -0,0 +1,133 @@ |
| +// AUTOGENERATED, DO NOT EDIT |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE |
| +# define CV_CPU_HAS_SUPPORT_SSE 1 |
| +# define CV_CPU_CALL_SSE(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE |
| +# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE)) |
| +# define CV_CPU_CALL_SSE(...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSE 0 |
| +# define CV_CPU_CALL_SSE(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2 |
| +# define CV_CPU_HAS_SUPPORT_SSE2 1 |
| +# define CV_CPU_CALL_SSE2(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2 |
| +# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2)) |
| +# define CV_CPU_CALL_SSE2(...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSE2 0 |
| +# define CV_CPU_CALL_SSE2(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3 |
| +# define CV_CPU_HAS_SUPPORT_SSE3 1 |
| +# define CV_CPU_CALL_SSE3(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3 |
| +# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3)) |
| +# define CV_CPU_CALL_SSE3(...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSE3 0 |
| +# define CV_CPU_CALL_SSE3(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3 |
| +# define CV_CPU_HAS_SUPPORT_SSSE3 1 |
| +# define CV_CPU_CALL_SSSE3(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3 |
| +# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3)) |
| +# define CV_CPU_CALL_SSSE3(...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSSE3 0 |
| +# define CV_CPU_CALL_SSSE3(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1 |
| +# define CV_CPU_HAS_SUPPORT_SSE4_1 1 |
| +# define CV_CPU_CALL_SSE4_1(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1 |
| +# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1)) |
| +# define CV_CPU_CALL_SSE4_1(...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSE4_1 0 |
| +# define CV_CPU_CALL_SSE4_1(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2 |
| +# define CV_CPU_HAS_SUPPORT_SSE4_2 1 |
| +# define CV_CPU_CALL_SSE4_2(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2 |
| +# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2)) |
| +# define CV_CPU_CALL_SSE4_2(...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_SSE4_2 0 |
| +# define CV_CPU_CALL_SSE4_2(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT |
| +# define CV_CPU_HAS_SUPPORT_POPCNT 1 |
| +# define CV_CPU_CALL_POPCNT(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT |
| +# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT)) |
| +# define CV_CPU_CALL_POPCNT(...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_POPCNT 0 |
| +# define CV_CPU_CALL_POPCNT(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX |
| +# define CV_CPU_HAS_SUPPORT_AVX 1 |
| +# define CV_CPU_CALL_AVX(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX |
| +# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX)) |
| +# define CV_CPU_CALL_AVX(...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_AVX 0 |
| +# define CV_CPU_CALL_AVX(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16 |
| +# define CV_CPU_HAS_SUPPORT_FP16 1 |
| +# define CV_CPU_CALL_FP16(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16 |
| +# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16)) |
| +# define CV_CPU_CALL_FP16(...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_FP16 0 |
| +# define CV_CPU_CALL_FP16(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2 |
| +# define CV_CPU_HAS_SUPPORT_AVX2 1 |
| +# define CV_CPU_CALL_AVX2(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2 |
| +# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2)) |
| +# define CV_CPU_CALL_AVX2(...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_AVX2 0 |
| +# define CV_CPU_CALL_AVX2(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3 |
| +# define CV_CPU_HAS_SUPPORT_FMA3 1 |
| +# define CV_CPU_CALL_FMA3(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3 |
| +# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3)) |
| +# define CV_CPU_CALL_FMA3(...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_FMA3 0 |
| +# define CV_CPU_CALL_FMA3(...) |
| +#endif |
| + |
| +#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON |
| +# define CV_CPU_HAS_SUPPORT_NEON 1 |
| +# define CV_CPU_CALL_NEON(...) return __VA_ARGS__ |
| +#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON |
| +# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON)) |
| +# define CV_CPU_CALL_NEON(...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__ |
| +#else |
| +# define CV_CPU_HAS_SUPPORT_NEON 0 |
| +# define CV_CPU_CALL_NEON(...) |
| +#endif |
| diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h |
| index 699b166..0a46e02 100644 |
| --- a/modules/core/include/opencv2/core/cvdef.h |
| +++ b/modules/core/include/opencv2/core/cvdef.h |
| @@ -48,6 +48,10 @@ |
| //! @addtogroup core_utils |
| //! @{ |
| |
| +#ifdef __OPENCV_BUILD |
| +#include "cvconfig.h" |
| +#endif |
| + |
| #if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 |
| # define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ |
| #endif |
| @@ -59,10 +63,6 @@ |
| #undef abs |
| #undef Complex |
| |
| -#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300 |
| -# define _CRT_SECURE_NO_DEPRECATE /* to avoid multiple Visual Studio warnings */ |
| -#endif |
| - |
| #include <limits.h> |
| #include "opencv2/core/hal/interface.h" |
| |
| @@ -88,7 +88,7 @@ |
| # endif |
| #endif |
| |
| -#if defined CV_ICC && !defined CV_ENABLE_UNROLLED |
| +#if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED) |
| # define CV_ENABLE_UNROLLED 0 |
| #else |
| # define CV_ENABLE_UNROLLED 1 |
| @@ -161,142 +161,9 @@ enum CpuFeatures { |
| CPU_NEON = 100 |
| }; |
| |
| -// do not include SSE/AVX/NEON headers for NVCC compiler |
| -#ifndef __CUDACC__ |
| - |
| -#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2) |
| -# include <emmintrin.h> |
| -# define CV_MMX 1 |
| -# define CV_SSE 1 |
| -# define CV_SSE2 1 |
| -# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) |
| -# include <pmmintrin.h> |
| -# define CV_SSE3 1 |
| -# endif |
| -# if defined __SSSE3__ || (defined _MSC_VER && _MSC_VER >= 1500) |
| -# include <tmmintrin.h> |
| -# define CV_SSSE3 1 |
| -# endif |
| -# if defined __SSE4_1__ || (defined _MSC_VER && _MSC_VER >= 1500) |
| -# include <smmintrin.h> |
| -# define CV_SSE4_1 1 |
| -# endif |
| -# if defined __SSE4_2__ || (defined _MSC_VER && _MSC_VER >= 1500) |
| -# include <nmmintrin.h> |
| -# define CV_SSE4_2 1 |
| -# endif |
| -# if defined __POPCNT__ || (defined _MSC_VER && _MSC_VER >= 1500) |
| -# ifdef _MSC_VER |
| -# include <nmmintrin.h> |
| -# else |
| -# include <popcntintrin.h> |
| -# endif |
| -# define CV_POPCNT 1 |
| -# endif |
| -# if defined __AVX__ || (defined _MSC_VER && _MSC_VER >= 1600 && 0) |
| -// MS Visual Studio 2010 (2012?) has no macro pre-defined to identify the use of /arch:AVX |
| -// See: http://connect.microsoft.com/VisualStudio/feedback/details/605858/arch-avx-should-define-a-predefined-macro-in-x64-and-set-a-unique-value-for-m-ix86-fp-in-win32 |
| -# include <immintrin.h> |
| -# define CV_AVX 1 |
| -# if defined(_XCR_XFEATURE_ENABLED_MASK) |
| -# define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK) |
| -# else |
| -# define __xgetbv() 0 |
| -# endif |
| -# endif |
| -# if defined __AVX2__ || (defined _MSC_VER && _MSC_VER >= 1800 && 0) |
| -# include <immintrin.h> |
| -# define CV_AVX2 1 |
| -# if defined __FMA__ |
| -# define CV_FMA3 1 |
| -# endif |
| -# endif |
| -#endif |
| - |
| -#if (defined WIN32 || defined _WIN32) && defined(_M_ARM) |
| -# include <Intrin.h> |
| -# include <arm_neon.h> |
| -# define CV_NEON 1 |
| -# define CPU_HAS_NEON_FEATURE (true) |
| -#elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__)) |
| -# include <arm_neon.h> |
| -# define CV_NEON 1 |
| -#endif |
| - |
| -#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ |
| -# define CV_VFP 1 |
| -#endif |
| - |
| -#endif // __CUDACC__ |
| - |
| -#ifndef CV_POPCNT |
| -#define CV_POPCNT 0 |
| -#endif |
| -#ifndef CV_MMX |
| -# define CV_MMX 0 |
| -#endif |
| -#ifndef CV_SSE |
| -# define CV_SSE 0 |
| -#endif |
| -#ifndef CV_SSE2 |
| -# define CV_SSE2 0 |
| -#endif |
| -#ifndef CV_SSE3 |
| -# define CV_SSE3 0 |
| -#endif |
| -#ifndef CV_SSSE3 |
| -# define CV_SSSE3 0 |
| -#endif |
| -#ifndef CV_SSE4_1 |
| -# define CV_SSE4_1 0 |
| -#endif |
| -#ifndef CV_SSE4_2 |
| -# define CV_SSE4_2 0 |
| -#endif |
| -#ifndef CV_AVX |
| -# define CV_AVX 0 |
| -#endif |
| -#ifndef CV_AVX2 |
| -# define CV_AVX2 0 |
| -#endif |
| -#ifndef CV_FMA3 |
| -# define CV_FMA3 0 |
| -#endif |
| -#ifndef CV_AVX_512F |
| -# define CV_AVX_512F 0 |
| -#endif |
| -#ifndef CV_AVX_512BW |
| -# define CV_AVX_512BW 0 |
| -#endif |
| -#ifndef CV_AVX_512CD |
| -# define CV_AVX_512CD 0 |
| -#endif |
| -#ifndef CV_AVX_512DQ |
| -# define CV_AVX_512DQ 0 |
| -#endif |
| -#ifndef CV_AVX_512ER |
| -# define CV_AVX_512ER 0 |
| -#endif |
| -#ifndef CV_AVX_512IFMA512 |
| -# define CV_AVX_512IFMA512 0 |
| -#endif |
| -#ifndef CV_AVX_512PF |
| -# define CV_AVX_512PF 0 |
| -#endif |
| -#ifndef CV_AVX_512VBMI |
| -# define CV_AVX_512VBMI 0 |
| -#endif |
| -#ifndef CV_AVX_512VL |
| -# define CV_AVX_512VL 0 |
| -#endif |
| |
| -#ifndef CV_NEON |
| -# define CV_NEON 0 |
| -#endif |
| +#include "cv_cpu_dispatch.h" |
| |
| -#ifndef CV_VFP |
| -# define CV_VFP 0 |
| -#endif |
| |
| /* fundamental constants */ |
| #define CV_PI 3.1415926535897932384626433832795 |
| diff --git a/modules/core/include/opencv2/core/fast_math.hpp b/modules/core/include/opencv2/core/fast_math.hpp |
| index c76936a..31c1062 100644 |
| --- a/modules/core/include/opencv2/core/fast_math.hpp |
| +++ b/modules/core/include/opencv2/core/fast_math.hpp |
| @@ -47,6 +47,12 @@ |
| |
| #include "opencv2/core/cvdef.h" |
| |
| +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ |
| + && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) |
| +#include <emmintrin.h> |
| +#endif |
| + |
| + |
| //! @addtogroup core_utils |
| //! @{ |
| |
| @@ -66,7 +72,7 @@ |
| # include "tegra_round.hpp" |
| #endif |
| |
| -#if CV_VFP |
| +#if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ |
| // 1. general scheme |
| #define ARM_ROUND(_value, _asm_string) \ |
| int res; \ |
| @@ -82,7 +88,7 @@ |
| #endif |
| // 3. version for float |
| #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]") |
| -#endif // CV_VFP |
| +#endif |
| |
| /** @brief Rounds floating-point number to the nearest integer |
| |
| @@ -93,7 +99,7 @@ CV_INLINE int |
| cvRound( double value ) |
| { |
| #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ |
| - && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) |
| + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) |
| __m128d t = _mm_set_sd( value ); |
| return _mm_cvtsd_si32(t); |
| #elif defined _MSC_VER && defined _M_IX86 |
| @@ -108,7 +114,7 @@ cvRound( double value ) |
| defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION |
| TEGRA_ROUND_DBL(value); |
| #elif defined CV_ICC || defined __GNUC__ |
| -# if CV_VFP |
| +# if defined ARM_ROUND_DBL |
| ARM_ROUND_DBL(value); |
| # else |
| return (int)lrint(value); |
| @@ -130,18 +136,8 @@ cvRound( double value ) |
| */ |
| CV_INLINE int cvFloor( double value ) |
| { |
| -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) |
| - __m128d t = _mm_set_sd( value ); |
| - int i = _mm_cvtsd_si32(t); |
| - return i - _mm_movemask_pd(_mm_cmplt_sd(t, _mm_cvtsi32_sd(t,i))); |
| -#elif defined __GNUC__ |
| int i = (int)value; |
| return i - (i > value); |
| -#else |
| - int i = cvRound(value); |
| - float diff = (float)(value - i); |
| - return i - (diff < 0); |
| -#endif |
| } |
| |
| /** @brief Rounds floating-point number to the nearest integer not smaller than the original. |
| @@ -153,18 +149,8 @@ CV_INLINE int cvFloor( double value ) |
| */ |
| CV_INLINE int cvCeil( double value ) |
| { |
| -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) |
| - __m128d t = _mm_set_sd( value ); |
| - int i = _mm_cvtsd_si32(t); |
| - return i + _mm_movemask_pd(_mm_cmplt_sd(_mm_cvtsi32_sd(t,i), t)); |
| -#elif defined __GNUC__ |
| int i = (int)value; |
| return i + (i < value); |
| -#else |
| - int i = cvRound(value); |
| - float diff = (float)(i - value); |
| - return i + (diff < 0); |
| -#endif |
| } |
| |
| /** @brief Determines if the argument is Not A Number. |
| @@ -200,8 +186,8 @@ CV_INLINE int cvIsInf( double value ) |
| /** @overload */ |
| CV_INLINE int cvRound(float value) |
| { |
| -#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ && \ |
| - defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) |
| +#if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \ |
| + && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__) |
| __m128 t = _mm_set_ss( value ); |
| return _mm_cvtss_si32(t); |
| #elif defined _MSC_VER && defined _M_IX86 |
| @@ -216,7 +202,7 @@ CV_INLINE int cvRound(float value) |
| defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION |
| TEGRA_ROUND_FLT(value); |
| #elif defined CV_ICC || defined __GNUC__ |
| -# if CV_VFP |
| +# if defined ARM_ROUND_FLT |
| ARM_ROUND_FLT(value); |
| # else |
| return (int)lrintf(value); |
| @@ -237,18 +223,8 @@ CV_INLINE int cvRound( int value ) |
| /** @overload */ |
| CV_INLINE int cvFloor( float value ) |
| { |
| -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__) |
| - __m128 t = _mm_set_ss( value ); |
| - int i = _mm_cvtss_si32(t); |
| - return i - _mm_movemask_ps(_mm_cmplt_ss(t, _mm_cvtsi32_ss(t,i))); |
| -#elif defined __GNUC__ |
| int i = (int)value; |
| return i - (i > value); |
| -#else |
| - int i = cvRound(value); |
| - float diff = (float)(value - i); |
| - return i - (diff < 0); |
| -#endif |
| } |
| |
| /** @overload */ |
| @@ -260,18 +236,8 @@ CV_INLINE int cvFloor( int value ) |
| /** @overload */ |
| CV_INLINE int cvCeil( float value ) |
| { |
| -#if (defined _MSC_VER && defined _M_X64 || (defined __GNUC__ && defined __SSE2__&& !defined __APPLE__)) && !defined(__CUDACC__) |
| - __m128 t = _mm_set_ss( value ); |
| - int i = _mm_cvtss_si32(t); |
| - return i + _mm_movemask_ps(_mm_cmplt_ss(_mm_cvtsi32_ss(t,i), t)); |
| -#elif defined __GNUC__ |
| int i = (int)value; |
| return i + (i < value); |
| -#else |
| - int i = cvRound(value); |
| - float diff = (float)(i - value); |
| - return i + (diff < 0); |
| -#endif |
| } |
| |
| /** @overload */ |
| diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp |
| index 3c8f39d..a983838 100644 |
| --- a/modules/core/src/system.cpp |
| +++ b/modules/core/src/system.cpp |
| @@ -237,24 +237,81 @@ void Exception::formatMessage() |
| msg = format("%s:%d: error: (%d) %s\n", file.c_str(), line, code, err.c_str()); |
| } |
| |
| +static const char* g_hwFeatureNames[CV_HARDWARE_MAX_FEATURE] = { NULL }; |
| + |
| +static const char* getHWFeatureName(int id) |
| +{ |
| + return (id < CV_HARDWARE_MAX_FEATURE) ? g_hwFeatureNames[id] : NULL; |
| +} |
| +static const char* getHWFeatureNameSafe(int id) |
| +{ |
| + const char* name = getHWFeatureName(id); |
| + return name ? name : "Unknown feature"; |
| +} |
| + |
| struct HWFeatures |
| { |
| enum { MAX_FEATURE = CV_HARDWARE_MAX_FEATURE }; |
| |
| - HWFeatures(void) |
| + HWFeatures(bool run_initialize = false) |
| { |
| - memset( have, 0, sizeof(have) ); |
| - x86_family = 0; |
| + memset( have, 0, sizeof(have[0]) * MAX_FEATURE ); |
| + if (run_initialize) |
| + initialize(); |
| } |
| |
| - static HWFeatures initialize(void) |
| + static void initializeNames() |
| { |
| - HWFeatures f; |
| + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) |
| + { |
| + g_hwFeatureNames[i] = 0; |
| + } |
| + g_hwFeatureNames[CPU_MMX] = "MMX"; |
| + g_hwFeatureNames[CPU_SSE] = "SSE"; |
| + g_hwFeatureNames[CPU_SSE2] = "SSE2"; |
| + g_hwFeatureNames[CPU_SSE3] = "SSE3"; |
| + g_hwFeatureNames[CPU_SSSE3] = "SSSE3"; |
| + g_hwFeatureNames[CPU_SSE4_1] = "SSE4.1"; |
| + g_hwFeatureNames[CPU_SSE4_2] = "SSE4.2"; |
| + g_hwFeatureNames[CPU_POPCNT] = "POPCNT"; |
| + g_hwFeatureNames[CPU_FP16] = "FP16"; |
| + g_hwFeatureNames[CPU_AVX] = "AVX"; |
| + g_hwFeatureNames[CPU_AVX2] = "AVX2"; |
| + g_hwFeatureNames[CPU_FMA3] = "FMA3"; |
| + |
| + g_hwFeatureNames[CPU_AVX_512F] = "AVX512F"; |
| + g_hwFeatureNames[CPU_AVX_512BW] = "AVX512BW"; |
| + g_hwFeatureNames[CPU_AVX_512CD] = "AVX512CD"; |
| + g_hwFeatureNames[CPU_AVX_512DQ] = "AVX512DQ"; |
| + g_hwFeatureNames[CPU_AVX_512ER] = "AVX512ER"; |
| + g_hwFeatureNames[CPU_AVX_512IFMA512] = "AVX512IFMA"; |
| + g_hwFeatureNames[CPU_AVX_512PF] = "AVX512PF"; |
| + g_hwFeatureNames[CPU_AVX_512VBMI] = "AVX512VBMI"; |
| + g_hwFeatureNames[CPU_AVX_512VL] = "AVX512VL"; |
| + |
| + g_hwFeatureNames[CPU_NEON] = "NEON"; |
| + } |
| + |
| + void initialize(void) |
| + { |
| +#ifndef WINRT |
| + if (getenv("OPENCV_DUMP_CONFIG")) |
| + { |
| + fprintf(stderr, "\nOpenCV build configuration is:\n%s\n", |
| + cv::getBuildInformation().c_str()); |
| + } |
| +#endif |
| + |
| + initializeNames(); |
| + |
| int cpuid_data[4] = { 0, 0, 0, 0 }; |
| + int cpuid_data_ex[4] = { 0, 0, 0, 0 }; |
| |
| #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) |
| + #define OPENCV_HAVE_X86_CPUID 1 |
| __cpuid(cpuid_data, 1); |
| #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) |
| + #define OPENCV_HAVE_X86_CPUID 1 |
| #ifdef __x86_64__ |
| asm __volatile__ |
| ( |
| @@ -278,33 +335,36 @@ struct HWFeatures |
| #endif |
| #endif |
| |
| - f.x86_family = (cpuid_data[0] >> 8) & 15; |
| - if( f.x86_family >= 6 ) |
| + #ifdef OPENCV_HAVE_X86_CPUID |
| + int x86_family = (cpuid_data[0] >> 8) & 15; |
| + if( x86_family >= 6 ) |
| { |
| - f.have[CV_CPU_MMX] = (cpuid_data[3] & (1 << 23)) != 0; |
| - f.have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; |
| - f.have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; |
| - f.have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; |
| - f.have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; |
| - f.have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; |
| - f.have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; |
| - f.have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; |
| - f.have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; |
| - f.have[CV_CPU_AVX] = (((cpuid_data[2] & (1<<28)) != 0)&&((cpuid_data[2] & (1<<27)) != 0));//OS uses XSAVE_XRSTORE and CPU support AVX |
| - f.have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; |
| + have[CV_CPU_MMX] = (cpuid_data[3] & (1<<23)) != 0; |
| + have[CV_CPU_SSE] = (cpuid_data[3] & (1<<25)) != 0; |
| + have[CV_CPU_SSE2] = (cpuid_data[3] & (1<<26)) != 0; |
| + have[CV_CPU_SSE3] = (cpuid_data[2] & (1<<0)) != 0; |
| + have[CV_CPU_SSSE3] = (cpuid_data[2] & (1<<9)) != 0; |
| + have[CV_CPU_FMA3] = (cpuid_data[2] & (1<<12)) != 0; |
| + have[CV_CPU_SSE4_1] = (cpuid_data[2] & (1<<19)) != 0; |
| + have[CV_CPU_SSE4_2] = (cpuid_data[2] & (1<<20)) != 0; |
| + have[CV_CPU_POPCNT] = (cpuid_data[2] & (1<<23)) != 0; |
| + have[CV_CPU_AVX] = (cpuid_data[2] & (1<<28)) != 0; |
| + have[CV_CPU_FP16] = (cpuid_data[2] & (1<<29)) != 0; |
| |
| // make the second call to the cpuid command in order to get |
| // information about extended features like AVX2 |
| #if defined _MSC_VER && (defined _M_IX86 || defined _M_X64) |
| - __cpuidex(cpuid_data, 7, 0); |
| + #define OPENCV_HAVE_X86_CPUID_EX 1 |
| + __cpuidex(cpuid_data_ex, 7, 0); |
| #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) |
| + #define OPENCV_HAVE_X86_CPUID_EX 1 |
| #ifdef __x86_64__ |
| asm __volatile__ |
| ( |
| "movl $7, %%eax\n\t" |
| "movl $0, %%ecx\n\t" |
| "cpuid\n\t" |
| - :[eax]"=a"(cpuid_data[0]),[ebx]"=b"(cpuid_data[1]),[ecx]"=c"(cpuid_data[2]),[edx]"=d"(cpuid_data[3]) |
| + :[eax]"=a"(cpuid_data_ex[0]),[ebx]"=b"(cpuid_data_ex[1]),[ecx]"=c"(cpuid_data_ex[2]),[edx]"=d"(cpuid_data_ex[3]) |
| : |
| : "cc" |
| ); |
| @@ -317,29 +377,76 @@ struct HWFeatures |
| "cpuid\n\t" |
| "movl %%ebx, %0\n\t" |
| "popl %%ebx\n\t" |
| - : "=r"(cpuid_data[1]), "=c"(cpuid_data[2]) |
| + : "=r"(cpuid_data_ex[1]), "=c"(cpuid_data_ex[2]) |
| : |
| : "cc" |
| ); |
| #endif |
| #endif |
| - f.have[CV_CPU_AVX2] = (cpuid_data[1] & (1<<5)) != 0; |
| - |
| - f.have[CV_CPU_AVX_512F] = (cpuid_data[1] & (1<<16)) != 0; |
| - f.have[CV_CPU_AVX_512DQ] = (cpuid_data[1] & (1<<17)) != 0; |
| - f.have[CV_CPU_AVX_512IFMA512] = (cpuid_data[1] & (1<<21)) != 0; |
| - f.have[CV_CPU_AVX_512PF] = (cpuid_data[1] & (1<<26)) != 0; |
| - f.have[CV_CPU_AVX_512ER] = (cpuid_data[1] & (1<<27)) != 0; |
| - f.have[CV_CPU_AVX_512CD] = (cpuid_data[1] & (1<<28)) != 0; |
| - f.have[CV_CPU_AVX_512BW] = (cpuid_data[1] & (1<<30)) != 0; |
| - f.have[CV_CPU_AVX_512VL] = (cpuid_data[1] & (1<<31)) != 0; |
| - f.have[CV_CPU_AVX_512VBMI] = (cpuid_data[2] & (1<<1)) != 0; |
| + |
| + #ifdef OPENCV_HAVE_X86_CPUID_EX |
| + have[CV_CPU_AVX2] = (cpuid_data_ex[1] & (1<<5)) != 0; |
| + |
| + have[CV_CPU_AVX_512F] = (cpuid_data_ex[1] & (1<<16)) != 0; |
| + have[CV_CPU_AVX_512DQ] = (cpuid_data_ex[1] & (1<<17)) != 0; |
| + have[CV_CPU_AVX_512IFMA512] = (cpuid_data_ex[1] & (1<<21)) != 0; |
| + have[CV_CPU_AVX_512PF] = (cpuid_data_ex[1] & (1<<26)) != 0; |
| + have[CV_CPU_AVX_512ER] = (cpuid_data_ex[1] & (1<<27)) != 0; |
| + have[CV_CPU_AVX_512CD] = (cpuid_data_ex[1] & (1<<28)) != 0; |
| + have[CV_CPU_AVX_512BW] = (cpuid_data_ex[1] & (1<<30)) != 0; |
| + have[CV_CPU_AVX_512VL] = (cpuid_data_ex[1] & (1<<31)) != 0; |
| + have[CV_CPU_AVX_512VBMI] = (cpuid_data_ex[2] & (1<<1)) != 0; |
| + #else |
| + CV_UNUSED(cpuid_data_ex); |
| + #endif |
| + |
| + bool have_AVX_OS_support = true; |
| + bool have_AVX512_OS_support = true; |
| + if (!(cpuid_data[2] & (1<<27))) |
| + have_AVX_OS_support = false; // OS uses XSAVE_XRSTORE and CPU support AVX |
| + else |
| + { |
| + int xcr0 = 0; |
| + #ifdef _XCR_XFEATURE_ENABLED_MASK // requires immintrin.h |
| + xcr0 = (int)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); |
| + #elif defined __GNUC__ && (defined __i386__ || defined __x86_64__) |
| + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx" ); |
| + #endif |
| + if ((xcr0 & 0x6) != 0x6) |
| + have_AVX_OS_support = false; // YMM registers |
| + if ((xcr0 & 0xe6) != 0xe6) |
| + have_AVX512_OS_support = false; // ZMM registers |
| + } |
| + |
| + if (!have_AVX_OS_support) |
| + { |
| + have[CV_CPU_AVX] = false; |
| + have[CV_CPU_FP16] = false; |
| + have[CV_CPU_AVX2] = false; |
| + have[CV_CPU_FMA3] = false; |
| + } |
| + if (!have_AVX_OS_support || !have_AVX512_OS_support) |
| + { |
| + have[CV_CPU_AVX_512F] = false; |
| + have[CV_CPU_AVX_512BW] = false; |
| + have[CV_CPU_AVX_512CD] = false; |
| + have[CV_CPU_AVX_512DQ] = false; |
| + have[CV_CPU_AVX_512ER] = false; |
| + have[CV_CPU_AVX_512IFMA512] = false; |
| + have[CV_CPU_AVX_512PF] = false; |
| + have[CV_CPU_AVX_512VBMI] = false; |
| + have[CV_CPU_AVX_512VL] = false; |
| + } |
| } |
| + #else |
| + CV_UNUSED(cpuid_data); |
| + CV_UNUSED(cpuid_data_ex); |
| + #endif // OPENCV_HAVE_X86_CPUID |
| |
| #if defined ANDROID || defined __linux__ |
| #ifdef __aarch64__ |
| - f.have[CV_CPU_NEON] = true; |
| - f.have[CV_CPU_FP16] = true; |
| + have[CV_CPU_NEON] = true; |
| + have[CV_CPU_FP16] = true; |
| #elif defined __arm__ |
| int cpufile = open("/proc/self/auxv", O_RDONLY); |
| |
| @@ -352,8 +459,8 @@ struct HWFeatures |
| { |
| if (auxv.a_type == AT_HWCAP) |
| { |
| - f.have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; |
| - f.have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; |
| + have[CV_CPU_NEON] = (auxv.a_un.a_val & 4096) != 0; |
| + have[CV_CPU_FP16] = (auxv.a_un.a_val & 2) != 0; |
| break; |
| } |
| } |
| @@ -363,21 +470,133 @@ struct HWFeatures |
| #endif |
| #elif (defined __clang__ || defined __APPLE__) |
| #if (defined __ARM_NEON__ || (defined __ARM_NEON && defined __aarch64__)) |
| - f.have[CV_CPU_NEON] = true; |
| + have[CV_CPU_NEON] = true; |
| #endif |
| #if (defined __ARM_FP && (((__ARM_FP & 0x2) != 0) && defined __ARM_NEON__)) |
| - f.have[CV_CPU_FP16] = true; |
| + have[CV_CPU_FP16] = true; |
| #endif |
| #endif |
| |
| - return f; |
| + int baseline_features[] = { CV_CPU_BASELINE_FEATURES }; |
| + if (!checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]))) |
| + { |
| + fprintf(stderr, "\n" |
| + "******************************************************************\n" |
| + "* FATAL ERROR: *\n" |
| + "* This OpenCV build doesn't support current CPU/HW configuration *\n" |
| + "* *\n" |
| + "* Use OPENCV_DUMP_CONFIG=1 environment variable for details *\n" |
| + "******************************************************************\n"); |
| + fprintf(stderr, "\nRequired baseline features:\n"); |
| + checkFeatures(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0]), true); |
| + CV_ErrorNoReturn(cv::Error::StsAssert, "Missing support for required CPU baseline features. Check OpenCV build configuration and required CPU/HW setup."); |
| + } |
| + |
| + readSettings(baseline_features, sizeof(baseline_features) / sizeof(baseline_features[0])); |
| + } |
| + |
| + bool checkFeatures(const int* features, int count, bool dump = false) |
| + { |
| + bool result = true; |
| + for (int i = 0; i < count; i++) |
| + { |
| + int feature = features[i]; |
| + if (feature) |
| + { |
| + if (have[feature]) |
| + { |
| + if (dump) fprintf(stderr, "%s - OK\n", getHWFeatureNameSafe(feature)); |
| + } |
| + else |
| + { |
| + result = false; |
| + if (dump) fprintf(stderr, "%s - NOT AVAILABLE\n", getHWFeatureNameSafe(feature)); |
| + } |
| + } |
| + } |
| + return result; |
| + } |
| + |
| + static inline bool isSymbolSeparator(char c) |
| + { |
| + return c == ',' || c == ';' || c == '-'; |
| + } |
| + |
| + void readSettings(const int* baseline_features, int baseline_count) |
| + { |
| + bool dump = true; |
| + const char* disabled_features = |
| +#ifndef WINRT |
| + getenv("OPENCV_CPU_DISABLE"); |
| +#else |
| + NULL; |
| +#endif |
| + if (disabled_features && disabled_features[0] != 0) |
| + { |
| + const char* start = disabled_features; |
| + for (;;) |
| + { |
| + while (start[0] != 0 && isSymbolSeparator(start[0])) |
| + { |
| + start++; |
| + } |
| + if (start[0] == 0) |
| + break; |
| + const char* end = start; |
| + while (end[0] != 0 && !isSymbolSeparator(end[0])) |
| + { |
| + end++; |
| + } |
| + if (end == start) |
| + continue; |
| + cv::String feature(start, end); |
| + start = end; |
| + |
| + CV_Assert(feature.size() > 0); |
| + |
| + bool found = false; |
| + for (int i = 0; i < CV_HARDWARE_MAX_FEATURE; i++) |
| + { |
| + if (!g_hwFeatureNames[i]) continue; |
| + size_t len = strlen(g_hwFeatureNames[i]); |
| + if (len != feature.size()) continue; |
| + if (feature.compare(g_hwFeatureNames[i]) == 0) |
| + { |
| + bool isBaseline = false; |
| + for (int k = 0; k < baseline_count; k++) |
| + { |
| + if (baseline_features[k] == i) |
| + { |
| + isBaseline = true; |
| + break; |
| + } |
| + } |
| + if (isBaseline) |
| + { |
| + if (dump) fprintf(stderr, "OPENCV: Trying to disable baseline CPU feature: '%s'. This has very limited effect, because code optimizations for this feature are executed unconditionally in the most cases.\n", getHWFeatureNameSafe(i)); |
| + } |
| + if (!have[i]) |
| + { |
| + if (dump) fprintf(stderr, "OPENCV: Trying to disable unavailable CPU feature on the current platform: '%s'.\n", getHWFeatureNameSafe(i)); |
| + } |
| + have[i] = false; |
| + |
| + found = true; |
| + break; |
| + } |
| + } |
| + if (!found) |
| + { |
| + if (dump) fprintf(stderr, "OPENCV: Trying to disable unknown CPU feature: '%s'.\n", feature.c_str()); |
| + } |
| + } |
| + } |
| } |
| |
| - int x86_family; |
| bool have[MAX_FEATURE+1]; |
| }; |
| |
| -static HWFeatures featuresEnabled = HWFeatures::initialize(), featuresDisabled = HWFeatures(); |
| +static HWFeatures featuresEnabled(true), featuresDisabled = HWFeatures(false); |
| static HWFeatures* currentFeatures = &featuresEnabled; |
| |
| bool checkHardwareSupport(int feature) |
| diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt |
| index eb56177..6d9c650 100644 |
| --- a/modules/highgui/CMakeLists.txt |
| +++ b/modules/highgui/CMakeLists.txt |
| @@ -65,7 +65,7 @@ elseif(HAVE_QT) |
| |
| list(APPEND HIGHGUI_LIBRARIES ${QT_LIBRARIES}) |
| list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_QT.cpp ${_MOC_OUTFILES} ${_RCC_OUTFILES}) |
| - ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag) |
| + ocv_check_flag_support(CXX -Wno-missing-declarations _have_flag "") |
| if(${_have_flag}) |
| set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) |
| endif() |
| diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp |
| index 0fa5202..dcf2e44 100644 |
| --- a/modules/imgproc/src/imgwarp.cpp |
| +++ b/modules/imgproc/src/imgwarp.cpp |
| @@ -1649,7 +1649,7 @@ struct VResizeLanczos4 |
| { |
| CastOp castOp; |
| VecOp vecOp; |
| - int k, x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); |
| + int x = vecOp((const uchar**)src, (uchar*)dst, (const uchar*)beta, width); |
| #if CV_ENABLE_UNROLLED |
| for( ; x <= width - 4; x += 4 ) |
| { |
| @@ -1657,7 +1657,7 @@ struct VResizeLanczos4 |
| const WT* S = src[0]; |
| WT s0 = S[x]*b, s1 = S[x+1]*b, s2 = S[x+2]*b, s3 = S[x+3]*b; |
| |
| - for( k = 1; k < 8; k++ ) |
| + for( int k = 1; k < 8; k++ ) |
| { |
| b = beta[k]; S = src[k]; |
| s0 += S[x]*b; s1 += S[x+1]*b; |
| diff --git a/modules/objdetect/src/haar.cpp b/modules/objdetect/src/haar.cpp |
| index 51843fa..bb37ee9 100644 |
| --- a/modules/objdetect/src/haar.cpp |
| +++ b/modules/objdetect/src/haar.cpp |
| @@ -824,10 +824,7 @@ cvRunHaarClassifierCascadeSum( const CvHaarClassifierCascade* _cascade, |
| CvPoint pt, double& stage_sum, int start_stage ) |
| { |
| #ifdef CV_HAAR_USE_AVX |
| - bool haveAVX = false; |
| - if(cv::checkHardwareSupport(CV_CPU_AVX)) |
| - if(__xgetbv()&0x6)// Check if the OS will save the YMM registers |
| - haveAVX = true; |
| + bool haveAVX = cv::checkHardwareSupport(CV_CPU_AVX); |
| #else |
| # ifdef CV_HAAR_USE_SSE |
| bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); |
| -- |
| 2.7.4 |
| |