Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
target_compile_options(${LIBRARY_NAME} PRIVATE -O3 -funroll-loops)
endif()

# Only apply SIMD flags if we are on a capable architecture (x86/x86_64).
# Only apply SIMD flags if we are on a capable architecture (x86/x86_64/arm/arm64).
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i.86")
message(STATUS "x86/x86_64 architecture detected. Configuring SIMD instruction sets.")

Expand Down Expand Up @@ -102,6 +102,27 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64|i.86")
elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_avx512.cpp" PROPERTIES COMPILE_OPTIONS "-mavx512f;-mavx512bw;-mavx512dq;-mavx512vl;-mavx512cd;-mfma")
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*|ARM.*|aarch64")
message(STATUS "arm/arm64 architecture detected. Configuring SIMD instruction sets.")

target_sources(${LIBRARY_NAME} PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_sse_base.h"
)

# Workaround missing x86 intrinsics header for Vector Class Library
set(GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include")
set(X86INTRIN_H_STUB "x86intrin.h")
file(MAKE_DIRECTORY ${GENERATED_INCLUDE_DIR})
file(TOUCH "${GENERATED_INCLUDE_DIR}/${X86INTRIN_H_STUB}")

target_include_directories(${LIBRARY_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/VCL2" "${GENERATED_INCLUDE_DIR}")

if(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "MSVC")
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp" PROPERTIES COMPILE_OPTIONS "/DINSTRSET=6")
elseif(CMAKE_CXX_COMPILER_FRONTEND_VARIANT STREQUAL "GNU")
set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/src/flash3kyuu_deband_impl_sse4.cpp" PROPERTIES COMPILE_OPTIONS "-DINSTRSET=6;-Wno-narrowing")
endif()
else()
message(STATUS "Non-x86 architecture detected (${CMAKE_SYSTEM_PROCESSOR}). Skipping SIMD-specific source files.")
endif()
Expand Down
48 changes: 44 additions & 4 deletions src/impl_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,76 @@ const process_plane_impl_t* process_plane_impl_high_precision_no_dithering[] = {
process_plane_impl_c_high_no_dithering,
process_plane_impl_c_high_no_dithering,
process_plane_impl_c_high_no_dithering,
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
process_plane_impl_sse4_high_no_dithering,
process_plane_impl_avx2_high_no_dithering,
process_plane_impl_avx512_high_no_dithering
process_plane_impl_avx512_high_no_dithering,
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
process_plane_impl_sse4_high_no_dithering,
nullptr,
nullptr,
#else
nullptr,
nullptr,
nullptr,
#endif
};

const process_plane_impl_t* process_plane_impl_high_precision_ordered_dithering[] = {
process_plane_impl_c_high_ordered_dithering,
process_plane_impl_c_high_ordered_dithering,
process_plane_impl_c_high_ordered_dithering,
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
process_plane_impl_sse4_high_ordered_dithering,
process_plane_impl_avx2_high_ordered_dithering,
process_plane_impl_avx512_high_ordered_dithering
process_plane_impl_avx512_high_ordered_dithering,
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
process_plane_impl_sse4_high_ordered_dithering,
nullptr,
nullptr,
#else
nullptr,
nullptr,
nullptr,
#endif
};

const process_plane_impl_t* process_plane_impl_high_precision_floyd_steinberg_dithering[] = {
process_plane_impl_c_high_floyd_steinberg_dithering,
process_plane_impl_c_high_floyd_steinberg_dithering,
process_plane_impl_c_high_floyd_steinberg_dithering,
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
process_plane_impl_sse4_high_floyd_steinberg_dithering,
process_plane_impl_avx2_high_floyd_steinberg_dithering,
process_plane_impl_avx512_high_floyd_steinberg_dithering
process_plane_impl_avx512_high_floyd_steinberg_dithering,
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
process_plane_impl_sse4_high_floyd_steinberg_dithering,
nullptr,
nullptr,
#else
nullptr,
nullptr,
nullptr,
#endif
};

const process_plane_impl_t* process_plane_impl_16bit_interleaved[] = {
process_plane_impl_c_16bit_interleaved,
process_plane_impl_c_16bit_interleaved,
process_plane_impl_c_16bit_interleaved,
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
process_plane_impl_sse4_16bit_interleaved,
process_plane_impl_avx2_16bit_interleaved,
process_plane_impl_avx512_16bit_interleaved
process_plane_impl_avx512_16bit_interleaved,
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
process_plane_impl_sse4_16bit_interleaved,
nullptr,
nullptr,
#else
nullptr,
nullptr,
nullptr,
#endif
};


Expand Down