mirror of
https://github.com/orange-cpp/omath.git
synced 2026-02-13 07:03:25 +00:00
Merge pull request #73 from orange-cpp/featore/performance_tests
added performance folder
This commit is contained in:
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,3 +1,6 @@
|
|||||||
[submodule "extlibs/googletest"]
|
[submodule "extlibs/googletest"]
|
||||||
path = extlibs/googletest
|
path = extlibs/googletest
|
||||||
url = https://github.com/google/googletest.git
|
url = https://github.com/google/googletest.git
|
||||||
|
[submodule "extlibs/benchmark"]
|
||||||
|
path = extlibs/benchmark
|
||||||
|
url = https://github.com/google/benchmark.git
|
||||||
|
|||||||
1
.idea/vcs.xml
generated
1
.idea/vcs.xml
generated
@@ -2,6 +2,7 @@
|
|||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="" vcs="Git" />
|
<mapping directory="" vcs="Git" />
|
||||||
|
<mapping directory="$PROJECT_DIR$/extlibs/benchmark" vcs="Git" />
|
||||||
<mapping directory="$PROJECT_DIR$/extlibs/googletest" vcs="Git" />
|
<mapping directory="$PROJECT_DIR$/extlibs/googletest" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
@@ -6,6 +6,7 @@ include(CMakePackageConfigHelpers)
|
|||||||
|
|
||||||
|
|
||||||
option(OMATH_BUILD_TESTS "Build unit tests" ${PROJECT_IS_TOP_LEVEL})
|
option(OMATH_BUILD_TESTS "Build unit tests" ${PROJECT_IS_TOP_LEVEL})
|
||||||
|
option(OMATH_BUILD_BENCHMARK "Build benchmarks" ${PROJECT_IS_TOP_LEVEL})
|
||||||
option(OMATH_THREAT_WARNING_AS_ERROR "Set highest level of warnings and force compiler to treat them as errors" ON)
|
option(OMATH_THREAT_WARNING_AS_ERROR "Set highest level of warnings and force compiler to treat them as errors" ON)
|
||||||
option(OMATH_BUILD_AS_SHARED_LIBRARY "Build Omath as .so or .dll" OFF)
|
option(OMATH_BUILD_AS_SHARED_LIBRARY "Build Omath as .so or .dll" OFF)
|
||||||
option(OMATH_USE_AVX2 "Omath will use AVX2 to boost performance" ON)
|
option(OMATH_USE_AVX2 "Omath will use AVX2 to boost performance" ON)
|
||||||
@@ -16,9 +17,10 @@ option(OMATH_SUPRESS_SAFETY_CHECKS "Supress some safety checks in release build
|
|||||||
option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF)
|
option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF)
|
||||||
option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" OFF)
|
option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" OFF)
|
||||||
|
|
||||||
message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}")
|
message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}, compiler ${CMAKE_CXX_COMPILER_ID}")
|
||||||
message(STATUS "[${PROJECT_NAME}]: Warnings as errors ${OMATH_THREAT_WARNING_AS_ERROR}")
|
message(STATUS "[${PROJECT_NAME}]: Warnings as errors ${OMATH_THREAT_WARNING_AS_ERROR}")
|
||||||
message(STATUS "[${PROJECT_NAME}]: Build unit tests ${OMATH_BUILD_TESTS}")
|
message(STATUS "[${PROJECT_NAME}]: Build unit tests ${OMATH_BUILD_TESTS}")
|
||||||
|
message(STATUS "[${PROJECT_NAME}]: Build benchmark ${OMATH_BUILD_BENCHMARK}")
|
||||||
message(STATUS "[${PROJECT_NAME}]: As dynamic library ${OMATH_BUILD_AS_SHARED_LIBRARY}")
|
message(STATUS "[${PROJECT_NAME}]: As dynamic library ${OMATH_BUILD_AS_SHARED_LIBRARY}")
|
||||||
message(STATUS "[${PROJECT_NAME}]: Static C++ runtime ${OMATH_STATIC_MSVC_RUNTIME_LIBRARY}")
|
message(STATUS "[${PROJECT_NAME}]: Static C++ runtime ${OMATH_STATIC_MSVC_RUNTIME_LIBRARY}")
|
||||||
message(STATUS "[${PROJECT_NAME}]: CMake unity build ${OMATH_USE_UNITY_BUILD}")
|
message(STATUS "[${PROJECT_NAME}]: CMake unity build ${OMATH_USE_UNITY_BUILD}")
|
||||||
@@ -90,19 +92,25 @@ if (OMATH_STATIC_MSVC_RUNTIME_LIBRARY)
|
|||||||
)
|
)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
if (OMATH_USE_AVX2 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||||
target_compile_options(${PROJECT_NAME} PRIVATE -mavx2 -mfma)
|
target_compile_options(${PROJECT_NAME} PUBLIC -mavx2 -mavx -mfma)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_23)
|
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_23)
|
||||||
|
|
||||||
|
if (OMATH_BUILD_TESTS OR OMATH_BUILD_BENCHMARK)
|
||||||
|
add_subdirectory(extlibs)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (OMATH_BUILD_TESTS)
|
if (OMATH_BUILD_TESTS)
|
||||||
add_subdirectory(extlibs)
|
|
||||||
add_subdirectory(tests)
|
add_subdirectory(tests)
|
||||||
target_compile_definitions(${PROJECT_NAME} PUBLIC OMATH_BUILD_TESTS)
|
target_compile_definitions(${PROJECT_NAME} PUBLIC OMATH_BUILD_TESTS)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (OMATH_BUILD_BENCHMARK)
|
||||||
|
add_subdirectory(benchmark)
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (OMATH_BUILD_EXAMPLES)
|
if (OMATH_BUILD_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif ()
|
endif ()
|
||||||
|
|||||||
15
benchmark/CMakeLists.txt
Normal file
15
benchmark/CMakeLists.txt
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
project(omath_benchmark)
|
||||||
|
|
||||||
|
|
||||||
|
file(GLOB_RECURSE OMATH_BENCHMARK_SOURCES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
|
||||||
|
add_executable(${PROJECT_NAME} ${OMATH_BENCHMARK_SOURCES})
|
||||||
|
|
||||||
|
set_target_properties(${PROJECT_NAME} PROPERTIES
|
||||||
|
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
|
||||||
|
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
|
||||||
|
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
|
||||||
|
CXX_STANDARD 23
|
||||||
|
CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
|
||||||
|
target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath)
|
||||||
66
benchmark/benchmark_mat.cpp
Normal file
66
benchmark/benchmark_mat.cpp
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
//
|
||||||
|
// Created by Vlad on 9/17/2025.
|
||||||
|
//
|
||||||
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
|
#include <omath/omath.hpp>
|
||||||
|
#include <chrono>
|
||||||
|
using namespace omath;
|
||||||
|
|
||||||
|
|
||||||
|
void mat_float_multiplication_col_major(benchmark::State& state)
|
||||||
|
{
|
||||||
|
using MatType = Mat<128, 128, float, MatStoreType::COLUMN_MAJOR>;
|
||||||
|
MatType a;
|
||||||
|
MatType b;
|
||||||
|
a.set(3.f);
|
||||||
|
b.set(7.f);
|
||||||
|
|
||||||
|
|
||||||
|
for (auto _ : state)
|
||||||
|
std::ignore = a * b;
|
||||||
|
}
|
||||||
|
void mat_float_multiplication_row_major(benchmark::State& state)
|
||||||
|
{
|
||||||
|
using MatType = Mat<128, 128, float, MatStoreType::ROW_MAJOR>;
|
||||||
|
MatType a;
|
||||||
|
MatType b;
|
||||||
|
a.set(3.f);
|
||||||
|
b.set(7.f);
|
||||||
|
|
||||||
|
|
||||||
|
for (auto _ : state)
|
||||||
|
std::ignore = a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mat_double_multiplication_row_major(benchmark::State& state)
|
||||||
|
{
|
||||||
|
using MatType = Mat<128, 128, double, MatStoreType::ROW_MAJOR>;
|
||||||
|
MatType a;
|
||||||
|
MatType b;
|
||||||
|
a.set(3.f);
|
||||||
|
b.set(7.f);
|
||||||
|
|
||||||
|
|
||||||
|
for (auto _ : state)
|
||||||
|
std::ignore = a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
void mat_double_multiplication_col_major(benchmark::State& state)
|
||||||
|
{
|
||||||
|
using MatType = Mat<128, 128, double, MatStoreType::COLUMN_MAJOR>;
|
||||||
|
MatType a;
|
||||||
|
MatType b;
|
||||||
|
a.set(3.f);
|
||||||
|
b.set(7.f);
|
||||||
|
|
||||||
|
|
||||||
|
for (auto _ : state)
|
||||||
|
std::ignore = a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK(mat_float_multiplication_col_major)->Iterations(5000);
|
||||||
|
BENCHMARK(mat_float_multiplication_row_major)->Iterations(5000);
|
||||||
|
|
||||||
|
BENCHMARK(mat_double_multiplication_col_major)->Iterations(5000);
|
||||||
|
BENCHMARK(mat_double_multiplication_row_major)->Iterations(5000);
|
||||||
5
benchmark/main.cpp
Normal file
5
benchmark/main.cpp
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
//
|
||||||
|
// Created by Vlad on 9/17/2025.
|
||||||
|
//
|
||||||
|
#include <benchmark/benchmark.h>
|
||||||
|
BENCHMARK_MAIN();
|
||||||
@@ -1 +1,2 @@
|
|||||||
add_subdirectory(googletest)
|
add_subdirectory(googletest)
|
||||||
|
add_subdirectory(benchmark)
|
||||||
1
extlibs/benchmark
Submodule
1
extlibs/benchmark
Submodule
Submodule extlibs/benchmark added at 2948b6a2e6
@@ -10,7 +10,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
namespace omath
|
namespace omath
|
||||||
{
|
{
|
||||||
@@ -155,10 +155,17 @@ namespace omath
|
|||||||
constexpr Mat<Rows, OtherColumns, Type, StoreType>
|
constexpr Mat<Rows, OtherColumns, Type, StoreType>
|
||||||
operator*(const Mat<Columns, OtherColumns, Type, StoreType>& other) const
|
operator*(const Mat<Columns, OtherColumns, Type, StoreType>& other) const
|
||||||
{
|
{
|
||||||
|
#ifdef OMATH_USE_AVX2
|
||||||
|
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
|
||||||
|
return avx_multiply_row_major(other);
|
||||||
|
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
|
||||||
|
return avx_multiply_col_major(other);
|
||||||
|
#else
|
||||||
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
|
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
|
||||||
return cache_friendly_multiply_row_major(other);
|
return cache_friendly_multiply_row_major(other);
|
||||||
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
|
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
|
||||||
return cache_friendly_multiply_col_major(other);
|
return cache_friendly_multiply_col_major(other);
|
||||||
|
#endif
|
||||||
std::unreachable();
|
std::unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -391,6 +398,160 @@ namespace omath
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
#ifdef OMATH_USE_AVX2
|
||||||
|
template<size_t OtherColumns> [[nodiscard]]
|
||||||
|
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>
|
||||||
|
avx_multiply_col_major(const Mat<Columns, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>& other) const
|
||||||
|
{
|
||||||
|
Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR> result;
|
||||||
|
|
||||||
|
const Type* this_mat_data = this->raw_array().data();
|
||||||
|
const Type* other_mat_data = other.raw_array().data();
|
||||||
|
Type* result_mat_data = result.raw_array().data();
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<Type, float>)
|
||||||
|
{
|
||||||
|
// ReSharper disable once CppTooWideScopeInitStatement
|
||||||
|
constexpr std::size_t vector_size = 8;
|
||||||
|
for (std::size_t j = 0; j < OtherColumns; ++j)
|
||||||
|
{
|
||||||
|
auto* c_col = reinterpret_cast<float*>(result_mat_data + j * Rows);
|
||||||
|
for (std::size_t k = 0; k < Columns; ++k)
|
||||||
|
{
|
||||||
|
const float bkj = reinterpret_cast<const float*>(other_mat_data)[k + j * Columns];
|
||||||
|
__m256 bkjv = _mm256_set1_ps(bkj);
|
||||||
|
|
||||||
|
const auto* a_col_k = reinterpret_cast<const float*>(this_mat_data + k * Rows);
|
||||||
|
|
||||||
|
std::size_t i = 0;
|
||||||
|
for (; i + vector_size <= Rows; i += vector_size)
|
||||||
|
{
|
||||||
|
__m256 cvec = _mm256_loadu_ps(c_col + i);
|
||||||
|
__m256 avec = _mm256_loadu_ps(a_col_k + i);
|
||||||
|
#if defined(__FMA__)
|
||||||
|
cvec = _mm256_fmadd_ps(avec, bkjv, cvec);
|
||||||
|
#else
|
||||||
|
cvec = _mm256_add_ps(cvec, _mm256_mul_ps(avec, bkjv));
|
||||||
|
#endif
|
||||||
|
_mm256_storeu_ps(c_col + i, cvec);
|
||||||
|
}
|
||||||
|
for (; i < Rows; ++i)
|
||||||
|
c_col[i] += a_col_k[i] * bkj;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (std::is_same_v<Type, double>)
|
||||||
|
{ // double
|
||||||
|
// ReSharper disable once CppTooWideScopeInitStatement
|
||||||
|
constexpr std::size_t vector_size = 4;
|
||||||
|
for (std::size_t j = 0; j < OtherColumns; ++j)
|
||||||
|
{
|
||||||
|
auto* c_col = reinterpret_cast<double*>(result_mat_data + j * Rows);
|
||||||
|
for (std::size_t k = 0; k < Columns; ++k)
|
||||||
|
{
|
||||||
|
const double bkj = reinterpret_cast<const double*>(other_mat_data)[k + j * Columns];
|
||||||
|
__m256d bkjv = _mm256_set1_pd(bkj);
|
||||||
|
|
||||||
|
const auto* a_col_k = reinterpret_cast<const double*>(this_mat_data + k * Rows);
|
||||||
|
|
||||||
|
std::size_t i = 0;
|
||||||
|
for (; i + vector_size <= Rows; i += vector_size)
|
||||||
|
{
|
||||||
|
__m256d cvec = _mm256_loadu_pd(c_col + i);
|
||||||
|
__m256d avec = _mm256_loadu_pd(a_col_k + i);
|
||||||
|
#if defined(__FMA__)
|
||||||
|
cvec = _mm256_fmadd_pd(avec, bkjv, cvec);
|
||||||
|
#else
|
||||||
|
cvec = _mm256_add_pd(cvec, _mm256_mul_pd(avec, bkjv));
|
||||||
|
#endif
|
||||||
|
_mm256_storeu_pd(c_col + i, cvec);
|
||||||
|
}
|
||||||
|
for (; i < Rows; ++i)
|
||||||
|
c_col[i] += a_col_k[i] * bkj;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
std::unreachable();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<size_t OtherColumns> [[nodiscard]]
|
||||||
|
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR>
|
||||||
|
avx_multiply_row_major(const Mat<Columns, OtherColumns, Type, MatStoreType::ROW_MAJOR>& other) const
|
||||||
|
{
|
||||||
|
Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR> result;
|
||||||
|
|
||||||
|
const Type* this_mat_data = this->raw_array().data();
|
||||||
|
const Type* other_mat_data = other.raw_array().data();
|
||||||
|
Type* result_mat_data = result.raw_array().data();
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<Type, float>)
|
||||||
|
{
|
||||||
|
// ReSharper disable once CppTooWideScopeInitStatement
|
||||||
|
constexpr std::size_t vector_size = 8;
|
||||||
|
for (std::size_t i = 0; i < Rows; ++i)
|
||||||
|
{
|
||||||
|
Type* c_row = result_mat_data + i * OtherColumns;
|
||||||
|
for (std::size_t k = 0; k < Columns; ++k)
|
||||||
|
{
|
||||||
|
const auto aik = static_cast<float>(this_mat_data[i * Columns + k]);
|
||||||
|
__m256 aikv = _mm256_set1_ps(aik);
|
||||||
|
const auto* b_row = reinterpret_cast<const float*>(other_mat_data + k * OtherColumns);
|
||||||
|
|
||||||
|
std::size_t j = 0;
|
||||||
|
for (; j + vector_size <= OtherColumns; j += vector_size)
|
||||||
|
{
|
||||||
|
__m256 cvec = _mm256_loadu_ps(c_row + j);
|
||||||
|
__m256 bvec = _mm256_loadu_ps(b_row + j);
|
||||||
|
#if defined(__FMA__)
|
||||||
|
cvec = _mm256_fmadd_ps(bvec, aikv, cvec);
|
||||||
|
#else
|
||||||
|
cvec = _mm256_add_ps(cvec, _mm256_mul_ps(bvec, aikv));
|
||||||
|
#endif
|
||||||
|
_mm256_storeu_ps(c_row + j, cvec);
|
||||||
|
}
|
||||||
|
for (; j < OtherColumns; ++j)
|
||||||
|
c_row[j] += aik * b_row[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (std::is_same_v<Type, double>)
|
||||||
|
{ // double
|
||||||
|
// ReSharper disable once CppTooWideScopeInitStatement
|
||||||
|
constexpr std::size_t vector_size = 4;
|
||||||
|
for (std::size_t i = 0; i < Rows; ++i)
|
||||||
|
{
|
||||||
|
Type* c_row = result_mat_data + i * OtherColumns;
|
||||||
|
for (std::size_t k = 0; k < Columns; ++k)
|
||||||
|
{
|
||||||
|
const auto aik = static_cast<double>(this_mat_data[i * Columns + k]);
|
||||||
|
__m256d aikv = _mm256_set1_pd(aik);
|
||||||
|
const auto* b_row = reinterpret_cast<const double*>(other_mat_data + k * OtherColumns);
|
||||||
|
|
||||||
|
std::size_t j = 0;
|
||||||
|
for (; j + vector_size <= OtherColumns; j += vector_size)
|
||||||
|
{
|
||||||
|
__m256d cvec = _mm256_loadu_pd(c_row + j);
|
||||||
|
__m256d bvec = _mm256_loadu_pd(b_row + j);
|
||||||
|
#if defined(__FMA__)
|
||||||
|
cvec = _mm256_fmadd_pd(bvec, aikv, cvec);
|
||||||
|
#else
|
||||||
|
cvec = _mm256_add_pd(cvec, _mm256_mul_pd(bvec, aikv));
|
||||||
|
#endif
|
||||||
|
_mm256_storeu_pd(c_row + j, cvec);
|
||||||
|
}
|
||||||
|
for (; j < OtherColumns; ++j)
|
||||||
|
c_row[j] += aik * b_row[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
std::unreachable();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class Type = float, MatStoreType St = MatStoreType::ROW_MAJOR> [[nodiscard]]
|
template<class Type = float, MatStoreType St = MatStoreType::ROW_MAJOR> [[nodiscard]]
|
||||||
|
|||||||
Reference in New Issue
Block a user