Compare commits

...

24 Commits

Author SHA1 Message Date
2ec0e2f93f Update CREDITS.md to include Billy O'Neal
Added acknowledgment for Billy O'Neal's contributions.
2025-09-18 06:08:49 +03:00
9170ffb1a9 Merge pull request #74 from BillyONeal/fmodf
Don't name std::fmodf.
2025-09-18 06:04:43 +03:00
e05f9ef5a9 Removes FMA check for matrix multiplication
Removes preprocessor check for FMA instructions in matrix multiplication functions.
This simplifies the code and relies on the compiler's ability to optimize the
code based on available hardware support. The assumption is that modern
compilers will automatically utilize FMA instructions if available, and fall
back to alternative implementations if not.
2025-09-18 06:02:37 +03:00
89bb4aa625 Guards AVX2 usage with a preprocessor definition
Ensures that AVX2 intrinsics are only included when the
OMATH_USE_AVX2 preprocessor definition is set. This prevents
compilation errors when AVX2 support is not available or
explicitly disabled.
2025-09-18 05:22:22 +03:00
Billy Robert O'Neal III
9b0845593d Don't name std::fmodf.
The C standard library function fmodf is not guaranteed to be in namespace std, and in fact is not with a default Ubuntu 24.04 installation, leading to the following compile error:

```console
Change Dir: '/vcpkg/buildtrees/vcpkg-ci-orange-math/x64-linux-dbg'

Run Build Command(s): /vcpkg/downloads/tools/ninja/1.12.1-linux/ninja -v -v -j33
[1/2] /usr/bin/c++ -DOMATH_SUPRESS_SAFETY_CHECKS -DOMATH_VERSION=\"3.5.0\" -isystem /vcpkg/installed/x64-linux/include -fPIC -g -std=gnu++23 -MD -MT CMakeFiles/main.dir/main.cpp.o -MF CMakeFiles/main.dir/main.cpp.o.d -o CMakeFiles/main.dir/main.cpp.o -c /vcpkg/scripts/test_ports/vcpkg-ci-orange-math/project/main.cpp
FAILED: CMakeFiles/main.dir/main.cpp.o
/usr/bin/c++ -DOMATH_SUPRESS_SAFETY_CHECKS -DOMATH_VERSION=\"3.5.0\" -isystem /vcpkg/installed/x64-linux/include -fPIC -g -std=gnu++23 -MD -MT CMakeFiles/main.dir/main.cpp.o -MF CMakeFiles/main.dir/main.cpp.o.d -o CMakeFiles/main.dir/main.cpp.o -c /vcpkg/scripts/test_ports/vcpkg-ci-orange-math/project/main.cpp
In file included from /vcpkg/installed/x64-linux/include/omath/omath.hpp:22,
                 from /vcpkg/scripts/test_ports/vcpkg-ci-orange-math/project/main.cpp:1:
/vcpkg/installed/x64-linux/include/omath/color.hpp: In member function ‘constexpr omath::Hsv omath::Color::to_hsv() const’:
/vcpkg/installed/x64-linux/include/omath/color.hpp:98:45: error: ‘fmodf’ is not a member of ‘std’; did you mean ‘modf’?
   98 |                 hsv_data.hue = 60.f * (std::fmodf(((green - blue) / delta), 6.f));
      |                                             ^~~~~
      |                                             modf
ninja: build stopped: subcommand failed.
```

Only the 'sufficient additional overloads' of `fmod` are guaranteed to be in `std`. Since this is clearly intended to call the (float, float) overload, explicitly cast `((green - blue) / delta)` (which is a `double`) to `float` and call the name in `std` as suggested by the diagnostic.
2025-09-17 19:15:10 -07:00
617ded2dd4 Merge pull request #73 from orange-cpp/featore/performance_tests
added performance folder
2025-09-17 20:53:11 +03:00
e882a224d2 fix 2025-09-17 20:50:30 +03:00
e04f6573c0 patch 2025-09-17 20:46:00 +03:00
791e3b2313 improved bench 2025-09-17 20:40:03 +03:00
26b56d757c fix 2025-09-17 20:25:22 +03:00
fbb77b9925 patch 2025-09-17 20:22:42 +03:00
7b671dbd90 added benchmark submodule 2025-09-17 20:14:33 +03:00
5875930f1a added benchmark 2025-09-17 19:56:50 +03:00
d773985822 added avx mutiplication 2025-09-17 19:47:29 +03:00
a2de6f8fae renamed folder 2025-09-17 18:07:28 +03:00
d71795006d added performance folder 2025-09-17 17:47:55 +03:00
561438d45c Merge pull request #72 from orange-cpp/feature/mat_refactor
Feature/mat refactor
2025-09-17 17:41:15 +03:00
874b028e86 removed unused var 2025-09-17 17:38:17 +03:00
68ec42d9ed added space 2025-09-17 17:33:05 +03:00
8aeb4667d7 decomposed mutiplication 2025-09-17 17:30:57 +03:00
565464f0cd forgot std 2025-09-17 17:23:02 +03:00
04b50d4545 Merge pull request #71 from orange-cpp/feature/mat_perf_boost
Improves matrix multiplication performance
2025-09-17 17:18:12 +03:00
e01d32fb22 Improves matrix multiplication performance
Optimizes matrix multiplication by specializing the algorithm
based on the matrix storage type (row-major or column-major).

This change significantly improves performance by leveraging
memory access patterns specific to each storage order.
2025-09-17 17:12:41 +03:00
a3a023a664 Add acknowledgment for AmbushedRaccoon's contribution 2025-09-16 16:58:00 +03:00
11 changed files with 294 additions and 18 deletions

5
.gitmodules vendored
View File

@@ -1,3 +1,6 @@
[submodule "extlibs/googletest"]
path = extlibs/googletest
url = https://github.com/google/googletest.git
url = https://github.com/google/googletest.git
[submodule "extlibs/benchmark"]
path = extlibs/benchmark
url = https://github.com/google/benchmark.git

1
.idea/vcs.xml generated
View File

@@ -2,6 +2,7 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$/extlibs/benchmark" vcs="Git" />
<mapping directory="$PROJECT_DIR$/extlibs/googletest" vcs="Git" />
</component>
</project>

View File

@@ -6,6 +6,7 @@ include(CMakePackageConfigHelpers)
option(OMATH_BUILD_TESTS "Build unit tests" ${PROJECT_IS_TOP_LEVEL})
option(OMATH_BUILD_BENCHMARK "Build benchmarks" ${PROJECT_IS_TOP_LEVEL})
option(OMATH_THREAT_WARNING_AS_ERROR "Set highest level of warnings and force compiler to treat them as errors" ON)
option(OMATH_BUILD_AS_SHARED_LIBRARY "Build Omath as .so or .dll" OFF)
option(OMATH_USE_AVX2 "Omath will use AVX2 to boost performance" ON)
@@ -16,9 +17,10 @@ option(OMATH_SUPRESS_SAFETY_CHECKS "Supress some safety checks in release build
option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF)
option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" OFF)
message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}")
message(STATUS "[${PROJECT_NAME}]: Building on ${CMAKE_HOST_SYSTEM_NAME}, compiler ${CMAKE_CXX_COMPILER_ID}")
message(STATUS "[${PROJECT_NAME}]: Warnings as errors ${OMATH_THREAT_WARNING_AS_ERROR}")
message(STATUS "[${PROJECT_NAME}]: Build unit tests ${OMATH_BUILD_TESTS}")
message(STATUS "[${PROJECT_NAME}]: Build benchmark ${OMATH_BUILD_BENCHMARK}")
message(STATUS "[${PROJECT_NAME}]: As dynamic library ${OMATH_BUILD_AS_SHARED_LIBRARY}")
message(STATUS "[${PROJECT_NAME}]: Static C++ runtime ${OMATH_STATIC_MSVC_RUNTIME_LIBRARY}")
message(STATUS "[${PROJECT_NAME}]: CMake unity build ${OMATH_USE_UNITY_BUILD}")
@@ -90,19 +92,25 @@ if (OMATH_STATIC_MSVC_RUNTIME_LIBRARY)
)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PRIVATE -mavx2 -mfma)
if (OMATH_USE_AVX2 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
target_compile_options(${PROJECT_NAME} PUBLIC -mavx2 -mavx -mfma)
endif ()
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_23)
if (OMATH_BUILD_TESTS OR OMATH_BUILD_BENCHMARK)
add_subdirectory(extlibs)
endif ()
if (OMATH_BUILD_TESTS)
add_subdirectory(extlibs)
add_subdirectory(tests)
target_compile_definitions(${PROJECT_NAME} PUBLIC OMATH_BUILD_TESTS)
endif ()
if (OMATH_BUILD_BENCHMARK)
add_subdirectory(benchmark)
endif ()
if (OMATH_BUILD_EXAMPLES)
add_subdirectory(examples)
endif ()

View File

@@ -3,6 +3,8 @@
Thanks to everyone who made this possible, including:
- Saikari aka luadebug for VCPKG port and awesome new initial logo design.
- AmbushedRaccoon for telegram post about omath to boost repository activity.
- Billy O'Neal aka BillyONeal for fixing compilation issues due to C math library compatibility.
And a big hand to everyone else who has contributed over the past!

15
benchmark/CMakeLists.txt Normal file
View File

@@ -0,0 +1,15 @@
project(omath_benchmark)
file(GLOB_RECURSE OMATH_BENCHMARK_SOURCES CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp")
add_executable(${PROJECT_NAME} ${OMATH_BENCHMARK_SOURCES})
set_target_properties(${PROJECT_NAME} PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out/${CMAKE_BUILD_TYPE}"
CXX_STANDARD 23
CXX_STANDARD_REQUIRED ON)
target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath)

View File

@@ -0,0 +1,66 @@
//
// Created by Vlad on 9/17/2025.
//
#include <benchmark/benchmark.h>
#include <omath/omath.hpp>
#include <chrono>
using namespace omath;
void mat_float_multiplication_col_major(benchmark::State& state)
{
using MatType = Mat<128, 128, float, MatStoreType::COLUMN_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);
for (auto _ : state)
std::ignore = a * b;
}
void mat_float_multiplication_row_major(benchmark::State& state)
{
using MatType = Mat<128, 128, float, MatStoreType::ROW_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);
for (auto _ : state)
std::ignore = a * b;
}
void mat_double_multiplication_row_major(benchmark::State& state)
{
using MatType = Mat<128, 128, double, MatStoreType::ROW_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);
for (auto _ : state)
std::ignore = a * b;
}
void mat_double_multiplication_col_major(benchmark::State& state)
{
using MatType = Mat<128, 128, double, MatStoreType::COLUMN_MAJOR>;
MatType a;
MatType b;
a.set(3.f);
b.set(7.f);
for (auto _ : state)
std::ignore = a * b;
}
BENCHMARK(mat_float_multiplication_col_major)->Iterations(5000);
BENCHMARK(mat_float_multiplication_row_major)->Iterations(5000);
BENCHMARK(mat_double_multiplication_col_major)->Iterations(5000);
BENCHMARK(mat_double_multiplication_row_major)->Iterations(5000);

5
benchmark/main.cpp Normal file
View File

@@ -0,0 +1,5 @@
//
// Created by Vlad on 9/17/2025.
//
#include <benchmark/benchmark.h>
BENCHMARK_MAIN();

View File

@@ -1 +1,2 @@
add_subdirectory(googletest)
add_subdirectory(googletest)
add_subdirectory(benchmark)

1
extlibs/benchmark Submodule

Submodule extlibs/benchmark added at 2948b6a2e6

View File

@@ -95,7 +95,7 @@ namespace omath
hsv_data.hue = 0.f;
else if (max == red)
hsv_data.hue = 60.f * (std::fmodf(((green - blue) / delta), 6.f));
hsv_data.hue = 60.f * (std::fmod(static_cast<float>((green - blue) / delta), 6.f));
else if (max == green)
hsv_data.hue = 60.f * (((blue - red) / delta) + 2.f);
else if (max == blue)

View File

@@ -11,6 +11,9 @@
#include <stdexcept>
#include <utility>
#ifdef OMATH_USE_AVX2
#include <immintrin.h>
#endif
namespace omath
{
@@ -155,17 +158,18 @@ namespace omath
constexpr Mat<Rows, OtherColumns, Type, StoreType>
operator*(const Mat<Columns, OtherColumns, Type, StoreType>& other) const
{
Mat<Rows, OtherColumns, Type, StoreType> result;
for (size_t i = 0; i < Rows; ++i)
for (size_t j = 0; j < OtherColumns; ++j)
{
Type sum = 0;
for (size_t k = 0; k < Columns; ++k)
sum += at(i, k) * other.at(k, j);
result.at(i, j) = sum;
}
return result;
#ifdef OMATH_USE_AVX2
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
return avx_multiply_row_major(other);
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
return avx_multiply_col_major(other);
#else
if constexpr (StoreType == MatStoreType::ROW_MAJOR)
return cache_friendly_multiply_row_major(other);
if constexpr (StoreType == MatStoreType::COLUMN_MAJOR)
return cache_friendly_multiply_col_major(other);
#endif
std::unreachable();
}
constexpr Mat& operator*=(const Type& f) noexcept
@@ -367,6 +371,176 @@ namespace omath
private:
std::array<Type, Rows * Columns> m_data;
template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR>
cache_friendly_multiply_row_major(const Mat<Columns, OtherColumns, Type, MatStoreType::ROW_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR> result;
for (std::size_t i = 0; i < Rows; ++i)
for (std::size_t k = 0; k < Columns; ++k)
{
const Type aik = at(i, k);
for (std::size_t j = 0; j < OtherColumns; ++j)
result.at(i, j) += aik * other.at(k, j);
}
return result;
}
template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR> cache_friendly_multiply_col_major(
const Mat<Columns, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR> result;
for (std::size_t j = 0; j < OtherColumns; ++j)
for (std::size_t k = 0; k < Columns; ++k)
{
const Type bkj = other.at(k, j);
for (std::size_t i = 0; i < Rows; ++i)
result.at(i, j) += at(i, k) * bkj;
}
return result;
}
#ifdef OMATH_USE_AVX2
template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>
avx_multiply_col_major(const Mat<Columns, OtherColumns, Type, MatStoreType::COLUMN_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::COLUMN_MAJOR> result;
const Type* this_mat_data = this->raw_array().data();
const Type* other_mat_data = other.raw_array().data();
Type* result_mat_data = result.raw_array().data();
if constexpr (std::is_same_v<Type, float>)
{
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 8;
for (std::size_t j = 0; j < OtherColumns; ++j)
{
auto* c_col = reinterpret_cast<float*>(result_mat_data + j * Rows);
for (std::size_t k = 0; k < Columns; ++k)
{
const float bkj = reinterpret_cast<const float*>(other_mat_data)[k + j * Columns];
__m256 bkjv = _mm256_set1_ps(bkj);
const auto* a_col_k = reinterpret_cast<const float*>(this_mat_data + k * Rows);
std::size_t i = 0;
for (; i + vector_size <= Rows; i += vector_size)
{
__m256 cvec = _mm256_loadu_ps(c_col + i);
__m256 avec = _mm256_loadu_ps(a_col_k + i);
cvec = _mm256_fmadd_ps(avec, bkjv, cvec);
_mm256_storeu_ps(c_col + i, cvec);
}
for (; i < Rows; ++i)
c_col[i] += a_col_k[i] * bkj;
}
}
}
else if (std::is_same_v<Type, double>)
{ // double
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 4;
for (std::size_t j = 0; j < OtherColumns; ++j)
{
auto* c_col = reinterpret_cast<double*>(result_mat_data + j * Rows);
for (std::size_t k = 0; k < Columns; ++k)
{
const double bkj = reinterpret_cast<const double*>(other_mat_data)[k + j * Columns];
__m256d bkjv = _mm256_set1_pd(bkj);
const auto* a_col_k = reinterpret_cast<const double*>(this_mat_data + k * Rows);
std::size_t i = 0;
for (; i + vector_size <= Rows; i += vector_size)
{
__m256d cvec = _mm256_loadu_pd(c_col + i);
__m256d avec = _mm256_loadu_pd(a_col_k + i);
cvec = _mm256_fmadd_pd(avec, bkjv, cvec);
_mm256_storeu_pd(c_col + i, cvec);
}
for (; i < Rows; ++i)
c_col[i] += a_col_k[i] * bkj;
}
}
}
else
std::unreachable();
return result;
}
template<size_t OtherColumns> [[nodiscard]]
constexpr Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR>
avx_multiply_row_major(const Mat<Columns, OtherColumns, Type, MatStoreType::ROW_MAJOR>& other) const
{
Mat<Rows, OtherColumns, Type, MatStoreType::ROW_MAJOR> result;
const Type* this_mat_data = this->raw_array().data();
const Type* other_mat_data = other.raw_array().data();
Type* result_mat_data = result.raw_array().data();
if constexpr (std::is_same_v<Type, float>)
{
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 8;
for (std::size_t i = 0; i < Rows; ++i)
{
Type* c_row = result_mat_data + i * OtherColumns;
for (std::size_t k = 0; k < Columns; ++k)
{
const auto aik = static_cast<float>(this_mat_data[i * Columns + k]);
__m256 aikv = _mm256_set1_ps(aik);
const auto* b_row = reinterpret_cast<const float*>(other_mat_data + k * OtherColumns);
std::size_t j = 0;
for (; j + vector_size <= OtherColumns; j += vector_size)
{
__m256 cvec = _mm256_loadu_ps(c_row + j);
__m256 bvec = _mm256_loadu_ps(b_row + j);
cvec = _mm256_fmadd_ps(bvec, aikv, cvec);
_mm256_storeu_ps(c_row + j, cvec);
}
for (; j < OtherColumns; ++j)
c_row[j] += aik * b_row[j];
}
}
}
else if (std::is_same_v<Type, double>)
{ // double
// ReSharper disable once CppTooWideScopeInitStatement
constexpr std::size_t vector_size = 4;
for (std::size_t i = 0; i < Rows; ++i)
{
Type* c_row = result_mat_data + i * OtherColumns;
for (std::size_t k = 0; k < Columns; ++k)
{
const auto aik = static_cast<double>(this_mat_data[i * Columns + k]);
__m256d aikv = _mm256_set1_pd(aik);
const auto* b_row = reinterpret_cast<const double*>(other_mat_data + k * OtherColumns);
std::size_t j = 0;
for (; j + vector_size <= OtherColumns; j += vector_size)
{
__m256d cvec = _mm256_loadu_pd(c_row + j);
__m256d bvec = _mm256_loadu_pd(b_row + j);
cvec = _mm256_fmadd_pd(bvec, aikv, cvec);
_mm256_storeu_pd(c_row + j, cvec);
}
for (; j < OtherColumns; ++j)
c_row[j] += aik * b_row[j];
}
}
}
else
std::unreachable();
return result;
}
#endif
};
template<class Type = float, MatStoreType St = MatStoreType::ROW_MAJOR> [[nodiscard]]