From e05f9ef5a97d6a093f45661345e79680ba535b72 Mon Sep 17 00:00:00 2001 From: Orange Date: Thu, 18 Sep 2025 06:02:37 +0300 Subject: [PATCH] Removes FMA check for matrix multiplication Removes preprocessor check for FMA instructions in matrix multiplication functions. This simplifies the code and relies on the compiler's ability to optimize the code based on available hardware support. The assumption is that modern compilers will automatically utilize FMA instructions if available, and fall back to alternative implementations if not. --- include/omath/linear_algebra/mat.hpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/include/omath/linear_algebra/mat.hpp b/include/omath/linear_algebra/mat.hpp index c5803d7..0c732a4 100644 --- a/include/omath/linear_algebra/mat.hpp +++ b/include/omath/linear_algebra/mat.hpp @@ -431,11 +431,7 @@ namespace omath { __m256 cvec = _mm256_loadu_ps(c_col + i); __m256 avec = _mm256_loadu_ps(a_col_k + i); -#if defined(__FMA__) cvec = _mm256_fmadd_ps(avec, bkjv, cvec); -#else - cvec = _mm256_add_ps(cvec, _mm256_mul_ps(avec, bkjv)); -#endif _mm256_storeu_ps(c_col + i, cvec); } for (; i < Rows; ++i) @@ -462,11 +458,7 @@ namespace omath { __m256d cvec = _mm256_loadu_pd(c_col + i); __m256d avec = _mm256_loadu_pd(a_col_k + i); -#if defined(__FMA__) cvec = _mm256_fmadd_pd(avec, bkjv, cvec); -#else - cvec = _mm256_add_pd(cvec, _mm256_mul_pd(avec, bkjv)); -#endif _mm256_storeu_pd(c_col + i, cvec); } for (; i < Rows; ++i) @@ -508,11 +500,8 @@ namespace omath { __m256 cvec = _mm256_loadu_ps(c_row + j); __m256 bvec = _mm256_loadu_ps(b_row + j); -#if defined(__FMA__) cvec = _mm256_fmadd_ps(bvec, aikv, cvec); -#else - cvec = _mm256_add_ps(cvec, _mm256_mul_ps(bvec, aikv)); -#endif + _mm256_storeu_ps(c_row + j, cvec); } for (; j < OtherColumns; ++j) @@ -538,11 +527,8 @@ namespace omath { __m256d cvec = _mm256_loadu_pd(c_row + j); __m256d bvec = _mm256_loadu_pd(b_row + j); -#if defined(__FMA__) cvec = _mm256_fmadd_pd(bvec, aikv, cvec); -#else - cvec = _mm256_add_pd(cvec, _mm256_mul_pd(bvec, aikv)); -#endif + _mm256_storeu_pd(c_row + j, cvec); } for (; j < OtherColumns; ++j)