Compare commits

...

9 Commits

Author SHA1 Message Date
7d194a5c36 Merge pull request #95 from orange-cpp/pathfinding_improvement
removed brackets
2025-10-30 05:44:00 +03:00
eec34c1efb removed brackets
Initial plan

Initial exploration and analysis complete

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

Optimize performance: A* pathfinding, Vector3 hash, pattern scanner, AVX2 code, and serialization

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

Add bounds check for navigation mesh serialization

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

Document serialization limitation for large neighbor counts

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

Add _codeql_build_dir to gitignore

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

Removes codeql detected source root

Eliminates the automatically generated file that was causing issues.

This file was added by codeql and no longer needed.

revert
cleaned up gitignore

moved to anon namespace

Improves navigation mesh serialization and clamping

Ensures correct serialization of navigation meshes by clamping neighbor counts to prevent data corruption when exceeding uint16_t limits.

Updates data types to `std::uint8_t` and `std::size_t` for consistency.
Uses `std::copy_n` instead of `std::memcpy` for deserialization.
2025-10-30 05:38:58 +03:00
765d5e7216 fixed typo 2025-10-30 05:37:34 +03:00
7cb8a4de52 Clarifies release process for stable builds
Explains that changes merged into master won't reach stable users unless the `stable` tag is updated.
2025-10-28 01:21:41 +03:00
d2fbc286b1 Merge pull request #93 from orange-cpp/feaure/small_improvement
removed useless option
2025-10-28 01:19:21 +03:00
64385757af fix 2025-10-28 01:15:36 +03:00
ea7b812645 removed 2025-10-28 01:11:02 +03:00
b214cdf3a1 removed useless option 2025-10-28 01:09:47 +03:00
dccc12ee30 added some comments 2025-10-27 11:25:31 +03:00
12 changed files with 175 additions and 136 deletions

View File

@@ -56,7 +56,7 @@ SpaceBeforeParensOptions:
AfterIfMacros: true AfterIfMacros: true
AfterOverloadedOperator: false AfterOverloadedOperator: false
BeforeNonEmptyParentheses: false BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: false SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false SpacesInConditionalStatement: false

View File

@@ -40,11 +40,11 @@ jobs:
git clone https://github.com/microsoft/vcpkg "$VCPKG_ROOT" git clone https://github.com/microsoft/vcpkg "$VCPKG_ROOT"
- name: Configure (cmake --preset) - name: Configure (cmake --preset)
shell: bash shell: bash
run: cmake --preset linux-release -DOMATH_BUILD_TESTS=ON -DOMATH_BUILD_BENCHMARK=OFF -DOMATH_BUILD_VIA_VCPKG=ON -DVCPKG_MANIFEST_FEATURES="imgui;avx2;tests" run: cmake --preset linux-release-vcpkg -DOMATH_BUILD_TESTS=ON -DOMATH_BUILD_BENCHMARK=OFF -DVCPKG_MANIFEST_FEATURES="imgui;avx2;tests"
- name: Build - name: Build
shell: bash shell: bash
run: cmake --build cmake-build/build/linux-release --target unit_tests omath run: cmake --build cmake-build/build/linux-release-vcpkg --target unit_tests omath
- name: Run unit_tests - name: Run unit_tests
shell: bash shell: bash
@@ -75,11 +75,11 @@ jobs:
- name: Configure (cmake --preset) - name: Configure (cmake --preset)
shell: bash shell: bash
run: cmake --preset windows-release -DOMATH_BUILD_TESTS=ON -DOMATH_BUILD_BENCHMARK=OFF -DOMATH_BUILD_VIA_VCPKG=ON -DVCPKG_MANIFEST_FEATURES="imgui;avx2;tests" run: cmake --preset windows-release-vcpkg -DOMATH_BUILD_TESTS=ON -DOMATH_BUILD_BENCHMARK=OFF -DVCPKG_MANIFEST_FEATURES="imgui;avx2;tests"
- name: Build - name: Build
shell: bash shell: bash
run: cmake --build cmake-build/build/windows-release --target unit_tests omath run: cmake --build cmake-build/build/windows-release-vcpkg --target unit_tests omath
- name: Run unit_tests.exe - name: Run unit_tests.exe
shell: bash shell: bash

2
.idea/vcs.xml generated
View File

@@ -2,7 +2,5 @@
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" /> <mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$/extlibs/benchmark" vcs="Git" />
<mapping directory="$PROJECT_DIR$/extlibs/googletest" vcs="Git" />
</component> </component>
</project> </project>

View File

@@ -23,13 +23,8 @@ option(OMATH_SUPRESS_SAFETY_CHECKS "Supress some safety checks in release build
option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF) option(OMATH_USE_UNITY_BUILD "Will enable unity build to speed up compilation" OFF)
option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" ON) option(OMATH_ENABLE_LEGACY "Will enable legacy classes that MUST be used ONLY for backward compatibility" ON)
option(OMATH_BUILD_VIA_VCPKG "Will enable building using vcpkg, vcpkg will override some options that were set in vcpkg.json file, and search for dependencies using vcpkg" OFF)
if (OMATH_BUILD_VIA_VCPKG AND NOT CMAKE_TOOLCHAIN_FILE) if (VCPKG_MANIFEST_FEATURES)
message(FATAL_ERROR "[${PROJECT_NAME}] CMAKE_TOOLCHAIN_FILE IS EMPTY! Please set env variable called 'VCPKG_ROOT' to vcpkg root folder here is an example: 'C:/vcpkg' or '/home/user/vcpkg' ")
endif ()
if (OMATH_BUILD_VIA_VCPKG AND VCPKG_MANIFEST_FEATURES)
foreach (omath_feature IN LISTS VCPKG_MANIFEST_FEATURES) foreach (omath_feature IN LISTS VCPKG_MANIFEST_FEATURES)
if (omath_feature STREQUAL "imgui") if (omath_feature STREQUAL "imgui")
set(OMATH_IMGUI_INTEGRATION ON) set(OMATH_IMGUI_INTEGRATION ON)

View File

@@ -9,8 +9,6 @@
"installDir": "${sourceDir}/cmake-build/install/${presetName}", "installDir": "${sourceDir}/cmake-build/install/${presetName}",
"cacheVariables": { "cacheVariables": {
"CMAKE_CXX_COMPILER": "cl.exe", "CMAKE_CXX_COMPILER": "cl.exe",
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"VCPKG_INSTALLED_DIR": "${sourceDir}/cmake-build/vcpkg_installed",
"CMAKE_MAKE_PROGRAM": "Ninja" "CMAKE_MAKE_PROGRAM": "Ninja"
}, },
"condition": { "condition": {
@@ -19,6 +17,17 @@
"rhs": "Windows" "rhs": "Windows"
} }
}, },
{
"name": "windows-base-vcpkg",
"hidden": true,
"inherits": "windows-base",
"cacheVariables": {
"OMATH_BUILD_VIA_VCPKG": "ON",
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"VCPKG_INSTALLED_DIR": "${sourceDir}/cmake-build/vcpkg_installed",
"VCPKG_MANIFEST_FEATURES": "tests;imgui;avx2"
}
},
{ {
"name": "windows-debug", "name": "windows-debug",
"displayName": "Debug", "displayName": "Debug",
@@ -30,17 +39,15 @@
{ {
"name": "windows-debug-vcpkg", "name": "windows-debug-vcpkg",
"displayName": "Debug", "displayName": "Debug",
"inherits": "windows-base", "inherits": "windows-base-vcpkg",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug"
"OMATH_BUILD_VIA_VCPKG": "ON",
"VCPKG_MANIFEST_FEATURES": "tests;imgui;avx2"
} }
}, },
{ {
"name": "windows-release-vcpkg", "name": "windows-release-vcpkg",
"displayName": "Release", "displayName": "Release",
"inherits": "windows-base", "inherits": "windows-base-vcpkg",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Release", "CMAKE_BUILD_TYPE": "Release",
"OMATH_BUILD_VIA_VCPKG": "ON" "OMATH_BUILD_VIA_VCPKG": "ON"
@@ -62,8 +69,6 @@
"installDir": "${sourceDir}/cmake-build/install/${presetName}", "installDir": "${sourceDir}/cmake-build/install/${presetName}",
"cacheVariables": { "cacheVariables": {
"CMAKE_CXX_COMPILER": "clang++", "CMAKE_CXX_COMPILER": "clang++",
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"VCPKG_INSTALLED_DIR": "${sourceDir}/cmake-build/vcpkg_installed",
"CMAKE_MAKE_PROGRAM": "ninja" "CMAKE_MAKE_PROGRAM": "ninja"
}, },
"condition": { "condition": {
@@ -72,6 +77,17 @@
"rhs": "Linux" "rhs": "Linux"
} }
}, },
{
"name": "linux-base-vcpkg",
"hidden": true,
"inherits": "linux-base",
"cacheVariables": {
"OMATH_BUILD_VIA_VCPKG": "ON",
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"VCPKG_INSTALLED_DIR": "${sourceDir}/cmake-build/vcpkg_installed",
"VCPKG_MANIFEST_FEATURES": "tests;imgui;avx2"
}
},
{ {
"name": "linux-debug", "name": "linux-debug",
"displayName": "Linux Debug", "displayName": "Linux Debug",
@@ -83,10 +99,9 @@
{ {
"name": "linux-debug-vcpkg", "name": "linux-debug-vcpkg",
"displayName": "Linux Debug", "displayName": "Linux Debug",
"inherits": "linux-base", "inherits": "linux-base-vcpkg",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug"
"OMATH_BUILD_VIA_VCPKG": "ON"
} }
}, },
{ {
@@ -100,10 +115,9 @@
{ {
"name": "linux-release-vcpkg", "name": "linux-release-vcpkg",
"displayName": "Linux Release", "displayName": "Linux Release",
"inherits": "linux-debug", "inherits": "linux-base-vcpkg",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Release", "CMAKE_BUILD_TYPE": "Release"
"OMATH_BUILD_VIA_VCPKG": "ON"
} }
}, },
{ {
@@ -134,8 +148,7 @@
"displayName": "Darwin Debug", "displayName": "Darwin Debug",
"inherits": "darwin-base", "inherits": "darwin-base",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Debug", "CMAKE_BUILD_TYPE": "Debug"
"OMATH_BUILD_VIA_VCPKG": "ON"
} }
}, },
{ {
@@ -151,8 +164,7 @@
"displayName": "Darwin Release", "displayName": "Darwin Release",
"inherits": "darwin-debug", "inherits": "darwin-debug",
"cacheVariables": { "cacheVariables": {
"CMAKE_BUILD_TYPE": "Release", "CMAKE_BUILD_TYPE": "Release"
"OMATH_BUILD_VIA_VCPKG": "ON"
} }
} }
] ]

View File

@@ -18,7 +18,7 @@ In order to send code back to the official OMath repository, you must first crea
account ([fork](https://help.github.com/articles/creating-a-pull-request-from-a-fork/)) and account ([fork](https://help.github.com/articles/creating-a-pull-request-from-a-fork/)) and
then [create a pull request](https://help.github.com/articles/creating-a-pull-request-from-a-fork/) back to OMath. then [create a pull request](https://help.github.com/articles/creating-a-pull-request-from-a-fork/) back to OMath.
OMath developement is performed on multiple branches. Changes are then pull requested into master. By default, changes OMath development is performed on multiple branches. Changes are then pull requested into master. By default, changes
merged into master will not roll out to stable build users unless the `stable` tag is updated. merged into master will not roll out to stable build users unless the `stable` tag is updated.
### 📜 Code-Style ### 📜 Code-Style

View File

@@ -11,9 +11,9 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
CXX_STANDARD 23 CXX_STANDARD 23
CXX_STANDARD_REQUIRED ON) CXX_STANDARD_REQUIRED ON)
if (TARGET benchmark::benchmark) if (TARGET benchmark::benchmark) # Benchmark is being linked as submodule
target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath) target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath)
else() else()
find_package(benchmark CONFIG REQUIRED) find_package(benchmark CONFIG REQUIRED) # Benchmark is being linked as vcpkg package
target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath) target_link_libraries(${PROJECT_NAME} PRIVATE benchmark::benchmark omath)
endif () endif ()

View File

@@ -22,9 +22,5 @@ namespace omath::pathfinding
static std::vector<Vector3<float>> static std::vector<Vector3<float>>
reconstruct_final_path(const std::unordered_map<Vector3<float>, PathNode>& closed_list, reconstruct_final_path(const std::unordered_map<Vector3<float>, PathNode>& closed_list,
const Vector3<float>& current) noexcept; const Vector3<float>& current) noexcept;
[[nodiscard]]
static auto get_perfect_node(const std::unordered_map<Vector3<float>, PathNode>& open_list,
const Vector3<float>& end_vertex) noexcept;
}; };
} // namespace omath::pathfinding } // namespace omath::pathfinding

View File

@@ -4,9 +4,25 @@
#include "omath/pathfinding/a_star.hpp" #include "omath/pathfinding/a_star.hpp"
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
#include <queue>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
namespace
{
struct OpenListNode final
{
omath::Vector3<float> position;
float f_cost;
[[nodiscard]]
bool operator>(const OpenListNode& other) const noexcept
{
return f_cost > other.f_cost;
}
};
}
namespace omath::pathfinding namespace omath::pathfinding
{ {
struct PathNode final struct PathNode final
@@ -37,23 +53,13 @@ namespace omath::pathfinding
std::ranges::reverse(path); std::ranges::reverse(path);
return path; return path;
} }
auto Astar::get_perfect_node(const std::unordered_map<Vector3<float>, PathNode>& open_list,
const Vector3<float>& end_vertex) noexcept
{
return std::ranges::min_element(open_list,
[&end_vertex](const auto& a, const auto& b)
{
const float fa = a.second.g_cost + a.first.distance_to(end_vertex);
const float fb = b.second.g_cost + b.first.distance_to(end_vertex);
return fa < fb;
});
}
std::vector<Vector3<float>> Astar::find_path(const Vector3<float>& start, const Vector3<float>& end, std::vector<Vector3<float>> Astar::find_path(const Vector3<float>& start, const Vector3<float>& end,
const NavigationMesh& nav_mesh) noexcept const NavigationMesh& nav_mesh) noexcept
{ {
std::unordered_map<Vector3<float>, PathNode> closed_list; std::unordered_map<Vector3<float>, PathNode> closed_list;
std::unordered_map<Vector3<float>, PathNode> open_list; std::unordered_map<Vector3<float>, PathNode> node_data;
std::priority_queue<OpenListNode, std::vector<OpenListNode>, std::greater<>> open_list;
auto maybe_start_vertex = nav_mesh.get_closest_vertex(start); auto maybe_start_vertex = nav_mesh.get_closest_vertex(start);
auto maybe_end_vertex = nav_mesh.get_closest_vertex(end); auto maybe_end_vertex = nav_mesh.get_closest_vertex(end);
@@ -64,20 +70,27 @@ namespace omath::pathfinding
const auto start_vertex = maybe_start_vertex.value(); const auto start_vertex = maybe_start_vertex.value();
const auto end_vertex = maybe_end_vertex.value(); const auto end_vertex = maybe_end_vertex.value();
open_list.emplace(start_vertex, PathNode{std::nullopt, 0.f}); node_data.emplace(start_vertex, PathNode{std::nullopt, 0.f});
open_list.push({start_vertex, start_vertex.distance_to(end_vertex)});
while (!open_list.empty()) while (!open_list.empty())
{ {
auto current_it = get_perfect_node(open_list, end_vertex); auto current = open_list.top().position;
open_list.pop();
const auto current = current_it->first; if (closed_list.contains(current))
const auto current_node = current_it->second; continue;
auto current_node_it = node_data.find(current);
if (current_node_it == node_data.end())
continue;
const auto current_node = current_node_it->second;
if (current == end_vertex) if (current == end_vertex)
return reconstruct_final_path(closed_list, current); return reconstruct_final_path(closed_list, current);
closed_list.emplace(current, current_node); closed_list.emplace(current, current_node);
open_list.erase(current_it);
for (const auto& neighbor: nav_mesh.get_neighbors(current)) for (const auto& neighbor: nav_mesh.get_neighbors(current))
{ {
@@ -86,11 +99,14 @@ namespace omath::pathfinding
const float tentative_g_cost = current_node.g_cost + neighbor.distance_to(current); const float tentative_g_cost = current_node.g_cost + neighbor.distance_to(current);
// ReSharper disable once CppTooWideScopeInitStatement auto node_it = node_data.find(neighbor);
const auto open_it = open_list.find(neighbor);
if (open_it == open_list.end() || tentative_g_cost < open_it->second.g_cost) if (node_it == node_data.end() || tentative_g_cost < node_it->second.g_cost)
open_list[neighbor] = PathNode{current, tentative_g_cost}; {
node_data[neighbor] = PathNode{current, tentative_g_cost};
const float f_cost = tentative_g_cost + neighbor.distance_to(end_vertex);
open_list.push({neighbor, f_cost});
}
} }
} }

View File

@@ -3,6 +3,8 @@
// //
#include "omath/pathfinding/navigation_mesh.hpp" #include "omath/pathfinding/navigation_mesh.hpp"
#include <algorithm> #include <algorithm>
#include <cstring>
#include <limits>
#include <stdexcept> #include <stdexcept>
namespace omath::pathfinding namespace omath::pathfinding
{ {
@@ -13,7 +15,7 @@ namespace omath::pathfinding
{ return a.first.distance_to(point) < b.first.distance_to(point); }); { return a.first.distance_to(point) < b.first.distance_to(point); });
if (res == m_vertex_map.cend()) if (res == m_vertex_map.cend())
return std::unexpected("Failed to get clossest point"); return std::unexpected("Failed to get closest point");
return res->first; return res->first;
} }
@@ -30,55 +32,68 @@ namespace omath::pathfinding
std::vector<uint8_t> NavigationMesh::serialize() const noexcept std::vector<uint8_t> NavigationMesh::serialize() const noexcept
{ {
auto dump_to_vector = []<typename T>(const T& t, std::vector<uint8_t>& vec) std::vector<std::uint8_t> raw;
{
for (size_t i = 0; i < sizeof(t); i++)
vec.push_back(*(reinterpret_cast<const uint8_t*>(&t) + i));
};
std::vector<uint8_t> raw; // Pre-calculate total size for better performance
std::size_t total_size = 0;
for (const auto& [vertex, neighbors] : m_vertex_map)
{
total_size += sizeof(vertex) + sizeof(std::uint16_t) + sizeof(Vector3<float>) * neighbors.size();
}
raw.reserve(total_size);
auto dump_to_vector = [&raw]<typename T>(const T& t)
{
const auto* byte_ptr = reinterpret_cast<const std::uint8_t*>(&t);
raw.insert(raw.end(), byte_ptr, byte_ptr + sizeof(T));
};
for (const auto& [vertex, neighbors] : m_vertex_map) for (const auto& [vertex, neighbors] : m_vertex_map)
{ {
const auto neighbors_count = neighbors.size(); // Clamp neighbors count to fit in uint16_t (prevents silent data corruption)
// NOTE: If neighbors.size() > 65535, only the first 65535 neighbors will be serialized.
// This is a limitation of the current serialization format using uint16_t for count.
const auto clamped_count =
std::min<std::size_t>(neighbors.size(), std::numeric_limits<std::uint16_t>::max());
const auto neighbors_count = static_cast<std::uint16_t>(clamped_count);
dump_to_vector(vertex, raw); dump_to_vector(vertex);
dump_to_vector(neighbors_count, raw); dump_to_vector(neighbors_count);
for (const auto& neighbor: neighbors) // Only serialize up to the clamped count
dump_to_vector(neighbor, raw); for (std::size_t i = 0; i < clamped_count; ++i)
dump_to_vector(neighbors[i]);
} }
return raw; return raw;
} }
void NavigationMesh::deserialize(const std::vector<uint8_t>& raw) noexcept void NavigationMesh::deserialize(const std::vector<uint8_t>& raw) noexcept
{ {
auto load_from_vector = [](const std::vector<uint8_t>& vec, size_t& offset, auto& value) auto load_from_vector = [](const std::vector<uint8_t>& vec, std::size_t& offset, auto& value)
{ {
if (offset + sizeof(value) > vec.size()) if (offset + sizeof(value) > vec.size())
{
throw std::runtime_error("Deserialize: Invalid input data size."); throw std::runtime_error("Deserialize: Invalid input data size.");
}
std::copy_n(vec.data() + offset, sizeof(value), reinterpret_cast<uint8_t*>(&value)); std::copy_n(vec.data() + offset, sizeof(value), reinterpret_cast<uint8_t*>(&value));
offset += sizeof(value); offset += sizeof(value);
}; };
m_vertex_map.clear(); m_vertex_map.clear();
size_t offset = 0; std::size_t offset = 0;
while (offset < raw.size()) while (offset < raw.size())
{ {
Vector3<float> vertex; Vector3<float> vertex;
load_from_vector(raw, offset, vertex); load_from_vector(raw, offset, vertex);
uint16_t neighbors_count; std::uint16_t neighbors_count;
load_from_vector(raw, offset, neighbors_count); load_from_vector(raw, offset, neighbors_count);
std::vector<Vector3<float>> neighbors; std::vector<Vector3<float>> neighbors;
neighbors.reserve(neighbors_count); neighbors.reserve(neighbors_count);
for (size_t i = 0; i < neighbors_count; ++i) for (std::size_t i = 0; i < neighbors_count; ++i)
{ {
Vector3<float> neighbor; Vector3<float> neighbor;
load_from_vector(raw, offset, neighbor); load_from_vector(raw, offset, neighbor);

View File

@@ -18,97 +18,104 @@ namespace omath::projectile_prediction
[[maybe_unused]] const Target& target) const [[maybe_unused]] const Target& target) const
{ {
#if defined(OMATH_USE_AVX2) && defined(__i386__) && defined(__x86_64__) #if defined(OMATH_USE_AVX2) && defined(__i386__) && defined(__x86_64__)
const float bulletGravity = m_gravityConstant * projectile.m_gravityScale; const float bullet_gravity = m_gravity_constant * projectile.m_gravity_scale;
const float v0 = projectile.m_launchSpeed; const float v0 = projectile.m_launch_speed;
const float v0Sqr = v0 * v0; const float v0_sqr = v0 * v0;
const Vector3 projOrigin = projectile.m_origin; const Vector3 proj_origin = projectile.m_origin;
constexpr int SIMD_FACTOR = 8; constexpr int SIMD_FACTOR = 8;
float currentTime = m_simulationTimeStep; float current_time = m_simulation_time_step;
for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep * SIMD_FACTOR) for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step * SIMD_FACTOR)
{ {
const __m256 times const __m256 times
= _mm256_setr_ps(currentTime, currentTime + m_simulationTimeStep, = _mm256_setr_ps(current_time, current_time + m_simulation_time_step,
currentTime + m_simulationTimeStep * 2, currentTime + m_simulationTimeStep * 3, current_time + m_simulation_time_step * 2, current_time + m_simulation_time_step * 3,
currentTime + m_simulationTimeStep * 4, currentTime + m_simulationTimeStep * 5, current_time + m_simulation_time_step * 4, current_time + m_simulation_time_step * 5,
currentTime + m_simulationTimeStep * 6, currentTime + m_simulationTimeStep * 7); current_time + m_simulation_time_step * 6, current_time + m_simulation_time_step * 7);
const __m256 targetX const __m256 target_x
= _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.x), times, _mm256_set1_ps(target.m_origin.x)); = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.x), times, _mm256_set1_ps(target.m_origin.x));
const __m256 targetY const __m256 target_y
= _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.y), times, _mm256_set1_ps(target.m_origin.y)); = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.y), times, _mm256_set1_ps(target.m_origin.y));
const __m256 timesSq = _mm256_mul_ps(times, times); const __m256 times_sq = _mm256_mul_ps(times, times);
const __m256 targetZ = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times, const __m256 target_z = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times,
_mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravityConstant), timesSq, _mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravity_constant), times_sq,
_mm256_set1_ps(target.m_origin.z))); _mm256_set1_ps(target.m_origin.z)));
const __m256 deltaX = _mm256_sub_ps(targetX, _mm256_set1_ps(projOrigin.x)); const __m256 delta_x = _mm256_sub_ps(target_x, _mm256_set1_ps(proj_origin.x));
const __m256 deltaY = _mm256_sub_ps(targetY, _mm256_set1_ps(projOrigin.y)); const __m256 delta_y = _mm256_sub_ps(target_y, _mm256_set1_ps(proj_origin.y));
const __m256 deltaZ = _mm256_sub_ps(targetZ, _mm256_set1_ps(projOrigin.z)); const __m256 delta_z = _mm256_sub_ps(target_z, _mm256_set1_ps(proj_origin.z));
const __m256 dSqr = _mm256_add_ps(_mm256_mul_ps(deltaX, deltaX), _mm256_mul_ps(deltaY, deltaY)); const __m256 d_sqr = _mm256_add_ps(_mm256_mul_ps(delta_x, delta_x), _mm256_mul_ps(delta_y, delta_y));
const __m256 bgTimesSq = _mm256_mul_ps(_mm256_set1_ps(bulletGravity), timesSq); const __m256 bg_times_sq = _mm256_mul_ps(_mm256_set1_ps(bullet_gravity), times_sq);
const __m256 term = _mm256_add_ps(deltaZ, _mm256_mul_ps(_mm256_set1_ps(0.5f), bgTimesSq)); const __m256 term = _mm256_add_ps(delta_z, _mm256_mul_ps(_mm256_set1_ps(0.5f), bg_times_sq));
const __m256 termSq = _mm256_mul_ps(term, term); const __m256 term_sq = _mm256_mul_ps(term, term);
const __m256 numerator = _mm256_add_ps(dSqr, termSq); const __m256 numerator = _mm256_add_ps(d_sqr, term_sq);
const __m256 denominator = _mm256_add_ps(timesSq, _mm256_set1_ps(1e-8f)); // Avoid division by zero const __m256 denominator = _mm256_add_ps(times_sq, _mm256_set1_ps(1e-8f)); // Avoid division by zero
const __m256 requiredV0Sqr = _mm256_div_ps(numerator, denominator); const __m256 required_v0_sqr = _mm256_div_ps(numerator, denominator);
const __m256 v0SqrVec = _mm256_set1_ps(v0Sqr + 1e-3f); const __m256 v0_sqr_vec = _mm256_set1_ps(v0_sqr + 1e-3f);
const __m256 mask = _mm256_cmp_ps(requiredV0Sqr, v0SqrVec, _CMP_LE_OQ); const __m256 mask = _mm256_cmp_ps(required_v0_sqr, v0_sqr_vec, _CMP_LE_OQ);
const unsigned validMask = _mm256_movemask_ps(mask); const unsigned valid_mask = _mm256_movemask_ps(mask);
if (!validMask) if (!valid_mask)
continue; continue;
alignas(32) float validTimes[SIMD_FACTOR]; alignas(32) float valid_times[SIMD_FACTOR];
_mm256_store_ps(validTimes, times); _mm256_store_ps(valid_times, times);
for (int i = 0; i < SIMD_FACTOR; ++i) for (int i = 0; i < SIMD_FACTOR; ++i)
{ {
if (!(validMask & (1 << i))) if (!(valid_mask & (1 << i)))
continue; continue;
const float candidateTime = validTimes[i]; const float candidate_time = valid_times[i];
if (candidateTime > m_maximumSimulationTime) if (candidate_time > m_maximum_simulation_time)
continue; continue;
// Fine search around candidate time // Fine search around candidate time
for (float fineTime = candidateTime - m_simulationTimeStep * 2; for (float fine_time = candidate_time - m_simulation_time_step * 2;
fineTime <= candidateTime + m_simulationTimeStep * 2; fineTime += m_simulationTimeStep) fine_time <= candidate_time + m_simulation_time_step * 2; fine_time += m_simulation_time_step)
{ {
if (fineTime < 0) if (fine_time < 0)
continue; continue;
const Vector3 targetPos = target.PredictPosition(fineTime, m_gravityConstant); // Manually compute predicted target position to avoid adding method to Target
const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, fineTime); Vector3 target_pos = target.m_origin + target.m_velocity * fine_time;
if (target.m_is_airborne)
target_pos.z -= 0.5f * m_gravity_constant * fine_time * fine_time;
const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, fine_time);
if (!pitch) if (!pitch)
continue; continue;
const Vector3 delta = targetPos - projOrigin; const Vector3 delta = target_pos - proj_origin;
const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y);
const float height = d * std::tan(angles::DegreesToRadians(*pitch)); const float height = d * std::tan(angles::degrees_to_radians(*pitch));
return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); return Vector3(target_pos.x, target_pos.y, proj_origin.z + height);
} }
} }
} }
// Fallback scalar processing for remaining times // Fallback scalar processing for remaining times
for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep) for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step)
{ {
const Vector3 targetPos = target.PredictPosition(currentTime, m_gravityConstant); Vector3 target_pos = target.m_origin + target.m_velocity * current_time;
const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, currentTime); if (target.m_is_airborne)
target_pos.z -= 0.5f * m_gravity_constant * current_time * current_time;
const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, current_time);
if (!pitch) if (!pitch)
continue; continue;
const Vector3 delta = targetPos - projOrigin; const Vector3 delta = target_pos - proj_origin;
const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y);
const float height = d * std::tan(angles::DegreesToRadians(*pitch)); const float height = d * std::tan(angles::degrees_to_radians(*pitch));
return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); return Vector3(target_pos.x, target_pos.y, proj_origin.z + height);
} }
return std::nullopt; return std::nullopt;
@@ -134,22 +141,22 @@ namespace omath::projectile_prediction
return std::nullopt; return std::nullopt;
const Vector3 delta = target_pos - proj_origin; const Vector3 delta = target_pos - proj_origin;
const float dSqr = delta.x * delta.x + delta.y * delta.y; const float d_sqr = delta.x * delta.x + delta.y * delta.y;
const float h = delta.z; const float h = delta.z;
const float term = h + 0.5f * bullet_gravity * time * time; const float term = h + 0.5f * bullet_gravity * time * time;
const float requiredV0Sqr = (dSqr + term * term) / (time * time); const float required_v0_sqr = (d_sqr + term * term) / (time * time);
const float v0Sqr = v0 * v0; const float v0_sqr = v0 * v0;
if (requiredV0Sqr > v0Sqr + 1e-3f) if (required_v0_sqr > v0_sqr + 1e-3f)
return std::nullopt; return std::nullopt;
if (dSqr == 0.0f) if (d_sqr == 0.0f)
return term >= 0.0f ? 90.0f : -90.0f; return term >= 0.0f ? 90.0f : -90.0f;
const float d = std::sqrt(dSqr); const float d = std::sqrt(d_sqr);
const float tanTheta = term / d; const float tan_theta = term / d;
return angles::RadiansToDegrees(std::atan(tanTheta)); return angles::radians_to_degrees(std::atan(tan_theta));
#else #else
throw std::runtime_error( throw std::runtime_error(
std::format("{} AVX2 feature is not enabled!", std::source_location::current().function_name())); std::format("{} AVX2 feature is not enabled!", std::source_location::current().function_name()));

View File

@@ -16,9 +16,9 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
if (TARGET gtest) if (TARGET gtest) # GTest is being linked as submodule
target_link_libraries(${PROJECT_NAME} PRIVATE gtest gtest_main omath::omath) target_link_libraries(${PROJECT_NAME} PRIVATE gtest gtest_main omath::omath)
else() else() # GTest is being linked as vcpkg package
find_package(GTest CONFIG REQUIRED) find_package(GTest CONFIG REQUIRED)
target_link_libraries(${PROJECT_NAME} PRIVATE GTest::gtest GTest::gtest_main omath::omath) target_link_libraries(${PROJECT_NAME} PRIVATE GTest::gtest GTest::gtest_main omath::omath)
endif() endif()