Merge pull request #95 from orange-cpp/pathfinding_improvement

removed brackets
This commit is contained in:
2025-10-30 05:44:00 +03:00
committed by GitHub
5 changed files with 133 additions and 99 deletions

View File

@@ -56,7 +56,7 @@ SpaceBeforeParensOptions:
AfterIfMacros: true AfterIfMacros: true
AfterOverloadedOperator: false AfterOverloadedOperator: false
BeforeNonEmptyParentheses: false BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: false SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false SpacesInConditionalStatement: false

View File

@@ -22,9 +22,5 @@ namespace omath::pathfinding
static std::vector<Vector3<float>> static std::vector<Vector3<float>>
reconstruct_final_path(const std::unordered_map<Vector3<float>, PathNode>& closed_list, reconstruct_final_path(const std::unordered_map<Vector3<float>, PathNode>& closed_list,
const Vector3<float>& current) noexcept; const Vector3<float>& current) noexcept;
[[nodiscard]]
static auto get_perfect_node(const std::unordered_map<Vector3<float>, PathNode>& open_list,
const Vector3<float>& end_vertex) noexcept;
}; };
} // namespace omath::pathfinding } // namespace omath::pathfinding

View File

@@ -4,9 +4,25 @@
#include "omath/pathfinding/a_star.hpp" #include "omath/pathfinding/a_star.hpp"
#include <algorithm> #include <algorithm>
#include <optional> #include <optional>
#include <queue>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
namespace
{
struct OpenListNode final
{
omath::Vector3<float> position;
float f_cost;
[[nodiscard]]
bool operator>(const OpenListNode& other) const noexcept
{
return f_cost > other.f_cost;
}
};
}
namespace omath::pathfinding namespace omath::pathfinding
{ {
struct PathNode final struct PathNode final
@@ -37,23 +53,13 @@ namespace omath::pathfinding
std::ranges::reverse(path); std::ranges::reverse(path);
return path; return path;
} }
auto Astar::get_perfect_node(const std::unordered_map<Vector3<float>, PathNode>& open_list,
const Vector3<float>& end_vertex) noexcept
{
return std::ranges::min_element(open_list,
[&end_vertex](const auto& a, const auto& b)
{
const float fa = a.second.g_cost + a.first.distance_to(end_vertex);
const float fb = b.second.g_cost + b.first.distance_to(end_vertex);
return fa < fb;
});
}
std::vector<Vector3<float>> Astar::find_path(const Vector3<float>& start, const Vector3<float>& end, std::vector<Vector3<float>> Astar::find_path(const Vector3<float>& start, const Vector3<float>& end,
const NavigationMesh& nav_mesh) noexcept const NavigationMesh& nav_mesh) noexcept
{ {
std::unordered_map<Vector3<float>, PathNode> closed_list; std::unordered_map<Vector3<float>, PathNode> closed_list;
std::unordered_map<Vector3<float>, PathNode> open_list; std::unordered_map<Vector3<float>, PathNode> node_data;
std::priority_queue<OpenListNode, std::vector<OpenListNode>, std::greater<>> open_list;
auto maybe_start_vertex = nav_mesh.get_closest_vertex(start); auto maybe_start_vertex = nav_mesh.get_closest_vertex(start);
auto maybe_end_vertex = nav_mesh.get_closest_vertex(end); auto maybe_end_vertex = nav_mesh.get_closest_vertex(end);
@@ -64,20 +70,27 @@ namespace omath::pathfinding
const auto start_vertex = maybe_start_vertex.value(); const auto start_vertex = maybe_start_vertex.value();
const auto end_vertex = maybe_end_vertex.value(); const auto end_vertex = maybe_end_vertex.value();
open_list.emplace(start_vertex, PathNode{std::nullopt, 0.f}); node_data.emplace(start_vertex, PathNode{std::nullopt, 0.f});
open_list.push({start_vertex, start_vertex.distance_to(end_vertex)});
while (!open_list.empty()) while (!open_list.empty())
{ {
auto current_it = get_perfect_node(open_list, end_vertex); auto current = open_list.top().position;
open_list.pop();
const auto current = current_it->first; if (closed_list.contains(current))
const auto current_node = current_it->second; continue;
auto current_node_it = node_data.find(current);
if (current_node_it == node_data.end())
continue;
const auto current_node = current_node_it->second;
if (current == end_vertex) if (current == end_vertex)
return reconstruct_final_path(closed_list, current); return reconstruct_final_path(closed_list, current);
closed_list.emplace(current, current_node); closed_list.emplace(current, current_node);
open_list.erase(current_it);
for (const auto& neighbor: nav_mesh.get_neighbors(current)) for (const auto& neighbor: nav_mesh.get_neighbors(current))
{ {
@@ -86,11 +99,14 @@ namespace omath::pathfinding
const float tentative_g_cost = current_node.g_cost + neighbor.distance_to(current); const float tentative_g_cost = current_node.g_cost + neighbor.distance_to(current);
// ReSharper disable once CppTooWideScopeInitStatement auto node_it = node_data.find(neighbor);
const auto open_it = open_list.find(neighbor);
if (open_it == open_list.end() || tentative_g_cost < open_it->second.g_cost) if (node_it == node_data.end() || tentative_g_cost < node_it->second.g_cost)
open_list[neighbor] = PathNode{current, tentative_g_cost}; {
node_data[neighbor] = PathNode{current, tentative_g_cost};
const float f_cost = tentative_g_cost + neighbor.distance_to(end_vertex);
open_list.push({neighbor, f_cost});
}
} }
} }

View File

@@ -3,6 +3,8 @@
// //
#include "omath/pathfinding/navigation_mesh.hpp" #include "omath/pathfinding/navigation_mesh.hpp"
#include <algorithm> #include <algorithm>
#include <cstring>
#include <limits>
#include <stdexcept> #include <stdexcept>
namespace omath::pathfinding namespace omath::pathfinding
{ {
@@ -30,55 +32,68 @@ namespace omath::pathfinding
std::vector<uint8_t> NavigationMesh::serialize() const noexcept std::vector<uint8_t> NavigationMesh::serialize() const noexcept
{ {
auto dump_to_vector = []<typename T>(const T& t, std::vector<uint8_t>& vec) std::vector<std::uint8_t> raw;
{
for (size_t i = 0; i < sizeof(t); i++)
vec.push_back(*(reinterpret_cast<const uint8_t*>(&t) + i));
};
std::vector<uint8_t> raw; // Pre-calculate total size for better performance
std::size_t total_size = 0;
for (const auto& [vertex, neighbors] : m_vertex_map)
{
total_size += sizeof(vertex) + sizeof(std::uint16_t) + sizeof(Vector3<float>) * neighbors.size();
}
raw.reserve(total_size);
auto dump_to_vector = [&raw]<typename T>(const T& t)
{
const auto* byte_ptr = reinterpret_cast<const std::uint8_t*>(&t);
raw.insert(raw.end(), byte_ptr, byte_ptr + sizeof(T));
};
for (const auto& [vertex, neighbors] : m_vertex_map) for (const auto& [vertex, neighbors] : m_vertex_map)
{ {
const auto neighbors_count = neighbors.size(); // Clamp neighbors count to fit in uint16_t (prevents silent data corruption)
// NOTE: If neighbors.size() > 65535, only the first 65535 neighbors will be serialized.
// This is a limitation of the current serialization format using uint16_t for count.
const auto clamped_count =
std::min<std::size_t>(neighbors.size(), std::numeric_limits<std::uint16_t>::max());
const auto neighbors_count = static_cast<std::uint16_t>(clamped_count);
dump_to_vector(vertex, raw); dump_to_vector(vertex);
dump_to_vector(neighbors_count, raw); dump_to_vector(neighbors_count);
for (const auto& neighbor: neighbors) // Only serialize up to the clamped count
dump_to_vector(neighbor, raw); for (std::size_t i = 0; i < clamped_count; ++i)
dump_to_vector(neighbors[i]);
} }
return raw; return raw;
} }
void NavigationMesh::deserialize(const std::vector<uint8_t>& raw) noexcept void NavigationMesh::deserialize(const std::vector<uint8_t>& raw) noexcept
{ {
auto load_from_vector = [](const std::vector<uint8_t>& vec, size_t& offset, auto& value) auto load_from_vector = [](const std::vector<uint8_t>& vec, std::size_t& offset, auto& value)
{ {
if (offset + sizeof(value) > vec.size()) if (offset + sizeof(value) > vec.size())
{
throw std::runtime_error("Deserialize: Invalid input data size."); throw std::runtime_error("Deserialize: Invalid input data size.");
}
std::copy_n(vec.data() + offset, sizeof(value), reinterpret_cast<uint8_t*>(&value)); std::copy_n(vec.data() + offset, sizeof(value), reinterpret_cast<uint8_t*>(&value));
offset += sizeof(value); offset += sizeof(value);
}; };
m_vertex_map.clear(); m_vertex_map.clear();
size_t offset = 0; std::size_t offset = 0;
while (offset < raw.size()) while (offset < raw.size())
{ {
Vector3<float> vertex; Vector3<float> vertex;
load_from_vector(raw, offset, vertex); load_from_vector(raw, offset, vertex);
uint16_t neighbors_count; std::uint16_t neighbors_count;
load_from_vector(raw, offset, neighbors_count); load_from_vector(raw, offset, neighbors_count);
std::vector<Vector3<float>> neighbors; std::vector<Vector3<float>> neighbors;
neighbors.reserve(neighbors_count); neighbors.reserve(neighbors_count);
for (size_t i = 0; i < neighbors_count; ++i) for (std::size_t i = 0; i < neighbors_count; ++i)
{ {
Vector3<float> neighbor; Vector3<float> neighbor;
load_from_vector(raw, offset, neighbor); load_from_vector(raw, offset, neighbor);

View File

@@ -18,97 +18,104 @@ namespace omath::projectile_prediction
[[maybe_unused]] const Target& target) const [[maybe_unused]] const Target& target) const
{ {
#if defined(OMATH_USE_AVX2) && defined(__i386__) && defined(__x86_64__) #if defined(OMATH_USE_AVX2) && defined(__i386__) && defined(__x86_64__)
const float bulletGravity = m_gravityConstant * projectile.m_gravityScale; const float bullet_gravity = m_gravity_constant * projectile.m_gravity_scale;
const float v0 = projectile.m_launchSpeed; const float v0 = projectile.m_launch_speed;
const float v0Sqr = v0 * v0; const float v0_sqr = v0 * v0;
const Vector3 projOrigin = projectile.m_origin; const Vector3 proj_origin = projectile.m_origin;
constexpr int SIMD_FACTOR = 8; constexpr int SIMD_FACTOR = 8;
float currentTime = m_simulationTimeStep; float current_time = m_simulation_time_step;
for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep * SIMD_FACTOR) for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step * SIMD_FACTOR)
{ {
const __m256 times const __m256 times
= _mm256_setr_ps(currentTime, currentTime + m_simulationTimeStep, = _mm256_setr_ps(current_time, current_time + m_simulation_time_step,
currentTime + m_simulationTimeStep * 2, currentTime + m_simulationTimeStep * 3, current_time + m_simulation_time_step * 2, current_time + m_simulation_time_step * 3,
currentTime + m_simulationTimeStep * 4, currentTime + m_simulationTimeStep * 5, current_time + m_simulation_time_step * 4, current_time + m_simulation_time_step * 5,
currentTime + m_simulationTimeStep * 6, currentTime + m_simulationTimeStep * 7); current_time + m_simulation_time_step * 6, current_time + m_simulation_time_step * 7);
const __m256 targetX const __m256 target_x
= _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.x), times, _mm256_set1_ps(target.m_origin.x)); = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.x), times, _mm256_set1_ps(target.m_origin.x));
const __m256 targetY const __m256 target_y
= _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.y), times, _mm256_set1_ps(target.m_origin.y)); = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.y), times, _mm256_set1_ps(target.m_origin.y));
const __m256 timesSq = _mm256_mul_ps(times, times); const __m256 times_sq = _mm256_mul_ps(times, times);
const __m256 targetZ = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times, const __m256 target_z = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times,
_mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravityConstant), timesSq, _mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravity_constant), times_sq,
_mm256_set1_ps(target.m_origin.z))); _mm256_set1_ps(target.m_origin.z)));
const __m256 deltaX = _mm256_sub_ps(targetX, _mm256_set1_ps(projOrigin.x)); const __m256 delta_x = _mm256_sub_ps(target_x, _mm256_set1_ps(proj_origin.x));
const __m256 deltaY = _mm256_sub_ps(targetY, _mm256_set1_ps(projOrigin.y)); const __m256 delta_y = _mm256_sub_ps(target_y, _mm256_set1_ps(proj_origin.y));
const __m256 deltaZ = _mm256_sub_ps(targetZ, _mm256_set1_ps(projOrigin.z)); const __m256 delta_z = _mm256_sub_ps(target_z, _mm256_set1_ps(proj_origin.z));
const __m256 dSqr = _mm256_add_ps(_mm256_mul_ps(deltaX, deltaX), _mm256_mul_ps(deltaY, deltaY)); const __m256 d_sqr = _mm256_add_ps(_mm256_mul_ps(delta_x, delta_x), _mm256_mul_ps(delta_y, delta_y));
const __m256 bgTimesSq = _mm256_mul_ps(_mm256_set1_ps(bulletGravity), timesSq); const __m256 bg_times_sq = _mm256_mul_ps(_mm256_set1_ps(bullet_gravity), times_sq);
const __m256 term = _mm256_add_ps(deltaZ, _mm256_mul_ps(_mm256_set1_ps(0.5f), bgTimesSq)); const __m256 term = _mm256_add_ps(delta_z, _mm256_mul_ps(_mm256_set1_ps(0.5f), bg_times_sq));
const __m256 termSq = _mm256_mul_ps(term, term); const __m256 term_sq = _mm256_mul_ps(term, term);
const __m256 numerator = _mm256_add_ps(dSqr, termSq); const __m256 numerator = _mm256_add_ps(d_sqr, term_sq);
const __m256 denominator = _mm256_add_ps(timesSq, _mm256_set1_ps(1e-8f)); // Avoid division by zero const __m256 denominator = _mm256_add_ps(times_sq, _mm256_set1_ps(1e-8f)); // Avoid division by zero
const __m256 requiredV0Sqr = _mm256_div_ps(numerator, denominator); const __m256 required_v0_sqr = _mm256_div_ps(numerator, denominator);
const __m256 v0SqrVec = _mm256_set1_ps(v0Sqr + 1e-3f); const __m256 v0_sqr_vec = _mm256_set1_ps(v0_sqr + 1e-3f);
const __m256 mask = _mm256_cmp_ps(requiredV0Sqr, v0SqrVec, _CMP_LE_OQ); const __m256 mask = _mm256_cmp_ps(required_v0_sqr, v0_sqr_vec, _CMP_LE_OQ);
const unsigned validMask = _mm256_movemask_ps(mask); const unsigned valid_mask = _mm256_movemask_ps(mask);
if (!validMask) if (!valid_mask)
continue; continue;
alignas(32) float validTimes[SIMD_FACTOR]; alignas(32) float valid_times[SIMD_FACTOR];
_mm256_store_ps(validTimes, times); _mm256_store_ps(valid_times, times);
for (int i = 0; i < SIMD_FACTOR; ++i) for (int i = 0; i < SIMD_FACTOR; ++i)
{ {
if (!(validMask & (1 << i))) if (!(valid_mask & (1 << i)))
continue; continue;
const float candidateTime = validTimes[i]; const float candidate_time = valid_times[i];
if (candidateTime > m_maximumSimulationTime) if (candidate_time > m_maximum_simulation_time)
continue; continue;
// Fine search around candidate time // Fine search around candidate time
for (float fineTime = candidateTime - m_simulationTimeStep * 2; for (float fine_time = candidate_time - m_simulation_time_step * 2;
fineTime <= candidateTime + m_simulationTimeStep * 2; fineTime += m_simulationTimeStep) fine_time <= candidate_time + m_simulation_time_step * 2; fine_time += m_simulation_time_step)
{ {
if (fineTime < 0) if (fine_time < 0)
continue; continue;
const Vector3 targetPos = target.PredictPosition(fineTime, m_gravityConstant); // Manually compute predicted target position to avoid adding method to Target
const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, fineTime); Vector3 target_pos = target.m_origin + target.m_velocity * fine_time;
if (target.m_is_airborne)
target_pos.z -= 0.5f * m_gravity_constant * fine_time * fine_time;
const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, fine_time);
if (!pitch) if (!pitch)
continue; continue;
const Vector3 delta = targetPos - projOrigin; const Vector3 delta = target_pos - proj_origin;
const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y);
const float height = d * std::tan(angles::DegreesToRadians(*pitch)); const float height = d * std::tan(angles::degrees_to_radians(*pitch));
return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); return Vector3(target_pos.x, target_pos.y, proj_origin.z + height);
} }
} }
} }
// Fallback scalar processing for remaining times // Fallback scalar processing for remaining times
for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep) for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step)
{ {
const Vector3 targetPos = target.PredictPosition(currentTime, m_gravityConstant); Vector3 target_pos = target.m_origin + target.m_velocity * current_time;
const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, currentTime); if (target.m_is_airborne)
target_pos.z -= 0.5f * m_gravity_constant * current_time * current_time;
const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, current_time);
if (!pitch) if (!pitch)
continue; continue;
const Vector3 delta = targetPos - projOrigin; const Vector3 delta = target_pos - proj_origin;
const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y);
const float height = d * std::tan(angles::DegreesToRadians(*pitch)); const float height = d * std::tan(angles::degrees_to_radians(*pitch));
return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); return Vector3(target_pos.x, target_pos.y, proj_origin.z + height);
} }
return std::nullopt; return std::nullopt;
@@ -134,22 +141,22 @@ namespace omath::projectile_prediction
return std::nullopt; return std::nullopt;
const Vector3 delta = target_pos - proj_origin; const Vector3 delta = target_pos - proj_origin;
const float dSqr = delta.x * delta.x + delta.y * delta.y; const float d_sqr = delta.x * delta.x + delta.y * delta.y;
const float h = delta.z; const float h = delta.z;
const float term = h + 0.5f * bullet_gravity * time * time; const float term = h + 0.5f * bullet_gravity * time * time;
const float requiredV0Sqr = (dSqr + term * term) / (time * time); const float required_v0_sqr = (d_sqr + term * term) / (time * time);
const float v0Sqr = v0 * v0; const float v0_sqr = v0 * v0;
if (requiredV0Sqr > v0Sqr + 1e-3f) if (required_v0_sqr > v0_sqr + 1e-3f)
return std::nullopt; return std::nullopt;
if (dSqr == 0.0f) if (d_sqr == 0.0f)
return term >= 0.0f ? 90.0f : -90.0f; return term >= 0.0f ? 90.0f : -90.0f;
const float d = std::sqrt(dSqr); const float d = std::sqrt(d_sqr);
const float tanTheta = term / d; const float tan_theta = term / d;
return angles::RadiansToDegrees(std::atan(tanTheta)); return angles::radians_to_degrees(std::atan(tan_theta));
#else #else
throw std::runtime_error( throw std::runtime_error(
std::format("{} AVX2 feature is not enabled!", std::source_location::current().function_name())); std::format("{} AVX2 feature is not enabled!", std::source_location::current().function_name()));