Add Mach-O pattern scanner (#144)

* Initial plan

* Add Mach-O pattern scanner implementation and unit tests

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

* Add Mach-O pattern scanner

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

* Remove CodeQL build artifacts from PR

Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com>
This commit is contained in:
Copilot
2026-02-04 17:30:20 +03:00
committed by GitHub
parent 510e3d7d1c
commit 775949887a
3 changed files with 726 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
//
// Created by Copilot on 04.02.2026.
//
#pragma once
#include <cstdint>
#include <filesystem>
#include <optional>
#include <string_view>
#include "section_scan_result.hpp"
namespace omath
{
class MachOPatternScanner final
{
public:
[[nodiscard]]
static std::optional<std::uintptr_t>
scan_for_pattern_in_loaded_module(const void* module_base_address, const std::string_view& pattern,
const std::string_view& target_section_name = "__text");
[[nodiscard]]
static std::optional<SectionScanResult>
scan_for_pattern_in_file(const std::filesystem::path& path_to_file, const std::string_view& pattern,
const std::string_view& target_section_name = "__text");
};
} // namespace omath

View File

@@ -0,0 +1,344 @@
//
// Created by Copilot on 04.02.2026.
//
#include "omath/utility/macho_pattern_scan.hpp"
#include "omath/utility/pattern_scan.hpp"
#include <cstring>
#include <fstream>
#include <variant>
#include <vector>
#pragma pack(push, 1)
namespace
{
// Mach-O magic numbers
constexpr std::uint32_t mh_magic_32 = 0xFEEDFACE;
constexpr std::uint32_t mh_magic_64 = 0xFEEDFACF;
constexpr std::uint32_t mh_cigam_32 = 0xCEFAEDFE; // Byte-swapped 32-bit
constexpr std::uint32_t mh_cigam_64 = 0xCFFAEDFE; // Byte-swapped 64-bit
// Load command types
constexpr std::uint32_t lc_segment = 0x1;
constexpr std::uint32_t lc_segment_64 = 0x19;
// Mach-O header for 32-bit
struct MachHeader32 final
{
std::uint32_t magic;
std::uint32_t cputype;
std::uint32_t cpusubtype;
std::uint32_t filetype;
std::uint32_t ncmds;
std::uint32_t sizeofcmds;
std::uint32_t flags;
};
// Mach-O header for 64-bit
struct MachHeader64 final
{
std::uint32_t magic;
std::uint32_t cputype;
std::uint32_t cpusubtype;
std::uint32_t filetype;
std::uint32_t ncmds;
std::uint32_t sizeofcmds;
std::uint32_t flags;
std::uint32_t reserved;
};
// Load command header
struct LoadCommand final
{
std::uint32_t cmd;
std::uint32_t cmdsize;
};
// Segment command for 32-bit
struct SegmentCommand32 final
{
std::uint32_t cmd;
std::uint32_t cmdsize;
char segname[16];
std::uint32_t vmaddr;
std::uint32_t vmsize;
std::uint32_t fileoff;
std::uint32_t filesize;
std::uint32_t maxprot;
std::uint32_t initprot;
std::uint32_t nsects;
std::uint32_t flags;
};
// Segment command for 64-bit
struct SegmentCommand64 final
{
std::uint32_t cmd;
std::uint32_t cmdsize;
char segname[16];
std::uint64_t vmaddr;
std::uint64_t vmsize;
std::uint64_t fileoff;
std::uint64_t filesize;
std::uint32_t maxprot;
std::uint32_t initprot;
std::uint32_t nsects;
std::uint32_t flags;
};
// Section for 32-bit
struct Section32 final
{
char sectname[16];
char segname[16];
std::uint32_t addr;
std::uint32_t size;
std::uint32_t offset;
std::uint32_t align;
std::uint32_t reloff;
std::uint32_t nreloc;
std::uint32_t flags;
std::uint32_t reserved1;
std::uint32_t reserved2;
};
// Section for 64-bit
struct Section64 final
{
char sectname[16];
char segname[16];
std::uint64_t addr;
std::uint64_t size;
std::uint32_t offset;
std::uint32_t align;
std::uint32_t reloff;
std::uint32_t nreloc;
std::uint32_t flags;
std::uint32_t reserved1;
std::uint32_t reserved2;
std::uint32_t reserved3;
};
#pragma pack(pop)
enum class MachOArch : std::int8_t
{
x32,
x64,
};
struct ExtractedSection final
{
std::uintptr_t virtual_base_addr{};
std::uintptr_t raw_base_addr{};
std::vector<std::byte> data;
};
[[nodiscard]]
std::optional<MachOArch> get_macho_arch(std::fstream& file)
{
std::uint32_t magic{};
const std::streampos backup_pos = file.tellg();
file.seekg(0, std::ios_base::beg);
file.read(reinterpret_cast<char*>(&magic), sizeof(magic));
file.seekg(backup_pos, std::ios_base::beg);
if (magic == mh_magic_64 || magic == mh_cigam_64)
return MachOArch::x64;
if (magic == mh_magic_32 || magic == mh_cigam_32)
return MachOArch::x32;
return std::nullopt;
}
[[nodiscard]]
bool is_macho_file(std::fstream& file)
{
return get_macho_arch(file).has_value();
}
[[nodiscard]]
std::string_view get_section_name(const char* sectname)
{
// Mach-O section names are fixed 16-byte arrays, not necessarily null-terminated
return std::string_view(sectname, std::min(std::strlen(sectname), std::size_t{16}));
}
template<typename HeaderType, typename SegmentType, typename SectionType, std::uint32_t segment_cmd>
std::optional<ExtractedSection> extract_section_impl(std::fstream& file, const std::string_view& section_name)
{
HeaderType header{};
file.seekg(0, std::ios_base::beg);
if (!file.read(reinterpret_cast<char*>(&header), sizeof(header))) [[unlikely]]
return std::nullopt;
std::streamoff cmd_offset = sizeof(header);
for (std::uint32_t i = 0; i < header.ncmds; ++i)
{
LoadCommand lc{};
file.seekg(cmd_offset, std::ios_base::beg);
if (!file.read(reinterpret_cast<char*>(&lc), sizeof(lc))) [[unlikely]]
return std::nullopt;
if (lc.cmd == segment_cmd)
{
SegmentType segment{};
file.seekg(cmd_offset, std::ios_base::beg);
if (!file.read(reinterpret_cast<char*>(&segment), sizeof(segment))) [[unlikely]]
return std::nullopt;
std::streamoff sect_offset = cmd_offset + static_cast<std::streamoff>(sizeof(segment));
for (std::uint32_t j = 0; j < segment.nsects; ++j)
{
SectionType section{};
file.seekg(sect_offset, std::ios_base::beg);
if (!file.read(reinterpret_cast<char*>(&section), sizeof(section))) [[unlikely]]
return std::nullopt;
if (get_section_name(section.sectname) == section_name)
{
ExtractedSection out;
out.virtual_base_addr = static_cast<std::uintptr_t>(section.addr);
out.raw_base_addr = static_cast<std::uintptr_t>(section.offset);
out.data.resize(static_cast<std::size_t>(section.size));
file.seekg(static_cast<std::streamoff>(section.offset), std::ios_base::beg);
if (!file.read(reinterpret_cast<char*>(out.data.data()),
static_cast<std::streamsize>(out.data.size()))) [[unlikely]]
return std::nullopt;
return out;
}
sect_offset += static_cast<std::streamoff>(sizeof(section));
}
}
cmd_offset += static_cast<std::streamoff>(lc.cmdsize);
}
return std::nullopt;
}
[[nodiscard]]
std::optional<ExtractedSection> get_macho_section_by_name(const std::filesystem::path& path,
const std::string_view& section_name)
{
std::fstream file(path, std::ios::binary | std::ios::in);
if (!file.is_open()) [[unlikely]]
return std::nullopt;
if (!is_macho_file(file)) [[unlikely]]
return std::nullopt;
const auto arch = get_macho_arch(file);
if (!arch.has_value()) [[unlikely]]
return std::nullopt;
if (arch.value() == MachOArch::x64)
return extract_section_impl<MachHeader64, SegmentCommand64, Section64, lc_segment_64>(file, section_name);
else
return extract_section_impl<MachHeader32, SegmentCommand32, Section32, lc_segment>(file, section_name);
}
template<typename HeaderType, typename SegmentType, typename SectionType, std::uint32_t segment_cmd>
std::optional<std::uintptr_t> scan_in_module_impl(const std::byte* base, const std::string_view pattern,
const std::string_view target_section_name)
{
const auto* header = reinterpret_cast<const HeaderType*>(base);
std::size_t cmd_offset = sizeof(HeaderType);
for (std::uint32_t i = 0; i < header->ncmds; ++i)
{
const auto* lc = reinterpret_cast<const LoadCommand*>(base + cmd_offset);
if (lc->cmd == segment_cmd)
{
const auto* segment = reinterpret_cast<const SegmentType*>(base + cmd_offset);
std::size_t sect_offset = cmd_offset + sizeof(SegmentType);
for (std::uint32_t j = 0; j < segment->nsects; ++j)
{
const auto* section = reinterpret_cast<const SectionType*>(base + sect_offset);
if (get_section_name(section->sectname) == target_section_name && section->size > 0)
{
const auto* section_begin = base + static_cast<std::size_t>(section->addr);
const auto* section_end = section_begin + static_cast<std::size_t>(section->size);
const auto scan_result =
omath::PatternScanner::scan_for_pattern(section_begin, section_end, pattern);
if (scan_result != section_end)
return reinterpret_cast<std::uintptr_t>(scan_result);
}
sect_offset += sizeof(SectionType);
}
}
cmd_offset += lc->cmdsize;
}
return std::nullopt;
}
} // namespace
namespace omath
{
std::optional<std::uintptr_t>
MachOPatternScanner::scan_for_pattern_in_loaded_module(const void* module_base_address,
const std::string_view& pattern,
const std::string_view& target_section_name)
{
if (module_base_address == nullptr) [[unlikely]]
return std::nullopt;
const auto* base = static_cast<const std::byte*>(module_base_address);
// Read magic to determine architecture
std::uint32_t magic{};
std::memcpy(&magic, base, sizeof(magic));
if (magic == mh_magic_64 || magic == mh_cigam_64)
return scan_in_module_impl<MachHeader64, SegmentCommand64, Section64, lc_segment_64>(
base, pattern, target_section_name);
if (magic == mh_magic_32 || magic == mh_cigam_32)
return scan_in_module_impl<MachHeader32, SegmentCommand32, Section32, lc_segment>(base, pattern,
target_section_name);
return std::nullopt;
}
std::optional<SectionScanResult>
MachOPatternScanner::scan_for_pattern_in_file(const std::filesystem::path& path_to_file,
const std::string_view& pattern,
const std::string_view& target_section_name)
{
const auto macho_section = get_macho_section_by_name(path_to_file, target_section_name);
if (!macho_section.has_value()) [[unlikely]]
return std::nullopt;
const auto scan_result =
PatternScanner::scan_for_pattern(macho_section->data.cbegin(), macho_section->data.cend(), pattern);
if (scan_result == macho_section->data.cend())
return std::nullopt;
const auto offset = std::distance(macho_section->data.begin(), scan_result);
return SectionScanResult{.virtual_base_addr = macho_section->virtual_base_addr,
.raw_base_addr = macho_section->raw_base_addr,
.target_offset = offset};
}
} // namespace omath

View File

@@ -0,0 +1,357 @@
//
// Created by Copilot on 04.02.2026.
//
// Unit tests for MachOPatternScanner
#include <gtest/gtest.h>
#include <omath/utility/macho_pattern_scan.hpp>
#include <cstdint>
#include <cstring>
#include <fstream>
#include <vector>
using namespace omath;
namespace
{
// Mach-O magic numbers
constexpr std::uint32_t mh_magic_64 = 0xFEEDFACF;
constexpr std::uint32_t mh_magic_32 = 0xFEEDFACE;
constexpr std::uint32_t lc_segment = 0x1;
constexpr std::uint32_t lc_segment_64 = 0x19;
#pragma pack(push, 1)
struct MachHeader64
{
std::uint32_t magic;
std::uint32_t cputype;
std::uint32_t cpusubtype;
std::uint32_t filetype;
std::uint32_t ncmds;
std::uint32_t sizeofcmds;
std::uint32_t flags;
std::uint32_t reserved;
};
struct MachHeader32
{
std::uint32_t magic;
std::uint32_t cputype;
std::uint32_t cpusubtype;
std::uint32_t filetype;
std::uint32_t ncmds;
std::uint32_t sizeofcmds;
std::uint32_t flags;
};
struct SegmentCommand64
{
std::uint32_t cmd;
std::uint32_t cmdsize;
char segname[16];
std::uint64_t vmaddr;
std::uint64_t vmsize;
std::uint64_t fileoff;
std::uint64_t filesize;
std::uint32_t maxprot;
std::uint32_t initprot;
std::uint32_t nsects;
std::uint32_t flags;
};
struct SegmentCommand32
{
std::uint32_t cmd;
std::uint32_t cmdsize;
char segname[16];
std::uint32_t vmaddr;
std::uint32_t vmsize;
std::uint32_t fileoff;
std::uint32_t filesize;
std::uint32_t maxprot;
std::uint32_t initprot;
std::uint32_t nsects;
std::uint32_t flags;
};
struct Section64
{
char sectname[16];
char segname[16];
std::uint64_t addr;
std::uint64_t size;
std::uint32_t offset;
std::uint32_t align;
std::uint32_t reloff;
std::uint32_t nreloc;
std::uint32_t flags;
std::uint32_t reserved1;
std::uint32_t reserved2;
std::uint32_t reserved3;
};
struct Section32
{
char sectname[16];
char segname[16];
std::uint32_t addr;
std::uint32_t size;
std::uint32_t offset;
std::uint32_t align;
std::uint32_t reloff;
std::uint32_t nreloc;
std::uint32_t flags;
std::uint32_t reserved1;
std::uint32_t reserved2;
};
#pragma pack(pop)
// Helper function to create a minimal 64-bit Mach-O file with a __text section
bool write_minimal_macho64_file(const std::string& path, const std::vector<std::uint8_t>& section_bytes)
{
std::ofstream f(path, std::ios::binary);
if (!f.is_open())
return false;
// Calculate sizes
constexpr std::size_t header_size = sizeof(MachHeader64);
constexpr std::size_t segment_size = sizeof(SegmentCommand64);
constexpr std::size_t section_size = sizeof(Section64);
constexpr std::size_t load_cmd_size = segment_size + section_size;
// Section data will start after headers
const std::size_t section_offset = header_size + load_cmd_size;
// Create Mach-O header
MachHeader64 header{};
header.magic = mh_magic_64;
header.cputype = 0x01000007; // CPU_TYPE_X86_64
header.cpusubtype = 0x3; // CPU_SUBTYPE_X86_64_ALL
header.filetype = 0x2; // MH_EXECUTE
header.ncmds = 1;
header.sizeofcmds = static_cast<std::uint32_t>(load_cmd_size);
header.flags = 0;
header.reserved = 0;
f.write(reinterpret_cast<const char*>(&header), sizeof(header));
// Create segment command
SegmentCommand64 segment{};
segment.cmd = lc_segment_64;
segment.cmdsize = static_cast<std::uint32_t>(load_cmd_size);
std::strncpy(segment.segname, "__TEXT", 16);
segment.vmaddr = 0x100000000;
segment.vmsize = section_bytes.size();
segment.fileoff = section_offset;
segment.filesize = section_bytes.size();
segment.maxprot = 7; // VM_PROT_ALL
segment.initprot = 5; // VM_PROT_READ | VM_PROT_EXECUTE
segment.nsects = 1;
segment.flags = 0;
f.write(reinterpret_cast<const char*>(&segment), sizeof(segment));
// Create section
Section64 section{};
std::strncpy(section.sectname, "__text", 16);
std::strncpy(section.segname, "__TEXT", 16);
section.addr = 0x100000000;
section.size = section_bytes.size();
section.offset = static_cast<std::uint32_t>(section_offset);
section.align = 0;
section.reloff = 0;
section.nreloc = 0;
section.flags = 0;
section.reserved1 = 0;
section.reserved2 = 0;
section.reserved3 = 0;
f.write(reinterpret_cast<const char*>(&section), sizeof(section));
// Write section data
f.write(reinterpret_cast<const char*>(section_bytes.data()), static_cast<std::streamsize>(section_bytes.size()));
f.close();
return true;
}
// Helper function to create a minimal 32-bit Mach-O file with a __text section
bool write_minimal_macho32_file(const std::string& path, const std::vector<std::uint8_t>& section_bytes)
{
std::ofstream f(path, std::ios::binary);
if (!f.is_open())
return false;
// Calculate sizes
constexpr std::size_t header_size = sizeof(MachHeader32);
constexpr std::size_t segment_size = sizeof(SegmentCommand32);
constexpr std::size_t section_size = sizeof(Section32);
constexpr std::size_t load_cmd_size = segment_size + section_size;
// Section data will start after headers
const std::size_t section_offset = header_size + load_cmd_size;
// Create Mach-O header
MachHeader32 header{};
header.magic = mh_magic_32;
header.cputype = 0x7; // CPU_TYPE_X86
header.cpusubtype = 0x3; // CPU_SUBTYPE_X86_ALL
header.filetype = 0x2; // MH_EXECUTE
header.ncmds = 1;
header.sizeofcmds = static_cast<std::uint32_t>(load_cmd_size);
header.flags = 0;
f.write(reinterpret_cast<const char*>(&header), sizeof(header));
// Create segment command
SegmentCommand32 segment{};
segment.cmd = lc_segment;
segment.cmdsize = static_cast<std::uint32_t>(load_cmd_size);
std::strncpy(segment.segname, "__TEXT", 16);
segment.vmaddr = 0x1000;
segment.vmsize = static_cast<std::uint32_t>(section_bytes.size());
segment.fileoff = static_cast<std::uint32_t>(section_offset);
segment.filesize = static_cast<std::uint32_t>(section_bytes.size());
segment.maxprot = 7; // VM_PROT_ALL
segment.initprot = 5; // VM_PROT_READ | VM_PROT_EXECUTE
segment.nsects = 1;
segment.flags = 0;
f.write(reinterpret_cast<const char*>(&segment), sizeof(segment));
// Create section
Section32 section{};
std::strncpy(section.sectname, "__text", 16);
std::strncpy(section.segname, "__TEXT", 16);
section.addr = 0x1000;
section.size = static_cast<std::uint32_t>(section_bytes.size());
section.offset = static_cast<std::uint32_t>(section_offset);
section.align = 0;
section.reloff = 0;
section.nreloc = 0;
section.flags = 0;
section.reserved1 = 0;
section.reserved2 = 0;
f.write(reinterpret_cast<const char*>(&section), sizeof(section));
// Write section data
f.write(reinterpret_cast<const char*>(section_bytes.data()), static_cast<std::streamsize>(section_bytes.size()));
f.close();
return true;
}
} // namespace
// Test scanning for a pattern that exists in a 64-bit Mach-O file
TEST(unit_test_macho_pattern_scan_file, ScanFindsPattern64)
{
constexpr std::string_view path = "./test_minimal_macho64.bin";
const std::vector<std::uint8_t> bytes = {0x55, 0x48, 0x89, 0xE5, 0x90, 0x90}; // push rbp; mov rbp, rsp; nop; nop
ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48 89 E5", "__text");
EXPECT_TRUE(res.has_value());
if (res.has_value())
{
EXPECT_EQ(res->target_offset, 0);
}
}
// Test scanning for a pattern that exists in a 32-bit Mach-O file
TEST(unit_test_macho_pattern_scan_file, ScanFindsPattern32)
{
constexpr std::string_view path = "./test_minimal_macho32.bin";
const std::vector<std::uint8_t> bytes = {0x55, 0x89, 0xE5, 0x90, 0x90}; // push ebp; mov ebp, esp; nop; nop
ASSERT_TRUE(write_minimal_macho32_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 89 E5", "__text");
EXPECT_TRUE(res.has_value());
if (res.has_value())
{
EXPECT_EQ(res->target_offset, 0);
}
}
// Test scanning for a pattern that does not exist
TEST(unit_test_macho_pattern_scan_file, ScanMissingPattern)
{
constexpr std::string_view path = "./test_minimal_macho_missing.bin";
const std::vector<std::uint8_t> bytes = {0x00, 0x01, 0x02, 0x03};
ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "FF EE DD", "__text");
EXPECT_FALSE(res.has_value());
}
// Test scanning for a pattern at a non-zero offset
TEST(unit_test_macho_pattern_scan_file, ScanPatternAtOffset)
{
constexpr std::string_view path = "./test_minimal_macho_offset.bin";
const std::vector<std::uint8_t> bytes = {0x90, 0x90, 0x90, 0x55, 0x48, 0x89, 0xE5}; // nops then pattern
ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48 89 E5", "__text");
EXPECT_TRUE(res.has_value());
if (res.has_value())
{
EXPECT_EQ(res->target_offset, 3);
}
}
// Test scanning with wildcards
TEST(unit_test_macho_pattern_scan_file, ScanWithWildcard)
{
constexpr std::string_view path = "./test_minimal_macho_wildcard.bin";
const std::vector<std::uint8_t> bytes = {0x55, 0x48, 0x89, 0xE5, 0x90};
ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 ? 89 E5", "__text");
EXPECT_TRUE(res.has_value());
}
// Test scanning a non-existent file
TEST(unit_test_macho_pattern_scan_file, ScanNonExistentFile)
{
const auto res = MachOPatternScanner::scan_for_pattern_in_file("/non/existent/file.bin", "55 48", "__text");
EXPECT_FALSE(res.has_value());
}
// Test scanning an invalid (non-Mach-O) file
TEST(unit_test_macho_pattern_scan_file, ScanInvalidFile)
{
constexpr std::string_view path = "./test_invalid_macho.bin";
std::ofstream f(path.data(), std::ios::binary);
const std::vector<std::uint8_t> garbage = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05};
f.write(reinterpret_cast<const char*>(garbage.data()), static_cast<std::streamsize>(garbage.size()));
f.close();
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48", "__text");
EXPECT_FALSE(res.has_value());
}
// Test scanning for a non-existent section
TEST(unit_test_macho_pattern_scan_file, ScanNonExistentSection)
{
constexpr std::string_view path = "./test_minimal_macho_nosect.bin";
const std::vector<std::uint8_t> bytes = {0x55, 0x48, 0x89, 0xE5};
ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes));
const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48", "__nonexistent");
EXPECT_FALSE(res.has_value());
}
// Test scanning with null module base address
TEST(unit_test_macho_pattern_scan_loaded, ScanNullModule)
{
const auto res = MachOPatternScanner::scan_for_pattern_in_loaded_module(nullptr, "55 48", "__text");
EXPECT_FALSE(res.has_value());
}
// Test scanning in loaded module with invalid magic
TEST(unit_test_macho_pattern_scan_loaded, ScanInvalidMagic)
{
std::vector<std::uint8_t> invalid_data(256, 0x00);
const auto res = MachOPatternScanner::scan_for_pattern_in_loaded_module(invalid_data.data(), "55 48", "__text");
EXPECT_FALSE(res.has_value());
}