From 775949887a4cf0c903068051ce3274992984891c Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Feb 2026 17:30:20 +0300 Subject: [PATCH] Add Mach-O pattern scanner (#144) * Initial plan * Add Mach-O pattern scanner implementation and unit tests Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> * Add Mach-O pattern scanner Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> * Remove CodeQL build artifacts from PR Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> --- include/omath/utility/macho_pattern_scan.hpp | 25 ++ source/utility/macho_pattern_scan.cpp | 344 ++++++++++++++++++ tests/general/unit_test_macho_scanner.cpp | 357 +++++++++++++++++++ 3 files changed, 726 insertions(+) create mode 100644 include/omath/utility/macho_pattern_scan.hpp create mode 100644 source/utility/macho_pattern_scan.cpp create mode 100644 tests/general/unit_test_macho_scanner.cpp diff --git a/include/omath/utility/macho_pattern_scan.hpp b/include/omath/utility/macho_pattern_scan.hpp new file mode 100644 index 0000000..a4bc9a4 --- /dev/null +++ b/include/omath/utility/macho_pattern_scan.hpp @@ -0,0 +1,25 @@ +// +// Created by Copilot on 04.02.2026. +// +#pragma once +#include +#include +#include +#include +#include "section_scan_result.hpp" +namespace omath +{ + class MachOPatternScanner final + { + public: + [[nodiscard]] + static std::optional + scan_for_pattern_in_loaded_module(const void* module_base_address, const std::string_view& pattern, + const std::string_view& target_section_name = "__text"); + + [[nodiscard]] + static std::optional + scan_for_pattern_in_file(const std::filesystem::path& path_to_file, const std::string_view& pattern, + const std::string_view& target_section_name = "__text"); + }; +} // namespace omath diff --git a/source/utility/macho_pattern_scan.cpp b/source/utility/macho_pattern_scan.cpp new file mode 100644 index 0000000..e4d1ef9 --- /dev/null +++ b/source/utility/macho_pattern_scan.cpp @@ -0,0 +1,344 @@ +// +// Created by Copilot on 04.02.2026. +// +#include "omath/utility/macho_pattern_scan.hpp" +#include "omath/utility/pattern_scan.hpp" +#include +#include +#include +#include + +#pragma pack(push, 1) + +namespace +{ + // Mach-O magic numbers + constexpr std::uint32_t mh_magic_32 = 0xFEEDFACE; + constexpr std::uint32_t mh_magic_64 = 0xFEEDFACF; + constexpr std::uint32_t mh_cigam_32 = 0xCEFAEDFE; // Byte-swapped 32-bit + constexpr std::uint32_t mh_cigam_64 = 0xCFFAEDFE; // Byte-swapped 64-bit + + // Load command types + constexpr std::uint32_t lc_segment = 0x1; + constexpr std::uint32_t lc_segment_64 = 0x19; + + // Mach-O header for 32-bit + struct MachHeader32 final + { + std::uint32_t magic; + std::uint32_t cputype; + std::uint32_t cpusubtype; + std::uint32_t filetype; + std::uint32_t ncmds; + std::uint32_t sizeofcmds; + std::uint32_t flags; + }; + + // Mach-O header for 64-bit + struct MachHeader64 final + { + std::uint32_t magic; + std::uint32_t cputype; + std::uint32_t cpusubtype; + std::uint32_t filetype; + std::uint32_t ncmds; + std::uint32_t sizeofcmds; + std::uint32_t flags; + std::uint32_t reserved; + }; + + // Load command header + struct LoadCommand final + { + std::uint32_t cmd; + std::uint32_t cmdsize; + }; + + // Segment command for 32-bit + struct SegmentCommand32 final + { + std::uint32_t cmd; + std::uint32_t cmdsize; + char segname[16]; + std::uint32_t vmaddr; + std::uint32_t vmsize; + std::uint32_t fileoff; + std::uint32_t filesize; + std::uint32_t maxprot; + std::uint32_t initprot; + std::uint32_t nsects; + std::uint32_t flags; + }; + + // Segment command for 64-bit + struct SegmentCommand64 final + { + std::uint32_t cmd; + std::uint32_t cmdsize; + char segname[16]; + std::uint64_t vmaddr; + std::uint64_t vmsize; + std::uint64_t fileoff; + std::uint64_t filesize; + std::uint32_t maxprot; + std::uint32_t initprot; + std::uint32_t nsects; + std::uint32_t flags; + }; + + // Section for 32-bit + struct Section32 final + { + char sectname[16]; + char segname[16]; + std::uint32_t addr; + std::uint32_t size; + std::uint32_t offset; + std::uint32_t align; + std::uint32_t reloff; + std::uint32_t nreloc; + std::uint32_t flags; + std::uint32_t reserved1; + std::uint32_t reserved2; + }; + + // Section for 64-bit + struct Section64 final + { + char sectname[16]; + char segname[16]; + std::uint64_t addr; + std::uint64_t size; + std::uint32_t offset; + std::uint32_t align; + std::uint32_t reloff; + std::uint32_t nreloc; + std::uint32_t flags; + std::uint32_t reserved1; + std::uint32_t reserved2; + std::uint32_t reserved3; + }; + +#pragma pack(pop) + + enum class MachOArch : std::int8_t + { + x32, + x64, + }; + + struct ExtractedSection final + { + std::uintptr_t virtual_base_addr{}; + std::uintptr_t raw_base_addr{}; + std::vector data; + }; + + [[nodiscard]] + std::optional get_macho_arch(std::fstream& file) + { + std::uint32_t magic{}; + const std::streampos backup_pos = file.tellg(); + + file.seekg(0, std::ios_base::beg); + file.read(reinterpret_cast(&magic), sizeof(magic)); + file.seekg(backup_pos, std::ios_base::beg); + + if (magic == mh_magic_64 || magic == mh_cigam_64) + return MachOArch::x64; + + if (magic == mh_magic_32 || magic == mh_cigam_32) + return MachOArch::x32; + + return std::nullopt; + } + + [[nodiscard]] + bool is_macho_file(std::fstream& file) + { + return get_macho_arch(file).has_value(); + } + + [[nodiscard]] + std::string_view get_section_name(const char* sectname) + { + // Mach-O section names are fixed 16-byte arrays, not necessarily null-terminated + return std::string_view(sectname, std::min(std::strlen(sectname), std::size_t{16})); + } + + template + std::optional extract_section_impl(std::fstream& file, const std::string_view& section_name) + { + HeaderType header{}; + file.seekg(0, std::ios_base::beg); + if (!file.read(reinterpret_cast(&header), sizeof(header))) [[unlikely]] + return std::nullopt; + + std::streamoff cmd_offset = sizeof(header); + + for (std::uint32_t i = 0; i < header.ncmds; ++i) + { + LoadCommand lc{}; + file.seekg(cmd_offset, std::ios_base::beg); + if (!file.read(reinterpret_cast(&lc), sizeof(lc))) [[unlikely]] + return std::nullopt; + + if (lc.cmd == segment_cmd) + { + SegmentType segment{}; + file.seekg(cmd_offset, std::ios_base::beg); + if (!file.read(reinterpret_cast(&segment), sizeof(segment))) [[unlikely]] + return std::nullopt; + + std::streamoff sect_offset = cmd_offset + static_cast(sizeof(segment)); + + for (std::uint32_t j = 0; j < segment.nsects; ++j) + { + SectionType section{}; + file.seekg(sect_offset, std::ios_base::beg); + if (!file.read(reinterpret_cast(§ion), sizeof(section))) [[unlikely]] + return std::nullopt; + + if (get_section_name(section.sectname) == section_name) + { + ExtractedSection out; + out.virtual_base_addr = static_cast(section.addr); + out.raw_base_addr = static_cast(section.offset); + out.data.resize(static_cast(section.size)); + + file.seekg(static_cast(section.offset), std::ios_base::beg); + if (!file.read(reinterpret_cast(out.data.data()), + static_cast(out.data.size()))) [[unlikely]] + return std::nullopt; + + return out; + } + + sect_offset += static_cast(sizeof(section)); + } + } + + cmd_offset += static_cast(lc.cmdsize); + } + + return std::nullopt; + } + + [[nodiscard]] + std::optional get_macho_section_by_name(const std::filesystem::path& path, + const std::string_view& section_name) + { + std::fstream file(path, std::ios::binary | std::ios::in); + + if (!file.is_open()) [[unlikely]] + return std::nullopt; + + if (!is_macho_file(file)) [[unlikely]] + return std::nullopt; + + const auto arch = get_macho_arch(file); + + if (!arch.has_value()) [[unlikely]] + return std::nullopt; + + if (arch.value() == MachOArch::x64) + return extract_section_impl(file, section_name); + else + return extract_section_impl(file, section_name); + } + + template + std::optional scan_in_module_impl(const std::byte* base, const std::string_view pattern, + const std::string_view target_section_name) + { + const auto* header = reinterpret_cast(base); + + std::size_t cmd_offset = sizeof(HeaderType); + + for (std::uint32_t i = 0; i < header->ncmds; ++i) + { + const auto* lc = reinterpret_cast(base + cmd_offset); + + if (lc->cmd == segment_cmd) + { + const auto* segment = reinterpret_cast(base + cmd_offset); + std::size_t sect_offset = cmd_offset + sizeof(SegmentType); + + for (std::uint32_t j = 0; j < segment->nsects; ++j) + { + const auto* section = reinterpret_cast(base + sect_offset); + + if (get_section_name(section->sectname) == target_section_name && section->size > 0) + { + const auto* section_begin = base + static_cast(section->addr); + const auto* section_end = section_begin + static_cast(section->size); + + const auto scan_result = + omath::PatternScanner::scan_for_pattern(section_begin, section_end, pattern); + + if (scan_result != section_end) + return reinterpret_cast(scan_result); + } + + sect_offset += sizeof(SectionType); + } + } + + cmd_offset += lc->cmdsize; + } + + return std::nullopt; + } + +} // namespace + +namespace omath +{ + std::optional + MachOPatternScanner::scan_for_pattern_in_loaded_module(const void* module_base_address, + const std::string_view& pattern, + const std::string_view& target_section_name) + { + if (module_base_address == nullptr) [[unlikely]] + return std::nullopt; + + const auto* base = static_cast(module_base_address); + + // Read magic to determine architecture + std::uint32_t magic{}; + std::memcpy(&magic, base, sizeof(magic)); + + if (magic == mh_magic_64 || magic == mh_cigam_64) + return scan_in_module_impl( + base, pattern, target_section_name); + + if (magic == mh_magic_32 || magic == mh_cigam_32) + return scan_in_module_impl(base, pattern, + target_section_name); + + return std::nullopt; + } + + std::optional + MachOPatternScanner::scan_for_pattern_in_file(const std::filesystem::path& path_to_file, + const std::string_view& pattern, + const std::string_view& target_section_name) + { + const auto macho_section = get_macho_section_by_name(path_to_file, target_section_name); + + if (!macho_section.has_value()) [[unlikely]] + return std::nullopt; + + const auto scan_result = + PatternScanner::scan_for_pattern(macho_section->data.cbegin(), macho_section->data.cend(), pattern); + + if (scan_result == macho_section->data.cend()) + return std::nullopt; + + const auto offset = std::distance(macho_section->data.begin(), scan_result); + + return SectionScanResult{.virtual_base_addr = macho_section->virtual_base_addr, + .raw_base_addr = macho_section->raw_base_addr, + .target_offset = offset}; + } +} // namespace omath diff --git a/tests/general/unit_test_macho_scanner.cpp b/tests/general/unit_test_macho_scanner.cpp new file mode 100644 index 0000000..0beada0 --- /dev/null +++ b/tests/general/unit_test_macho_scanner.cpp @@ -0,0 +1,357 @@ +// +// Created by Copilot on 04.02.2026. +// +// Unit tests for MachOPatternScanner +#include +#include +#include +#include +#include +#include + +using namespace omath; + +namespace +{ + // Mach-O magic numbers + constexpr std::uint32_t mh_magic_64 = 0xFEEDFACF; + constexpr std::uint32_t mh_magic_32 = 0xFEEDFACE; + constexpr std::uint32_t lc_segment = 0x1; + constexpr std::uint32_t lc_segment_64 = 0x19; + +#pragma pack(push, 1) + struct MachHeader64 + { + std::uint32_t magic; + std::uint32_t cputype; + std::uint32_t cpusubtype; + std::uint32_t filetype; + std::uint32_t ncmds; + std::uint32_t sizeofcmds; + std::uint32_t flags; + std::uint32_t reserved; + }; + + struct MachHeader32 + { + std::uint32_t magic; + std::uint32_t cputype; + std::uint32_t cpusubtype; + std::uint32_t filetype; + std::uint32_t ncmds; + std::uint32_t sizeofcmds; + std::uint32_t flags; + }; + + struct SegmentCommand64 + { + std::uint32_t cmd; + std::uint32_t cmdsize; + char segname[16]; + std::uint64_t vmaddr; + std::uint64_t vmsize; + std::uint64_t fileoff; + std::uint64_t filesize; + std::uint32_t maxprot; + std::uint32_t initprot; + std::uint32_t nsects; + std::uint32_t flags; + }; + + struct SegmentCommand32 + { + std::uint32_t cmd; + std::uint32_t cmdsize; + char segname[16]; + std::uint32_t vmaddr; + std::uint32_t vmsize; + std::uint32_t fileoff; + std::uint32_t filesize; + std::uint32_t maxprot; + std::uint32_t initprot; + std::uint32_t nsects; + std::uint32_t flags; + }; + + struct Section64 + { + char sectname[16]; + char segname[16]; + std::uint64_t addr; + std::uint64_t size; + std::uint32_t offset; + std::uint32_t align; + std::uint32_t reloff; + std::uint32_t nreloc; + std::uint32_t flags; + std::uint32_t reserved1; + std::uint32_t reserved2; + std::uint32_t reserved3; + }; + + struct Section32 + { + char sectname[16]; + char segname[16]; + std::uint32_t addr; + std::uint32_t size; + std::uint32_t offset; + std::uint32_t align; + std::uint32_t reloff; + std::uint32_t nreloc; + std::uint32_t flags; + std::uint32_t reserved1; + std::uint32_t reserved2; + }; +#pragma pack(pop) + + // Helper function to create a minimal 64-bit Mach-O file with a __text section + bool write_minimal_macho64_file(const std::string& path, const std::vector& section_bytes) + { + std::ofstream f(path, std::ios::binary); + if (!f.is_open()) + return false; + + // Calculate sizes + constexpr std::size_t header_size = sizeof(MachHeader64); + constexpr std::size_t segment_size = sizeof(SegmentCommand64); + constexpr std::size_t section_size = sizeof(Section64); + constexpr std::size_t load_cmd_size = segment_size + section_size; + + // Section data will start after headers + const std::size_t section_offset = header_size + load_cmd_size; + + // Create Mach-O header + MachHeader64 header{}; + header.magic = mh_magic_64; + header.cputype = 0x01000007; // CPU_TYPE_X86_64 + header.cpusubtype = 0x3; // CPU_SUBTYPE_X86_64_ALL + header.filetype = 0x2; // MH_EXECUTE + header.ncmds = 1; + header.sizeofcmds = static_cast(load_cmd_size); + header.flags = 0; + header.reserved = 0; + + f.write(reinterpret_cast(&header), sizeof(header)); + + // Create segment command + SegmentCommand64 segment{}; + segment.cmd = lc_segment_64; + segment.cmdsize = static_cast(load_cmd_size); + std::strncpy(segment.segname, "__TEXT", 16); + segment.vmaddr = 0x100000000; + segment.vmsize = section_bytes.size(); + segment.fileoff = section_offset; + segment.filesize = section_bytes.size(); + segment.maxprot = 7; // VM_PROT_ALL + segment.initprot = 5; // VM_PROT_READ | VM_PROT_EXECUTE + segment.nsects = 1; + segment.flags = 0; + + f.write(reinterpret_cast(&segment), sizeof(segment)); + + // Create section + Section64 section{}; + std::strncpy(section.sectname, "__text", 16); + std::strncpy(section.segname, "__TEXT", 16); + section.addr = 0x100000000; + section.size = section_bytes.size(); + section.offset = static_cast(section_offset); + section.align = 0; + section.reloff = 0; + section.nreloc = 0; + section.flags = 0; + section.reserved1 = 0; + section.reserved2 = 0; + section.reserved3 = 0; + + f.write(reinterpret_cast(§ion), sizeof(section)); + + // Write section data + f.write(reinterpret_cast(section_bytes.data()), static_cast(section_bytes.size())); + + f.close(); + return true; + } + + // Helper function to create a minimal 32-bit Mach-O file with a __text section + bool write_minimal_macho32_file(const std::string& path, const std::vector& section_bytes) + { + std::ofstream f(path, std::ios::binary); + if (!f.is_open()) + return false; + + // Calculate sizes + constexpr std::size_t header_size = sizeof(MachHeader32); + constexpr std::size_t segment_size = sizeof(SegmentCommand32); + constexpr std::size_t section_size = sizeof(Section32); + constexpr std::size_t load_cmd_size = segment_size + section_size; + + // Section data will start after headers + const std::size_t section_offset = header_size + load_cmd_size; + + // Create Mach-O header + MachHeader32 header{}; + header.magic = mh_magic_32; + header.cputype = 0x7; // CPU_TYPE_X86 + header.cpusubtype = 0x3; // CPU_SUBTYPE_X86_ALL + header.filetype = 0x2; // MH_EXECUTE + header.ncmds = 1; + header.sizeofcmds = static_cast(load_cmd_size); + header.flags = 0; + + f.write(reinterpret_cast(&header), sizeof(header)); + + // Create segment command + SegmentCommand32 segment{}; + segment.cmd = lc_segment; + segment.cmdsize = static_cast(load_cmd_size); + std::strncpy(segment.segname, "__TEXT", 16); + segment.vmaddr = 0x1000; + segment.vmsize = static_cast(section_bytes.size()); + segment.fileoff = static_cast(section_offset); + segment.filesize = static_cast(section_bytes.size()); + segment.maxprot = 7; // VM_PROT_ALL + segment.initprot = 5; // VM_PROT_READ | VM_PROT_EXECUTE + segment.nsects = 1; + segment.flags = 0; + + f.write(reinterpret_cast(&segment), sizeof(segment)); + + // Create section + Section32 section{}; + std::strncpy(section.sectname, "__text", 16); + std::strncpy(section.segname, "__TEXT", 16); + section.addr = 0x1000; + section.size = static_cast(section_bytes.size()); + section.offset = static_cast(section_offset); + section.align = 0; + section.reloff = 0; + section.nreloc = 0; + section.flags = 0; + section.reserved1 = 0; + section.reserved2 = 0; + + f.write(reinterpret_cast(§ion), sizeof(section)); + + // Write section data + f.write(reinterpret_cast(section_bytes.data()), static_cast(section_bytes.size())); + + f.close(); + return true; + } + +} // namespace + +// Test scanning for a pattern that exists in a 64-bit Mach-O file +TEST(unit_test_macho_pattern_scan_file, ScanFindsPattern64) +{ + constexpr std::string_view path = "./test_minimal_macho64.bin"; + const std::vector bytes = {0x55, 0x48, 0x89, 0xE5, 0x90, 0x90}; // push rbp; mov rbp, rsp; nop; nop + ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48 89 E5", "__text"); + EXPECT_TRUE(res.has_value()); + if (res.has_value()) + { + EXPECT_EQ(res->target_offset, 0); + } +} + +// Test scanning for a pattern that exists in a 32-bit Mach-O file +TEST(unit_test_macho_pattern_scan_file, ScanFindsPattern32) +{ + constexpr std::string_view path = "./test_minimal_macho32.bin"; + const std::vector bytes = {0x55, 0x89, 0xE5, 0x90, 0x90}; // push ebp; mov ebp, esp; nop; nop + ASSERT_TRUE(write_minimal_macho32_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 89 E5", "__text"); + EXPECT_TRUE(res.has_value()); + if (res.has_value()) + { + EXPECT_EQ(res->target_offset, 0); + } +} + +// Test scanning for a pattern that does not exist +TEST(unit_test_macho_pattern_scan_file, ScanMissingPattern) +{ + constexpr std::string_view path = "./test_minimal_macho_missing.bin"; + const std::vector bytes = {0x00, 0x01, 0x02, 0x03}; + ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "FF EE DD", "__text"); + EXPECT_FALSE(res.has_value()); +} + +// Test scanning for a pattern at a non-zero offset +TEST(unit_test_macho_pattern_scan_file, ScanPatternAtOffset) +{ + constexpr std::string_view path = "./test_minimal_macho_offset.bin"; + const std::vector bytes = {0x90, 0x90, 0x90, 0x55, 0x48, 0x89, 0xE5}; // nops then pattern + ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48 89 E5", "__text"); + EXPECT_TRUE(res.has_value()); + if (res.has_value()) + { + EXPECT_EQ(res->target_offset, 3); + } +} + +// Test scanning with wildcards +TEST(unit_test_macho_pattern_scan_file, ScanWithWildcard) +{ + constexpr std::string_view path = "./test_minimal_macho_wildcard.bin"; + const std::vector bytes = {0x55, 0x48, 0x89, 0xE5, 0x90}; + ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 ? 89 E5", "__text"); + EXPECT_TRUE(res.has_value()); +} + +// Test scanning a non-existent file +TEST(unit_test_macho_pattern_scan_file, ScanNonExistentFile) +{ + const auto res = MachOPatternScanner::scan_for_pattern_in_file("/non/existent/file.bin", "55 48", "__text"); + EXPECT_FALSE(res.has_value()); +} + +// Test scanning an invalid (non-Mach-O) file +TEST(unit_test_macho_pattern_scan_file, ScanInvalidFile) +{ + constexpr std::string_view path = "./test_invalid_macho.bin"; + std::ofstream f(path.data(), std::ios::binary); + const std::vector garbage = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05}; + f.write(reinterpret_cast(garbage.data()), static_cast(garbage.size())); + f.close(); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48", "__text"); + EXPECT_FALSE(res.has_value()); +} + +// Test scanning for a non-existent section +TEST(unit_test_macho_pattern_scan_file, ScanNonExistentSection) +{ + constexpr std::string_view path = "./test_minimal_macho_nosect.bin"; + const std::vector bytes = {0x55, 0x48, 0x89, 0xE5}; + ASSERT_TRUE(write_minimal_macho64_file(path.data(), bytes)); + + const auto res = MachOPatternScanner::scan_for_pattern_in_file(path, "55 48", "__nonexistent"); + EXPECT_FALSE(res.has_value()); +} + +// Test scanning with null module base address +TEST(unit_test_macho_pattern_scan_loaded, ScanNullModule) +{ + const auto res = MachOPatternScanner::scan_for_pattern_in_loaded_module(nullptr, "55 48", "__text"); + EXPECT_FALSE(res.has_value()); +} + +// Test scanning in loaded module with invalid magic +TEST(unit_test_macho_pattern_scan_loaded, ScanInvalidMagic) +{ + std::vector invalid_data(256, 0x00); + const auto res = MachOPatternScanner::scan_for_pattern_in_loaded_module(invalid_data.data(), "55 48", "__text"); + EXPECT_FALSE(res.has_value()); +}