Initial pass at implementing the symbol input file mechanism

This commit is contained in:
Mr-Wiseguy 2024-05-14 10:27:42 -04:00
parent d4fab15fcc
commit cf2f16ed71
5 changed files with 337 additions and 73 deletions

View file

@ -623,7 +623,6 @@ std::vector<T> toml_to_vec(const toml::value& branch_targets_data) {
} }
bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) { bool read_config(const std::filesystem::path& config_path, RSPRecompilerConfig& out) {
std::ifstream config_file {config_path};
RSPRecompilerConfig ret{}; RSPRecompilerConfig ret{};
try { try {

View file

@ -63,6 +63,8 @@ namespace RecompPort {
bool single_file_output; bool single_file_output;
bool use_absolute_symbols; bool use_absolute_symbols;
std::filesystem::path elf_path; std::filesystem::path elf_path;
std::filesystem::path symbols_file_path;
std::filesystem::path rom_file_path;
std::filesystem::path output_func_path; std::filesystem::path output_func_path;
std::filesystem::path relocatable_sections_path; std::filesystem::path relocatable_sections_path;
std::vector<std::string> stubbed_funcs; std::vector<std::string> stubbed_funcs;
@ -130,7 +132,6 @@ namespace RecompPort {
uint32_t symbol_index; uint32_t symbol_index;
uint32_t target_section; uint32_t target_section;
RelocType type; RelocType type;
bool needs_relocation;
}; };
struct Section { struct Section {
@ -175,6 +176,10 @@ namespace RecompPort {
rom.reserve(8 * 1024 * 1024); rom.reserve(8 * 1024 * 1024);
executable_section_count = 0; executable_section_count = 0;
} }
static bool from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, Context& out);
Context() = default;
}; };
bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats); bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats);

View file

@ -242,7 +242,15 @@ RecompPort::Config::Config(const char* path) {
else { else {
has_entrypoint = false; has_entrypoint = false;
} }
if (input_data.contains("elf_path")) {
elf_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "elf_path")); elf_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "elf_path"));
}
if (input_data.contains("symbols_file_path")) {
symbols_file_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "symbols_file_path"));
}
if (input_data.contains("rom_file_path")) {
rom_file_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "rom_file_path"));
}
output_func_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "output_func_path")); output_func_path = concat_if_not_empty(basedir, toml::find<std::string>(input_data, "output_func_path"));
relocatable_sections_path = concat_if_not_empty(basedir, toml::find_or<std::string>(input_data, "relocatable_sections_path", "")); relocatable_sections_path = concat_if_not_empty(basedir, toml::find_or<std::string>(input_data, "relocatable_sections_path", ""));
uses_mips3_float_mode = toml::find_or<bool>(input_data, "uses_mips3_float_mode", false); uses_mips3_float_mode = toml::find_or<bool>(input_data, "uses_mips3_float_mode", false);
@ -295,3 +303,147 @@ RecompPort::Config::Config(const char* path) {
// No errors occured, so mark this config file as good. // No errors occured, so mark this config file as good.
bad = false; bad = false;
} }
const std::unordered_map<std::string, RecompPort::RelocType> reloc_type_name_map {
{ "R_MIPS_NONE", RecompPort::RelocType::R_MIPS_NONE },
{ "R_MIPS_16", RecompPort::RelocType::R_MIPS_16 },
{ "R_MIPS_32", RecompPort::RelocType::R_MIPS_32 },
{ "R_MIPS_REL32", RecompPort::RelocType::R_MIPS_REL32 },
{ "R_MIPS_26", RecompPort::RelocType::R_MIPS_26 },
{ "R_MIPS_HI16", RecompPort::RelocType::R_MIPS_HI16 },
{ "R_MIPS_LO16", RecompPort::RelocType::R_MIPS_LO16 },
{ "R_MIPS_GPREL16", RecompPort::RelocType::R_MIPS_GPREL16 },
};
RecompPort::RelocType reloc_type_from_name(const std::string& reloc_type_name) {
auto find_it = reloc_type_name_map.find(reloc_type_name);
if (find_it != reloc_type_name_map.end()) {
return find_it->second;
}
return RecompPort::RelocType::R_MIPS_NONE;
}
bool RecompPort::Context::from_symbol_file(const std::filesystem::path& symbol_file_path, std::vector<uint8_t>&& rom, RecompPort::Context& out) {
RecompPort::Context ret{};
try {
const toml::value config_data = toml::parse(symbol_file_path);
const toml::value config_sections_value = toml::find_or<toml::value>(config_data, "section", toml::value{});
if (config_sections_value.type() != toml::value_t::array) {
return false;
}
const toml::array config_sections = config_sections_value.as_array();
ret.section_functions.resize(config_sections.size());
for (const toml::value& section_value : config_sections) {
size_t section_index = ret.sections.size();
Section& section = ret.sections.emplace_back(Section{});
section.rom_addr = toml::find<uint32_t>(section_value, "rom");
section.ram_addr = toml::find<uint32_t>(section_value, "vram");
section.size = toml::find<uint32_t>(section_value, "size");
section.name = toml::find<toml::string>(section_value, "name");
section.executable = true;
const toml::array& functions = toml::find<toml::array>(section_value, "functions");
// Read functions for the section.
for (const toml::value& function_value : functions) {
size_t function_index = ret.functions.size();
Function cur_func{};
cur_func.name = toml::find<std::string>(function_value, "name");
cur_func.vram = toml::find<uint32_t>(function_value, "vram");
cur_func.rom = cur_func.vram - section.ram_addr + section.rom_addr;
cur_func.section_index = section_index;
uint32_t func_size = toml::find<uint32_t>(function_value, "size");
if (cur_func.vram & 0b11) {
// Function isn't word aligned in vram.
throw value_error(toml::detail::format_underline(
std::string{ std::source_location::current().function_name() } + ": function's vram address isn't word aligned!", {
{function_value.location(), ""}
}), function_value.location());
}
if (cur_func.rom & 0b11) {
// Function isn't word aligned in rom.
throw value_error(toml::detail::format_underline(
std::string{ std::source_location::current().function_name() } + ": function's rom address isn't word aligned!", {
{function_value.location(), ""}
}), function_value.location());
}
if (cur_func.rom + func_size > rom.size()) {
// Function is out of bounds of the provided rom.
throw value_error(toml::detail::format_underline(
std::string{ std::source_location::current().function_name() } + ": function is out of bounds of the provided rom!", {
{function_value.location(), ""}
}), function_value.location());
}
// Get the function's words from the rom.
cur_func.words.reserve(func_size / sizeof(uint32_t));
for (size_t rom_addr = cur_func.rom; rom_addr < cur_func.rom + func_size; rom_addr += sizeof(uint32_t)) {
cur_func.words.push_back(*reinterpret_cast<const uint32_t*>(rom.data() + rom_addr));
}
section.function_addrs.push_back(cur_func.vram);
ret.functions_by_name[cur_func.name] = function_index;
ret.functions_by_vram[cur_func.vram].push_back(function_index);
ret.section_functions[section_index].push_back(function_index);
ret.functions.emplace_back(std::move(cur_func));
}
// Check if relocs exist for the section and read them if so.
const toml::value& relocs_value = toml::find_or<toml::value>(section_value, "relocs", toml::value{});
if (relocs_value.type() == toml::value_t::array) {
// Mark the section as relocatable, since it has relocs.
section.relocatable = true;
// Read relocs for the section.
for (const toml::value& reloc_value : relocs_value.as_array()) {
size_t reloc_index = ret.functions.size();
Reloc cur_reloc{};
cur_reloc.address = toml::find<uint32_t>(reloc_value, "vram");
cur_reloc.target_address = toml::find<uint32_t>(reloc_value, "target_vram");
cur_reloc.symbol_index = (uint32_t)-1;
cur_reloc.target_section = section_index;
const std::string& reloc_type = toml::find<std::string>(reloc_value, "type");
cur_reloc.type = reloc_type_from_name(reloc_type);
section.relocs.emplace_back(std::move(cur_reloc));
}
}
else {
section.relocatable = false;
}
}
}
catch (const toml::syntax_error& err) {
fmt::print(stderr, "Syntax error in config file on line {}, full error:\n{}\n", err.location().line(), err.what());
return false;
}
catch (const toml::type_error& err) {
fmt::print(stderr, "Incorrect type in config file on line {}, full error:\n{}\n", err.location().line(), err.what());
return false;
}
catch (const value_error& err) {
fmt::print(stderr, "Invalid value in config file on line {}, full error:\n{}\n", err.location().line(), err.what());
return false;
}
catch (const std::out_of_range& err) {
fmt::print(stderr, "Missing value in config file, full error:\n{}\n", err.what());
return false;
}
ret.rom = std::move(rom);
out = std::move(ret);
return true;
}

View file

@ -965,7 +965,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
reloc_out.address = rel_offset; reloc_out.address = rel_offset;
reloc_out.symbol_index = rel_symbol; reloc_out.symbol_index = rel_symbol;
reloc_out.type = static_cast<RecompPort::RelocType>(rel_type); reloc_out.type = static_cast<RecompPort::RelocType>(rel_type);
reloc_out.needs_relocation = false;
std::string rel_symbol_name; std::string rel_symbol_name;
ELFIO::Elf64_Addr rel_symbol_value; ELFIO::Elf64_Addr rel_symbol_value;
@ -980,12 +979,6 @@ ELFIO::section* read_sections(RecompPort::Context& context, const RecompPort::Co
reloc_out.target_section = rel_symbol_section_index; reloc_out.target_section = rel_symbol_section_index;
bool rel_needs_relocation = false;
if (rel_symbol_section_index < context.sections.size()) {
rel_needs_relocation = context.sections[rel_symbol_section_index].relocatable;
}
// Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf) // Reloc pairing, see MIPS System V ABI documentation page 4-18 (https://refspecs.linuxfoundation.org/elf/mipsabi.pdf)
if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) { if (reloc_out.type == RecompPort::RelocType::R_MIPS_LO16) {
if (prev_hi) { if (prev_hi) {
@ -1159,6 +1152,79 @@ bool recompile_single_function(const RecompPort::Context& context, const RecompP
return true; return true;
} }
std::vector<std::string> reloc_names {
"R_MIPS_NONE ",
"R_MIPS_16",
"R_MIPS_32",
"R_MIPS_REL32",
"R_MIPS_26",
"R_MIPS_HI16",
"R_MIPS_LO16",
"R_MIPS_GPREL16",
};
void dump_context(const RecompPort::Context& context, const std::filesystem::path& path) {
std::ofstream context_file {path};
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
const RecompPort::Section& section = context.sections[section_index];
const std::vector<ELFIO::Elf_Xword>& section_funcs = context.section_functions[section_index];
if (!section_funcs.empty()) {
fmt::print(context_file,
"# Autogenerated from an ELF via N64Recomp\n"
"[[section]]\n"
"name = \"{}\"\n"
"rom = 0x{:08X}\n"
"vram = 0x{:08X}\n"
"size = 0x{:X}\n"
"\n",
section.name, section.rom_addr, section.ram_addr, section.size);
if (!section.relocs.empty()) {
fmt::print(context_file, "relocs = [\n");
for (const RecompPort::Reloc& reloc : section.relocs) {
if (reloc.target_section == section_index || reloc.target_section == section.bss_section_index) {
// TODO allow MIPS32 relocs for TLB mapping support.
if (reloc.type == RecompPort::RelocType::R_MIPS_HI16 || reloc.type == RecompPort::RelocType::R_MIPS_LO16) {
fmt::print(context_file, " {{ type = \"{}\", vram = 0x{:08X}, target_vram = 0x{:08X} }},\n",
reloc_names[static_cast<int>(reloc.type)], reloc.address, reloc.target_address);
}
}
}
fmt::print(context_file, "]\n\n");
}
fmt::print(context_file, "functions = [\n");
for (const ELFIO::Elf_Xword& function_index : section_funcs) {
const RecompPort::Function& func = context.functions[function_index];
fmt::print(context_file, " {{ name = \"{}\", vram = 0x{:08X}, size = 0x{:X} }},\n",
func.name, func.vram, func.words.size() * sizeof(func.words[0]));
}
fmt::print(context_file, "]\n\n");
}
}
}
static std::vector<uint8_t> read_file(const std::filesystem::path& path) {
std::vector<uint8_t> ret;
std::ifstream file{ path, std::ios::binary};
if (file.good()) {
file.seekg(0, std::ios::end);
ret.resize(file.tellg());
file.seekg(0, std::ios::beg);
file.read(reinterpret_cast<char*>(ret.data()), ret.size());
}
return ret;
}
int main(int argc, char** argv) { int main(int argc, char** argv) {
auto exit_failure = [] (const std::string& error_str) { auto exit_failure = [] (const std::string& error_str) {
fmt::vprint(stderr, error_str, fmt::make_format_args()); fmt::vprint(stderr, error_str, fmt::make_format_args());
@ -1177,7 +1243,6 @@ int main(int argc, char** argv) {
exit_failure(fmt::format("Failed to load config file: {}\n", config_path)); exit_failure(fmt::format("Failed to load config file: {}\n", config_path));
} }
ELFIO::elfio elf_file;
RabbitizerConfig_Cfg.pseudos.pseudoMove = false; RabbitizerConfig_Cfg.pseudos.pseudoMove = false;
RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false; RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false;
RabbitizerConfig_Cfg.pseudos.pseudoBnez = false; RabbitizerConfig_Cfg.pseudos.pseudoBnez = false;
@ -1195,6 +1260,16 @@ int main(int argc, char** argv) {
std::unordered_set<std::string> relocatable_sections{}; std::unordered_set<std::string> relocatable_sections{};
relocatable_sections.insert(relocatable_sections_ordered.begin(), relocatable_sections_ordered.end()); relocatable_sections.insert(relocatable_sections_ordered.begin(), relocatable_sections_ordered.end());
RecompPort::Context context{};
if (!config.elf_path.empty() && !config.symbols_file_path.empty()) {
exit_failure("Config file cannot provide both an elf and a symbols file\n");
}
// Build a context from the provided elf file.
if (!config.elf_path.empty()) {
ELFIO::elfio elf_file;
if (!elf_file.load(config.elf_path.string())) { if (!elf_file.load(config.elf_path.string())) {
exit_failure("Failed to load provided elf file\n"); exit_failure("Failed to load provided elf file\n");
} }
@ -1207,7 +1282,7 @@ int main(int argc, char** argv) {
exit_failure("Incorrect endianness\n"); exit_failure("Incorrect endianness\n");
} }
RecompPort::Context context{ elf_file }; context = { elf_file };
context.relocatable_sections = std::move(relocatable_sections); context.relocatable_sections = std::move(relocatable_sections);
// Read all of the sections in the elf and look for the symbol table section // Read all of the sections in the elf and look for the symbol table section
@ -1221,13 +1296,7 @@ int main(int argc, char** argv) {
exit_failure("No symbol table section found\n"); exit_failure("No symbol table section found\n");
} }
// Functions that weren't declared properly and thus have no size in the elf // Manually sized functions
//context.manually_sized_funcs.emplace("guMtxF2L", 0x64);
//context.manually_sized_funcs.emplace("guScaleF", 0x48);
//context.manually_sized_funcs.emplace("guTranslateF", 0x48);
//context.manually_sized_funcs.emplace("guMtxIdentF", 0x48);
//context.manually_sized_funcs.emplace("sqrtf", 0x8);
//context.manually_sized_funcs.emplace("guMtxIdent", 0x4C);
for (const auto& func_size : config.manual_func_sizes) { for (const auto& func_size : config.manual_func_sizes) {
context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes); context.manually_sized_funcs.emplace(func_size.func_name, func_size.size_bytes);
} }
@ -1241,6 +1310,40 @@ int main(int argc, char** argv) {
if (config.has_entrypoint && !found_entrypoint_func) { if (config.has_entrypoint && !found_entrypoint_func) {
exit_failure("Could not find entrypoint function\n"); exit_failure("Could not find entrypoint function\n");
} }
}
// Build a context from the provided symbols file.
else if (!config.symbols_file_path.empty()) {
if (config.rom_file_path.empty()) {
exit_failure("A ROM file must be provided when using a symbols file\n");
}
std::vector<uint8_t> rom = read_file(config.rom_file_path);
if (rom.empty()) {
exit_failure("Failed to load ROM file: " + config.rom_file_path.string() + "\n");
}
if (!RecompPort::Context::from_symbol_file(config.symbols_file_path, std::move(rom), context)) {
exit_failure("Failed to load symbols file\n");
}
for (RecompPort::Function& func : context.functions) {
if (reimplemented_funcs.contains(func.name)) {
func.reimplemented = true;
func.name = func.name + "_recomp";
func.ignored = true;
} else if (ignored_funcs.contains(func.name)) {
func.name = func.name + "_recomp";
func.ignored = true;
} else if (renamed_funcs.contains(func.name)) {
func.name = func.name + "_recomp";
func.ignored = false;
}
}
}
else {
exit_failure("Config file must provide either an elf or a symbols file\n");
}
fmt::print("Function count: {}\n", context.functions.size()); fmt::print("Function count: {}\n", context.functions.size());
@ -1259,6 +1362,11 @@ int main(int argc, char** argv) {
std::vector<std::vector<uint32_t>> static_funcs_by_section{ context.sections.size() }; std::vector<std::vector<uint32_t>> static_funcs_by_section{ context.sections.size() };
// TODO expose a way to dump the context from the command line. Make sure not to rename functions when doing so.
//fmt::print("Dumping context\n");
//dump_context(context, "dump.toml");
//return 0;
fmt::print("Working dir: {}\n", std::filesystem::current_path().string()); fmt::print("Working dir: {}\n", std::filesystem::current_path().string());
// Stub out any functions specified in the config file. // Stub out any functions specified in the config file.