WIP overlay support and some libultra function implementations for other games

This commit is contained in:
Mr-Wiseguy 2023-01-12 23:39:49 -05:00
parent 0af9d489b3
commit c6de2b6189
17 changed files with 13096 additions and 214 deletions

View file

@ -6,6 +6,8 @@
#include <cstdint>
#include <vector>
#include <unordered_map>
#include <span>
#include "elfio/elfio.hpp"
#ifdef _MSC_VER
inline uint32_t byteswap(uint32_t val) {
@ -29,26 +31,57 @@ namespace RecompPort {
std::vector<uint32_t> entries;
};
struct AbsoluteJump {
uint32_t jump_target;
uint32_t instruction_vram;
};
struct Function {
uint32_t vram;
uint32_t rom;
const std::span<const uint32_t> words;
std::string name;
ELFIO::Elf_Half section_index;
bool ignored;
bool reimplemented;
};
struct Section {
ELFIO::Elf_Xword rom_addr;
ELFIO::Elf64_Addr ram_addr;
ELFIO::Elf_Xword size;
std::vector<uint32_t> function_addrs;
std::string name;
bool executable;
};
struct FunctionStats {
std::vector<JumpTable> jump_tables;
std::vector<AbsoluteJump> absolute_jumps;
};
struct Context {
std::vector<RecompPort::Function> functions;
// ROM address of each section
std::vector<Section> sections;
std::vector<Function> functions;
std::unordered_map<uint32_t, std::vector<size_t>> functions_by_vram;
std::vector<uint8_t> rom;
// A list of the list of each function (by index in `functions`) in a given section
std::vector<std::vector<size_t>> section_functions;
int executable_section_count;
Context(const ELFIO::elfio& elf_file) {
sections.resize(elf_file.sections.size());
section_functions.resize(elf_file.sections.size());
functions.reserve(1024);
functions_by_vram.reserve(1024);
rom.reserve(8 * 1024 * 1024);
executable_section_count = 0;
}
};
bool analyze_function(const Context& context, const Function& function, const std::vector<rabbitizer::InstructionCpu>& instructions, FunctionStats& stats);
bool recompile_function(const Context& context, const Function& func, std::string_view output_path);
bool recompile_function(const Context& context, const Function& func, std::string_view output_path, std::span<std::vector<uint32_t>> static_funcs);
}
#endif

View file

@ -4,6 +4,8 @@
#include <stdint.h>
#include <math.h>
#include <assert.h>
#include <setjmp.h>
#include <malloc.h>
#if 0 // treat GPRs as 32-bit, should be better codegen
typedef uint32_t gpr;
@ -129,6 +131,19 @@ static inline gpr do_lwl(uint8_t* rdram, gpr offset, gpr reg) {
#define TRUNC_W_D(val) \
((int32_t)(val))
#define TRUNC_L_S(val) \
((int64_t)(val))
#define TRUNC_L_D(val) \
((int64_t)(val))
// TODO rounding mode
#define CVT_W_S(val) \
((int32_t)(val))
#define CVT_W_D(val) \
((int32_t)(val))
#define NAN_CHECK(val) \
assert(val == val)
@ -172,6 +187,38 @@ recomp_func_t* get_function(uint32_t vram);
#define LOOKUP_FUNC(val) \
get_function(val)
// For the Mario Party games (not working)
//// This has to be in this file so it can be inlined
//struct jmp_buf_storage {
// jmp_buf buffer;
//};
//
//struct RecompJmpBuf {
// int32_t owner;
// struct jmp_buf_storage* storage;
// uint64_t magic;
//};
//
//// Randomly generated constant
//#define SETJMP_MAGIC 0xe17afdfa939a437bu
//
//int32_t osGetThreadEx(void);
//
//#define setjmp_recomp(rdram, ctx) { \
// struct RecompJmpBuf* buf = (struct RecompJmpBuf*)(&rdram[(uint64_t)ctx->r4 - 0xFFFFFFFF80000000]); \
// \
// /* Check if this jump buffer was previously set up */ \
// if (buf->magic == SETJMP_MAGIC) { \
// /* If so, free the old jmp_buf */ \
// free(buf->storage); \
// } \
// \
// buf->magic = SETJMP_MAGIC; \
// buf->owner = osGetThreadEx(); \
// buf->storage = (struct jmp_buf_storage*)calloc(1, sizeof(struct jmp_buf_storage)); \
// ctx->r2 = setjmp(buf->storage->buffer); \
//}
#ifdef __cplusplus
}
#endif

21
sections.h Normal file
View file

@ -0,0 +1,21 @@
#ifndef __SECTIONS_H__
#define __SECTIONS_H__
#include <stdint.h>
#define ARRLEN(x) (sizeof(x) / sizeof((x)[0]))
typedef struct {
void* func;
uint32_t offset;
} FuncEntry;
typedef struct {
uint32_t rom_addr;
uint32_t ram_addr;
uint32_t size;
FuncEntry *funcs;
size_t num_funcs;
} SectionTableEntry;
#endif

View file

@ -19,37 +19,38 @@ struct RegState {
bool valid_addiu;
bool valid_addend;
// For tracking a register that has been loaded from RAM
uint32_t loaded_lw_vram;
uint32_t loaded_addu_vram;
uint32_t loaded_address;
uint8_t loaded_addend_reg;
bool valid_loaded;
uint32_t loaded_lw_vram;
uint32_t loaded_addu_vram;
uint32_t loaded_address;
uint8_t loaded_addend_reg;
bool valid_loaded;
RegState() = default;
RegState() = default;
void invalidate() {
prev_lui = 0;
prev_addiu_vram = 0;
prev_addu_vram = 0;
prev_addend_reg = 0;
void invalidate() {
prev_lui = 0;
prev_addiu_vram = 0;
prev_addu_vram = 0;
prev_addend_reg = 0;
valid_lui = false;
valid_addiu = false;
valid_addend = false;
valid_lui = false;
valid_addiu = false;
valid_addend = false;
loaded_lw_vram = 0;
loaded_addu_vram = 0;
loaded_address = 0;
loaded_addend_reg = 0;
loaded_lw_vram = 0;
loaded_addu_vram = 0;
loaded_address = 0;
loaded_addend_reg = 0;
valid_loaded = false;
}
valid_loaded = false;
}
};
using InstrId = rabbitizer::InstrId::UniqueId;
using RegId = rabbitizer::Registers::Cpu::GprO32;
bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPort::Function& func, RecompPort::FunctionStats& stats,
RegState reg_states[32]) {
RegState reg_states[32], std::vector<RegState>& stack_states) {
// Temporary register state for tracking the register being operated on
RegState temp{};
@ -117,11 +118,45 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
case InstrId::cpu_or:
check_move();
break;
case InstrId::cpu_sw:
// If this is a store to the stack, copy the state of rt into the stack at the given offset
if (base == (int)RegId::GPR_O32_sp) {
if ((imm & 0b11) != 0) {
fmt::print(stderr, "Invalid alignment on offset for sw to stack: {}\n", (int16_t)imm);
return false;
}
if (((int16_t)imm) < 0) {
fmt::print(stderr, "Negative offset for sw to stack: {}\n", (int16_t)imm);
return false;
}
size_t stack_offset = imm / 4;
if (stack_offset >= stack_states.size()) {
stack_states.resize(stack_offset + 1);
}
stack_states[stack_offset] = reg_states[rt];
}
break;
case InstrId::cpu_lw:
// rt has been completely overwritten, so invalidate it
temp.invalidate();
// If this is a load from the stack, copy the state of the stack at the given offset to rt
if (base == (int)RegId::GPR_O32_sp) {
if ((imm & 0b11) != 0) {
fmt::print(stderr, "Invalid alignment on offset for lw from stack: {}\n", (int16_t)imm);
return false;
}
if (((int16_t)imm) < 0) {
fmt::print(stderr, "Negative offset for lw from stack: {}\n", (int16_t)imm);
return false;
}
size_t stack_offset = imm / 4;
if (stack_offset >= stack_states.size()) {
stack_states.resize(stack_offset + 1);
}
temp = stack_states[stack_offset];
}
// If the base register has a valid lui state and a valid addend before this, then this may be a load from a jump table
if (reg_states[base].valid_lui && reg_states[base].valid_addend) {
else if (reg_states[base].valid_lui && reg_states[base].valid_addend) {
// Exactly one of the lw and the base reg should have a valid lo16 value
bool nonzero_immediate = imm != 0;
if (nonzero_immediate != reg_states[base].valid_addiu) {
@ -158,6 +193,12 @@ bool analyze_instruction(const rabbitizer::InstructionCpu& instr, const RecompPo
instr.getVram(),
std::vector<uint32_t>{}
);
} else if (reg_states[rs].valid_lui && reg_states[rs].valid_addiu && !reg_states[rs].valid_addend && !reg_states[rs].valid_loaded) {
uint32_t address = reg_states[rs].prev_addiu_vram + reg_states[rs].prev_lui;
stats.absolute_jumps.emplace_back(
address,
instr.getVram()
);
} else {
// Inconclusive analysis
fmt::print(stderr, "Failed to to find jump table for `jr {}` at 0x{:08X} in {}\n", RabbitizerRegister_getNameGpr(rs), instr.getVram(), func.name);
@ -180,11 +221,12 @@ bool RecompPort::analyze_function(const RecompPort::Context& context, const Reco
const std::vector<rabbitizer::InstructionCpu>& instructions, RecompPort::FunctionStats& stats) {
// Create a state to track each register (r0 won't be used)
RegState reg_states[32] {};
std::vector<RegState> stack_states{};
// Look for jump tables
// A linear search through the func won't be accurate due to not taking control flow into account, but it'll work for finding jtables
for (const auto& instr : instructions) {
if (!analyze_instruction(instr, func, stats, reg_states)) {
if (!analyze_instruction(instr, func, stats, reg_states, stack_states)) {
return false;
}
}

View file

@ -10,12 +10,114 @@
#include "fmt/ostream.h"
#include "recomp_port.h"
#include "main.h"
#include <set>
std::unordered_set<std::string> reimplemented_funcs{
// OS initialize functions
"__osInitialize_common",
"osInitialize",
// Audio interface functions
"osAiGetLength",
"osAiGetStatus",
"osAiSetFrequency",
"osAiSetNextBuffer",
// Video interface functions
"osViSetYScale",
"osCreateViManager",
"osViBlack",
"osViSetSpecialFeatures",
"osViGetCurrentFramebuffer",
"osViGetNextFramebuffer",
"osViSwapBuffer",
"osViSetMode",
"osViSetEvent",
// RDP functions
"osDpSetNextBuffer",
// RSP functions
"osSpTaskLoad",
"osSpTaskStartGo",
"osSpTaskYield",
"osSpTaskYielded",
"__osSpSetPc",
// Controller functions
"osContInit",
"osContStartReadData",
"osContGetReadData",
"osContSetCh",
// EEPROM functions
"osEepromProbe",
"osEepromWrite",
"osEepromLongWrite",
"osEepromRead",
"osEepromLongRead",
// Rumble functions
"__osMotorAccess",
"osMotorInit",
"osMotorStart",
"osMotorStop",
// Parallel interface (cartridge, DMA, etc.) functions
"osCartRomInit",
"osCreatePiManager",
"osPiStartDma",
"osEPiStartDma",
"osPiGetStatus",
"osEPiRawStartDma",
// Threading functions
"osCreateThread",
"osStartThread",
"osStopThread",
"osDestroyThread",
"osSetThreadPri",
"osGetThreadPri",
"osGetThreadId",
// Message Queue functions
"osCreateMesgQueue",
"osRecvMesg",
"osSendMesg",
"osJamMesg",
"osSetEventMesg",
// Timer functions
"osGetTime",
// interrupt functions
"osSetIntMask",
"__osDisableInt",
"__osRestoreInt",
// TLB functions
"osVirtualToPhysical",
// Coprocessor 0/1 functions
"osGetCount",
"__osSetFpcCsr",
// Cache funcs
"osInvalDCache",
"osInvalICache",
"osWritebackDCache",
"osWritebackDCacheAll",
// Debug functions
"__checkHardware_msp",
"__checkHardware_kmc",
"__checkHardware_isv",
"__osInitialize_msp",
"__osInitialize_kmc",
"__osInitialize_isv",
"__osRdbSend",
// libgcc math routines (these throw off the recompiler)
"__udivdi3",
"__divdi3",
"__umoddi3",
// ido math routines
"__ull_div",
"__ll_div",
"__ll_mul",
"__ull_rem",
"__ull_to_d",
"__ull_to_f",
};
std::unordered_set<std::string> ignored_funcs {
// OS initialize functions
"__createSpeedParam",
"__osInitialize_common",
"__osInitialize_autodetect",
"osInitialize",
// Audio interface functions
"osAiGetLength",
@ -86,6 +188,17 @@ std::unordered_set<std::string> ignored_funcs {
"osMotorInit",
"osMotorStart",
"osMotorStop",
"__osMotorAccess",
"_MakeMotorData",
// Pack functions
"__osCheckId",
"__osCheckPackId",
"__osGetId",
"__osPfsRWInode",
"__osRepairPackId",
"__osPfsSelectBank",
"__osCheckPackId",
"ramromMain",
// PFS functions
"osPfsAllocateFile",
"osPfsChecker",
@ -112,6 +225,8 @@ std::unordered_set<std::string> ignored_funcs {
"__osPfsRequestData",
"__osPfsRequestOneChannel",
"__osPfsCreateAccessQueue",
"__osPfsCheckRamArea",
"__osPfsGetNextPage",
// Low level serial interface functions
"__osSiDeviceBusy",
"__osSiGetStatus",
@ -236,15 +351,148 @@ std::unordered_set<std::string> ignored_funcs {
"gspF3DEX2_fifoTextStart",
"gspS2DEX2_fifoTextStart",
"gspL3DEX2_fifoTextStart",
// Debug functions
"msp_proutSyncPrintf",
"__osInitialize_msp",
"__checkHardware_msp",
"kmc_proutSyncPrintf",
"__osInitialize_kmc",
"__checkHardware_kmc",
"isPrintfInit",
"is_proutSyncPrintf",
"__osInitialize_isv",
"__checkHardware_isv",
"__isExpJP",
"__isExp",
"__osRdbSend",
"__rmonSendData",
"__rmonWriteMem",
"__rmonReadWordAt",
"__rmonWriteWordTo",
"__rmonWriteMem",
"__rmonSetSRegs",
"__rmonSetVRegs",
"__rmonStopThread",
"__rmonGetThreadStatus",
"__rmonGetVRegs",
"__rmonHitSpBreak",
"__rmonRunThread",
"__rmonClearBreak",
"__rmonGetBranchTarget",
"__rmonGetSRegs",
"__rmonSetBreak",
"__rmonReadMem",
"__rmonRunThread",
"__rmonCopyWords",
"__rmonExecute",
"__rmonGetExceptionStatus",
"__rmonGetExeName",
"__rmonGetFRegisters",
"__rmonGetGRegisters",
"__rmonGetRegionCount",
"__rmonGetRegions",
"__rmonGetRegisterContents",
"__rmonGetTCB",
"__rmonHitBreak",
"__rmonHitCpuFault",
"__rmonIdleRCP",
"__rmonInit",
"__rmonIOflush",
"__rmonIOhandler",
"__rmonIOputw",
"__rmonListBreak",
"__rmonListProcesses",
"__rmonListThreads",
"__rmonLoadProgram",
"__rmonMaskIdleThreadInts",
"__rmonMemcpy",
"__rmonPanic",
"__rmonRCPrunning",
"__rmonRunRCP",
"__rmonSendFault",
"__rmonSendHeader",
"__rmonSendReply",
"__rmonSetComm",
"__rmonSetFault",
"__rmonSetFRegisters",
"__rmonSetGRegisters",
"__rmonSetSingleStep",
"__rmonStepRCP",
"__rmonStopUserThreads",
"__rmonThreadStatus",
"__rmon",
"__rmonRunThread",
"rmonFindFaultedThreads",
"rmonMain",
"rmonPrintf",
"rmonGetRcpRegister",
"kdebugserver",
"send",
// libgcc math routines (these throw off the recompiler)
"__muldi3",
"__divdi3",
"__udivdi3",
"__umoddi3",
// ido math routines
"__ll_div",
"__ll_lshift",
"__ll_mod",
"__ll_mul",
"__ll_rem",
"__ll_rshift",
"__ull_div",
"__ull_divremi",
"__ull_rem",
"__ull_rshift",
"__d_to_ll",
"__f_to_ll",
"__d_to_ull",
"__f_to_ull",
"__ll_to_d",
"__ll_to_f",
"__ull_to_d",
"__ull_to_f",
// Setjmp/longjmp for mario party
"setjmp",
"longjmp"
// 64-bit functions for banjo
"func_8025C29C",
"func_8025C240",
"func_8025C288",
};
std::unordered_set<std::string> renamed_funcs{
"sincosf",
"sinf",
"cosf",
"sqrt",
"sqrtf",
"memcpy",
"memset",
"strchr",
"strlen",
"sprintf",
"bzero",
"bcopy",
"bcmp",
"setjmp",
"longjmp",
"ldiv",
"lldiv",
"ceil",
"ceilf",
"floor",
"floorf",
"fmodf",
"lround",
"lroundf",
"nearbyint",
"nearbyintf",
"round",
"roundf",
"trunc",
"truncf",
"vsprintf"
};
// Functions that weren't declared properly and thus have no size in the elf
@ -257,6 +505,177 @@ std::unordered_map<std::string, size_t> unsized_funcs{
{ "guMtxIdent", 0x4C },
};
bool read_symbols(RecompPort::Context& context, const ELFIO::elfio& elf_file, ELFIO::section* symtab_section, uint32_t entrypoint) {
bool found_entrypoint_func = false;
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) {
std::string name;
ELFIO::Elf64_Addr value;
ELFIO::Elf_Xword size;
unsigned char bind;
unsigned char type;
ELFIO::Elf_Half section_index;
unsigned char other;
bool ignored = false;
bool reimplemented = false;
// Read symbol properties
symbols.get_symbol(sym_index, name, value, size, bind, type,
section_index, other);
if (section_index >= context.sections.size()) {
continue;
}
// Check if this symbol is the entrypoint
if (value == entrypoint && type == ELFIO::STT_FUNC) {
found_entrypoint_func = true;
size = 0x50; // dummy size for entrypoints, should cover them all
name = "recomp_entrypoint";
}
// Check if this symbol is unsized and if so populate its size from the unsized_funcs map
if (size == 0) {
auto size_find = unsized_funcs.find(name);
if (size_find != unsized_funcs.end()) {
size = size_find->second;
type = ELFIO::STT_FUNC;
}
}
if (reimplemented_funcs.contains(name)) {
reimplemented = true;
name = name + "_recomp";
ignored = true;
} else if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
auto& section = context.sections[section_index];
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) {
if (renamed_funcs.contains(name)) {
name = name + "_recomp";
ignored = false;
}
if (section_index < context.sections.size()) {
auto section_offset = value - elf_file.sections[section_index]->get_address();
const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
uint32_t vram = static_cast<uint32_t>(value);
uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0;
uint32_t rom_address = static_cast<uint32_t>(section_offset + section.rom_addr);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
// Find the entrypoint by rom address in case it doesn't have vram as its value
if (rom_address == 0x1000) {
vram = entrypoint;
found_entrypoint_func = true;
name = "recomp_entrypoint";
if (size == 0) {
num_instructions = 0x50 / 4;
}
}
if (num_instructions > 0) {
context.section_functions[section_index].push_back(context.functions.size());
}
context.functions.emplace_back(
vram,
rom_address,
std::span{ words, num_instructions },
std::move(name),
section_index,
ignored,
reimplemented
);
} else {
uint32_t vram = static_cast<uint32_t>(value);
section.function_addrs.push_back(vram);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::span<const uint32_t>{},
std::move(name),
section_index,
ignored,
reimplemented
);
}
}
}
return found_entrypoint_func;
}
ELFIO::section* read_sections(RecompPort::Context& context, const ELFIO::elfio& elf_file) {
ELFIO::section* symtab_section = nullptr;
// Iterate over every section to record rom addresses and find the symbol table
fmt::print("Sections\n");
for (const std::unique_ptr<ELFIO::section>& section : elf_file.sections) {
auto& section_out = context.sections[section->get_index()];
//fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size());
// Set the rom address of this section to the current accumulated ROM size
section_out.rom_addr = context.rom.size();
section_out.ram_addr = section->get_address();
section_out.size = section->get_size();
// If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), copy this section into the rom
if (section->get_type() != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC) {
size_t cur_rom_size = context.rom.size();
context.rom.resize(context.rom.size() + section->get_size());
std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[cur_rom_size]);
}
// Check if this section is the symbol table and record it if so
if (section->get_type() == ELFIO::SHT_SYMTAB) {
symtab_section = section.get();
}
// Check if this section is marked as executable, which means it has code in it
if (section->get_flags() & ELFIO::SHF_EXECINSTR) {
section_out.executable = true;
context.executable_section_count++;
}
section_out.name = section->get_name();
}
return symtab_section;
}
template<typename Iterator, typename Pred, typename Operation> void
for_each_if(Iterator begin, Iterator end, Pred p, Operation op) {
for (; begin != end; begin++) {
if (p(*begin)) {
op(*begin);
}
}
}
void analyze_sections(RecompPort::Context& context, const ELFIO::elfio& elf_file) {
std::vector<RecompPort::Section*> executable_sections{};
executable_sections.reserve(context.executable_section_count);
for_each_if(context.sections.begin(), context.sections.end(),
[](const RecompPort::Section& section) {
return section.executable && section.rom_addr >= 0x1000;
},
[&](RecompPort::Section& section) {
executable_sections.push_back(&section);
}
);
std::sort(executable_sections.begin(), executable_sections.end(),
[](const RecompPort::Section* a, const RecompPort::Section* b) {
return a->ram_addr < b->ram_addr;
}
);
}
int main(int argc, char** argv) {
if (argc != 3) {
fmt::print("Usage: {} [input elf file] [entrypoint RAM address]\n", argv[0]);
@ -267,6 +686,7 @@ int main(int argc, char** argv) {
RabbitizerConfig_Cfg.pseudos.pseudoMove = false;
RabbitizerConfig_Cfg.pseudos.pseudoBeqz = false;
RabbitizerConfig_Cfg.pseudos.pseudoBnez = false;
RabbitizerConfig_Cfg.pseudos.pseudoNot = false;
auto exit_failure = [] (const std::string& error_str) {
fmt::print(stderr, error_str);
@ -293,113 +713,21 @@ int main(int argc, char** argv) {
exit_failure("Incorrect endianness\n");
}
// Pointer to the symbol table section
ELFIO::section* symtab_section = nullptr;
// ROM address of each section
std::vector<ELFIO::Elf_Xword> section_rom_addrs{};
RecompPort::Context context{ elf_file };
RecompPort::Context context{};
section_rom_addrs.resize(elf_file.sections.size());
context.functions.reserve(1024);
context.rom.reserve(8 * 1024 * 1024);
// Read all of the sections in the elf and look for the symbol table section
ELFIO::section* symtab_section = read_sections(context, elf_file);
// Iterate over every section to record rom addresses and find the symbol table
fmt::print("Sections\n");
for (const std::unique_ptr<ELFIO::section>& section : elf_file.sections) {
//fmt::print(" {}: {} @ 0x{:08X}, 0x{:08X}\n", section->get_index(), section->get_name(), section->get_address(), context.rom.size());
// Set the rom address of this section to the current accumulated ROM size
section_rom_addrs[section->get_index()] = context.rom.size();
// If this section isn't bss (SHT_NOBITS) and ends up in the rom (SHF_ALLOC), copy this section into the rom
if (section->get_type() != ELFIO::SHT_NOBITS && section->get_flags() & ELFIO::SHF_ALLOC) {
size_t cur_rom_size = context.rom.size();
context.rom.resize(context.rom.size() + section->get_size());
std::copy(section->get_data(), section->get_data() + section->get_size(), &context.rom[cur_rom_size]);
}
// Check if this section is the symbol table and record it if so
if (section->get_type() == ELFIO::SHT_SYMTAB) {
symtab_section = section.get();
}
}
// Search the sections to see if any are overlays or TLB-mapped
analyze_sections(context, elf_file);
// If no symbol table was found then exit
if (symtab_section == nullptr) {
exit_failure("No symbol table section found\n");
}
ELFIO::symbol_section_accessor symbols{ elf_file, symtab_section };
fmt::print("Num symbols: {}\n", symbols.get_symbols_num());
bool found_entrypoint_func = false;
for (int sym_index = 0; sym_index < symbols.get_symbols_num(); sym_index++) {
std::string name;
ELFIO::Elf64_Addr value;
ELFIO::Elf_Xword size;
unsigned char bind;
unsigned char type;
ELFIO::Elf_Half section_index;
unsigned char other;
bool ignored = false;
// Read symbol properties
symbols.get_symbol(sym_index, name, value, size, bind, type,
section_index, other);
// Check if this symbol is unsized and if so populate its size from the unsized_funcs map
if (size == 0) {
if (value == entrypoint && type == ELFIO::STT_FUNC) {
found_entrypoint_func = true;
size = 0x50; // dummy size for entrypoints, should cover them all
name = "recomp_entrypoint";
} else {
auto size_find = unsized_funcs.find(name);
if (size_find != unsized_funcs.end()) {
size = size_find->second;
type = ELFIO::STT_FUNC;
}
}
}
if (ignored_funcs.contains(name)) {
name = name + "_recomp";
ignored = true;
}
// Check if this symbol is a function or has no type (like a regular glabel would)
// Symbols with no type have a dummy entry created so that their symbol can be looked up for function calls
if (ignored || type == ELFIO::STT_FUNC || type == ELFIO::STT_NOTYPE || type == ELFIO::STT_OBJECT) {
if (renamed_funcs.contains(name)) {
name = "_" + name;
ignored = false;
}
if (section_index < section_rom_addrs.size()) {
auto section_rom_addr = section_rom_addrs[section_index];
auto section_offset = value - elf_file.sections[section_index]->get_address();
const uint32_t* words = reinterpret_cast<const uint32_t*>(elf_file.sections[section_index]->get_data() + section_offset);
uint32_t vram = static_cast<uint32_t>(value);
uint32_t num_instructions = type == ELFIO::STT_FUNC ? size / 4 : 0;
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
static_cast<uint32_t>(section_offset + section_rom_addr),
std::span{ words, num_instructions },
std::move(name),
ignored
);
} else {
uint32_t vram = static_cast<uint32_t>(value);
context.functions_by_vram[vram].push_back(context.functions.size());
context.functions.emplace_back(
vram,
0,
std::span<const uint32_t>{},
std::move(name),
ignored
);
}
}
}
// Read all of the symbols in the elf and look for the entrypoint function
bool found_entrypoint_func = read_symbols(context, elf_file, symtab_section, entrypoint);
if (!found_entrypoint_func) {
exit_failure("Could not find entrypoint function\n");
@ -427,6 +755,10 @@ int main(int argc, char** argv) {
"\n"
);
std::vector<std::vector<uint32_t>> static_funcs_by_section{ context.sections.size() };
std::string output_dir = "test/funcs/";
//#pragma omp parallel for
for (size_t i = 0; i < context.functions.size(); i++) {
const auto& func = context.functions[i];
@ -436,7 +768,73 @@ int main(int argc, char** argv) {
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
if (RecompPort::recompile_function(context, func, "test/funcs/" + func.name + ".c") == false) {
if (RecompPort::recompile_function(context, func, output_dir + "ignore.txt"/*func.name + ".c"*/, static_funcs_by_section) == false) {
func_lookup_file.clear();
fmt::print(stderr, "Error recompiling {}\n", func.name);
std::exit(EXIT_FAILURE);
}
} else if (func.reimplemented) {
fmt::print(func_header_file,
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
}
}
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
auto& section = context.sections[section_index];
auto& section_funcs = section.function_addrs;
// Sort the section's functions
std::sort(section_funcs.begin(), section_funcs.end());
// Sort and deduplicate the static functions via a set
std::set<uint32_t> statics_set{ static_funcs_by_section[section_index].begin(), static_funcs_by_section[section_index].end() };
std::vector<uint32_t> section_statics{};
section_statics.assign(statics_set.begin(), statics_set.end());
size_t closest_func_index = 0;
for (size_t static_func_index = 0; static_func_index < section_statics.size(); static_func_index++) {
uint32_t static_func_addr = section_statics[static_func_index];
// Search for the closest function
while (section_funcs[closest_func_index] < static_func_addr && closest_func_index < section_funcs.size()) {
closest_func_index++;
}
// Determine the end of this static function
uint32_t cur_func_end = static_cast<uint32_t>(section.size + section.ram_addr);
// Check if there's a nonstatic function after this one
if (closest_func_index < section_funcs.size()) {
// If so, use that function's address as the end of this one
cur_func_end = section_funcs[closest_func_index];
}
uint32_t next_static_index = static_func_index + 1;
// Check if there's a known static function after this one
if (next_static_index < section_statics.size()) {
// If so, check if it's before the current end address
if (section_statics[next_static_index] < cur_func_end) {
cur_func_end = section_statics[next_static_index];
}
}
uint32_t rom_addr = static_cast<uint32_t>(static_func_addr - section.ram_addr + section.rom_addr);
const uint32_t* func_rom_start = reinterpret_cast<const uint32_t*>(context.rom.data() + rom_addr);
RecompPort::Function func {
static_func_addr,
rom_addr,
std::span{ func_rom_start, (cur_func_end - static_func_addr) / sizeof(uint32_t) },
fmt::format("static_{}_{:08X}", section_index, static_func_addr),
static_cast<ELFIO::Elf_Half>(section_index),
false
};
fmt::print(func_header_file,
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
if (RecompPort::recompile_function(context, func, output_dir + func.name + ".c", static_funcs_by_section) == false) {
func_lookup_file.clear();
fmt::print(stderr, "Error recompiling {}\n", func.name);
std::exit(EXIT_FAILURE);
@ -453,7 +851,7 @@ int main(int argc, char** argv) {
"const char* get_rom_name() {{ return \"{}\"; }}\n"
"\n",
entrypoint,
std::filesystem::path{ elf_name }.replace_extension(".z64").string()
std::filesystem::path{ elf_name }.filename().replace_extension(".z64").string()
);
fmt::print(func_header_file,
@ -463,5 +861,50 @@ int main(int argc, char** argv) {
"#endif\n"
);
{
std::ofstream overlay_file(output_dir + "recomp_overlays.c");
std::string section_load_table = "SectionTableEntry sections[] = {\n";
fmt::print(overlay_file,
"#include \"recomp.h\"\n"
"#include \"funcs.h\"\n"
"#include \"sections.h\"\n"
"\n"
);
for (size_t section_index = 0; section_index < context.sections.size(); section_index++) {
const auto& section = context.sections[section_index];
const auto& section_funcs = context.section_functions[section_index];
if (!section_funcs.empty()) {
std::string_view section_name_trimmed{ section.name };
while (section_name_trimmed[0] == '.') {
section_name_trimmed.remove_prefix(1);
}
std::string section_funcs_array_name = fmt::format("section_{}_{}_funcs", section_index, section_name_trimmed);
section_load_table += fmt::format(" {{ .rom_addr = 0x{0:08X}, .ram_addr = 0x{1:08X}, .size = 0x{2:08X}, .funcs = {3}, .num_funcs = ARRLEN({3}) }},\n",
section.rom_addr, section.ram_addr, section.size, section_funcs_array_name);
fmt::print(overlay_file, "FuncEntry {}[] = {{\n", section_funcs_array_name);
for (size_t func_index : section_funcs) {
const auto& func = context.functions[func_index];
if (func.reimplemented || (!func.name.empty() && !func.ignored && func.words.size() != 0)) {
fmt::print(overlay_file, " {{ .func = {}, .offset = 0x{:08x} }},\n", func.name, func.rom - section.rom_addr);
}
}
fmt::print(overlay_file, "}};\n");
}
}
section_load_table += "};\n";
fmt::print(overlay_file, "{}", section_load_table);
}
return 0;
}

View file

@ -17,7 +17,7 @@ std::string_view ctx_gpr_prefix(int reg) {
return "";
}
bool process_instruction(const RecompPort::Context& context, const RecompPort::Function& func, const RecompPort::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, bool& needs_link_branch, bool& is_branch_likely) {
bool process_instruction(const RecompPort::Context& context, const RecompPort::Function& func, const RecompPort::FunctionStats& stats, const std::unordered_set<uint32_t>& skipped_insns, size_t instr_index, const std::vector<rabbitizer::InstructionCpu>& instructions, std::ofstream& output_file, bool indent, bool emit_link_branch, int link_branch_index, bool& needs_link_branch, bool& is_branch_likely, std::span<std::vector<uint32_t>> static_funcs_out) {
const auto& instr = instructions[instr_index];
needs_link_branch = false;
is_branch_likely = false;
@ -56,7 +56,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely);
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out);
}
print_indent();
fmt::print(output_file, fmt_str, args...);
@ -72,7 +72,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if (instr_index < instructions.size() - 1) {
bool dummy_needs_link_branch;
bool dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely);
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, true, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out);
}
fmt::print(output_file, " ");
fmt::print(output_file, fmt_str, args...);
@ -106,6 +106,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_lui:
print_line("{}{} = S32({:#X} << 16)", ctx_gpr_prefix(rt), rt, imm);
break;
case InstrId::cpu_add:
case InstrId::cpu_addu:
{
// Check if this addu belongs to a jump table load
@ -125,6 +126,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = {}{} + {}{}", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
case InstrId::cpu_negu: // pseudo instruction for subu x, 0, y
case InstrId::cpu_sub:
case InstrId::cpu_subu:
print_line("{}{} = SUB32({}{}, {}{})", ctx_gpr_prefix(rd), rd, ctx_gpr_prefix(rs), rs, ctx_gpr_prefix(rt), rt);
break;
@ -265,51 +267,62 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
{
uint32_t target_func_vram = instr.getBranchVramGeneric();
const auto matching_funcs_find = context.functions_by_vram.find(target_func_vram);
if (matching_funcs_find == context.functions_by_vram.end()) {
fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram);
return false;
}
const auto& matching_funcs_vec = matching_funcs_find->second;
size_t real_func_index;
bool ambiguous;
// If there is more than one corresponding function, look for any that have a nonzero size
if (matching_funcs_vec.size() > 1) {
size_t nonzero_func_index = (size_t)-1;
bool found_nonzero_func = false;
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
if (cur_func.words.size() != 0) {
if (found_nonzero_func) {
ambiguous = true;
break;
}
found_nonzero_func = true;
nonzero_func_index = cur_func_index;
}
}
if (nonzero_func_index == (size_t)-1) {
fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n");
std::string jal_target_name;
// TODO the current section should be prioritized if the target jal is in its vram even if a function isn't known (i.e. static)
if (matching_funcs_find != context.functions_by_vram.end()) {
// If we found matches for the target function by vram,
const auto& matching_funcs_vec = matching_funcs_find->second;
size_t real_func_index;
bool ambiguous;
// If there is more than one corresponding function, look for any that have a nonzero size
if (matching_funcs_vec.size() > 1) {
size_t nonzero_func_index = (size_t)-1;
bool found_nonzero_func = false;
for (size_t cur_func_index : matching_funcs_vec) {
fmt::print(stderr, " {}\n", context.functions[cur_func_index].name);
const auto& cur_func = context.functions[cur_func_index];
if (cur_func.words.size() != 0) {
if (found_nonzero_func) {
ambiguous = true;
break;
}
found_nonzero_func = true;
nonzero_func_index = cur_func_index;
}
}
nonzero_func_index = 0;
if (nonzero_func_index == (size_t)-1) {
fmt::print(stderr, "[Warn] Potential jal resolution ambiguity\n");
for (size_t cur_func_index : matching_funcs_vec) {
fmt::print(stderr, " {}\n", context.functions[cur_func_index].name);
}
nonzero_func_index = 0;
}
real_func_index = nonzero_func_index;
ambiguous = false;
} else {
real_func_index = matching_funcs_vec.front();
ambiguous = false;
}
real_func_index = nonzero_func_index;
ambiguous = false;
if (ambiguous) {
fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram);
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
fmt::print(stderr, " {}\n", cur_func.name);
}
return false;
}
jal_target_name = context.functions[real_func_index].name;
} else {
real_func_index = matching_funcs_vec.front();
ambiguous = false;
}
if (ambiguous) {
fmt::print(stderr, "Ambiguous jal target: 0x{:08X}\n", target_func_vram);
for (size_t cur_func_index : matching_funcs_vec) {
const auto& cur_func = context.functions[cur_func_index];
fmt::print(stderr, " {}\n", cur_func.name);
const auto& section = context.sections[func.section_index];
if (target_func_vram >= section.ram_addr && target_func_vram < section.ram_addr + section.size) {
jal_target_name = fmt::format("static_{}_{:08X}", func.section_index, target_func_vram);
static_funcs_out[func.section_index].push_back(target_func_vram);
} else {
fmt::print(stderr, "No function found for jal target: 0x{:08X}\n", target_func_vram);
return false;
}
return false;
}
needs_link_branch = true;
print_unconditional_branch("{}(rdram, ctx)", context.functions[real_func_index].name);
print_unconditional_branch("{}(rdram, ctx)", jal_target_name);
break;
}
case InstrId::cpu_jalr:
@ -335,26 +348,43 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
print_unconditional_branch("return");
} else {
auto find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
auto jtbl_find_result = std::find_if(stats.jump_tables.begin(), stats.jump_tables.end(),
[instr_vram](const RecompPort::JumpTable& jtbl) {
return jtbl.jr_vram == instr_vram;
});
if (find_result == stats.jump_tables.end()) {
fmt::print(stderr, "No jump table found for jr at 0x{:08X}\n", instr_vram);
}
const RecompPort::JumpTable& cur_jtbl = *find_result;
bool dummy_needs_link_branch, dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely);
print_indent();
fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
if (jtbl_find_result != stats.jump_tables.end()) {
const RecompPort::JumpTable& cur_jtbl = *jtbl_find_result;
bool dummy_needs_link_branch, dummy_is_branch_likely;
process_instruction(context, func, stats, skipped_insns, instr_index + 1, instructions, output_file, false, false, link_branch_index, dummy_needs_link_branch, dummy_is_branch_likely, static_funcs_out);
print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
fmt::print(output_file, "switch (jr_addend_{:08X} >> 2) {{\n", cur_jtbl.jr_vram);
for (size_t entry_index = 0; entry_index < cur_jtbl.entries.size(); entry_index++) {
print_indent();
print_line("case {}: goto L_{:08X}; break", entry_index, cur_jtbl.entries[entry_index]);
}
print_indent();
print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram);
print_indent();
fmt::print(output_file, "}}\n");
break;
}
print_indent();
print_line("default: switch_error(__func__, 0x{:08X}, 0x{:08X})", instr_vram, cur_jtbl.vram);
print_indent();
fmt::print(output_file, "}}\n");
auto jump_find_result = std::find_if(stats.absolute_jumps.begin(), stats.absolute_jumps.end(),
[instr_vram](const RecompPort::AbsoluteJump& jump) {
return jump.instruction_vram == instr_vram;
});
if (jump_find_result != stats.absolute_jumps.end()) {
needs_link_branch = true;
print_unconditional_branch("LOOKUP_FUNC({})(rdram, ctx)", (uint64_t)(int32_t)jump_find_result->jump_target);
// jr doesn't link so it acts like a tail call, meaning we should return directly after the jump returns
print_line("return");
break;
}
fmt::print(stderr, "No jump table found for jr at 0x{:08X}\n", instr_vram);
}
break;
case InstrId::cpu_bnel:
@ -429,6 +459,15 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
print_line("{}{} = ctx->f{}.u32h", ctx_gpr_prefix(rt), rt, fs - 1);
}
break;
//case InstrId::cpu_dmfc1:
// if ((fs & 1) == 0) {
// // even fpr
// print_line("{}{} = ctx->f{}.u64", ctx_gpr_prefix(rt), rt, fs);
// } else {
// fmt::print(stderr, "Invalid operand for dmfc1: f{}\n", fs);
// return false;
// }
// break;
case InstrId::cpu_lwc1:
if ((ft & 1) == 0) {
// even fpr
@ -751,6 +790,100 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
return false;
}
break;
//case InstrId::cpu_trunc_l_s:
// if ((fd & 1) == 0 && (fs & 1) == 0) {
// // even fpr
// print_line("ctx->f{}.u64 = TRUNC_L_S(ctx->f{}.fl)", fd, fs);
// } else {
// fmt::print(stderr, "Invalid operand(s) for trunc.l.s: f{} f{}\n", fd, fs);
// return false;
// }
// break;
//case InstrId::cpu_trunc_l_d:
// if ((fd & 1) == 0 && (fs & 1) == 0) {
// // even fpr
// print_line("ctx->f{}.u64 = TRUNC_L_D(ctx->f{}.d)", fd, fs);
// } else {
// fmt::print(stderr, "Invalid operand(s) for trunc.l.d: f{} f{}\n", fd, fs);
// return false;
// }
// break;
// TODO rounding modes
case InstrId::cpu_ctc1:
case InstrId::cpu_cfc1:
break;
case InstrId::cpu_cvt_w_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = CVT_W_S(ctx->f{}.fl)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for cvt.w.s: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_cvt_w_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = CVT_W_D(ctx->f{}.d)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for cvt.w.d: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_round_w_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = lroundf(ctx->f{}.fl)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for round.w.s: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_round_w_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = lround(ctx->f{}.d)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for round.w.d: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_ceil_w_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = S32(ceilf(ctx->f{}.fl))", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for ceil.w.s: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_ceil_w_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = S32(ceil(ctx->f{}.d))", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for ceil.w.d: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_floor_w_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = S32(floorf(ctx->f{}.fl))", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for floor.w.s: f{} f{}\n", fd, fs);
return false;
}
break;
case InstrId::cpu_floor_w_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = S32(floor(ctx->f{}.d))", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for floor.w.d: f{} f{}\n", fd, fs);
return false;
}
break;
default:
fmt::print(stderr, "Unhandled instruction: {}\n", instr.getOpcodeName());
return false;
@ -763,7 +896,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
return true;
}
bool RecompPort::recompile_function(const RecompPort::Context& context, const RecompPort::Function& func, std::string_view output_path) {
bool RecompPort::recompile_function(const RecompPort::Context& context, const RecompPort::Function& func, std::string_view output_path, std::span<std::vector<uint32_t>> static_funcs_out) {
//fmt::print("Recompiling {}\n", func.name);
std::vector<rabbitizer::InstructionCpu> instructions;
@ -835,7 +968,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
++cur_label;
}
// Process the current instruction and check for errors
if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, needs_link_branch, is_branch_likely) == false) {
if (process_instruction(context, func, stats, skipped_insns, instr_index, instructions, output_file, false, needs_link_branch, num_link_branches, needs_link_branch, is_branch_likely, static_funcs_out) == false) {
fmt::print(stderr, "Error in recompilation, clearing {}\n", output_path);
output_file.clear();
return false;
@ -856,7 +989,7 @@ bool RecompPort::recompile_function(const RecompPort::Context& context, const Re
}
// Terminate the function
fmt::print(output_file, "}}\n");
fmt::print(output_file, ";}}\n");
return true;
}

View file

@ -150,7 +150,7 @@ XCOPY "$(ProjectDir)Lib\SDL2-2.24.0\lib\$(Platform)\SDL2.dll" "$(TargetDir)" /S
<ClCompile Include="funcs\lookup.cpp" />
<ClCompile Include="portultra\events.cpp" />
<ClCompile Include="portultra\ultrainit.cpp" />
<ClCompile Include="portultra\main.c" />
<ClCompile Include="portultra\port_main.c" />
<ClCompile Include="portultra\mesgqueue.cpp" />
<ClCompile Include="portultra\scheduler.cpp" />
<ClCompile Include="portultra\task_pthreads.cpp" />
@ -162,6 +162,7 @@ XCOPY "$(ProjectDir)Lib\SDL2-2.24.0\lib\$(Platform)\SDL2.dll" "$(TargetDir)" /S
<ClCompile Include="src\dp.cpp" />
<ClCompile Include="src\eep.cpp" />
<ClCompile Include="portultra\misc_ultra.cpp" />
<ClCompile Include="src\math_routines.cpp" />
<ClCompile Include="src\pi.cpp" />
<ClCompile Include="src\portultra_translation.cpp" />
<ClCompile Include="src\recomp.cpp" />

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,5 @@
#include "ultra64.h"
extern uint64_t start_time;
#define K0BASE 0x80000000
#define K1BASE 0xA0000000
#define K2BASE 0xC0000000

View file

@ -1,5 +1,6 @@
#include <cstdio>
#include <thread>
#include <cassert>
#include "ultra64.h"
#include "multilibultra.hpp"
@ -102,6 +103,14 @@ extern "C" void osCreateThread(RDRAM_ARG PTR(OSThread) t_, OSId id, PTR(thread_f
t->context->host_thread = std::thread{_thread_func, PASS_RDRAM t_, entrypoint, arg};
}
extern "C" void osStopThread(RDRAM_ARG PTR(OSThread) t_) {
assert(false);
}
extern "C" void osDestroyThread(RDRAM_ARG PTR(OSThread) t_) {
assert(false);
}
extern "C" void osSetThreadPri(RDRAM_ARG PTR(OSThread) t, OSPri pri) {
if (t == NULLPTR) {
t = thread_self;
@ -120,6 +129,20 @@ extern "C" void osSetThreadPri(RDRAM_ARG PTR(OSThread) t, OSPri pri) {
}
}
extern "C" OSPri osGetThreadPri(RDRAM_ARG PTR(OSThread) t) {
if (t == NULLPTR) {
t = thread_self;
}
return TO_PTR(OSThread, t)->priority;
}
extern "C" OSId osGetThreadId(RDRAM_ARG PTR(OSThread) t) {
if (t == NULLPTR) {
t = thread_self;
}
return TO_PTR(OSThread, t)->id;
}
// TODO yield thread, need a stable priority queue in the scheduler
void Multilibultra::set_self_paused(RDRAM_ARG1) {

View file

@ -149,7 +149,11 @@ typedef void (thread_func_t)(PTR(void));
void osCreateThread(RDRAM_ARG PTR(OSThread) t, OSId id, PTR(thread_func_t) entry, PTR(void) arg, PTR(void) sp, OSPri p);
void osStartThread(RDRAM_ARG PTR(OSThread) t);
void osStopThread(RDRAM_ARG PTR(OSThread) t);
void osDestroyThread(RDRAM_ARG PTR(OSThread) t);
void osSetThreadPri(RDRAM_ARG PTR(OSThread) t, OSPri pri);
OSPri osGetThreadPri(RDRAM_ARG PTR(OSThread) thread);
OSId osGetThreadId(RDRAM_ARG PTR(OSThread) t);
s32 MQ_GET_COUNT(RDRAM_ARG PTR(OSMesgQueue));
s32 MQ_IS_EMPTY(RDRAM_ARG PTR(OSMesgQueue));

View file

@ -54,6 +54,14 @@ extern "C" void osContGetReadData_recomp(uint8_t* restrict rdram, recomp_context
MEM_B(4, pad) = 0;
}
extern "C" void osContSetCh_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
ctx->r2 = 0;
}
extern "C" void __osMotorAccess_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
}
extern "C" void osMotorInit_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
}

View file

@ -0,0 +1,80 @@
#include "../portultra/multilibultra.hpp"
#include "recomp.h"
extern "C" void __udivdi3_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
uint64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
uint64_t ret = a / b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __divdi3_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
int64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
int64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
int64_t ret = a / b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __umoddi3_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
uint64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
uint64_t ret = a % b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __ull_div_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
uint64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
uint64_t ret = a / b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __ll_div_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
int64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
int64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
int64_t ret = a / b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __ll_mul_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
uint64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
uint64_t ret = a * b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __ull_rem_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
uint64_t b = (ctx->r6 << 32) | (ctx->r7 << 0);
uint64_t ret = a % b;
ctx->r2 = (int32_t)(ret >> 32);
ctx->r3 = (int32_t)(ret >> 0);
}
extern "C" void __ull_to_d_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
double ret = (double)a;
ctx->f0.d = ret;
}
extern "C" void __ull_to_f_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t a = (ctx->r4 << 32) | (ctx->r5 << 0);
float ret = (float)a;
ctx->f0.fl = ret;
}

View file

@ -1,4 +1,6 @@
#include <memory>
#include "../portultra/ultra64.h"
#include "../portultra/multilibultra.hpp"
#include "recomp.h"
extern "C" void osInitialize_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
@ -10,40 +12,56 @@ extern "C" void __osInitialize_common_recomp(uint8_t * restrict rdram, recomp_co
}
extern "C" void osCreateThread_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osCreateThread(rdram, (uint32_t)ctx->r4, (OSId)ctx->r5, (uint32_t)ctx->r6, (uint32_t)ctx->r7,
(uint32_t)MEM_W(0x10, ctx->r29), (OSPri)MEM_W(0x14, ctx->r29));
osCreateThread(rdram, (int32_t)ctx->r4, (OSId)ctx->r5, (int32_t)ctx->r6, (int32_t)ctx->r7,
(int32_t)MEM_W(0x10, ctx->r29), (OSPri)MEM_W(0x14, ctx->r29));
}
extern "C" void osStartThread_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osStartThread(rdram, (uint32_t)ctx->r4);
osStartThread(rdram, (int32_t)ctx->r4);
}
extern "C" void osStopThread_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
osStopThread(rdram, (int32_t)ctx->r4);
}
extern "C" void osDestroyThread_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
osDestroyThread(rdram, (int32_t)ctx->r4);
}
extern "C" void osSetThreadPri_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osSetThreadPri(rdram, (uint32_t)ctx->r4, (OSPri)ctx->r5);
osSetThreadPri(rdram, (int32_t)ctx->r4, (OSPri)ctx->r5);
}
extern "C" void osGetThreadPri_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = osGetThreadPri(rdram, (int32_t)ctx->r4);
}
extern "C" void osGetThreadId_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = osGetThreadId(rdram, (int32_t)ctx->r4);
}
extern "C" void osCreateMesgQueue_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osCreateMesgQueue(rdram, (uint32_t)ctx->r4, (uint32_t)ctx->r5, (s32)ctx->r6);
osCreateMesgQueue(rdram, (int32_t)ctx->r4, (int32_t)ctx->r5, (s32)ctx->r6);
}
extern "C" void osRecvMesg_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
ctx->r2 = osRecvMesg(rdram, (uint32_t)ctx->r4, (uint32_t)ctx->r5, (s32)ctx->r6);
ctx->r2 = osRecvMesg(rdram, (int32_t)ctx->r4, (int32_t)ctx->r5, (s32)ctx->r6);
}
extern "C" void osSendMesg_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
ctx->r2 = osSendMesg(rdram, (uint32_t)ctx->r4, (OSMesg)ctx->r5, (s32)ctx->r6);
ctx->r2 = osSendMesg(rdram, (int32_t)ctx->r4, (OSMesg)ctx->r5, (s32)ctx->r6);
}
extern "C" void osJamMesg_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
ctx->r2 = osJamMesg(rdram, (uint32_t)ctx->r4, (OSMesg)ctx->r5, (s32)ctx->r6);
ctx->r2 = osJamMesg(rdram, (int32_t)ctx->r4, (OSMesg)ctx->r5, (s32)ctx->r6);
}
extern "C" void osSetEventMesg_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osSetEventMesg(rdram, (OSEvent)ctx->r4, (uint32_t)ctx->r5, (OSMesg)ctx->r6);
osSetEventMesg(rdram, (OSEvent)ctx->r4, (int32_t)ctx->r5, (OSMesg)ctx->r6);
}
extern "C" void osViSetEvent_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
osViSetEvent(rdram, (uint32_t)ctx->r4, (OSMesg)ctx->r5, (u32)ctx->r6);
osViSetEvent(rdram, (int32_t)ctx->r4, (OSMesg)ctx->r5, (u32)ctx->r6);
}
extern "C" void osGetCount_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
@ -91,3 +109,77 @@ extern "C" void __osRestoreInt_recomp(uint8_t * restrict rdram, recomp_context *
extern "C" void __osSetFpcCsr_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = 0;
}
extern "C" void __checkHardware_msp_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = 0;
}
extern "C" void __checkHardware_kmc_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = 0;
}
extern "C" void __checkHardware_isv_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = 0;
}
extern "C" void __osInitialize_msp_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
}
extern "C" void __osInitialize_kmc_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
}
extern "C" void __osInitialize_isv_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
}
extern "C" void __osRdbSend_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
gpr buf = ctx->r4;
size_t size = ctx->r5;
u32 type = (u32)ctx->r6;
std::unique_ptr<char[]> to_print = std::make_unique<char[]>(size);
for (size_t i = 0; i < size; i++) {
to_print[i] = MEM_B(i, buf);
}
to_print[size] = '\x00';
fwrite(to_print.get(), 1, size, stdout);
ctx->r2 = size;
}
// For the Mario Party games (not working)
//extern "C" void longjmp_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
// RecompJmpBuf* buf = TO_PTR(RecompJmpBuf, ctx->r4);
//
// // Check if this is a buffer that was set up with setjmp
// if (buf->magic == SETJMP_MAGIC) {
// // If so, longjmp to it
// // Setjmp/longjmp does not work across threads, so verify that this buffer was made by this thread
// assert(buf->owner == Multilibultra::this_thread());
// longjmp(buf->storage->buffer, ctx->r5);
// } else {
// // Otherwise, check if it was one built manually by the game with $ra pointing to a function
// gpr sp = MEM_W(0, ctx->r4);
// gpr ra = MEM_W(4, ctx->r4);
// ctx->r29 = sp;
// recomp_func_t* target = LOOKUP_FUNC(ra);
// if (target == nullptr) {
// fprintf(stderr, "Failed to find function for manual longjmp\n");
// std::quick_exit(EXIT_FAILURE);
// }
// target(rdram, ctx);
//
// // TODO kill this thread if the target function returns
// assert(false);
// }
//}
//
//#undef setjmp_recomp
//extern "C" void setjmp_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
// fprintf(stderr, "Program called setjmp_recomp\n");
// std::quick_exit(EXIT_FAILURE);
//}
//
//extern "C" int32_t osGetThreadEx(void) {
// return Multilibultra::this_thread();
//}

View file

@ -66,8 +66,6 @@ void do_rom_read(uint8_t* rdram, gpr ram_address, uint32_t dev_address, size_t n
std::unique_ptr<uint8_t[]> rom;
size_t rom_size;
uint64_t start_time;
// Recomp generation functions
extern "C" void recomp_entrypoint(uint8_t * restrict rdram, recomp_context * restrict ctx);
gpr get_entrypoint_address();
@ -117,18 +115,6 @@ int main(int argc, char **argv) {
func_map[funcs[i].first] = funcs[i].second;
}
// TODO move this to a more appropriate place
#ifdef _WIN32
{
SYSTEMTIME st;
FILETIME ft;
GetSystemTime(&st);
SystemTimeToFileTime(&st, &ft);
start_time = ((uint64_t)ft.dwHighDateTime << 32) + ft.dwLowDateTime;
}
#endif
// Set up stack pointer
context.r29 = 0xFFFFFFFF803FFFF0u;

View file

@ -1,6 +1,10 @@
#include "../portultra/multilibultra.hpp"
#include "recomp.h"
extern "C" void osViSetYScale_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
;
}
extern "C" void osCreateViManager_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
}