Implemented SP, DP, and SI events

Inject a thread pause into infinite loops to allow the idle thread to
yield for event processing
Removed all preemption usage from the scheduler
This commit is contained in:
Mr-Wiseguy 2022-11-18 16:12:39 -05:00
parent b94fe6f5fb
commit 39b67c8468
20 changed files with 449 additions and 113 deletions

View file

@ -3,6 +3,7 @@
#include <stdint.h>
#include <math.h>
#include <assert.h>
#if 0 // treat GPRs as 32-bit, should be better codegen
typedef uint32_t gpr;
@ -22,9 +23,6 @@ typedef uint64_t gpr;
#define SUB32(a, b) \
((gpr)(int32_t)((a) - (b)))
#define MEM_D(offset, reg) \
(*(int64_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
#define MEM_W(offset, reg) \
(*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
@ -40,6 +38,22 @@ typedef uint64_t gpr;
#define MEM_BU(offset, reg) \
(*(uint8_t*)(rdram + ((((reg) + (offset)) ^ 3) & 0x3FFFFFF)))
#define SD(val, offset, reg) { \
*(uint32_t*)(rdram + ((((reg) + (offset) + 4)) & 0x3FFFFFF)) = (uint32_t)((val) >> 32); \
*(uint32_t*)(rdram + ((((reg) + (offset) + 0)) & 0x3FFFFFF)) = (uint32_t)((val) >> 0); \
}
static inline uint64_t load_doubleword(uint8_t* rdram, gpr reg, gpr offset) {
uint64_t ret = 0;
uint64_t lo = (uint64_t)(uint32_t)MEM_W(reg, offset + 4);
uint64_t hi = (uint64_t)(uint32_t)MEM_W(reg, offset + 0);
ret = (lo << 0) | (hi << 32);
return ret;
}
#define LD(offset, reg) \
load_doubleword(rdram, offset, reg)
// TODO proper lwl/lwr/swl/swr
#define MEM_WL(offset, reg) \
(*(int32_t*)(rdram + ((((reg) + (offset))) & 0x3FFFFFF)))
@ -86,6 +100,9 @@ typedef uint64_t gpr;
#define TRUNC_W_D(val) \
((int32_t)(val))
#define NAN_CHECK(val) \
assert(val == val)
typedef union {
double d;
struct {

View file

@ -173,6 +173,7 @@ std::unordered_set<std::string> ignored_funcs {
"osStartTimer",
"osSetTimer",
"osStopTimer",
"osGetTime",
"__osInsertTimer",
"__osTimerInterrupt",
"__osTimerServicesInit",
@ -353,8 +354,8 @@ int main(int argc, char** argv) {
fmt::print("Function count: {}\n", context.functions.size());
std::ofstream func_lookup_file{ "out/funcs/lookup.cpp" };
std::ofstream func_header_file{ "out/funcs/funcs.h" };
std::ofstream func_lookup_file{ "test/funcs/lookup.cpp" };
std::ofstream func_header_file{ "test/funcs/funcs.h" };
fmt::print(func_lookup_file,
"#include <utility>\n"
@ -381,7 +382,7 @@ int main(int argc, char** argv) {
"void {}(uint8_t* restrict rdram, recomp_context* restrict ctx);\n", func.name);
fmt::print(func_lookup_file,
" {{ 0x{:08X}u, {} }},\n", func.vram, func.name);
if (RecompPort::recompile_function(context, func, "out/funcs/" + func.name + ".c") == false) {
if (RecompPort::recompile_function(context, func, "test/funcs/" + func.name + ".c") == false) {
func_lookup_file.clear();
fmt::print(stderr, "Error recompiling {}\n", func.name);
std::exit(EXIT_FAILURE);

View file

@ -318,7 +318,13 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
break;
case InstrId::cpu_j:
case InstrId::cpu_b:
print_unconditional_branch("goto L_{:08X}", (uint32_t)instr.getBranchVramGeneric());
{
uint32_t branch_target = instr.getBranchVramGeneric();
if (branch_target == instr_vram) {
print_line("void pause_self(uint8_t *rdram); pause_self(rdram)");
}
print_unconditional_branch("goto L_{:08X}", branch_target);
}
break;
case InstrId::cpu_jr:
if (rs == (int)rabbitizer::Registers::Cpu::GprO32::GPR_O32_ra) {
@ -422,6 +428,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
if ((ft & 1) == 0) {
// even fpr
print_line("ctx->f{}.u32l = MEM_W({:#X}, {}{})", ft, (int16_t)imm, ctx_gpr_prefix(base), base);
print_line("NAN_CHECK(ctx->f{}.fl)", ft);
} else {
// odd fpr
print_line("ctx->f{}.u32h = MEM_W({:#X}, {}{})", ft - 1, (int16_t)imm, ctx_gpr_prefix(base), base);
@ -429,7 +436,8 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
break;
case InstrId::cpu_ldc1:
if ((ft & 1) == 0) {
print_line("ctx->f{}.u64 = MEM_D({:#X}, {}{})", ft, (int16_t)imm, ctx_gpr_prefix(base), base);
print_line("ctx->f{}.u64 = LD({:#X}, {}{})", ft, (int16_t)imm, ctx_gpr_prefix(base), base);
print_line("NAN_CHECK(ctx->f{}.d)", ft);
} else {
fmt::print(stderr, "Invalid operand for ldc1: f{}\n", ft);
return false;
@ -446,7 +454,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
break;
case InstrId::cpu_sdc1:
if ((ft & 1) == 0) {
print_line("MEM_D({:#X}, {}{}) = ctx->f{}.u64", (int16_t)imm, ctx_gpr_prefix(base), base, ft);
print_line("SD(ctx->f{}.u64, {:#X}, {}{})", ft, (int16_t)imm, ctx_gpr_prefix(base), base);
} else {
fmt::print(stderr, "Invalid operand for sdc1: f{}\n", ft);
return false;
@ -525,6 +533,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_mov_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl)", fs);
print_line("ctx->f{}.fl = ctx->f{}.fl", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for mov.s: f{} f{}\n", fd, fs);
@ -534,6 +543,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_mov_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d)", fs);
print_line("ctx->f{}.d = ctx->f{}.d", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for mov.d: f{} f{}\n", fd, fs);
@ -543,6 +553,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_neg_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl)", fs);
print_line("ctx->f{}.fl = -ctx->f{}.fl", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for neg.s: f{} f{}\n", fd, fs);
@ -552,6 +563,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_neg_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d)", fs);
print_line("ctx->f{}.d = -ctx->f{}.d", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for neg.d: f{} f{}\n", fd, fs);
@ -561,6 +573,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_abs_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl)", fs);
print_line("ctx->f{}.fl = fabsf(ctx->f{}.fl)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for abs.s: f{} f{}\n", fd, fs);
@ -570,6 +583,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_abs_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d)", fs);
print_line("ctx->f{}.d = fabs(ctx->f{}.d)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for abs.d: f{} f{}\n", fd, fs);
@ -579,6 +593,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_sqrt_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl)", fs);
print_line("ctx->f{}.fl = sqrtf(ctx->f{}.fl)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for sqrt.s: f{} f{}\n", fd, fs);
@ -588,6 +603,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_sqrt_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d)", fs);
print_line("ctx->f{}.d = sqrt(ctx->f{}.d)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for sqrt.d: f{} f{}\n", fd, fs);
@ -597,6 +613,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_add_s:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl); NAN_CHECK(ctx->f{}.fl)", fs, ft);
print_line("ctx->f{}.fl = ctx->f{}.fl + ctx->f{}.fl", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for add.s: f{} f{} f{}\n", fd, fs, ft);
@ -606,6 +623,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_add_d:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d); NAN_CHECK(ctx->f{}.d)", fs, ft);
print_line("ctx->f{}.d = ctx->f{}.d + ctx->f{}.d", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for add.d: f{} f{} f{}\n", fd, fs, ft);
@ -615,6 +633,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_sub_s:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl); NAN_CHECK(ctx->f{}.fl)", fs, ft);
print_line("ctx->f{}.fl = ctx->f{}.fl - ctx->f{}.fl", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for sub.s: f{} f{} f{}\n", fd, fs, ft);
@ -624,6 +643,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_sub_d:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d); NAN_CHECK(ctx->f{}.d)", fs, ft);
print_line("ctx->f{}.d = ctx->f{}.d - ctx->f{}.d", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for sub.d: f{} f{} f{}\n", fd, fs, ft);
@ -633,6 +653,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_mul_s:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl); NAN_CHECK(ctx->f{}.fl)", fs, ft);
print_line("ctx->f{}.fl = MUL_S(ctx->f{}.fl, ctx->f{}.fl)", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for mul.s: f{} f{} f{}\n", fd, fs, ft);
@ -642,6 +663,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_mul_d:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d); NAN_CHECK(ctx->f{}.d)", fs, ft);
print_line("ctx->f{}.d = MUL_D(ctx->f{}.d, ctx->f{}.d)", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for mul.d: f{} f{} f{}\n", fd, fs, ft);
@ -651,6 +673,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_div_s:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl); NAN_CHECK(ctx->f{}.fl)", fs, ft);
print_line("ctx->f{}.fl = DIV_S(ctx->f{}.fl, ctx->f{}.fl)", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for div.s: f{} f{} f{}\n", fd, fs, ft);
@ -660,6 +683,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_div_d:
if ((fd & 1) == 0 && (fs & 1) == 0 && (ft & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d); NAN_CHECK(ctx->f{}.d)", fs, ft);
print_line("ctx->f{}.d = DIV_D(ctx->f{}.d, ctx->f{}.d)", fd, fs, ft);
} else {
fmt::print(stderr, "Invalid operand(s) for div.d: f{} f{} f{}\n", fd, fs, ft);
@ -687,6 +711,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_cvt_d_s:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.fl)", fs);
print_line("ctx->f{}.d = CVT_D_S(ctx->f{}.fl)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for cvt.d.s: f{} f{}\n", fd, fs);
@ -696,6 +721,7 @@ bool process_instruction(const RecompPort::Context& context, const RecompPort::F
case InstrId::cpu_cvt_s_d:
if ((fd & 1) == 0 && (fs & 1) == 0) {
// even fpr
print_line("NAN_CHECK(ctx->f{}.d)", fs);
print_line("ctx->f{}.fl = CVT_S_D(ctx->f{}.d)", fd, fs);
} else {
fmt::print(stderr, "Invalid operand(s) for cvt.s.d: f{} f{}\n", fd, fs);

View file

@ -1861,7 +1861,6 @@
<ClCompile Include="funcs\obj_update_sleeping.c" />
<ClCompile Include="funcs\osCreateScheduler.c" />
<ClCompile Include="funcs\osDriveRomInit.c" />
<ClCompile Include="funcs\osGetTime.c" />
<ClCompile Include="funcs\osScAddClient.c" />
<ClCompile Include="funcs\osScGetCmdQ.c" />
<ClCompile Include="funcs\osScRemoveClient.c" />
@ -1998,6 +1997,7 @@
<ClCompile Include="funcs\__sinf.c" />
<ClCompile Include="funcs\__udivdi3.c" />
<ClCompile Include="funcs\__udivmoddi4.c" />
<ClCompile Include="portultra\events.cpp" />
<ClCompile Include="portultra\init.cpp" />
<ClCompile Include="portultra\main.c" />
<ClCompile Include="portultra\mesgqueue.cpp" />

View file

@ -5529,9 +5529,6 @@
<ClCompile Include="funcs\osDriveRomInit.c">
<Filter>Funcs</Filter>
</ClCompile>
<ClCompile Include="funcs\osGetTime.c">
<Filter>Funcs</Filter>
</ClCompile>
<ClCompile Include="funcs\osScAddClient.c">
<Filter>Funcs</Filter>
</ClCompile>
@ -5700,6 +5697,9 @@
<ClCompile Include="funcs\lookup.cpp">
<Filter>Funcs</Filter>
</ClCompile>
<ClCompile Include="portultra\events.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="portultra\platform_specific.h">

201
test/portultra/events.cpp Normal file
View file

@ -0,0 +1,201 @@
#include <thread>
#include <atomic>
#include <chrono>
#include <cinttypes>
#include "ultra64.h"
#include "multilibultra.hpp"
#include "recomp.h"
static struct {
struct {
std::thread thread;
PTR(OSMesgQueue) mq = NULLPTR;
OSMesg msg = (OSMesg)0;
int retrace_count = 1;
} vi;
struct {
std::thread thread;
PTR(OSMesgQueue) mq = NULLPTR;
OSMesg msg = (OSMesg)0;
OSTask task;
std::atomic_flag task_queued;
} sp;
struct {
std::thread thread;
PTR(OSMesgQueue) mq = NULLPTR;
OSMesg msg = (OSMesg)0;
} dp;
struct {
std::thread thread;
PTR(OSMesgQueue) mq = NULLPTR;
OSMesg msg = (OSMesg)0;
} ai;
struct {
std::thread thread;
PTR(OSMesgQueue) mq = NULLPTR;
OSMesg msg = (OSMesg)0;
std::atomic_flag read_queued;
} si;
// The same message queue may be used for multiple events, so share a mutex for all of them
std::mutex message_mutex;
uint8_t* rdram;
std::chrono::system_clock::time_point start;
} events_context{};
extern "C" void osSetEventMesg(RDRAM_ARG OSEvent event_id, PTR(OSMesgQueue) mq_, OSMesg msg) {
OSMesgQueue* mq = TO_PTR(OSMesgQueue, mq_);
std::lock_guard lock{ events_context.message_mutex };
switch (event_id) {
case OS_EVENT_SP:
events_context.sp.msg = msg;
events_context.sp.mq = mq_;
break;
case OS_EVENT_DP:
events_context.dp.msg = msg;
events_context.dp.mq = mq_;
break;
case OS_EVENT_AI:
events_context.ai.msg = msg;
events_context.ai.mq = mq_;
break;
case OS_EVENT_SI:
events_context.si.msg = msg;
events_context.si.mq = mq_;
}
}
extern "C" void osViSetEvent(RDRAM_ARG PTR(OSMesgQueue) mq_, OSMesg msg, u32 retrace_count) {
std::lock_guard lock{ events_context.message_mutex };
events_context.vi.mq = mq_;
events_context.vi.msg = msg;
events_context.vi.retrace_count = retrace_count;
}
// N64 CPU counter ticks per millisecond
constexpr uint32_t counter_per_ms = 46'875;
uint64_t duration_to_count(std::chrono::system_clock::duration duration) {
uint64_t delta_micros = std::chrono::duration_cast<std::chrono::microseconds>(duration).count();
// More accurate than using a floating point timer, will only overflow after running for 12.47 years
// Units: (micros * (counts/millis)) / (micros/millis) = counts
uint64_t total_count = (delta_micros * counter_per_ms) / 1000;
return total_count;
}
extern "C" u32 osGetCount() {
uint64_t total_count = duration_to_count(std::chrono::system_clock::now() - events_context.start);
// Allow for overflows, which is how osGetCount behaves
return (uint32_t)total_count;
}
extern "C" OSTime osGetTime() {
uint64_t total_count = duration_to_count(std::chrono::system_clock::now() - events_context.start);
return total_count;
}
void vi_thread_func() {
using namespace std::chrono_literals;
events_context.start = std::chrono::system_clock::now();
uint64_t total_vis = 0;
int remaining_retraces = events_context.vi.retrace_count;
while (true) {
// Determine the next VI time (more accurate than adding 16ms each VI interrupt)
auto next = events_context.start + (total_vis * 1000000us) / 60;
//if (next > std::chrono::system_clock::now()) {
// printf("Sleeping for %" PRIu64 " us to get from %" PRIu64 " us to %" PRIu64 " us \n",
// (next - std::chrono::system_clock::now()) / 1us,
// (std::chrono::system_clock::now() - events_context.start) / 1us,
// (next - events_context.start) / 1us);
//} else {
// printf("No need to sleep\n");
//}
std::this_thread::sleep_until(next);
// Calculate how many VIs have passed
uint64_t new_total_vis = ((std::chrono::system_clock::now() - events_context.start) * 60 / 1000ms) + 1;
if (new_total_vis > total_vis + 1) {
printf("Skipped % " PRId64 " frames in VI interupt thread!\n", new_total_vis - total_vis - 1);
}
total_vis = new_total_vis;
remaining_retraces--;
if (remaining_retraces == 0) {
std::lock_guard lock{ events_context.message_mutex };
remaining_retraces = events_context.vi.retrace_count;
uint8_t* rdram = events_context.rdram;
if (events_context.vi.mq != NULLPTR) {
if (osSendMesg(PASS_RDRAM events_context.vi.mq, events_context.vi.msg, OS_MESG_NOBLOCK) == -1) {
//printf("Game skipped a VI frame!\n");
}
}
}
}
}
void sp_complete() {
uint8_t* rdram = events_context.rdram;
std::lock_guard lock{ events_context.message_mutex };
osSendMesg(PASS_RDRAM events_context.sp.mq, events_context.sp.msg, OS_MESG_NOBLOCK);
}
void dp_complete() {
uint8_t* rdram = events_context.rdram;
std::lock_guard lock{ events_context.message_mutex };
osSendMesg(PASS_RDRAM events_context.dp.mq, events_context.dp.msg, OS_MESG_NOBLOCK);
}
void gfx_thread_func() {
while (true) {
// Wait for a sp task to be queued
events_context.sp.task_queued.wait(false);
// Grab the task and inform the game that it's free to queue up a new task
OSTask current_task = events_context.sp.task;
events_context.sp.task_queued.clear();
events_context.sp.task_queued.notify_all();
// Process the task
if (current_task.t.type = M_GFXTASK) {
// TODO interface with RT64 here
// (TODO let RT64 do this) Tell the game that the RSP and RDP tasks are complete
sp_complete();
dp_complete();
} else if (current_task.t.type == M_AUDTASK) {
sp_complete();
} else {
fprintf(stderr, "Unknown task type: %" PRIu32 "\n", current_task.t.type);
std::exit(EXIT_FAILURE);
}
}
}
void Multilibultra::submit_rsp_task(RDRAM_ARG PTR(OSTask) task_) {
OSTask* task = TO_PTR(OSTask, task_);
// Wait for the sp thread clear the old task
events_context.sp.task_queued.wait(true);
// Make a full copy of the task instead of just recording a pointer to it, since that's what osSpTaskLoad does
events_context.sp.task = *task;
events_context.sp.task_queued.test_and_set();
events_context.sp.task_queued.notify_all();
}
void Multilibultra::send_si_message() {
uint8_t* rdram = events_context.rdram;
osSendMesg(PASS_RDRAM events_context.si.mq, events_context.si.msg, OS_MESG_NOBLOCK);
}
void Multilibultra::init_events(uint8_t* rdram) {
events_context.rdram = rdram;
events_context.vi.thread = std::thread{ vi_thread_func };
events_context.sp.thread = std::thread{ gfx_thread_func };
}

View file

@ -1,6 +1,11 @@
#include "ultra64.h"
#include "multilibultra.hpp"
void Multilibultra::preinit(uint8_t* rdram) {
Multilibultra::set_main_thread();
Multilibultra::init_events(rdram);
}
extern "C" void osInitialize() {
Multilibultra::init_scheduler();
Multilibultra::native_init();

View file

@ -49,6 +49,12 @@ bool thread_queue_empty(RDRAM_ARG PTR(OSThread)* queue) {
extern "C" s32 osSendMesg(RDRAM_ARG PTR(OSMesgQueue) mq_, OSMesg msg, s32 flags) {
OSMesgQueue *mq = TO_PTR(OSMesgQueue, mq_);
// Prevent accidentally blocking anything that isn't a game thread
if (!Multilibultra::is_game_thread()) {
flags = OS_MESG_NOBLOCK;
}
Multilibultra::disable_preemption();
if (flags == OS_MESG_NOBLOCK) {
@ -81,9 +87,13 @@ extern "C" s32 osSendMesg(RDRAM_ARG PTR(OSMesgQueue) mq_, OSMesg msg, s32 flags)
Multilibultra::enable_preemption();
if (to_run) {
debug_printf("[Message Queue] Thread %d is unblocked\n", to_run->id);
OSThread* self = TO_PTR(OSThread, Multilibultra::this_thread());
if (to_run->priority > self->priority) {
Multilibultra::swap_to_thread(PASS_RDRAM to_run);
if (Multilibultra::is_game_thread()) {
OSThread* self = TO_PTR(OSThread, Multilibultra::this_thread());
if (to_run->priority > self->priority) {
Multilibultra::swap_to_thread(PASS_RDRAM to_run);
} else {
Multilibultra::schedule_running_thread(to_run);
}
} else {
Multilibultra::schedule_running_thread(to_run);
}
@ -182,7 +192,3 @@ extern "C" s32 osRecvMesg(RDRAM_ARG PTR(OSMesgQueue) mq_, PTR(OSMesg) msg_, s32
}
return 0;
}
extern "C" void osSetEventMesg(RDRAM_ARG OSEvent, PTR(OSMesgQueue), OSMesg) {
}

View file

@ -4,6 +4,7 @@
#include <thread>
#include <atomic>
#include <mutex>
#include <algorithm>
#include "ultra64.h"
#include "platform_specific.h"
@ -16,8 +17,10 @@ struct UltraThreadContext {
namespace Multilibultra {
void preinit(uint8_t* rdram);
void native_init();
void init_scheduler();
void init_events(uint8_t* rdram);
void native_thread_init(OSThread *t);
void set_self_paused(RDRAM_ARG1);
void wait_for_resumed(RDRAM_ARG1);
@ -36,6 +39,9 @@ void enable_preemption();
void notify_scheduler();
void reprioritize_thread(OSThread *t, OSPri pri);
void set_main_thread();
bool is_game_thread();
void submit_rsp_task(RDRAM_ARG PTR(OSTask) task);
void send_si_message();
class preemption_guard {
public:
@ -47,7 +53,9 @@ private:
} // namespace Multilibultra
#define debug_printf(...) printf(__VA_ARGS__);
//#define debug_printf(...)
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define debug_printf(...)
//#define debug_printf(...) printf(__VA_ARGS__);
#endif

View file

@ -113,20 +113,26 @@ void handle_scheduler_notifications() {
}
void swap_running_thread(thread_queue_t& running_thread_queue, OSThread*& cur_running_thread) {
OSThread* new_running_thread = running_thread_queue.top();
if (cur_running_thread != new_running_thread) {
if (cur_running_thread && cur_running_thread->state == OSThreadState::RUNNING) {
debug_printf("[Scheduler] Switching execution from thread %d (%d) to thread %d (%d)\n",
cur_running_thread->id, cur_running_thread->priority,
new_running_thread->id, new_running_thread->priority);
Multilibultra::pause_thread_impl(cur_running_thread);
} else {
debug_printf("[Scheduler] Switching execution to thread %d (%d)\n", new_running_thread->id, new_running_thread->priority);
if (running_thread_queue.size() > 0) {
OSThread* new_running_thread = running_thread_queue.top();
if (cur_running_thread != new_running_thread) {
if (cur_running_thread && cur_running_thread->state == OSThreadState::RUNNING) {
debug_printf("[Scheduler] Need to wait for thread %d to pause itself\n", cur_running_thread->id);
return;
//debug_printf("[Scheduler] Switching execution from thread %d (%d) to thread %d (%d)\n",
// cur_running_thread->id, cur_running_thread->priority,
// new_running_thread->id, new_running_thread->priority);
//Multilibultra::pause_thread_impl(cur_running_thread);
} else {
debug_printf("[Scheduler] Switching execution to thread %d (%d)\n", new_running_thread->id, new_running_thread->priority);
}
Multilibultra::resume_thread_impl(new_running_thread);
cur_running_thread = new_running_thread;
} else if (cur_running_thread && cur_running_thread->state != OSThreadState::RUNNING) {
Multilibultra::resume_thread_impl(cur_running_thread);
}
Multilibultra::resume_thread_impl(new_running_thread);
cur_running_thread = new_running_thread;
} else if (cur_running_thread && cur_running_thread->state != OSThreadState::RUNNING) {
Multilibultra::resume_thread_impl(cur_running_thread);
} else {
cur_running_thread = nullptr;
}
}
@ -240,12 +246,19 @@ void cleanup_thread(OSThread *t) {
void disable_preemption() {
scheduler_context.premption_mutex.lock();
scheduler_context.can_preempt = false;
if (Multilibultra::is_game_thread()) {
scheduler_context.can_preempt = false;
}
}
void enable_preemption() {
scheduler_context.can_preempt = true;
if (Multilibultra::is_game_thread()) {
scheduler_context.can_preempt = true;
}
#pragma warning(push)
#pragma warning( disable : 26110)
scheduler_context.premption_mutex.unlock();
#pragma warning( pop )
}
// lock's constructor is called first, so can_preempt is set after locking
@ -266,3 +279,8 @@ void notify_scheduler() {
}
}
extern "C" void pause_self(uint8_t* rdram) {
Multilibultra::pause_self(rdram);
}

View file

@ -21,7 +21,7 @@ void Multilibultra::pause_thread_native_impl(OSThread *t) {
SuspendThread(t->context->host_thread.native_handle());
// Perform a synchronous action to ensure that the thread is suspended
// see: https://devblogs.microsoft.com/oldnewthing/20150205-00/?p=44743
CONTEXT threadContext;
CONTEXT threadContext{};
GetThreadContext(t->context->host_thread.native_handle(), &threadContext);
}

View file

@ -7,12 +7,19 @@
extern "C" void bootproc();
thread_local bool is_main_thread = false;
// Whether this thread is part of the game (i.e. the start thread or one spawned by osCreateThread)
thread_local bool is_game_thread = false;
thread_local PTR(OSThread) thread_self = NULLPTR;
void Multilibultra::set_main_thread() {
::is_game_thread = true;
is_main_thread = true;
}
bool Multilibultra::is_game_thread() {
return ::is_game_thread;
}
#if 0
int main(int argc, char** argv) {
Multilibultra::set_main_thread();
@ -31,6 +38,7 @@ static void _thread_func(RDRAM_ARG PTR(OSThread) self_, PTR(thread_func_t) entry
OSThread *self = TO_PTR(OSThread, self_);
debug_printf("[Thread] Thread created: %d\n", self->id);
thread_self = self_;
is_game_thread = true;
// Perform any necessary native thread initialization.
Multilibultra::native_thread_init(self);
@ -54,23 +62,24 @@ static void _thread_func(RDRAM_ARG PTR(OSThread) self_, PTR(thread_func_t) entry
Multilibultra::cleanup_thread(self);
}
extern "C" void osStartThread(RDRAM_ARG PTR(OSThread) t) {
debug_printf("[os] Start Thread %d\n", TO_PTR(OSThread, t)->id);
extern "C" void osStartThread(RDRAM_ARG PTR(OSThread) t_) {
OSThread* t = TO_PTR(OSThread, t_);
debug_printf("[os] Start Thread %d\n", t->id);
// Wait until the thread is initialized to indicate that it's ready to be started.
TO_PTR(OSThread, t)->context->initialized.wait(false);
// Wait until the thread is initialized to indicate that it's task_queued to be started.
t->context->initialized.wait(false);
debug_printf("[os] Thread %d is ready to be started\n", TO_PTR(OSThread, t)->id);
debug_printf("[os] Thread %d is ready to be started\n", t->id);
if (thread_self && (TO_PTR(OSThread, t)->priority > TO_PTR(OSThread, thread_self)->priority)) {
Multilibultra::swap_to_thread(PASS_RDRAM TO_PTR(OSThread, t));
if (thread_self && (t->priority > TO_PTR(OSThread, thread_self)->priority)) {
Multilibultra::swap_to_thread(PASS_RDRAM t);
} else {
Multilibultra::schedule_running_thread(TO_PTR(OSThread, t));
Multilibultra::schedule_running_thread(t);
}
// The main thread "becomes" the first thread started, so join on it and exit after it completes.
if (is_main_thread) {
TO_PTR(OSThread, t)->context->host_thread.join();
t->context->host_thread.join();
std::exit(EXIT_SUCCESS);
}
}
@ -132,10 +141,12 @@ void Multilibultra::pause_thread_impl(OSThread* t) {
}
void Multilibultra::resume_thread_impl(OSThread *t) {
if (t->state == OSThreadState::PREEMPTED) {
Multilibultra::resume_thread_native_impl(t);
}
t->state = OSThreadState::RUNNING;
t->context->running.store(true);
t->context->running.notify_all();
Multilibultra::resume_thread_native_impl(t);
}
PTR(OSThread) Multilibultra::this_thread() {

View file

@ -45,6 +45,28 @@ typedef uint8_t u8;
typedef s32 OSPri;
typedef s32 OSId;
typedef u64 OSTime;
#define OS_EVENT_SW1 0 /* CPU SW1 interrupt */
#define OS_EVENT_SW2 1 /* CPU SW2 interrupt */
#define OS_EVENT_CART 2 /* Cartridge interrupt: used by rmon */
#define OS_EVENT_COUNTER 3 /* Counter int: used by VI/Timer Mgr */
#define OS_EVENT_SP 4 /* SP task done interrupt */
#define OS_EVENT_SI 5 /* SI (controller) interrupt */
#define OS_EVENT_AI 6 /* AI interrupt */
#define OS_EVENT_VI 7 /* VI interrupt: used by VI/Timer Mgr */
#define OS_EVENT_PI 8 /* PI interrupt: used by PI Manager */
#define OS_EVENT_DP 9 /* DP full sync interrupt */
#define OS_EVENT_CPU_BREAK 10 /* CPU breakpoint: used by rmon */
#define OS_EVENT_SP_BREAK 11 /* SP breakpoint: used by rmon */
#define OS_EVENT_FAULT 12 /* CPU fault event: used by rmon */
#define OS_EVENT_THREADSTATUS 13 /* CPU thread status: used by rmon */
#define OS_EVENT_PRENMI 14 /* Pre NMI interrupt */
#define M_GFXTASK 1
#define M_AUDTASK 2
#define M_VIDTASK 3
/////////////
// Structs //
/////////////
@ -73,24 +95,6 @@ typedef struct OSThread_t {
typedef u32 OSEvent;
typedef PTR(void) OSMesg;
// This union holds C++ members along with a padding array. Those members are guarded by an ifdef for C++
// so that they don't cause compilation errors in C. The padding array reserves the necessary space to
// hold the atomic members in C and a static assert is used to ensure that the union is large enough.
// typedef union UltraQueueContext {
// u64 pad[1];
// #ifdef __cplusplus
// struct {
// } atomics;
// // Construct pad instead of the atomics, which get constructed in-place in osCreateMesgQueue
// UltraQueueContext() : pad{} {}
// #endif
// } UltraQueueContext;
// #ifdef __cplusplus
// static_assert(sizeof(UltraQueueContext::pad) == sizeof(UltraQueueContext),
// "UltraQueueContext does not have enough padding to hold C++ members!");
// #endif
typedef struct OSMesgQueue {
PTR(OSThread) blocked_on_recv; /* Linked list of threads blocked on receiving from this queue */
PTR(OSThread) blocked_on_send; /* Linked list of threads blocked on sending to this queue */
@ -100,6 +104,37 @@ typedef struct OSMesgQueue {
PTR(OSMesg) msg; /* Pointer to circular buffer to store messages */
} OSMesgQueue;
typedef struct {
u32 type;
u32 flags;
PTR(u64) ucode_boot;
u32 ucode_boot_size;
PTR(u64) ucode;
u32 ucode_size;
PTR(u64) ucode_data;
u32 ucode_data_size;
PTR(u64) dram_stack;
u32 dram_stack_size;
PTR(u64) output_buff;
PTR(u64) output_buff_size;
PTR(u64) data_ptr;
u32 data_size;
PTR(u64) yield_data_ptr;
u32 yield_data_size;
} OSTask_t;
typedef union {
OSTask_t t;
int64_t force_structure_alignment;
} OSTask;
///////////////
// Functions //
///////////////
@ -125,6 +160,9 @@ s32 osSendMesg(RDRAM_ARG PTR(OSMesgQueue), OSMesg, s32);
s32 osJamMesg(RDRAM_ARG PTR(OSMesgQueue), OSMesg, s32);
s32 osRecvMesg(RDRAM_ARG PTR(OSMesgQueue), PTR(OSMesg), s32);
void osSetEventMesg(RDRAM_ARG OSEvent, PTR(OSMesgQueue), OSMesg);
void osViSetEvent(RDRAM_ARG PTR(OSMesgQueue), OSMesg, u32);
u32 osGetCount();
OSTime osGetTime();
#ifdef __cplusplus
} // extern "C"

View file

@ -1,3 +1,4 @@
#include "../portultra/multilibultra.hpp"
#include "recomp.h"
extern "C" void osContInit_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
@ -5,7 +6,7 @@ extern "C" void osContInit_recomp(uint8_t* restrict rdram, recomp_context* restr
}
extern "C" void osContStartReadData_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
Multilibultra::send_si_message();
}
extern "C" void osContGetReadData_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {

View file

@ -30,28 +30,6 @@ extern "C" void osWritebackDCacheAll_recomp(uint8_t* restrict rdram, recomp_cont
;
}
// Ticks per second
constexpr uint32_t counter_rate = 46'875'000;
extern "C" void osGetCount_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
// TODO move this to a more appropriate place
int32_t count = 0;
#ifdef _WIN32
SYSTEMTIME st;
FILETIME ft;
GetSystemTime(&st);
SystemTimeToFileTime(&st, &ft);
uint64_t cur_time = ((uint64_t)ft.dwHighDateTime << 32) + ft.dwLowDateTime;
uint64_t delta_100ns = cur_time - start_time;
count = (delta_100ns * counter_rate) / (1'000'000'000 / 100);
#endif
ctx->r2 = count;
;
}
extern "C" void osSetIntMask_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
}

View file

@ -1,6 +1,7 @@
#include <memory>
#include "recomp.h"
#include "../portultra/ultra64.h"
#include "../portultra/multilibultra.hpp"
extern std::unique_ptr<uint8_t[]> rom;
extern size_t rom_size;
@ -15,6 +16,15 @@ extern "C" void osCreatePiManager_recomp(uint8_t* restrict rdram, recomp_context
constexpr uint32_t rom_base = 0xB0000000;
void do_rom_read(uint8_t* rdram, uint32_t ram_address, uint32_t dev_address, size_t num_bytes) {
// TODO use word copies when possible
uint8_t* rom_addr = rom.get() + (dev_address | rom_base) - rom_base;
for (size_t i = 0; i < num_bytes; i++) {
MEM_B(i, ram_address) = *rom_addr;
rom_addr++;
}
}
extern "C" void osPiStartDma_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
uint32_t mb = ctx->r4;
uint32_t pri = ctx->r5;
@ -25,11 +35,13 @@ extern "C" void osPiStartDma_recomp(uint8_t* restrict rdram, recomp_context* res
uint32_t mq_ = MEM_W(0x18, ctx->r29);
OSMesgQueue* mq = TO_PTR(OSMesgQueue, mq_);
printf("[pi] DMA from 0x%08X into 0x%08X of size 0x%08X\n", devAddr, dramAddr, size);
debug_printf("[pi] DMA from 0x%08X into 0x%08X of size 0x%08X\n", devAddr, dramAddr, size);
// TODO asynchronous transfer (will require preemption in the scheduler)
// TODO this won't handle unaligned DMA
memcpy(rdram + (dramAddr & 0x3FFFFFF), rom.get() + (devAddr | rom_base) - rom_base, size);
do_rom_read(rdram, dramAddr, devAddr, size);
//memcpy(rdram + (dramAddr & 0x3FFFFFF), rom.get() + (devAddr | rom_base) - rom_base, num_bytes);
// Send a message to the mq to indicate that the transfer completed
osSendMesg(rdram, mq_, 0, OS_MESG_NOBLOCK);

View file

@ -6,13 +6,11 @@ extern "C" void osInitialize_recomp(uint8_t * restrict rdram, recomp_context * r
}
extern "C" void osCreateThread_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
//printf("Creating thread 0x%08X\n", (uint32_t)ctx->r4);
osCreateThread(rdram, (uint32_t)ctx->r4, (OSId)ctx->r5, (uint32_t)ctx->r6, (uint32_t)ctx->r7,
(uint32_t)MEM_W(0x10, ctx->r29), (OSPri)MEM_W(0x14, ctx->r29));
}
extern "C" void osStartThread_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
//printf("Starting thread 0x%08X\n", (uint32_t)ctx->r4);
osStartThread(rdram, (uint32_t)ctx->r4);
}
@ -39,3 +37,17 @@ extern "C" void osJamMesg_recomp(uint8_t* restrict rdram, recomp_context* restri
extern "C" void osSetEventMesg_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
osSetEventMesg(rdram, (OSEvent)ctx->r4, (uint32_t)ctx->r5, (OSMesg)ctx->r6);
}
extern "C" void osViSetEvent_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
osViSetEvent(rdram, (uint32_t)ctx->r4, (OSMesg)ctx->r5, (u32)ctx->r6);
}
extern "C" void osGetCount_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
ctx->r2 = osGetCount();
}
extern "C" void osGetTime_recomp(uint8_t * restrict rdram, recomp_context * restrict ctx) {
uint64_t total_count = osGetTime();
ctx->r2 = (uint32_t)(total_count >> 32);
ctx->r3 = (int32_t)(total_count >> 0);
}

View file

@ -20,11 +20,6 @@ constexpr uint32_t byteswap(uint32_t val) {
}
#endif
void test_func(uint8_t* restrict rdram, recomp_context* restrict ctx) {
printf("in test_func\n");
exit(EXIT_FAILURE);
}
extern std::pair<uint32_t, recomp_func_t*> funcs[];
extern const size_t num_funcs;
@ -61,11 +56,13 @@ extern "C" void do_break(uint32_t vram) {
void run_thread_function(uint8_t* rdram, uint32_t addr, uint32_t sp, uint32_t arg) {
recomp_context ctx{};
ctx.r29 = sp;
ctx.r4 = arg;
recomp_func_t* func = get_function(addr);
func(rdram, &ctx);
}
extern "C" void game_init(uint8_t* restrict rdram, recomp_context* restrict ctx);
void do_rom_read(uint8_t* rdram, uint32_t ram_address, uint32_t dev_address, size_t num_bytes);
std::unique_ptr<uint8_t[]> rom;
size_t rom_size;
@ -100,15 +97,15 @@ int main(int argc, char **argv) {
}
// Byteswap the rom
for (size_t rom_addr = 0; rom_addr < rom_size; rom_addr += 4) {
uint32_t word = *reinterpret_cast<uint32_t*>(rom.get() + rom_addr);
word = byteswap(word);
*reinterpret_cast<uint32_t*>(rom.get() + rom_addr) = word;
}
//for (size_t rom_addr = 0; rom_addr < rom_size; rom_addr += 4) {
// uint32_t word = *reinterpret_cast<uint32_t*>(rom.get() + rom_addr);
// word = byteswap(word);
// *reinterpret_cast<uint32_t*>(rom.get() + rom_addr) = word;
//}
// Get entrypoint from ROM
// TODO fix this for other IPL3 versions
uint32_t entrypoint = *reinterpret_cast<uint32_t*>(rom.get() + 0x8);
uint32_t entrypoint = byteswap(*reinterpret_cast<uint32_t*>(rom.get() + 0x8));
// Allocate rdram_buffer
std::unique_ptr<uint8_t[]> rdram_buffer = std::make_unique<uint8_t[]>(8 * 1024 * 1024);
@ -116,7 +113,8 @@ int main(int argc, char **argv) {
recomp_context context{};
// Initial 1MB DMA
std::copy_n(rom.get() + 0x1000, 0x100000, rdram_buffer.get() + entrypoint - 0x80000000);
do_rom_read(rdram_buffer.get(), entrypoint, 0x1000, 0x100000);
//std::copy_n(rom.get() + 0x1000, 0x100000, rdram_buffer.get() + entrypoint - 0x80000000);
// Initialize function address map
for (size_t i = 0; i < num_funcs; i++) {
@ -157,13 +155,13 @@ int main(int argc, char **argv) {
// TODO run the entrypoint instead
memset(rdram_buffer.get() + 0XAF860, 0, 0xC00A0u - 0XAF860);
printf("[Recomp] Starting\n");
debug_printf("[Recomp] Starting\n");
Multilibultra::set_main_thread();
Multilibultra::preinit(rdram_buffer.get());
game_init(rdram_buffer.get(), &context);
printf("[Recomp] Quitting\n");
debug_printf("[Recomp] Quitting\n");
return EXIT_SUCCESS;
}

View file

@ -1,4 +1,5 @@
#include <cstdio>
#include "../portultra/multilibultra.hpp"
#include "recomp.h"
extern "C" void osSpTaskLoad_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
@ -6,7 +7,14 @@ extern "C" void osSpTaskLoad_recomp(uint8_t* restrict rdram, recomp_context* res
}
extern "C" void osSpTaskStartGo_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
printf("[sp] osSpTaskStartGo(0x%08X)\n", (uint32_t)ctx->r4);
//printf("[sp] osSpTaskStartGo(0x%08X)\n", (uint32_t)ctx->r4);
OSTask* task = TO_PTR(OSTask, ctx->r4);
if (task->t.type == M_GFXTASK) {
printf("[sp] Gfx task: %08X\n", (uint32_t)ctx->r4);
} else if (task->t.type == M_AUDTASK) {
printf("[sp] Audio task: %08X\n", (uint32_t)ctx->r4);
}
Multilibultra::submit_rsp_task(rdram, ctx->r4);
}
extern "C" void osSpTaskYield_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {

View file

@ -27,7 +27,3 @@ extern "C" void osViSwapBuffer_recomp(uint8_t* restrict rdram, recomp_context* r
extern "C" void osViSetMode_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
}
extern "C" void osViSetEvent_recomp(uint8_t* restrict rdram, recomp_context* restrict ctx) {
;
}