From 998d2e6349ad5d0e158b7802071f3c4c56b36d41 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Sun, 6 Aug 2023 11:23:27 +0200 Subject: [PATCH] Update exception handler and ps1-bare-metal headers --- src/app/app.cpp | 11 +- src/app/app.hpp | 30 +-- src/cartio.cpp | 20 +- src/gpu.cpp | 13 +- src/ide.cpp | 7 +- src/ps1/cop0gte.h | 444 +++++++++++++++++++++++++++++++++++++++++ src/ps1/gpucmd.h | 28 +++ src/ps1/registers.h | 2 +- src/ps1/system.c | 28 +-- src/ps1/system.h | 69 +++---- src/ps1/system.s | 106 ++++------ src/ps1/unhandledexc.c | 4 +- src/util.cpp | 6 +- 13 files changed, 599 insertions(+), 169 deletions(-) create mode 100644 src/ps1/cop0gte.h diff --git a/src/app/app.cpp b/src/app/app.cpp index 219aead..7c4182e 100644 --- a/src/app/app.cpp +++ b/src/app/app.cpp @@ -39,7 +39,7 @@ void App::_unloadCartData(void) { void App::_setupWorker(bool (App::*func)(void)) { LOG("restarting worker, func=0x%08x", func); - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); _workerStatus.reset(); _workerFunction = func; @@ -49,8 +49,8 @@ void App::_setupWorker(bool (App::*func)(void)) { &_workerThread, util::forcedCast(&App::_worker), this, &_workerStack[(WORKER_STACK_SIZE - 1) & ~7] ); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } void App::_setupInterrupts(void) { @@ -58,7 +58,8 @@ void App::_setupInterrupts(void) { util::forcedCast(&App::_interruptHandler), this ); - setInterruptMask(1 << IRQ_VBLANK); + IRQ_MASK = 1 << IRQ_VSYNC; + enableInterrupts(); } /* Worker functions */ @@ -552,7 +553,7 @@ void App::_worker(void) { } void App::_interruptHandler(void) { - if (acknowledgeInterrupt(IRQ_VBLANK)) { + if (acknowledgeInterrupt(IRQ_VSYNC)) { _ctx->tick(); if (_workerStatus.status != WORKER_REBOOT) diff --git a/src/app/app.hpp b/src/app/app.hpp index 9e69cb9..957e0a4 100644 --- a/src/app/app.hpp +++ b/src/app/app.hpp @@ -47,38 +47,38 @@ public: nextScreen = nullptr; } inline void update(int part, int total, const char *text = nullptr) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); status = WORKER_BUSY; progress = part; progressTotal = total; if (text) message = text; - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } inline void setStatus(WorkerStatusType value) { - auto mask = setInterruptMask(0); - status = value; + auto enable = disableInterrupts(); + status = value; - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } inline void setNextScreen(ui::Screen &next, bool goBack = false) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); _nextGoBack = goBack; _nextScreen = &next; - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } inline void finish(void) { - auto mask = setInterruptMask(0); - status = _nextGoBack ? WORKER_NEXT_BACK : WORKER_NEXT; - nextScreen = _nextScreen; + auto enable = disableInterrupts(); + status = _nextGoBack ? WORKER_NEXT_BACK : WORKER_NEXT; + nextScreen = _nextScreen; - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } }; diff --git a/src/cartio.cpp b/src/cartio.cpp index 8d468f2..783b390 100644 --- a/src/cartio.cpp +++ b/src/cartio.cpp @@ -118,11 +118,11 @@ static constexpr int _ZS01_SEND_DELAY = 30000; static constexpr int _ZS01_PACKET_DELAY = 30000; DriverError CartDriver::readSystemID(void) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); if (!io::dsDIOReset()) { - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); LOG("no 1-wire device found"); return DS2401_NO_RESP; @@ -134,8 +134,8 @@ DriverError CartDriver::readSystemID(void) { for (int i = 0; i < 8; i++) _dump.systemID.data[i] = io::dsDIOReadByte(); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); if (!_dump.systemID.validateDSCRC()) return DS2401_ID_ERROR; @@ -144,11 +144,11 @@ DriverError CartDriver::readSystemID(void) { } DriverError X76Driver::readCartID(void) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); if (!io::dsCartReset()) { - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); LOG("no 1-wire device found"); return DS2401_NO_RESP; @@ -160,8 +160,8 @@ DriverError X76Driver::readCartID(void) { for (int i = 0; i < 8; i++) _dump.cartID.data[i] = io::dsCartReadByte(); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); if (!_dump.cartID.validateDSCRC()) return DS2401_ID_ERROR; diff --git a/src/gpu.cpp b/src/gpu.cpp index 9f7f38f..615614d 100644 --- a/src/gpu.cpp +++ b/src/gpu.cpp @@ -26,8 +26,8 @@ size_t upload(const RectWH &rect, const void *data, bool wait) { if (!waitForDMATransfer(DMA_GPU, _DMA_TIMEOUT)) return 0; - auto mask = setInterruptMask(0); - GPU_GP1 = gp1_dmaRequestMode(GP1_DREQ_NONE); + auto enable = disableInterrupts(); + GPU_GP1 = gp1_dmaRequestMode(GP1_DREQ_NONE); while (!(GPU_GP1 & GP1_STAT_CMD_READY)) __asm__ volatile(""); @@ -46,7 +46,8 @@ size_t upload(const RectWH &rect, const void *data, bool wait) { DMA_BCR (DMA_GPU) = _DMA_CHUNK_SIZE | (length << 16); DMA_CHCR(DMA_GPU) = DMA_CHCR_WRITE | DMA_CHCR_MODE_SLICE | DMA_CHCR_ENABLE; - setInterruptMask(mask); + if (enable) + enableInterrupts(); if (wait) waitForDMATransfer(DMA_GPU, _DMA_TIMEOUT); @@ -107,7 +108,7 @@ void Context::flip(void) { void Context::setResolution( VideoMode mode, int _width, int _height, bool sideBySide ) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); width = _width; height = _height; @@ -126,8 +127,8 @@ void Context::setResolution( _currentBuffer = 0; _applyResolution(mode); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } uint32_t *Context::newPacket(size_t length) { diff --git a/src/ide.cpp b/src/ide.cpp index bab9076..09b7dab 100644 --- a/src/ide.cpp +++ b/src/ide.cpp @@ -122,7 +122,7 @@ DeviceError Device::_waitForStatus(uint8_t mask, uint8_t value, int timeout) { return NO_ERROR; delayMicroseconds(1); - if (acknowledgeInterrupt(IRQ_VBLANK)) + if (acknowledgeInterrupt(IRQ_VSYNC)) io::clearWatchdog(); } @@ -167,7 +167,6 @@ DeviceError Device::_transferPIO(void *data, size_t length, bool write) { return NO_ERROR; } -// FIXME: DMA transfers are completely broken currently DeviceError Device::_transferDMA(void *data, size_t length, bool write) { length /= 4; @@ -222,7 +221,7 @@ DeviceError Device::enumerate(void) { if (error) return error; - error = _transferPIO(&block, sizeof(IdentifyBlock)); + error = _transferDMA(&block, sizeof(IdentifyBlock)); if (error) return error; @@ -243,7 +242,7 @@ DeviceError Device::enumerate(void) { if (error) return error; - error = _transferPIO(&block, sizeof(IdentifyBlock)); + error = _transferDMA(&block, sizeof(IdentifyBlock)); if (error) return error; diff --git a/src/ps1/cop0gte.h b/src/ps1/cop0gte.h new file mode 100644 index 0000000..a124f6e --- /dev/null +++ b/src/ps1/cop0gte.h @@ -0,0 +1,444 @@ +/* + * ps1-bare-metal - (C) 2023 spicyjpeg + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#pragma once + +#include + +#define COP0_GET(reg, output) \ + __asm__ volatile("mfc0 %0, $%1;" : "=r"(output) : "i"(reg)) +#define COP0_SET(reg, input) \ + __asm__ volatile("mtc0 %0, $%1;" :: "r"(input), "i"(reg)) + +#define GTE_GET(reg, output) \ + __asm__ volatile("mfc2 %0, $%1;" : "=r"(output) : "i"(reg)) +#define GTE_SET(reg, input) \ + __asm__ volatile("mtc2 %0, $%1;" :: "r"(input), "i"(reg)) + +#define GTE_GETC(reg, output) \ + __asm__ volatile("cfc2 %0, $%1;" : "=r"(output) : "i"(reg)) +#define GTE_SETC(reg, input) \ + __asm__ volatile("ctc2 %0, $%1;" :: "r"(input), "i"(reg)) + +#define GTE_LOAD(reg, offset, ptr) \ + __asm__ volatile("lwc2 $%0, %1(%2);" :: "i"(reg), "i"(offset), "r"(ptr)) +#define GTE_STORE(reg, offset, ptr) \ + __asm__ volatile("swc2 $%0, %1(%2);" :: "i"(reg), "i"(offset), "r"(ptr) : "memory") + +/* Coprocessor 0 */ + +typedef enum { + COP0_BPC = 3, // Breakpoint program counter + COP0_BDA = 5, // Breakpoint data address + COP0_DCIC = 7, // Breakpoint control + COP0_BADVADDR = 8, // Bad virtual address + COP0_BDAM = 9, // Breakpoint program counter bitmask + COP0_BPCM = 11, // Breakpoint data address bitmask + COP0_SR = 12, // Status register + COP0_CAUSE = 13, // Exception cause + COP0_EPC = 14, // Exception program counter + COP0_PRID = 15 // Processor identifier +} COP0Register; + +typedef enum { + COP0_CAUSE_EXC_BITMASK = 31 << 2, + COP0_CAUSE_EXC_INT = 0 << 2, // Interrupt + COP0_CAUSE_EXC_AdEL = 4 << 2, // Load address error + COP0_CAUSE_EXC_AdES = 5 << 2, // Store address error + COP0_CAUSE_EXC_IBE = 6 << 2, // Instruction bus error + COP0_CAUSE_EXC_DBE = 7 << 2, // Data bus error + COP0_CAUSE_EXC_SYS = 8 << 2, // Syscall + COP0_CAUSE_EXC_BP = 9 << 2, // Breakpoint or break instruction + COP0_CAUSE_EXC_RI = 10 << 2, // Reserved instruction + COP0_CAUSE_EXC_CpU = 11 << 2, // Coprocessor unusable + COP0_CAUSE_EXC_Ov = 12 << 2, // Arithmetic overflow + COP0_CAUSE_Ip0 = 1 << 8, // IRQ 0 pending (software interrupt) + COP0_CAUSE_Ip1 = 1 << 9, // IRQ 1 pending (software interrupt) + COP0_CAUSE_Ip2 = 1 << 10, // IRQ 2 pending (hardware interrupt) + COP0_CAUSE_CE_BITMASK = 3 << 28, + COP0_CAUSE_BD = 1 << 30 // Exception occurred in delay slot +} COP0CauseFlag; + +typedef enum { + COP0_SR_IEc = 1 << 0, // Current interrupt enable + COP0_SR_KUc = 1 << 1, // Current privilege level + COP0_SR_IEp = 1 << 2, // Previous interrupt enable + COP0_SR_KUp = 1 << 3, // Previous privilege level + COP0_SR_IEo = 1 << 4, // Old interrupt enable + COP0_SR_KUo = 1 << 5, // Old privilege level + COP0_SR_Im0 = 1 << 8, // IRQ mask 0 (software interrupt) + COP0_SR_Im1 = 1 << 9, // IRQ mask 1 (software interrupt) + COP0_SR_Im2 = 1 << 10, // IRQ mask 2 (hardware interrupt) + COP0_SR_Isc = 1 << 16, // Isolate cache + COP0_SR_BEV = 1 << 22, // Boot exception vector location + COP0_SR_CU0 = 1 << 28, // Coprocessor 0 privilege level + COP0_SR_CU2 = 1 << 30 // Coprocessor 2 enable +} COP0StatusFlag; + +#define SETTER(reg, type) \ + static inline type cop0_get##reg(void) { \ + type value; \ + COP0_GET(COP0_##reg, value); \ + return value; \ + } \ + static inline void cop0_set##reg(type value) { \ + COP0_SET(COP0_##reg, value); \ + } \ + +SETTER(BPC, void *) +SETTER(BDA, void *) +SETTER(DCIC, uint32_t) +SETTER(BDAM, uint32_t) +SETTER(BPCM, uint32_t) +SETTER(SR, uint32_t) + +#undef SETTER + +#define GETTER(reg, type) \ + static inline type cop0_get##reg(void) { \ + type value; \ + COP0_GET(COP0_##reg, value); \ + return value; \ + } \ + +GETTER(BADVADDR, void *) +GETTER(CAUSE, uint32_t) +GETTER(EPC, void *) + +#undef GETTER + +/* GTE commands */ + +typedef enum { + GTE_CMD_BITMASK = 63 << 0, + GTE_CMD_RTPS = 1 << 0, // Perspective transformation (1 vertex) + GTE_CMD_NCLIP = 6 << 0, // Normal clipping + GTE_CMD_OP = 12 << 0, // Outer product + GTE_CMD_DPCS = 16 << 0, // Depth cue (1 vertex) + GTE_CMD_INTPL = 17 << 0, // Depth cue with vector + GTE_CMD_MVMVA = 18 << 0, // Matrix-vector multiplication + GTE_CMD_NCDS = 19 << 0, // Normal color depth (1 vertex) + GTE_CMD_CDP = 20 << 0, // Color depth cue + GTE_CMD_NCDT = 22 << 0, // Normal color depth (3 vertices) + GTE_CMD_NCCS = 27 << 0, // Normal color color (1 vertex) + GTE_CMD_CC = 28 << 0, // Color color + GTE_CMD_NCS = 30 << 0, // Normal color (1 vertex) + GTE_CMD_NCT = 32 << 0, // Normal color (3 vertices) + GTE_CMD_SQR = 40 << 0, // Square of vector + GTE_CMD_DCPL = 41 << 0, // Depth cue with light + GTE_CMD_DPCT = 42 << 0, // Depth cue (3 vertices) + GTE_CMD_AVSZ3 = 45 << 0, // Average Z value (3 vertices) + GTE_CMD_AVSZ4 = 46 << 0, // Average Z value (4 vertices) + GTE_CMD_RTPT = 48 << 0, // Perspective transformation (3 vertices) + GTE_CMD_GPF = 61 << 0, // Linear interpolation + GTE_CMD_GPL = 62 << 0, // Linear interpolation with base + GTE_CMD_NCCT = 63 << 0, // Normal color color (3 vertices) + GTE_LM = 1 << 10, // Saturate IR to 0x0000-0x7fff + GTE_CV_BITMASK = 3 << 13, + GTE_CV_TR = 0 << 13, // Use TR as translation vector for MVMVA + GTE_CV_BK = 1 << 13, // Use BK as translation vector for MVMVA + GTE_CV_FC = 2 << 13, // Use FC as translation vector for MVMVA + GTE_CV_NONE = 3 << 13, // Skip translation for MVMVA + GTE_V_BITMASK = 3 << 15, + GTE_V_V0 = 0 << 15, // Use V0 as operand for MVMVA + GTE_V_V1 = 1 << 15, // Use V1 as operand for MVMVA + GTE_V_V2 = 2 << 15, // Use V2 as operand for MVMVA + GTE_V_IR = 3 << 15, // Use IR as operand for MVMVA + GTE_MX_BITMASK = 3 << 17, + GTE_MX_RT = 0 << 17, // Use rotation matrix as operand for MVMVA + GTE_MX_LLM = 1 << 17, // Use light matrix as operand for MVMVA + GTE_MX_LCM = 2 << 17, // Use light color matrix as operand for MVMVA + GTE_SF = 1 << 19 // Shift results by 12 bits +} GTECommandFlag; + +typedef struct { + int16_t x, y, z; + uint8_t _padding[2]; +} GTEVector16; + +typedef struct { + int32_t x, y, z; +} GTEVector32; + +typedef struct { + int16_t values[3][3]; + uint8_t _padding[2]; +} GTEMatrix; + +#define gte_command(cmd) \ + __asm__ volatile("nop; nop; cop2 %0;" :: "i"(cmd)) + +/* GTE control registers */ + +typedef enum { + GTE_RT11RT12 = 0, // Rotation matrix + GTE_RT13RT21 = 1, // Rotation matrix + GTE_RT22RT23 = 2, // Rotation matrix + GTE_RT31RT32 = 3, // Rotation matrix + GTE_RT33 = 4, // Rotation matrix + GTE_TRX = 5, // Translation vector + GTE_TRY = 6, // Translation vector + GTE_TRZ = 7, // Translation vector + GTE_L11L12 = 8, // Light matrix + GTE_L13L21 = 9, // Light matrix + GTE_L22L23 = 10, // Light matrix + GTE_L31L32 = 11, // Light matrix + GTE_L33 = 12, // Light matrix + GTE_RBK = 13, // Background color + GTE_GBK = 14, // Background color + GTE_BBK = 15, // Background color + GTE_LC11LC12 = 16, // Light color matrix + GTE_LC13LC21 = 17, // Light color matrix + GTE_LC22LC23 = 18, // Light color matrix + GTE_LC31LC32 = 19, // Light color matrix + GTE_LC33 = 20, // Light color matrix + GTE_RFC = 21, // Far color + GTE_GFC = 22, // Far color + GTE_BFC = 23, // Far color + GTE_OFX = 24, // Screen coordinate offset + GTE_OFY = 25, // Screen coordinate offset + GTE_H = 26, // Projection plane distance + GTE_DQA = 27, // Depth cue scale factor + GTE_DQB = 28, // Depth cue base + GTE_ZSF3 = 29, // Average Z scale factor + GTE_ZSF4 = 30, // Average Z scale factor + GTE_FLAG = 31 // Error/overflow flags +} GTEControlRegister; + +typedef enum { + GTE_FLAG_IR0_SATURATED = 1 << 12, + GTE_FLAG_SY2_SATURATED = 1 << 13, + GTE_FLAG_SX2_SATURATED = 1 << 14, + GTE_FLAG_MAC0_UNDERFLOW = 1 << 15, + GTE_FLAG_MAC0_OVERFLOW = 1 << 16, + GTE_FLAG_DIVIDE_OVERFLOW = 1 << 17, + GTE_FLAG_Z_SATURATED = 1 << 18, + GTE_FLAG_B_SATURATED = 1 << 19, + GTE_FLAG_G_SATURATED = 1 << 20, + GTE_FLAG_R_SATURATED = 1 << 21, + GTE_FLAG_IR3_SATURATED = 1 << 22, + GTE_FLAG_IR2_SATURATED = 1 << 23, + GTE_FLAG_IR1_SATURATED = 1 << 24, + GTE_FLAG_MAC3_UNDERFLOW = 1 << 25, + GTE_FLAG_MAC2_UNDERFLOW = 1 << 26, + GTE_FLAG_MAC1_UNDERFLOW = 1 << 27, + GTE_FLAG_MAC3_OVERFLOW = 1 << 28, + GTE_FLAG_MAC2_OVERFLOW = 1 << 29, + GTE_FLAG_MAC1_OVERFLOW = 1 << 30, + GTE_FLAG_ERROR = 1 << 31 +} GTEStatusFlag; + +#define VECTOR32_SETTER(regA, regB, regC, name) \ + static inline void gte_set##name(int x, int y, int z) { \ + GTE_SETC(GTE_##regA, x); \ + GTE_SETC(GTE_##regB, y); \ + GTE_SETC(GTE_##regC, z); \ + } + +VECTOR32_SETTER(TRX, TRY, TRZ, TranslationVector) +VECTOR32_SETTER(RBK, GBK, BBK, BackgroundColor) +VECTOR32_SETTER(RFC, GFC, BFC, FarColor) + +#undef VECTOR32_SETTER + +#define MATRIX_SETTER(reg, name) \ + static inline void gte_set##name( \ + int16_t v11, int16_t v12, int16_t v13, \ + int16_t v21, int16_t v22, int16_t v23, \ + int16_t v31, int16_t v32, int16_t v33 \ + ) { \ + uint32_t value; \ + value = ((uint32_t) v11 & 0xffff) | ((uint32_t) v12 << 16); \ + GTE_SETC(GTE_##reg##11##reg##12, value); \ + value = ((uint32_t) v13 & 0xffff) | ((uint32_t) v21 << 16); \ + GTE_SETC(GTE_##reg##13##reg##21, value); \ + value = ((uint32_t) v22 & 0xffff) | ((uint32_t) v23 << 16); \ + GTE_SETC(GTE_##reg##22##reg##23, value); \ + value = ((uint32_t) v31 & 0xffff) | ((uint32_t) v32 << 16); \ + GTE_SETC(GTE_##reg##31##reg##32, value); \ + GTE_SETC(GTE_##reg##33, v33); \ + } \ + static inline void gte_load##name(const GTEMatrix *input) { \ + uint32_t value; \ + value = ((const uint32_t *) input)[0]; \ + GTE_SETC(GTE_##reg##11##reg##12, value); \ + value = ((const uint32_t *) input)[1]; \ + GTE_SETC(GTE_##reg##13##reg##21, value); \ + value = ((const uint32_t *) input)[2]; \ + GTE_SETC(GTE_##reg##22##reg##23, value); \ + value = ((const uint32_t *) input)[3]; \ + GTE_SETC(GTE_##reg##31##reg##32, value); \ + value = ((const uint32_t *) input)[4]; \ + GTE_SETC(GTE_##reg##33, value); \ + } + +MATRIX_SETTER(RT, RotationMatrix) +MATRIX_SETTER(L, LightMatrix) +MATRIX_SETTER(LC, LightColorMatrix) + +#undef MATRIX_SETTER + +static inline void gte_setXYOrigin(int x, int y) { + GTE_SETC(GTE_OFX, x << 16); + GTE_SETC(GTE_OFY, y << 16); +} +static inline void gte_setFieldOfView(int value) { + GTE_SETC(GTE_H, value); +} +static inline void gte_setDepthCueFactor(int base, int scale) { + GTE_SETC(GTE_DQA, scale); + GTE_SETC(GTE_DQB, base); +} +static inline void gte_setZScaleFactor(unsigned int scale) { + unsigned int z3 = scale / 3, z4 = scale / 4; + + GTE_SETC(GTE_ZSF3, z3); + GTE_SETC(GTE_ZSF4, z4); +} + +/* GTE data registers */ + +typedef enum { + GTE_VXY0 = 0, // Input vector 0 + GTE_VZ0 = 1, // Input vector 0 + GTE_VXY1 = 2, // Input vector 1 + GTE_VZ1 = 3, // Input vector 1 + GTE_VXY2 = 4, // Input vector 2 + GTE_VZ2 = 5, // Input vector 2 + GTE_RGBC = 6, // Input color and GPU command + GTE_OTZ = 7, // Average Z value output + GTE_IR0 = 8, // Scalar accumulator + GTE_IR1 = 9, // Vector accumulator + GTE_IR2 = 10, // Vector accumulator + GTE_IR3 = 11, // Vector accumulator + GTE_SXY0 = 12, // X/Y coordinate output FIFO + GTE_SXY1 = 13, // X/Y coordinate output FIFO + GTE_SXY2 = 14, // X/Y coordinate output FIFO + GTE_SXYP = 15, // X/Y coordinate output FIFO + GTE_SZ0 = 16, // Z coordinate output FIFO + GTE_SZ1 = 17, // Z coordinate output FIFO + GTE_SZ2 = 18, // Z coordinate output FIFO + GTE_SZ3 = 19, // Z coordinate output FIFO + GTE_RGB0 = 20, // Color and GPU command output FIFO + GTE_RGB1 = 21, // Color and GPU command output FIFO + GTE_RGB2 = 22, // Color and GPU command output FIFO + GTE_MAC0 = 24, // Extended scalar accumulator + GTE_MAC1 = 25, // Extended vector accumulator + GTE_MAC2 = 26, // Extended vector accumulator + GTE_MAC3 = 27, // Extended vector accumulator + GTE_IRGB = 28, // RGB conversion input + GTE_ORGB = 29, // RGB conversion output + GTE_LZCS = 30, // Leading zero count input + GTE_LZCR = 31 // Leading zero count output +} GTEDataRegister; + +#define VECTOR16_SETTER(regA, regB, name) \ + static inline void gte_set##name(int16_t x, int16_t y, int16_t z) { \ + uint32_t xy = ((uint32_t) x & 0xffff) | ((uint32_t) y << 16); \ + GTE_SET(GTE_##regA, xy); \ + GTE_SET(GTE_##regB, z); \ + } \ + static inline void gte_load##name(const GTEVector16 *input) { \ + GTE_LOAD(GTE_##regA, 0, input); \ + GTE_LOAD(GTE_##regB, 4, input); \ + } + +VECTOR16_SETTER(VXY0, VZ0, V0) +VECTOR16_SETTER(VXY1, VZ1, V1) +VECTOR16_SETTER(VXY2, VZ2, V2) + +#undef VECTOR16_SETTER + +static inline void gte_loadV012(const GTEVector16 *input) { + GTE_LOAD(GTE_VXY0, 0, input); + GTE_LOAD(GTE_VZ0, 4, input); + GTE_LOAD(GTE_VXY1, 8, input); + GTE_LOAD(GTE_VZ1, 12, input); + GTE_LOAD(GTE_VXY2, 16, input); + GTE_LOAD(GTE_VZ2, 20, input); +} +static inline void gte_setColumnVectors( + int16_t v11, int16_t v12, int16_t v13, + int16_t v21, int16_t v22, int16_t v23, + int16_t v31, int16_t v32, int16_t v33 +) { + uint32_t value; + value = ((uint32_t) v11 & 0xffff) | ((uint32_t) v21 << 16); + GTE_SET(GTE_VXY0, value); + GTE_SET(GTE_VZ0, v31); + value = ((uint32_t) v12 & 0xffff) | ((uint32_t) v22 << 16); + GTE_SET(GTE_VXY1, value); + GTE_SET(GTE_VZ1, v32); + value = ((uint32_t) v13 & 0xffff) | ((uint32_t) v23 << 16); + GTE_SET(GTE_VXY2, value); + GTE_SET(GTE_VZ2, v33); +} + +#define SETTER(reg, type) \ + static inline type gte_get##reg(void) { \ + type value; \ + GTE_GET(GTE_##reg, value); \ + return value; \ + } \ + static inline void gte_set##reg(type value) { \ + GTE_SET(GTE_##reg, value); \ + } \ + static inline void gte_load##reg(const type *input) { \ + GTE_LOAD(GTE_##reg, 0, input); \ + } \ + static inline void gte_store##reg(type *output) { \ + GTE_STORE(GTE_##reg, 0, output); \ + } + +SETTER(RGBC, uint32_t) +SETTER(OTZ, int) +SETTER(IR0, int) +SETTER(IR1, int) +SETTER(IR2, int) +SETTER(IR3, int) +SETTER(SXY0, uint32_t) +SETTER(SXY1, uint32_t) +SETTER(SXY2, uint32_t) +SETTER(SZ0, int) +SETTER(SZ1, int) +SETTER(SZ2, int) +SETTER(SZ3, int) +SETTER(RGB0, uint32_t) +SETTER(RGB1, uint32_t) +SETTER(RGB2, uint32_t) +SETTER(MAC0, int) +SETTER(MAC1, int) +SETTER(MAC2, int) +SETTER(MAC3, int) +SETTER(LZCS, uint32_t) +SETTER(LZCR, int) + +#undef SETTER + +static inline void gte_storeSXY012(uint32_t *output) { + GTE_STORE(GTE_SXY0, 0, output); + GTE_STORE(GTE_SXY1, 4, output); + GTE_STORE(GTE_SXY2, 8, output); +} + +#undef COP0_GET +#undef COP0_SET +#undef GTE_GET +#undef GTE_SET +#undef GTE_GETC +#undef GTE_SETC +#undef GTE_LOAD +#undef GTE_STORE diff --git a/src/ps1/gpucmd.h b/src/ps1/gpucmd.h index ce11e5d..b4f31c3 100644 --- a/src/ps1/gpucmd.h +++ b/src/ps1/gpucmd.h @@ -285,6 +285,34 @@ typedef enum { GP1_CMD_GET_INFO = 16 << 24 } GP1Command; +DEF32 gp1_clockMultiplierH(GP1HorizontalRes horizontalRes) { + switch (horizontalRes) { + case GP1_HRES_256: + return 10; + case GP1_HRES_320: + return 8; + case GP1_HRES_368: + return 7; + case GP1_HRES_512: + return 5; + case GP1_HRES_640: + return 4; + default: + return 0; + } +} + +DEF32 gp1_clockDividerV(GP1VerticalRes verticalRes) { + switch (verticalRes) { + case GP1_VRES_256: + return 1; + case GP1_VRES_512: + return 2; + default: + return 0; + } +} + DEF32 gp1_resetGPU(void) { return GP1_CMD_RESET_GPU; } diff --git a/src/ps1/registers.h b/src/ps1/registers.h index 680e9e8..0925194 100644 --- a/src/ps1/registers.h +++ b/src/ps1/registers.h @@ -148,7 +148,7 @@ typedef enum { /* IRQ controller */ typedef enum { - IRQ_VBLANK = 0, + IRQ_VSYNC = 0, IRQ_GPU = 1, IRQ_CDROM = 2, IRQ_DMA = 3, diff --git a/src/ps1/system.c b/src/ps1/system.c index 1d6b30f..1552bb3 100644 --- a/src/ps1/system.c +++ b/src/ps1/system.c @@ -17,6 +17,7 @@ #include #include #include +#include "ps1/cop0gte.h" #include "ps1/registers.h" #include "ps1/system.h" @@ -49,8 +50,7 @@ void installExceptionHandler(void) { DMA_DICR = DMA_DICR_CH_STAT_BITMASK; // Ensure interrupts and the GTE are enabled at the COP0 side. - uint32_t sr = SR_IEc | SR_Im2 | SR_CU0 | SR_CU2; - __asm__ volatile("mtc0 %0, $12;" :: "r"(sr)); + cop0_setSR(COP0_SR_IEc | COP0_SR_Im2 | COP0_SR_CU0 | COP0_SR_CU2); // Grab a direct pointer to the BIOS function to flush the instruction // cache. This is the only function that must always run from the BIOS ROM @@ -68,7 +68,7 @@ void installExceptionHandler(void) { } void setInterruptHandler(ArgFunction func, void *arg) { - setInterruptMask(0); + disableInterrupts(); interruptHandler = func; interruptHandlerArg = arg; @@ -79,15 +79,15 @@ void flushCache(void) { //if (!_flushCache) //_flushCache = BIOS_API_TABLE[0x44]; - uint32_t mask = setInterruptMask(0); + bool enable = disableInterrupts(); _flushCache(); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } void softReset(void) { - setInterruptMask(0); + disableInterrupts(); BIOS_ENTRY_POINT(); } @@ -146,17 +146,3 @@ void switchThread(Thread *thread) { nextThread = thread; atomic_signal_fence(memory_order_release); } - -void switchThreadImmediate(Thread *thread) { - if (!thread) - thread = &_mainThread; - - nextThread = thread; - atomic_signal_fence(memory_order_release); - - // Execute a syscall to force the switch to happen. Note that the syscall - // handler will take a different path if $a0 is zero (see system.s), but - // that will never happen here since the check above is ensuring $a0 (i.e. - // the first argument) will always be a valid pointer. - __asm__ volatile("syscall 0;" :: "r"(thread) : "a0", "memory"); -} diff --git a/src/ps1/system.h b/src/ps1/system.h index 6cbbbdf..1b2a0a3 100644 --- a/src/ps1/system.h +++ b/src/ps1/system.h @@ -18,35 +18,9 @@ #include #include +#include "ps1/cop0gte.h" #include "ps1/registers.h" -typedef enum { - CAUSE_INT = 0, // Interrupt - CAUSE_AdEL = 4, // Load address error - CAUSE_AdES = 5, // Store address error - CAUSE_IBE = 6, // Instruction bus error - CAUSE_DBE = 7, // Data bus error - CAUSE_SYS = 8, // Syscall - CAUSE_BP = 9, // Breakpoint or break instruction - CAUSE_RI = 10, // Reserved instruction - CAUSE_CpU = 11, // Coprocessor unusable - CAUSE_Ov = 12 // Arithmetic overflow -} ExceptionCause; - -typedef enum { - SR_IEc = 1 << 0, // Current interrupt enable - SR_KUc = 1 << 1, // Current privilege level - SR_IEp = 1 << 2, // Previous interrupt enable - SR_KUp = 1 << 3, // Previous privilege level - SR_IEo = 1 << 4, // Old interrupt enable - SR_KUo = 1 << 5, // Old privilege level - SR_Im0 = 1 << 8, // IRQ mask 0 (software interrupt) - SR_Im1 = 1 << 9, // IRQ mask 1 (software interrupt) - SR_Im2 = 1 << 10, // IRQ mask 2 (hardware interrupt) - SR_CU0 = 1 << 28, // Coprocessor 0 privilege level - SR_CU2 = 1 << 30 // Coprocessor 2 enable -} SRFlag; - typedef struct { uint32_t pc, at, v0, v1, a0, a1, a2, a3; uint32_t t0, t1, t2, t3, t4, t5, t6, t7; @@ -74,17 +48,28 @@ extern Thread *currentThread; extern Thread *nextThread; /** - * @brief Disables interrupts temporarily, then sets the IRQ_MASK register to - * the specified value (which should be a bitfield of (1 << IRQ_*) flags) and - * returns its previous value. Must *not* be used in IRQ handlers. + * @brief Enables all interrupts at the COP0 side (without altering the IRQ_MASK + * register). If any IRQs occurred and were not acknowledged while interrupts + * were disabled, any callback set using setInterruptHandler() will be invoked + * immediately. */ -static inline uint32_t setInterruptMask(uint32_t mask) { - register uint32_t v0 __asm__("v0"); - register uint32_t a0 __asm__("a0") = 0; - register uint32_t a1 __asm__("a1") = mask; +static inline void enableInterrupts(void) { + cop0_setSR(cop0_getSR() | COP0_SR_IEc); +} - __asm__ volatile("syscall 0;" : "=r"(v0) : "r"(a0), "r"(a1) : "memory"); - return v0; +/** + * @brief Disables all interrupts at the COP0 side (without altering the + * IRQ_MASK register). This function is not atomic, but can be used safely as + * long as no other code is manipulating the COP0 SR register while interrupts + * are enabled. + * + * @return True if interrupts were previously enabled, false otherwise + */ +static inline bool disableInterrupts(void) { + uint32_t sr = cop0_getSR(); + + cop0_setSR(sr & ~COP0_SR_IEc); + return (sr & COP0_SR_IEc); } /** @@ -130,7 +115,7 @@ void installExceptionHandler(void); * - it must return quickly, as IRQs fired while the exception handler is * running may otherwise be missed. * - * Interrupts must be re-enabled manually using setInterruptMask() after setting + * Interrupts must be re-enabled manually using enableInterrupts() after setting * a new handler. * * @param func @@ -139,7 +124,8 @@ void installExceptionHandler(void); void setInterruptHandler(ArgFunction func, void *arg); /** - * @brief Clears the instruction cache. Must *not* be used in IRQ handlers. + * @brief Temporarily disables interrupts, then calls the BIOS function to clear + * the instruction cache. */ void flushCache(void); @@ -217,7 +203,12 @@ void switchThread(Thread *thread); * * @param thread Pointer to new thread or NULL for main thread */ -void switchThreadImmediate(Thread *thread); +static inline void switchThreadImmediate(Thread *thread) { + switchThread(thread); + + // Execute a syscall to force the switch to happen. + __asm__ volatile("syscall 0;" ::: "memory"); +} #ifdef __cplusplus } diff --git a/src/ps1/system.s b/src/ps1/system.s index dedba12..c58e7f6 100644 --- a/src/ps1/system.s +++ b/src/ps1/system.s @@ -20,10 +20,6 @@ .set CAUSE, $13 .set EPC, $14 -.set IO_BASE, 0xbf80 -.set IRQ_STAT, 0x1070 -.set IRQ_MASK, 0x1074 - .section .text._exceptionVector, "ax", @progbits .global _exceptionVector .type _exceptionVector, @function @@ -31,55 +27,27 @@ _exceptionVector: # This tiny stub is going to be relocated to the address the CPU jumps to # when an exception occurs (0x80000080) at runtime, overriding the default - # one installed by the BIOS. + # one installed by the BIOS. We're going to fetch a pointer to the current + # thread, grab the EPC (i.e. the address of the instruction that was being + # executed before the exception occurred) and jump to the exception handler. + # NOTE: we can't use any registers other than $k0 and $k1 here, as doing so + # would destroy their contents and corrupt the current thread's state. lui $k0, %hi(currentThread) - j _exceptionHandler lw $k0, %lo(currentThread)($k0) - nop + j _exceptionHandler + mfc0 $k1, EPC .section .text._exceptionHandler, "ax", @progbits .global _exceptionHandler .type _exceptionHandler, @function _exceptionHandler: - # We're going to need at least 3 registers to store a pointer to the current - # thread, the return pointer (EPC) and the state of the CAUSE register - # respectively. $k0 and $k1 are always available to the exception handler, - # so only $at needs to be saved. - nop + # Save the full state of the thread in order to make sure the interrupt + # handler callback (invoked later on) can use any register. The state of + # $hi/$lo is saved after all other registers in order to let the multiplier + # finish any ongoing calculation. sw $at, 0x04($k0) - mfc0 $at, CAUSE - - # Check CAUSE bits 2-6 to determine what triggered the exception. If it was - # caused by a syscall, increment EPC so it won't be executed again; - # furthermore, if the first argument passed to the syscall was zero, take an - # alternate, shorter code path (as it is the syscall to set IRQ_MASK). - li $k1, 8 << 2 # if (((CAUSE >> 2) & 0x1f) != 8) goto notFastSyscall - andi $at, 0x1f << 2 - bne $at, $k1, .LnotFastSyscall - mfc0 $k1, EPC - - bnez $a0, .LnotFastSyscall # if (arg0) goto notFastSyscall - addiu $k1, 4 # EPC++ - -.LfastSyscall: - # Save the current value of IRQ_MASK then set it "atomically" (as interrupts - # are disabled while the exception handler runs), then restore $at and - # return immediately without the overhead of saving and restoring the whole - # thread. - lui $at, IO_BASE - lhu $v0, IRQ_MASK($at) # returnValue = IRQ_MASK - lw $at, 0x04($k0) - sh $a1, IRQ_MASK($at) # IRQ_MASK = arg1 - - jr $k1 - rfe - -.LnotFastSyscall: - # If the fast path couldn't be taken, save the full state of the thread. The - # state of $hi/$lo is saved after all other registers in order to let the - # multiplier finish any ongoing calculation. sw $v0, 0x08($k0) sw $v1, 0x0c($k0) sw $a0, 0x10($k0) @@ -114,41 +82,53 @@ _exceptionHandler: sw $v0, 0x78($k0) sw $v1, 0x7c($k0) - # Check again if the CAUSE code is either 0 (IRQ) or 8 (syscall). If not - # call _unhandledException(), which will then display information about the - # exception and lock up. - lui $v0, %hi(interruptHandler) - andi $v1, $at, 0x17 << 2 # if (!(((CAUSE >> 2) & 0x1f) % 8)) goto irqOrSyscall - beqz $v1, .LirqOrSyscall - lw $v0, %lo(interruptHandler)($v0) + # Check bits 2-6 of the CAUSE register to determine what triggered the + # exception. If it was caused by a syscall, increment EPC to make sure + # returning to the thread won't trigger another syscall. + mfc0 $v0, CAUSE + lui $v1, %hi(interruptHandler) + + andi $v0, 0x1f << 2 # if (((CAUSE >> 2) & 0x1f) == 0) goto checkForGTEInst + beqz $v0, .LcheckForGTEInst + li $at, 8 << 2 # if (((CAUSE >> 2) & 0x1f) == 8) goto applyIncrement + beq $v0, $at, .LapplyIncrement + lw $v1, %lo(interruptHandler)($v1) .LotherException: + # If the exception was not triggered by a syscall nor by an interrupt call + # _unhandledException(), which will then display information about the + # exception and lock up. + sw $k1, 0x00($k0) + mfc0 $a1, BADV # _unhandledException((CAUSE >> 2) & 0x1f, BADV) - srl $a0, $at, 2 + srl $a0, $v0, 2 jal _unhandledException addiu $sp, -8 b .Lreturn addiu $sp, 8 -.LirqOrSyscall: - # Otherwise, check if the interrupted instruction was a GTE opcode and - # increment EPC to avoid executing it again (as with syscalls). This is a - # workaround for a hardware bug. - lw $v1, 0($k1) - li $at, 0x25 # if ((*EPC >> 25) == 0x25) EPC++ - srl $v1, 25 - bne $v1, $at, .LskipIncrement - lui $a0, %hi(interruptHandlerArg) +.LcheckForGTEInst: + # If the exception was caused by an interrupt, check if the interrupted + # instruction was a GTE opcode and increment EPC to avoid executing it again + # if that is the case. This is a workaround for a hardware bug. + lw $v0, 0($k1) # if ((*EPC >> 25) == 0x25) EPC++ + li $at, 0x25 + srl $v0, 25 + bne $v0, $at, .LskipIncrement + lw $v1, %lo(interruptHandler)($v1) +.LapplyIncrement: addiu $k1, 4 .LskipIncrement: - lw $a0, %lo(interruptHandlerArg)($a0) + # Save the modified EPC and dispatch any pending interrupts. The handler + # will temporarily use the current thread's stack. sw $k1, 0x00($k0) - # Dispatch any pending interrupts. - jalr $v0 # interruptHandler(interruptHandlerArg) + lui $a0, %hi(interruptHandlerArg) + lw $a0, %lo(interruptHandlerArg)($a0) + jalr $v1 # interruptHandler(interruptHandlerArg) addiu $sp, -8 addiu $sp, 8 diff --git a/src/ps1/unhandledexc.c b/src/ps1/unhandledexc.c index 794d75b..b753d81 100644 --- a/src/ps1/unhandledexc.c +++ b/src/ps1/unhandledexc.c @@ -43,9 +43,9 @@ static const char _registerNames[] = "t8" "t9" "gp" "sp" "fp" "ra" "hi" "lo"; #endif -void _unhandledException(ExceptionCause cause, uint32_t badv) { +void _unhandledException(int cause, uint32_t badv) { #ifndef NDEBUG - if ((cause == CAUSE_AdEL) || (cause == CAUSE_AdES)) + if (cause <= 5) printf("Exception: %s (%08x)\nRegister dump:\n", _causeNames[cause - 4], badv); else printf("Exception: %s\nRegister dump:\n", _causeNames[cause - 4]); diff --git a/src/util.cpp b/src/util.cpp index 9d2f020..b489ba1 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -47,7 +47,7 @@ void Logger::clear(void) { } void Logger::log(const char *format, ...) { - auto mask = setInterruptMask(0); + auto enable = disableInterrupts(); size_t tail = _tail; va_list ap; @@ -60,8 +60,8 @@ void Logger::log(const char *format, ...) { if (enableSyslog) puts(_lines[tail]); - if (mask) - setInterruptMask(mask); + if (enable) + enableInterrupts(); } /* CRC calculation */