From 7e7db5b8902f2b2f2c9ecdbb9e8e31464bfa9c42 Mon Sep 17 00:00:00 2001 From: spicyjpeg Date: Mon, 25 Dec 2023 15:04:13 +0100 Subject: [PATCH] Fix delay function, logs in 480i mode, clangd config --- .clangd | 9 +++++ .gitignore | 1 + src/cartdata.cpp | 1 + src/libc/crt0.c | 2 +- src/ps1/cop0gte.h | 18 ++++----- src/ps1/registers.h | 6 +-- src/ps1/system.c | 24 +++--------- src/ps1/system.h | 25 ++++++++---- src/ps1/system.s | 95 +++++++++++++++++++++++++++++++++++++++++++++ src/util.cpp | 8 ++-- src/util.hpp | 2 +- 11 files changed, 149 insertions(+), 42 deletions(-) create mode 100644 .clangd diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..a98af58 --- /dev/null +++ b/.clangd @@ -0,0 +1,9 @@ +# As clang/clangd's MIPS-I support is still experimental, some minor changes to +# the GCC arguments it picks up from CMake are required in order to prevent it +# from erroring out. Additionally, specifying the target architecture manually +# fixes some edge cases (such as CMake emitting 8.3 format paths on Windows and +# breaking clangd's target autodetection). + +CompileFlags: + Add: [ --target=mipsel-none-elf, -march=mips1 ] + Remove: [ -march, -mno-llsc, -mdivide-breaks ] diff --git a/.gitignore b/.gitignore index e51f112..9f0a1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ desktop.ini # Do not include any built or cached files. build/ +.cache/ __pycache__/ *.pyc *.pyo diff --git a/src/cartdata.cpp b/src/cartdata.cpp index 8b3fef0..b0740dc 100644 --- a/src/cartdata.cpp +++ b/src/cartdata.cpp @@ -336,6 +336,7 @@ Parser *newCartParser(Dump &dump, FormatType formatType, uint8_t flags) { Parser *newCartParser(Dump &dump) { // Try all formats from the most complex one to the simplest. + //for (auto &format : _KNOWN_FORMATS) { for (int i = util::countOf(_KNOWN_FORMATS) - 1; i >= 0; i--) { auto &format = _KNOWN_FORMATS[i]; Parser *parser = newCartParser(dump, format.format, format.flags); diff --git a/src/libc/crt0.c b/src/libc/crt0.c index 0fde2fd..b70f1a5 100644 --- a/src/libc/crt0.c +++ b/src/libc/crt0.c @@ -56,7 +56,7 @@ int _start(int argc, const char **argv) { // Set $gp to point to the middle of the .sdata/.sbss sections, ensuring // variables placed in those sections can be quickly accessed. See the // linker script for more details. - __asm__ volatile("la $gp, _gp;"); + __asm__ volatile("la $gp, _gp\n"); // Set all uninitialized variables to zero by clearing the BSS section. __builtin_memset(_bssStart, 0, _bssEnd - _bssStart); diff --git a/src/ps1/cop0gte.h b/src/ps1/cop0gte.h index a124f6e..3fe96c5 100644 --- a/src/ps1/cop0gte.h +++ b/src/ps1/cop0gte.h @@ -19,24 +19,24 @@ #include #define COP0_GET(reg, output) \ - __asm__ volatile("mfc0 %0, $%1;" : "=r"(output) : "i"(reg)) + __asm__ volatile("mfc0 %0, $%1\n" : "=r"(output) : "i"(reg)) #define COP0_SET(reg, input) \ - __asm__ volatile("mtc0 %0, $%1;" :: "r"(input), "i"(reg)) + __asm__ volatile("mtc0 %0, $%1\n" :: "r"(input), "i"(reg)) #define GTE_GET(reg, output) \ - __asm__ volatile("mfc2 %0, $%1;" : "=r"(output) : "i"(reg)) + __asm__ volatile("mfc2 %0, $%1\n" : "=r"(output) : "i"(reg)) #define GTE_SET(reg, input) \ - __asm__ volatile("mtc2 %0, $%1;" :: "r"(input), "i"(reg)) + __asm__ volatile("mtc2 %0, $%1\n" :: "r"(input), "i"(reg)) #define GTE_GETC(reg, output) \ - __asm__ volatile("cfc2 %0, $%1;" : "=r"(output) : "i"(reg)) + __asm__ volatile("cfc2 %0, $%1\n" : "=r"(output) : "i"(reg)) #define GTE_SETC(reg, input) \ - __asm__ volatile("ctc2 %0, $%1;" :: "r"(input), "i"(reg)) + __asm__ volatile("ctc2 %0, $%1\n" :: "r"(input), "i"(reg)) #define GTE_LOAD(reg, offset, ptr) \ - __asm__ volatile("lwc2 $%0, %1(%2);" :: "i"(reg), "i"(offset), "r"(ptr)) + __asm__ volatile("lwc2 $%0, %1(%2)\n" :: "i"(reg), "i"(offset), "r"(ptr)) #define GTE_STORE(reg, offset, ptr) \ - __asm__ volatile("swc2 $%0, %1(%2);" :: "i"(reg), "i"(offset), "r"(ptr) : "memory") + __asm__ volatile("swc2 $%0, %1(%2)\n" :: "i"(reg), "i"(offset), "r"(ptr) : "memory") /* Coprocessor 0 */ @@ -179,7 +179,7 @@ typedef struct { } GTEMatrix; #define gte_command(cmd) \ - __asm__ volatile("nop; nop; cop2 %0;" :: "i"(cmd)) + __asm__ volatile("nop\n" "nop\n" "cop2 %0\n" :: "i"(cmd)) /* GTE control registers */ diff --git a/src/ps1/registers.h b/src/ps1/registers.h index c5f1819..ac53408 100644 --- a/src/ps1/registers.h +++ b/src/ps1/registers.h @@ -241,9 +241,9 @@ typedef enum { TIMER_CTRL_OVERFLOWED = 1 << 12 } TimerControlFlag; -#define TIMER_VALUE(N) _MMIO32((IO_BASE | 0x100) + (16 * (N))) -#define TIMER_CTRL(N) _MMIO32((IO_BASE | 0x104) + (16 * (N))) -#define TIMER_RELOAD(N) _MMIO32((IO_BASE | 0x108) + (16 * (N))) +#define TIMER_VALUE(N) _MMIO16((IO_BASE | 0x100) + (16 * (N))) +#define TIMER_CTRL(N) _MMIO16((IO_BASE | 0x104) + (16 * (N))) +#define TIMER_RELOAD(N) _MMIO16((IO_BASE | 0x108) + (16 * (N))) /* CD-ROM drive */ diff --git a/src/ps1/system.c b/src/ps1/system.c index 1552bb3..d2d47ae 100644 --- a/src/ps1/system.c +++ b/src/ps1/system.c @@ -89,22 +89,10 @@ void flushCache(void) { void softReset(void) { disableInterrupts(); BIOS_ENTRY_POINT(); + __builtin_unreachable(); } -/* IRQ acknowledgement and blocking delay */ - -void delayMicroseconds(int us) { - // 1 us = 33.8688 cycles = 17 loop iterations (2 cycles per iteration) - us *= (F_CPU + 1000000) / 2000000; - - __asm__ volatile( - ".set noreorder;" - "bgtz %0, .;" - "addiu %0, -1;" - ".set reorder;" - : "=r"(us) : "r"(us) - ); -} +/* IRQ acknowledgement */ bool acknowledgeInterrupt(IRQChannel irq) { if (IRQ_STAT & (1 << irq)) { @@ -116,22 +104,22 @@ bool acknowledgeInterrupt(IRQChannel irq) { } bool waitForInterrupt(IRQChannel irq, int timeout) { - for (; timeout > 0; timeout--) { + for (; timeout > 0; timeout -= 10) { if (acknowledgeInterrupt(irq)) return true; - delayMicroseconds(1); + delayMicroseconds(10); } return false; } bool waitForDMATransfer(DMAChannel dma, int timeout) { - for (; timeout > 0; timeout--) { + for (; timeout > 0; timeout -= 10) { if (!(DMA_CHCR(dma) & DMA_CHCR_ENABLE)) return true; - delayMicroseconds(1); + delayMicroseconds(10); } return false; diff --git a/src/ps1/system.h b/src/ps1/system.h index 1b2a0a3..f106ea6 100644 --- a/src/ps1/system.h +++ b/src/ps1/system.h @@ -136,13 +136,24 @@ void softReset(void); /** * @brief Blocks for (roughly) the specified number of microseconds. This - * function does not rely on a hardware timer, so interrupts may throw off - * timings if not explicitly disabled prior to calling delayMicroseconds(). + * function will reset hardware timer 2 and use it for timing. Disabling + * interrupts prior to calling delayMicroseconds() is highly recommended to + * prevent jitter, but not strictly necessary unless the interrupt handler + * accesses timer 2. * * @param time */ void delayMicroseconds(int time); +/** + * @brief Blocks for (roughly) the specified number of microseconds. This + * function does not rely on a hardware timer, so interrupts may throw off + * timings if not explicitly disabled prior to calling delayMicrosecondsBusy(). + * + * @param time + */ +void delayMicrosecondsBusy(int time); + /** * @brief Checks if the specified interrupt was fired but not yet acknowledged; * if so, acknowledges it and returns true. This function can be used in a @@ -162,9 +173,9 @@ bool acknowledgeInterrupt(IRQChannel irq); /** * @brief Waits for the specified interrupt to be fired for up to the specified - * number of microseconds. This function will work with interrupts that are not - * explicitly enabled in the IRQ_MASK register, but will *not* work with - * interrupts that have been enabled if any callback set using + * number of microseconds (with 10 us granularity). This function will work with + * interrupts that are not explicitly enabled in the IRQ_MASK register, but will + * *not* work with interrupts that have been enabled if any callback set using * setInterruptHandler() acknowledges them. * * @param irq @@ -175,7 +186,7 @@ bool waitForInterrupt(IRQChannel irq, int timeout); /** * @brief Waits for the specified DMA channel to finish any ongoing transfer for - * up to the specified number of microseconds. + * up to the specified number of microseconds (with 10 us granularity). * * @param dma * @param timeout @@ -207,7 +218,7 @@ static inline void switchThreadImmediate(Thread *thread) { switchThread(thread); // Execute a syscall to force the switch to happen. - __asm__ volatile("syscall 0;" ::: "memory"); + __asm__ volatile("syscall 0\n" ::: "memory"); } #ifdef __cplusplus diff --git a/src/ps1/system.s b/src/ps1/system.s index c58e7f6..efbdc2e 100644 --- a/src/ps1/system.s +++ b/src/ps1/system.s @@ -15,6 +15,8 @@ .set noreorder .set noat +## Exception handler + .set BADV, $8 .set SR, $12 .set CAUSE, $13 @@ -179,3 +181,96 @@ _exceptionHandler: jr $k1 rfe + +## Delay functions + +.set IO_BASE, 0xbf801000 + +.set TIMER2_VALUE, IO_BASE | 0x120 +.set TIMER2_CTRL, IO_BASE | 0x124 +.set TIMER2_RELOAD, IO_BASE | 0x128 + +.section .text.delayMicroseconds, "ax", @progbits +.global delayMicroseconds +.type delayMicroseconds, @function + +delayMicroseconds: + # Calculate the approximate number of CPU cycles that need to be burned, + # assuming a 33.8688 MHz clock (1 us = 33.8688 = ~33.875 cycles). + sll $a1, $a0, 8 # cycles = ((us * 271) + 4) / 8 + sll $a2, $a0, 4 + addu $a1, $a2 + subu $a1, $a0 + addiu $a1, 4 + sra $a0, $a1, 3 + + # Compensate for the overhead of calculating the cycle count, entering the + # loop and returning. + addiu $a0, -(6 + 1 + 2 + 3 + 2) + + # Reset timer 2 to its default setting of counting system clock edges. + lui $v1, %hi(IO_BASE) + sh $0, %lo(TIMER2_CTRL)($v1) # TIMER2_CTRL = 0 + + # Wait for up to 0xff00 cycles at a time (resetting the timer and waiting + # for it to count up each time), as the counter is only 16 bits wide. We + # have to wait 0xff00 cycles rather than 0x10000 since the counter wraps + # around rather than saturating on overflow. + li $a1, 0xff00 + slt $v0, $a1, $a0 + #beqz $v0, .LshortDelay + #nop + beqz $v0, .LskipLongDelay + +.LlongDelay: # for (; cycles > 0xff00; cycles -= 0xff00) + sh $0, %lo(TIMER2_VALUE)($v1) # TIMER2_VALUE = 0 + li $v0, 0 + +.LlongDelayLoop: # while (TIMER2_VALUE < 0xff00); + nop + slt $v0, $v0, $a1 + bnez $v0, .LlongDelayLoop + lhu $v0, %lo(TIMER2_VALUE)($v1) + + slt $v0, $a1, $a0 + bnez $v0, .LlongDelay + subu $a0, $a1 + +.LshortDelay: + # Run the last busy loop once less than 0xff00 cycles are remaining. + sh $0, %lo(TIMER2_VALUE)($v1) # TIMER2_VALUE = 0 +.LskipLongDelay: + li $v0, 0 + +.LshortDelayLoop: # while (TIMER2_VALUE < cycles); + nop + slt $v0, $v0, $a0 + bnez $v0, .LshortDelayLoop + lhu $v0, %lo(TIMER2_VALUE)($v1) + + jr $ra + nop + +.section .text.delayMicrosecondsBusy, "ax", @progbits +.global delayMicrosecondsBusy +.type delayMicrosecondsBusy, @function + +delayMicrosecondsBusy: + # Calculate the approximate number of CPU cycles that need to be burned, + # assuming a 33.8688 MHz clock (1 us = 33.8688 = ~33.875 cycles). + sll $a1, $a0, 8 # cycles = ((us * 271) + 4) / 8 + sll $a2, $a0, 4 + addu $a1, $a2 + subu $a1, $a0 + addiu $a1, 4 + sra $a0, $a1, 3 + + # Compensate for the overhead of calculating the cycle count and returning. + addiu $a0, -(6 + 1 + 2) + +.Lloop: # while (cycles > 0) cycles -= 2 + bgtz $a0, .Lloop + addiu $a0, -2 + + jr $ra + nop diff --git a/src/util.cpp b/src/util.cpp index b489ba1..24b0d25 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -42,8 +42,8 @@ Logger::Logger(void) } void Logger::clear(void) { - for (int i = 0; i < MAX_LOG_LINES; i++) - _lines[i][0] = 0; + for (auto line : _lines) + line[0] = 0; } void Logger::log(const char *format, ...) { @@ -114,8 +114,10 @@ uint32_t zipCRC32(const uint8_t *data, size_t length, uint32_t crc) { crc = ~crc; for (; length; length--) { + uint32_t temp = crc; + crc >>= 8; - crc ^= table[(crc ^ *(data++)) & 0xff]; + crc ^= table[(temp ^ *(data++)) & 0xff]; } return ~crc; diff --git a/src/util.hpp b/src/util.hpp index 7c278e1..1765de4 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -224,7 +224,7 @@ public: /* Logger (basically a ring buffer of lines) */ static constexpr int MAX_LOG_LINE_LENGTH = 128; -static constexpr int MAX_LOG_LINES = 32; +static constexpr int MAX_LOG_LINES = 64; class Logger { private: