diff options
author | David Guillen Fandos | 2021-03-08 18:44:03 +0100 |
---|---|---|
committer | David Guillen Fandos | 2021-03-08 18:44:03 +0100 |
commit | 56dc6ecb70e6fc76d32d6a7194acb273b76bfe0e (patch) | |
tree | cf3b14a8d1bc593248398d0c544251ea8987b40e /psp | |
parent | 02e35339ee89f92d346d290c24497bbbae59ea79 (diff) | |
download | picogpsp-56dc6ecb70e6fc76d32d6a7194acb273b76bfe0e.tar.gz picogpsp-56dc6ecb70e6fc76d32d6a7194acb273b76bfe0e.tar.bz2 picogpsp-56dc6ecb70e6fc76d32d6a7194acb273b76bfe0e.zip |
Remove libco
This removes libco and all the usages of it (+pthreads).
Rewired all dynarecs and interpreter to return after every frame so that
libretro can process events. This required to make dynarec re-entrant.
Dynarecs were updated to check for new frame on every update (IRQ, cycle
exhaustion, I/O write, etc). The performance impact of doing so should
be minimal (and definitely outweight the libco gains). While at it,
fixed small issues to get a bit more perf: arm dynarec was not idling
correctly, mips was using stack when not needed, etc.
Tested on PSP (mips), OGA (armv7), Linux (x86 and interpreter). Not
tested on Android though.
Diffstat (limited to 'psp')
-rw-r--r-- | psp/mips_stub.S | 90 |
1 files changed, 65 insertions, 25 deletions
diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 420f602..c89a5b1 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -51,7 +51,6 @@ .global memory_map_write .global reg -.extern reg .extern spsr # MIPS register layout: @@ -117,7 +116,8 @@ .equ CPU_MODE, (29 * 4) .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) -.equ GP_SAVE, (32 * 4) +.equ COMPLETED_FRAME, (32 * 4) +.equ GP_SAVE, (33 * 4) .equ SUPERVISOR_LR, (reg_mode + (3 * (7 * 4)) + (6 * 4)) .equ SUPERVISOR_SPSR, (spsr + (3 * 4)) @@ -206,28 +206,52 @@ .balign 64 +# This gets called every time the cycle counter runs out +# (checked at every branch/jump) mips_update_gba: sw $4, REG_PC($16) # current PC = $4 - addiu $sp, $sp, -4 # make room on the stack - sw $ra,($sp) # save return address + sw $ra, REG_SAVE2($16) # save return addr collapse_flags # update cpsr save_registers # save registers jal update_gba # process the next event sw $0, CHANGED_PC_STATUS($16) - lw $ra, ($sp) # restore return address - addiu $sp, $sp, 4 # fix stack + lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame + bne $1, $0, return_to_main # Return to main thread now + + addu $17, $2, $0 # $17 = new cycle count (ret value) + + lw $ra, REG_SAVE2($16) # restore return address lw $1, CHANGED_PC_STATUS($16) bne $1, $0, lookup_pc - addu $17, $2, $0 # $17 = new cycle count (delay slot) + nop restore_registers jr $ra # if not, go back to caller nop + +# Loads the main context and returns to it. +# ARM regs must be saved before branching here +return_to_main: + lw $28, GP_SAVE($16) # Restore previous state + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + lw $s6, 24($sp) + lw $s7, 28($sp) + lw $fp, 32($sp) + lw $ra, 36($sp) + jr $ra # Return to main + add $sp, $sp, 48 # Restore stack pointer (delay slot) + + # Perform an indirect branch. # $4: GBA address to branch to @@ -2059,8 +2083,7 @@ execute_store_io_u8: region_check 4, patch_store_u8 andi $5, $5, 0xFF # make value 8bit andi $4, $4, 0x3FF # wrap around address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register8 # write the value out @@ -2160,8 +2183,7 @@ execute_store_io_u16: region_check 4, patch_store_u16 andi $5, $5, 0xFFFF # make value 16bit andi $4, $4, 0x3FE # wrap around/align address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register16 # write the value out @@ -2263,8 +2285,7 @@ execute_store_io_u32: region_check 4, patch_store_u32 nop andi $4, $4, 0x3FC # wrap around/align address - addiu $sp, $sp, -4 # make room on the stack for $ra - sw $ra, ($sp) + sw $ra, REG_SAVE3($16) # preserve $ra save_registers jal write_io_register32 # write the value out @@ -2482,6 +2503,10 @@ write_io_epilogue: alert_loop: jal update_gba # process the next event nop + + lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame + bne $1, $0, return_to_main # Return to main thread now + lw $1, CPU_HALT_STATE($16) # check if CPU is sleeping bne $1, $0, alert_loop # see if it hasn't changed nop @@ -2490,21 +2515,20 @@ alert_loop: lw $4, REG_PC($16) # $4 = new PC j lookup_pc - addiu $sp, $sp, 4 # fix the stack (delay slot) + nop irq_alert: restore_registers j lookup_pc # PC has changed, get a new one - addiu $sp, $sp, 4 # fix the stack + nop no_alert: restore_registers - lw $ra, ($sp) # restore return address + lw $ra, REG_SAVE3($16) # restore return jr $ra # we can return - addiu $sp, $sp, 4 # fix the stack + nop smc_dma: - addiu $sp, $sp, 4 # fix the stack jal flush_translation_cache_ram # flush translation cache nop j lookup_pc @@ -2740,16 +2764,32 @@ ror_zero_shift: # $4: cycle counter argument execute_arm_translate: - addu $17, $4, $0 # load cycle counter register + add $sp, $sp, -48 # Store the main thread context + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + sw $s6, 24($sp) + sw $s7, 28($sp) + sw $fp, 32($sp) + sw $ra, 36($sp) + + lui $16, %hi(reg) # load reg address into base reg + addiu $16, %lo(reg) + + sw $28, GP_SAVE($16) - lui $4, %hi(arm_reg) # load arm_reg address into $4 - addiu $4, %lo(arm_reg) - - jal move_reg # update reg to point to arm_reg - addu $16, $4, $0 # copy address of arm_reg into $16 + addu $17, $4, $0 # load cycle counter register extract_flags # load flag variables + # CPU might be sleeping, do not wake ip up! + lw $1, CPU_HALT_STATE($16) # check if CPU is sleeping + bne $1, $0, alert_loop # see if it hasn't changed + + lw $1, REG_CPSR($16) and $1, $1, 0x20 # see if Thumb bit is set in flags bne $1, $0, 1f @@ -2810,7 +2850,7 @@ memory_map_read: # This must be between memory_map_read and memory_map_write because it's used # to calculate their addresses elsewhere in this file. -arm_reg: +reg: .space 0x100 memory_map_write: |