From 4d6467383217647e3fbc58ab9213a31c0f3bd8c9 Mon Sep 17 00:00:00 2001 From: notaz Date: Tue, 2 Apr 2013 04:03:25 +0300 Subject: drc/psx_gpu: handle more calling conventions --- libpcsxcore/new_dynarec/assem_arm.c | 26 ++++++++++++-------- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 37 ++++++++++++++++------------- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 8fe88fd..45edd65 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -34,6 +34,12 @@ char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); #endif +#ifndef __MACH__ +#define CALLER_SAVE_REGS 0x100f +#else +#define CALLER_SAVE_REGS 0x120f +#endif + extern int cycle_count; extern int last_count; extern int pcaddr; @@ -2627,13 +2633,13 @@ static void restore_regs_all(u_int reglist) // Save registers before function call static void save_regs(u_int reglist) { - reglist&=0x100f; // only save the caller-save registers, r0-r3, r12 + reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12 save_regs_all(reglist); } // Restore registers after function call static void restore_regs(u_int reglist) { - reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12 + reglist&=CALLER_SAVE_REGS; restore_regs_all(reglist); } @@ -4518,7 +4524,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) for(hr=0;hrregmap[hr]>=0) reglist_full|=1<>63); // +1 because of how liveness detection works @@ -4536,7 +4542,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; int mx = (source[i] >> 17) & 3; - reglist=reglist_full&0x10ff; // +{r4-r7} + reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7} c2op_prologue(c2op,reglist); /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */ if(v<3) @@ -5414,7 +5420,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(m2h>=0); assert(m1l>=0); assert(m2l>=0); - save_regs(0x100f); + save_regs(CALLER_SAVE_REGS); if(m1l!=0) emit_mov(m1l,0); if(m1h==0) emit_readword((int)&dynarec_local,1); else if(m1h>1) emit_mov(m1h,1); @@ -5423,7 +5429,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3); else if(m2h>3) emit_mov(m2h,3); emit_call((int)&multu64); - restore_regs(0x100f); + restore_regs(CALLER_SAVE_REGS); signed char hih=get_reg(i_regs->regmap,HIREG|64); signed char hil=get_reg(i_regs->regmap,HIREG); signed char loh=get_reg(i_regs->regmap,LOREG|64); @@ -5494,7 +5500,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(d2h>=0); assert(d1l>=0); assert(d2l>=0); - save_regs(0x100f); + save_regs(CALLER_SAVE_REGS); if(d1l!=0) emit_mov(d1l,0); if(d1h==0) emit_readword((int)&dynarec_local,1); else if(d1h>1) emit_mov(d1h,1); @@ -5503,7 +5509,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); else if(d2h>3) emit_mov(d2h,3); emit_call((int)&div64); - restore_regs(0x100f); + restore_regs(CALLER_SAVE_REGS); signed char hih=get_reg(i_regs->regmap,HIREG|64); signed char hil=get_reg(i_regs->regmap,HIREG); signed char loh=get_reg(i_regs->regmap,LOREG|64); @@ -5527,7 +5533,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(d2h>=0); assert(d1l>=0); assert(d2l>=0); - save_regs(0x100f); + save_regs(CALLER_SAVE_REGS); if(d1l!=0) emit_mov(d1l,0); if(d1h==0) emit_readword((int)&dynarec_local,1); else if(d1h>1) emit_mov(d1h,1); @@ -5536,7 +5542,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3); else if(d2h>3) emit_mov(d2h,3); emit_call((int)&divu64); - restore_regs(0x100f); + restore_regs(CALLER_SAVE_REGS); signed char hih=get_reg(i_regs->regmap,HIREG|64); signed char hil=get_reg(i_regs->regmap,HIREG); signed char loh=get_reg(i_regs->regmap,LOREG|64); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 63252b0..efb065d 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -205,6 +205,8 @@ #define JT_OP(x...) x #define JTE(start, target) target +#define EXTRA_UNSAVED_REGS + #else #define function(name) \ @@ -220,6 +222,9 @@ #define JT_OP(x...) #define JTE(start, target) (target - start) +// r7 is preserved, but add it for EABI alignment.. +#define EXTRA_UNSAVED_REGS r7, r9, + #define flush_render_block_buffer _flush_render_block_buffer #define setup_sprite_untextured_simple _setup_sprite_untextured_simple #define update_texture_8bpp_cache _update_texture_8bpp_cache @@ -1588,9 +1593,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1785,9 +1790,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1901,9 +1906,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) 2: vpush { colors } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { colors } @@ -2316,9 +2321,9 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ /* TODO: Load from psx_gpu instead of saving/restoring these */\ vpush { rg_dx4 }; \ \ - stmdb sp!, { r0 - r3, r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ \ vpop { rg_dx4 }; \ \ @@ -2809,11 +2814,11 @@ function(texture_blocks_8bpp) ldmia sp!, { r3 - r11, pc } 1: - stmdb sp!, { r1 - r2, r12 } + stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 } bl update_texture_8bpp_cache - ldmia sp!, { r1 - r2, r12 } + ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 } bal 0b @@ -4479,9 +4484,9 @@ function(render_block_fill_body) setup_sprite_flush_blocks: vpush { q1 - q5 } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { q1 - q5 } @@ -4496,9 +4501,9 @@ setup_sprite_update_texture_4bpp_cache: setup_sprite_update_texture_8bpp_cache: - stmdb sp!, { r0 - r3, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 } bl update_texture_8bpp_cache - ldmia sp!, { r0 - r3, pc } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc } #define setup_sprite_tiled_initialize_4bpp() \ @@ -5422,9 +5427,9 @@ function(texture_sprite_blocks_8bpp) setup_sprites_16bpp_flush: vpush { d0 - d3 } - stmdb sp!, { r0 - r3, r12, r14 } + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 } vpop { d0 - d3 } -- cgit v1.2.3