aboutsummaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authornotaz2013-04-01 03:03:52 +0300
committernotaz2013-04-01 18:33:15 +0300
commited0fd81dfdb63fe5941b9010ace353719168d3ae (patch)
tree15ff980a0268f7707a1bb9961345f42810189983 /plugins
parentcd2306936bb0e14363d91aeb0fb29167eb94dd2a (diff)
downloadpcsx_rearmed-ed0fd81dfdb63fe5941b9010ace353719168d3ae.tar.gz
pcsx_rearmed-ed0fd81dfdb63fe5941b9010ace353719168d3ae.tar.bz2
pcsx_rearmed-ed0fd81dfdb63fe5941b9010ace353719168d3ae.zip
psx_gpu: convert to UAL, load everything from context
Diffstat (limited to 'plugins')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c1
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.h4
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S68
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h1
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c1
5 files changed, 33 insertions, 42 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index f52e842..e113f06 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -5056,6 +5056,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
initialize_reciprocal_table();
+ psx_gpu->reciprocal_table_ptr = reciprocal_table;
// 00 01 10 11
// 00 0 4 1 5
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h
index 846658c..1eaa99a 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h
@@ -180,6 +180,8 @@ typedef struct
u16 clut_settings;
u16 texture_settings;
+ u32 *reciprocal_table_ptr;
+
// enhancement stuff
u16 *enhancement_buf_ptr;
u16 *enhancement_current_buf_ptr;
@@ -192,7 +194,7 @@ typedef struct
// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
- u8 reserved_a[164];
+ u8 reserved_a[160];
// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index d8fb153..8df7aca 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -31,6 +31,8 @@
#define edge_data_right_mask_offset 4
#define edge_data_y_offset 6
+.syntax unified
+.text
#define psx_gpu r0
#define v_a r1
@@ -192,18 +194,6 @@
.align 4
-/* FIXME: users of this should be in psx_gpu instead */
-#ifndef __PIC__
-#define load_pointer(register, pointer) \
- movw register, :lower16:pointer; \
- movt register, :upper16:pointer; \
-
-#else
-#define load_pointer(register, pointer) \
- ldr register, =pointer \
-
-#endif
-
#define function(name) \
.global name; \
name: \
@@ -576,7 +566,7 @@ function(compute_all_gradients)
vld1.32 { uvrg }, [ temp ]; \
add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \
vld1.32 { uvrg_dy }, [ temp ]; \
- load_pointer(reciprocal_table_ptr, reciprocal_table); \
+ ldr reciprocal_table_ptr, [ psx_gpu, #psx_gpu_reciprocal_table_ptr_offset ]; \
\
vmov.u32 c_0x01, #0x01 \
@@ -624,7 +614,7 @@ function(compute_all_gradients)
#define height_b_alt r12
#define compute_edge_delta_x3(start_c, height_a, height_b) \
- vmov.u32 heights, height_a, height_b; \
+ vmov heights, height_a, height_b; \
ldr temp, [ reciprocal_table_ptr, height_a, lsl #2 ]; \
vmov.u32 edge_shifts[0], temp; \
ldr temp, [ reciprocal_table_ptr, height_b, lsl #2 ]; \
@@ -884,7 +874,7 @@ function(compute_all_gradients)
add temp, temp, #(1 << 16); \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
- vmov.u32 y_x4, temp, y_a; \
+ vmov y_x4, temp, y_a; \
\
setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
right_index); \
@@ -939,7 +929,7 @@ function(compute_all_gradients)
sub temp, temp, #(1 << 16); \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
- vmov.u32 y_x4, temp, y_a; \
+ vmov y_x4, temp, y_a; \
\
vaddw.s32 edges_xy, edges_xy, edges_dx_dy; \
\
@@ -970,7 +960,7 @@ function(compute_all_gradients)
sub height, y_a, y_c; \
\
vdup.u32 x_starts, x_a; \
- vmov.u32 x_ends, x_c, x_b; \
+ vmov x_ends, x_c, x_b; \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_up(major, minor, minor, yes); \
@@ -982,8 +972,6 @@ function(setup_spans_up_left)
function(setup_spans_up_right)
setup_spans_up_up(right, left)
-.pool
-
#define setup_spans_down_down(minor, major) \
setup_spans_prologue(); \
sub height_minor_a, y_b, y_a; \
@@ -991,7 +979,7 @@ function(setup_spans_up_right)
sub height, y_c, y_a; \
\
vdup.u32 x_starts, x_a; \
- vmov.u32 x_ends, x_c, x_b; \
+ vmov x_ends, x_c, x_b; \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_down(major, minor, minor, yes); \
@@ -1014,7 +1002,7 @@ function(setup_spans_down_right)
function(setup_spans_up_a)
setup_spans_prologue()
- vmov.u32 x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_up_flat()
@@ -1023,7 +1011,7 @@ function(setup_spans_up_b)
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov.u32 x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_up_flat()
@@ -1037,7 +1025,7 @@ function(setup_spans_up_b)
function(setup_spans_down_a)
setup_spans_prologue()
- vmov.u32 x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_down_flat()
@@ -1046,7 +1034,7 @@ function(setup_spans_down_b)
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov.u32 x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_down_flat()
@@ -1077,13 +1065,13 @@ function(setup_spans_up_down)
sub height_minor_b, y_c, y_a
sub height_major, y_c, y_b
- vmov.u32 x_starts, x_a, x_c
+ vmov x_starts, x_a, x_c
vdup.u32 x_ends, x_b
compute_edge_delta_x3(x_a, height_minor_a, height_major)
mov temp, #0
- vmov.u32 height_increment, temp, height_minor_b
+ vmov height_increment, temp, height_minor_b
vmlal.s32 edges_xy, edges_dx_dy, height_increment
vmov edges_xy_b_left, edge_alt_low, edge_alt_high
@@ -1120,7 +1108,7 @@ function(setup_spans_up_down)
sub temp, temp, #(1 << 16)
sub y_a, temp, #2
sub y_a, y_a, #(2 << 16)
- vmov.u32 y_x4, temp, y_a
+ vmov y_x4, temp, y_a
vaddw.s32 edges_xy, edges_xy, edges_dx_dy
@@ -1170,7 +1158,7 @@ function(setup_spans_up_down)
add temp, temp, #(1 << 16)
add y_a, temp, #2
add y_a, y_a, #(2 << 16)
- vmov.u32 y_x4, temp, y_a
+ vmov y_x4, temp, y_a
setup_spans_adjust_edges_alternate_no(left, right)
@@ -1204,8 +1192,6 @@ function(setup_spans_up_down)
bne 2b
bal 1b
-.pool
-
#undef span_uvrg_offset
#undef span_edge_data
#undef span_b_offset
@@ -1936,7 +1922,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
vdup.u16 colors, color
add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
- orr color, color, lsl #16
+ orr color, color, color, lsl #16
0:
@@ -1978,7 +1964,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
moveq right_mask, right_mask, lsr #2
tst right_mask, #0x1
- streqh color, [ fb_ptr ]
+ strheq color, [ fb_ptr ]
1:
add span_edge_data, span_edge_data, #8
@@ -2690,7 +2676,7 @@ function(texture_blocks_4bpp)
orr pixels_a, pixels_a, pixel_3, lsl #24
orr pixels_b, pixels_b, pixel_7, lsl #24
- vmov.u32 texels, pixels_a, pixels_b
+ vmov texels, pixels_a, pixels_b
vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels
vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels
@@ -4751,7 +4737,7 @@ setup_sprite_update_texture_8bpp_cache:
mov fb_ptr_advance_column, #32; \
vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
\
- sub fb_ptr_advance_column, height, lsl #11; \
+ sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11; \
vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
#define setup_sprite_setup_right_draw_mask_fb_ptr() \
@@ -5095,7 +5081,7 @@ setup_sprite_update_texture_8bpp_cache:
mov fb_ptr_advance_column, #32 * 2; \
vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
- sub fb_ptr_advance_column, height, lsl #11 + 1; \
+ sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11 + 1; \
vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
@@ -5428,7 +5414,7 @@ function(setup_sprite_16bpp)
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base, texture_offset_base, v, lsl #11
mov left_mask_bits, #0xFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@ -5443,7 +5429,7 @@ function(setup_sprite_16bpp)
and right_width, width_rounded, #0x7
mvn left_mask_bits, left_mask_bits, lsl left_offset
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask, texture_mask_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
@@ -5590,7 +5576,7 @@ function(setup_sprite_16bpp_4x)
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base, texture_offset_base, v, lsl #11
movw left_mask_bits, #0xFFFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@ -5609,7 +5595,7 @@ function(setup_sprite_16bpp_4x)
lsl right_width, #1
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask, texture_mask_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
@@ -5760,7 +5746,7 @@ function(setup_sprite_untextured)
ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
| RENDER_FLAGS_BLEND)
- ldreqb r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
+ ldrbeq r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
tsteq r12, #RENDER_INTERLACE_ENABLED
beq setup_sprite_untextured_simple
@@ -6081,7 +6067,7 @@ function(scale2x_tiles8)
mov r14, r2
add r0, #1024*2*2
add r4, #1024*2
- sub r0, r2, lsl #4+1
+ sub r0, r0, r2, lsl #4+1
mov r1, r4
add r12, r0, #1024*2
bgt 0b
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
index 1307891..5460e40 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
@@ -48,6 +48,7 @@
#define psx_gpu_offset_y_offset 0x102
#define psx_gpu_clut_settings_offset 0x104
#define psx_gpu_texture_settings_offset 0x106
+#define psx_gpu_reciprocal_table_ptr_offset 0x108
#define psx_gpu_blocks_offset 0x200
#define psx_gpu_span_uvrg_offset_offset 0x2200
#define psx_gpu_span_edge_data_offset 0x4200
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
index 5adfb75..b1de121 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
@@ -73,6 +73,7 @@ int main()
WRITE_OFFSET(f, offset_y);
WRITE_OFFSET(f, clut_settings);
WRITE_OFFSET(f, texture_settings);
+ WRITE_OFFSET(f, reciprocal_table_ptr);
WRITE_OFFSET(f, blocks);
WRITE_OFFSET(f, span_uvrg_offset);
WRITE_OFFSET(f, span_edge_data);