aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_neon')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c2
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S45
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_main.c11
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_parse.c52
4 files changed, 67 insertions, 43 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index 75deb30..76bfb15 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -4352,8 +4352,8 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
u32 width, u32 height, u32 pitch)
{
u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
- u32 mask_msb = psx_gpu->mask_msb;
u32 draw_x, draw_y;
+ u32 mask_msb = psx_gpu->mask_msb;
if((width == 0) || (height == 0))
return;
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 0dc7ece..973a8b3 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -4363,9 +4363,11 @@ function(warmup)
#define num_unaligned r2
#define num_width r6
-#undef colors
+#undef colors_a
+#undef colors_b
-#define colors q0
+#define colors_a q0
+#define colors_b q1
.align 3
@@ -4386,51 +4388,26 @@ function(render_block_fill_body)
orr color, color_r, color_g, lsl #5
orr color, color, color_b, lsl #10
- add left_unaligned, x, #0x7
+ vdup.u16 colors_a, color
- bic left_unaligned, left_unaligned, #0x7
- vdup.u16 colors, color
-
- sub left_unaligned, left_unaligned, x
+ vmov colors_b, colors_a
mov pitch, #2048
-
sub pitch, pitch, width, lsl #1
- sub width, width, left_unaligned
-
- and right_unaligned, width, #0x7
- bic width, width, #0x7
0:
- mov num_width, width, lsr #3
-
- movs num_unaligned, left_unaligned
- beq 2f
+ mov num_width, width, lsr #4
1:
- strh color, [ vram_ptr ], #2
-
- subs num_unaligned, num_unaligned, #1
- bne 1b
+ vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]!
- 2:
- vst1.u32 { colors }, [ vram_ptr, :128 ]!
subs num_width, num_width, #1
- bne 2b
-
- movs num_unaligned, right_unaligned
- beq 4f
-
- 3:
- strh color, [ vram_ptr ], #2
-
- subs num_unaligned, num_unaligned, #1
- bne 3b
+ bne 1b
- 4:
add vram_ptr, vram_ptr, pitch
subs height, height, #1
bne 0b
-
+
+ 1:
ldmia sp!, { r4 - r6, pc }
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
index f1f7944..6c17b0a 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
@@ -174,7 +174,7 @@ int main(int argc, char *argv[])
MAP_SHARED | 0xA0000000, fbdev_handle, 0));
vram_ptr += 64;
- initialize_psx_gpu(psx_gpu, vram_ptr + 64);
+ initialize_psx_gpu(psx_gpu, vram_ptr);
#else
initialize_psx_gpu(psx_gpu, _vram + 64);
#endif
@@ -204,14 +204,15 @@ int main(int argc, char *argv[])
gpu_parse(psx_gpu, list, size);
flush_render_block_buffer(psx_gpu);
- printf("%-64s: ", argv[1]);
#ifdef NEON_BUILD
u32 cycles_elapsed = get_counter() - cycles;
- printf("%d\n", cycles_elapsed);
+ printf("%-64s: %d\n", argv[1], cycles_elapsed);
+#else
+ printf("%-64s: ", argv[1]);
#endif
-#if 0
+#if 1
u32 i;
for(i = 0; i < 1024 * 512; i++)
@@ -238,7 +239,7 @@ int main(int argc, char *argv[])
}
#endif
-#if 1
+#if 0
printf("\n");
printf(" %d pixels, %d pixel blocks, %d spans\n"
" (%lf pixels per block, %lf pixels per span),\n"
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
index 7fee2eb..4f3dd12 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
@@ -210,16 +210,61 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s16 *list_s16 = (void *)list;
current_command = *list >> 24;
command_length = command_lengths[current_command];
-
+
switch(current_command)
{
case 0x00:
break;
case 0x02:
- render_block_fill(psx_gpu, list[0] & 0xFFFFFF, list_s16[2] & 0x3FF,
- list_s16[3] & 0x1FF, list_s16[4] & 0x3FF, list_s16[5] & 0x1FF);
+ {
+ u32 x = list_s16[2] & 0x3FF;
+ u32 y = list_s16[3] & 0x1FF;
+ u32 width = list_s16[4] & 0x3FF;
+ u32 height = list_s16[5] & 0x1FF;
+ u32 color = list[0] & 0xFFFFFF;
+
+ x &= ~0xF;
+ width = ((width + 0xF) & ~0xF);
+
+ if((x + width) > 1024)
+ {
+ u32 width_a = 1024 - x;
+ u32 width_b = width - width_a;
+
+ if((y + height) > 512)
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width_a, height_a);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
+ render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width_a, height);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height);
+ }
+ }
+ else
+ {
+ if((y + height) > 512)
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width, height);
+ }
+ }
break;
+ }
case 0x20 ... 0x23:
{
@@ -567,6 +612,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
case 0xE1:
set_texture(psx_gpu, list[0] & 0x1FF);
+
if(list[0] & (1 << 9))
psx_gpu->render_state_base |= RENDER_STATE_DITHER;
else