aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/gpu_neon')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c46
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S63
2 files changed, 50 insertions, 59 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index 76bfb15..7c1503b 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -4322,30 +4322,48 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
flush_render_block_buffer(psx_gpu);
invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
-#ifndef NEON_BUILD
u32 r = color & 0xFF;
u32 g = (color >> 8) & 0xFF;
u32 b = (color >> 16) & 0xFF;
- u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+ psx_gpu->mask_msb;
+ u32 color_32bpp = color_16bpp | (color_16bpp << 16);
- u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
- u32 draw_x, draw_y;
+ u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024));
- for(draw_y = 0; draw_y < height; draw_y++)
+ u32 pitch = 512 - (width / 2);
+ u32 num_width;
+
+ if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED)
{
- for(draw_x = 0; draw_x < width; draw_x++)
+ pitch += 512;
+ height /= 2;
+
+ if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
+ vram_ptr += 512;
+ }
+
+ while(height)
+ {
+ num_width = width;
+ while(num_width)
{
- vram_ptr[draw_x] = color_16bpp;
+ vram_ptr[0] = color_32bpp;
+ vram_ptr[1] = color_32bpp;
+ vram_ptr[2] = color_32bpp;
+ vram_ptr[3] = color_32bpp;
+ vram_ptr[4] = color_32bpp;
+ vram_ptr[5] = color_32bpp;
+ vram_ptr[6] = color_32bpp;
+ vram_ptr[7] = color_32bpp;
+
+ vram_ptr += 8;
+ num_width -= 16;
}
- vram_ptr += 1024;
+ vram_ptr += pitch;
+ height--;
}
-#else
- void render_block_fill_body(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
- u32 width, u32 height);
-
- render_block_fill_body(psx_gpu, color, x, y, width, height);
-#endif
}
void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 973a8b3..11a11b1 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -4337,31 +4337,20 @@ function(warmup)
bx lr
+#undef vram_ptr
#undef color
-#undef y
+#undef width
#undef height
-
-#define psx_gpu r0
-#define color r1
-#define x r2
-#define y r3
+#undef pitch
#define vram_ptr r0
-#define width r3
-#define height r12
-
-#define parameter_width_offset 0
-#define parameter_height_offset 4
+#define color r1
+#define width r2
+#define height r3
-#define color_r r14
-#define color_g r4
-#define color_b r5
+#define pitch r1
-#define left_unaligned r14
-#define right_unaligned r4
-#define pitch r5
-#define num_unaligned r2
-#define num_width r6
+#define num_width r12
#undef colors_a
#undef colors_b
@@ -4372,44 +4361,28 @@ function(warmup)
.align 3
function(render_block_fill_body)
- ldr vram_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
- ldr height, [ sp, #parameter_height_offset ]
-
- add vram_ptr, vram_ptr, y, lsl #11
- ldr width, [ sp, #parameter_width_offset ]
-
- add vram_ptr, vram_ptr, x, lsl #1
- stmdb sp!, { r4 - r6, r14 }
-
- ubfx color_r, color, #3, #5
- ubfx color_g, color, #11, #5
-
- ubfx color_b, color, #19, #5
- orr color, color_r, color_g, lsl #5
-
- orr color, color, color_b, lsl #10
vdup.u16 colors_a, color
+ mov pitch, #2048
vmov colors_b, colors_a
- mov pitch, #2048
sub pitch, pitch, width, lsl #1
- 0:
- mov num_width, width, lsr #4
+ mov num_width, width
- 1:
- vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]!
+ 0:
+ vst1.u32 { colors_a, colors_b }, [ vram_ptr, :256 ]!
- subs num_width, num_width, #1
- bne 1b
+ subs num_width, num_width, #2
+ bne 0b
add vram_ptr, vram_ptr, pitch
+ mov num_width, width
+
subs height, height, #1
bne 0b
-
- 1:
- ldmia sp!, { r4 - r6, pc }
+ bx lr
+
#undef x
#undef y