aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon
diff options
context:
space:
mode:
authornotaz2012-10-27 22:14:16 +0300
committernotaz2012-10-27 22:14:16 +0300
commitf0931e56b2428fe5e0f6b4d7d6d0f41462cfc551 (patch)
treea8a10a33b219f24b0e18fe48f153297d50f8cc1c /plugins/gpu_neon
parenta8be0debff95f9b56af7c4c19eaacee782a09e28 (diff)
downloadpcsx_rearmed-f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551.tar.gz
pcsx_rearmed-f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551.tar.bz2
pcsx_rearmed-f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551.zip
psx_gpu: implement setup_sprite_untextured in asm
Diffstat (limited to 'plugins/gpu_neon')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c74
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S143
2 files changed, 215 insertions, 2 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index ce72af5..485ef27 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -3885,6 +3885,11 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
s32 width, s32 height, u32 color);
+void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color);
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color);
+
#ifndef NEON_BUILD
setup_sprite_tiled_builder(4bpp,);
setup_sprite_tiled_builder(8bpp,);
@@ -4013,11 +4018,16 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
}
-#endif
-
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
+ if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+ RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0)
+ {
+ setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+ return;
+ }
+
u32 right_width = ((width - 1) & 0x7) + 1;
u32 right_mask_bits = (0xFF << right_width);
u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
@@ -4083,6 +4093,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
}
+#endif
+
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+ u32 r = color & 0xFF;
+ u32 g = (color >> 8) & 0xFF;
+ u32 b = (color >> 16) & 0xFF;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+ psx_gpu->mask_msb;
+ u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+ u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+ u32 *vram_ptr;
+
+ u32 num_width;
+
+ if(psx_gpu->num_blocks > MAX_BLOCKS)
+ {
+ flush_render_block_buffer(psx_gpu);
+ }
+
+ while(height)
+ {
+ num_width = width;
+
+ vram_ptr = (void *)vram_ptr16;
+ if((long)vram_ptr16 & 2)
+ {
+ *vram_ptr16 = color_32bpp;
+ vram_ptr = (void *)(vram_ptr16 + 1);
+ num_width--;
+ }
+
+ while(num_width >= 4 * 2)
+ {
+ vram_ptr[0] = color_32bpp;
+ vram_ptr[1] = color_32bpp;
+ vram_ptr[2] = color_32bpp;
+ vram_ptr[3] = color_32bpp;
+
+ vram_ptr += 4;
+ num_width -= 4 * 2;
+ }
+
+ while(num_width >= 2)
+ {
+ *vram_ptr++ = color_32bpp;
+ num_width -= 2;
+ }
+
+ if(num_width > 0)
+ {
+ *(u16 *)vram_ptr = color_32bpp;
+ }
+
+ vram_ptr16 += 1024;
+ height--;
+ }
+}
#define setup_sprite_blocks_switch_textured(texture_mode) \
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 103483a..085e11b 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -17,6 +17,10 @@
#define MAX_BLOCKS 64
#define MAX_BLOCKS_PER_ROW 128
+#define RENDER_STATE_MASK_EVALUATE 0x20
+#define RENDER_FLAGS_MODULATE_TEXELS 0x1
+#define RENDER_FLAGS_BLEND 0x2
+
#include "psx_gpu_offsets.h"
#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
@@ -5687,6 +5691,145 @@ function(setup_sprite_16bpp_4x)
ldmia sp!, { r4 - r11, pc }
+#undef width
+#undef right_width
+#undef right_mask_bits
+#undef color
+#undef height
+#undef blocks_remaining
+#undef colors
+#undef right_mask
+#undef test_mask
+#undef draw_mask
+
+#define psx_gpu r0
+#define x r1
+#define y r2
+#define width r3
+#define right_width r5
+#define right_mask_bits r6
+#define fb_ptr r7
+#define color r8
+#define height r9
+#define fb_ptr_pitch r12
+
+// referenced by setup_sprites_16bpp_flush
+#define num_blocks r4
+#define block r5
+#define block_width r11
+
+#define color_r r1
+#define color_g r2
+#define color_b r8
+#define blocks_remaining r6
+
+#define colors q0
+#define right_mask q1
+#define test_mask q2
+#define draw_mask q2
+#define draw_mask_bits_fb_ptr d6
+
+
+.align 3
+
+function(setup_sprite_untextured)
+ ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
+ tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
+ | RENDER_FLAGS_BLEND)
+ beq setup_sprite_untextured_simple
+
+ stmdb sp!, { r4 - r11, r14 }
+
+ ldr width, [ sp, #40 ]
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
+
+ ldr height, [ sp, #44 ]
+ add fb_ptr, fb_ptr, y, lsl #11
+
+ add fb_ptr, fb_ptr, x, lsl #1
+ sub right_width, width, #1
+
+ ldr color, [ sp, #48 ]
+ and right_width, #7
+
+ add block_width, width, #7
+ add right_width, #1
+
+ lsr block_width, #3
+ mov right_mask_bits, #0xff
+
+ sub fb_ptr_pitch, block_width, #1
+ lsl right_mask_bits, right_width
+
+ lsl fb_ptr_pitch, #3+1
+ ubfx color_r, color, #3, #5
+
+ rsb fb_ptr_pitch, #1024*2
+ ubfx color_g, color, #11, #5
+
+ vld1.u32 { test_mask }, [ psx_gpu, :128 ]
+ ubfx color_b, color, #19, #5
+
+ vdup.u16 right_mask, right_mask_bits
+ orr color, color_r, color_b, lsl #10
+
+ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ orr color, color, color_g, lsl #5
+
+ vtst.u16 right_mask, right_mask, test_mask
+ add block, psx_gpu, #psx_gpu_blocks_offset
+
+ vdup.u16 colors, color
+ add block, block, num_blocks, lsl #6
+
+
+setup_sprite_untextured_height_loop:
+ add num_blocks, block_width
+ sub blocks_remaining, block_width, #1
+
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ cmp blocks_remaining, #0
+ ble 1f
+
+ vmov.u8 draw_mask, #0 /* zero_mask */
+ vmov.u8 draw_mask_bits_fb_ptr, #0
+
+ 0:
+ vst1.u32 { draw_mask }, [ block, :128 ]!
+ subs blocks_remaining, #1
+
+ vst1.u32 { colors }, [ block, :128 ]
+ add block, block, #24
+
+ vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+ vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+
+ add block, block, #24
+ add fb_ptr, #8*2
+ bgt 0b
+
+ 1:
+ vst1.u32 { right_mask }, [ block, :128 ]!
+ subs height, #1
+
+ vst1.u32 { colors }, [ block, :128 ]
+ add block, block, #24
+
+ vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+ vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+
+ add block, block, #24
+ add fb_ptr, fb_ptr_pitch
+
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ bgt setup_sprite_untextured_height_loop
+
+ ldmia sp!, { r4 - r11, pc }
+
+
+
#undef texture_page_ptr
#undef vram_ptr
#undef dirty_textures_mask