aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon/psx_gpu
diff options
context:
space:
mode:
authorExophase2011-12-23 02:47:19 +0200
committernotaz2011-12-23 02:47:26 +0200
commit3867c6efed8d1cd6cd40f07cd46876f59da8912f (patch)
treec6c7edcb38c17ab2affb67aeae79350bf9cd03ae /plugins/gpu_neon/psx_gpu
parentde35a4641acce078961c2e748e1b6da6fe8d8b70 (diff)
downloadpcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.tar.gz
pcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.tar.bz2
pcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.zip
psx_gpu: texture cache fix, updates
Diffstat (limited to 'plugins/gpu_neon/psx_gpu')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c122
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.h10
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S113
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_main.c67
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_parse.c46
5 files changed, 218 insertions, 140 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index 84848f8..2acfedc 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -20,7 +20,6 @@
u32 span_pixels = 0;
u32 span_pixel_blocks = 0;
-u32 span_pixel_blocks_unaligned = 0;
u32 spans = 0;
u32 triangles = 0;
u32 sprites = 0;
@@ -39,9 +38,6 @@ u32 texel_blocks_8bpp = 0;
u32 texel_blocks_16bpp = 0;
u32 texel_blocks_untextured = 0;
u32 blend_blocks = 0;
-u32 untextured_pixels = 0;
-u32 blend_pixels = 0;
-u32 transparent_pixels = 0;
u32 render_buffer_flushes = 0;
u32 state_changes = 0;
u32 left_split_triangles = 0;
@@ -49,8 +45,7 @@ u32 flat_triangles = 0;
u32 clipped_triangles = 0;
u32 zero_block_spans = 0;
u32 texture_cache_loads = 0;
-u32 false_modulated_triangles = 0;
-u32 false_modulated_sprites = 0;
+u32 false_modulated_blocks = 0;
u32 reciprocal_table[512];
@@ -241,6 +236,7 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1,
{
u32 mask = texture_region_mask(x1, y1, x2, y2) &
psx_gpu->viewport_mask;
+
psx_gpu->dirty_textures_4bpp_mask |= mask;
psx_gpu->dirty_textures_8bpp_mask |= mask;
psx_gpu->dirty_textures_8bpp_alternate_mask |= mask;
@@ -257,7 +253,7 @@ void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
{
u32 current_texture_page = psx_gpu->current_texture_page;
- u8 *texture_page_ptr = psx_gpu->texture_page_ptr;
+ u8 *texture_page_ptr = psx_gpu->texture_page_base;
u16 *vram_ptr = psx_gpu->vram_ptr;
u32 texel_block;
@@ -319,7 +315,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)
void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,
u32 texture_page)
{
- u16 *texture_page_ptr = psx_gpu->texture_page_ptr;
+ u16 *texture_page_ptr = psx_gpu->texture_page_base;
u16 *vram_ptr = psx_gpu->vram_ptr;
u32 tile_x, tile_y;
@@ -413,8 +409,10 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
render_block_handler->shade_blocks(psx_gpu);
render_block_handler->blend_blocks(psx_gpu);
+#ifdef PROFILE
span_pixel_blocks += psx_gpu->num_blocks;
render_buffer_flushes++;
+#endif
psx_gpu->num_blocks = 0;
}
@@ -1748,6 +1746,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
} \
#define setup_blocks_add_blocks_direct() \
+ texel_blocks_untextured += span_num_blocks; \
+ span_pixel_blocks += span_num_blocks \
#define setup_blocks_builder(shading, texturing, dithering, sw, target) \
@@ -1787,7 +1787,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \
s32 pixel_span = span_num_blocks * 8; \
pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF); \
span_pixels += pixel_span; \
- span_pixel_blocks_unaligned += (pixel_span + 7) / 8; \
\
span_num_blocks--; \
while(span_num_blocks) \
@@ -2017,16 +2016,29 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)
} \
-#define shade_blocks_textured_modulated_shaded_primitive_load() \
+#define shade_blocks_textured_false_modulated_check_dithered(target) \
+
+#define shade_blocks_textured_false_modulated_check_undithered(target) \
+ if(psx_gpu->triangle_color == 0x808080) \
+ { \
+ \
+ shade_blocks_textured_unmodulated_##target(psx_gpu); \
+ false_modulated_blocks += num_blocks; \
+ return; \
+ } \
+
+
+#define shade_blocks_textured_modulated_shaded_primitive_load(dithering, \
+ target) \
-#define shade_blocks_textured_modulated_unshaded_primitive_load() \
+#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering, \
+ target) \
{ \
u32 color = psx_gpu->triangle_color; \
dup_8x8b(colors_r, color); \
dup_8x8b(colors_g, color >> 8); \
dup_8x8b(colors_b, color >> 16); \
- if(psx_gpu->triangle_color == 0x808080) \
- false_modulated_triangles++; \
+ shade_blocks_textured_false_modulated_check_##dithering(target); \
} \
#define shade_blocks_textured_modulated_shaded_block_load() \
@@ -2091,7 +2103,8 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target( \
\
dup_8x16b(d128_0x8000, 0x8000); \
\
- shade_blocks_textured_modulated_##shading##_primitive_load(); \
+ shade_blocks_textured_modulated_##shading##_primitive_load(dithering, \
+ target); \
\
while(num_blocks) \
{ \
@@ -2157,6 +2170,9 @@ void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct
void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct
*psx_gpu);
+void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
+void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
+
#ifndef NEON_BUILD
shade_blocks_textured_modulated_builder(shaded, dithered, direct);
@@ -2204,8 +2220,38 @@ void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu) \
} \
} \
-void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu);
-void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu);
+#define shade_blocks_textured_unmodulated_dithered_builder(target) \
+void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct \
+ *psx_gpu) \
+{ \
+ block_struct *block = psx_gpu->blocks; \
+ u32 num_blocks = psx_gpu->num_blocks; \
+ vec_8x16u draw_mask; \
+ vec_8x16u test_mask = psx_gpu->test_mask; \
+ u32 draw_mask_bits; \
+ \
+ vec_8x16u pixels; \
+ shade_blocks_load_msb_mask_##target(); \
+ \
+ while(num_blocks) \
+ { \
+ vec_8x16u zero_mask; \
+ \
+ draw_mask_bits = block->draw_mask_bits; \
+ dup_8x16b(draw_mask, draw_mask_bits); \
+ tst_8x16b(draw_mask, draw_mask, test_mask); \
+ \
+ pixels = block->texels; \
+ \
+ cmpeqz_8x16b(zero_mask, pixels); \
+ or_8x16b(zero_mask, draw_mask, zero_mask); \
+ \
+ shade_blocks_store_##target(zero_mask, pixels); \
+ \
+ num_blocks--; \
+ block++; \
+ } \
+} \
#ifndef NEON_BUILD
@@ -2773,11 +2819,15 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y);
+#ifdef PROFILE
triangles++;
+#endif
if(triangle_area == 0)
{
+#ifdef PROFILE
trivial_rejects++;
+#endif
return;
}
@@ -2797,7 +2847,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
if((y_bottom - y_top) >= 512)
{
+#ifdef PROFILE
trivial_rejects++;
+#endif
return;
}
@@ -2821,14 +2873,18 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
if((c->x - a->x) >= 1024)
{
+#ifdef PROFILE
trivial_rejects++;
+#endif
return;
}
if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
y_bottom) == 0)
{
+#ifdef PROFILE
trivial_rejects++;
+#endif
return;
}
@@ -2922,7 +2978,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
break;
}
+#ifdef PROFILE
spans += psx_gpu->num_spans;
+#endif
u32 render_state = flags &
(RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
@@ -2934,7 +2992,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
{
psx_gpu->render_state = render_state;
flush_render_block_buffer(psx_gpu);
+#ifdef PROFILE
state_changes++;
+#endif
}
psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE;
@@ -3369,7 +3429,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
u32 left_offset = u & 0x7;
u32 width_rounded = width + left_offset + 7;
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - left_offset);
+ u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset);
u32 right_width = width_rounded & 0x7;
u32 block_width = width_rounded / 8;
u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8);
@@ -3519,7 +3579,10 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
{
blocks_remaining = block_width - 1;
num_blocks += block_width;
+
+#ifdef PROFILE
sprite_blocks += block_width;
+#endif
if(num_blocks > MAX_BLOCKS)
{
@@ -3654,6 +3717,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
s32 x_right = x + width - 1;
s32 y_bottom = y + height - 1;
+#ifdef PROFILE
+ sprites++;
+#endif
+
if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right,
y_bottom) == 0)
{
@@ -3685,10 +3752,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
if((width <= 0) || (height <= 0))
return;
- sprites++;
-
+#ifdef PROFILE
span_pixels += width * height;
spans += height;
+#endif
u32 render_state = flags &
(RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
@@ -3701,7 +3768,9 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
{
psx_gpu->render_state = render_state;
flush_render_block_buffer(psx_gpu);
+#ifdef PROFILE
state_changes++;
+#endif
}
psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;
@@ -4032,7 +4101,9 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
u32 control_mask;
+#ifdef PROFILE
lines++;
+#endif
if(vertex_a->x >= vertex_b->x)
{
@@ -4276,8 +4347,6 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
- //printf("copy for %d, %d\n", width, height);
-
for(draw_y = 0; draw_y < height; draw_y++)
{
for(draw_x = 0; draw_x < width; draw_x++)
@@ -4329,15 +4398,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->test_mask = test_mask;
- psx_gpu->pixel_count_mode = 0;
- psx_gpu->pixel_compare_mode = 0;
-
- psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512);
- psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512);
- memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512);
- memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512);
- psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512);
-
psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF;
psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF;
psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF;
@@ -4354,6 +4414,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->vram_ptr = vram;
+ psx_gpu->texture_page_base = psx_gpu->vram_ptr;
psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
psx_gpu->clut_ptr = psx_gpu->vram_ptr;
@@ -4374,7 +4435,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
// d1: (2 3 6 7): y0
// d2: (4 5 6 7): x0 ^ y0
-
psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1);
psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);
psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0);
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h
index 49425ce..137dda9 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h
@@ -130,12 +130,11 @@ typedef struct
u32 dirty_textures_8bpp_alternate_mask;
u32 triangle_color;
- u32 primitive_color;
-
u32 dither_table[4];
struct render_block_handler_struct *render_block_handler;
void *texture_page_ptr;
+ void *texture_page_base;
u16 *clut_ptr;
u16 *vram_ptr;
@@ -189,13 +188,6 @@ typedef struct
u8 texture_4bpp_cache[32][256 * 256];
u8 texture_8bpp_even_cache[16][256 * 256];
u8 texture_8bpp_odd_cache[16][256 * 256];
-
- u32 pixel_count_mode;
- u32 pixel_compare_mode;
-
- u8 *vram_pixel_counts_a;
- u8 *vram_pixel_counts_b;
- u16 *compare_vram;
} psx_gpu_struct;
typedef struct __attribute__((aligned(16)))
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 54605b8..fd99798 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -38,10 +38,10 @@
#define psx_gpu_dirty_textures_8bpp_mask_offset 172
#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176
#define psx_gpu_triangle_color_offset 180
-#define psx_gpu_primitive_color_offset 184
-#define psx_gpu_dither_table_offset 188
-#define psx_gpu_render_block_handler_offset 204
-#define psx_gpu_texture_page_ptr_offset 208
+#define psx_gpu_dither_table_offset 184
+#define psx_gpu_render_block_handler_offset 200
+#define psx_gpu_texture_page_ptr_offset 204
+#define psx_gpu_texture_page_base_offset 208
#define psx_gpu_clut_ptr_offset 212
#define psx_gpu_vram_ptr_offset 216
@@ -1955,6 +1955,8 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
vdup.u16 colors, color
add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
+ orr color, color, lsl #16
+
0:
ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
@@ -1981,12 +1983,21 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
3:
ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ]
- eor right_mask, right_mask, #0xFF
- 4:
- strh color, [ fb_ptr ], #2
- movs right_mask, right_mask, lsr #1
- bne 4b
+ cmp right_mask, #0x0
+ beq 5f
+
+ tst right_mask, #0xF
+ streq color, [ fb_ptr ], #4
+ moveq right_mask, right_mask, lsr #4
+ streq color, [ fb_ptr ], #4
+
+ tst right_mask, #0x3
+ streq color, [ fb_ptr ], #4
+ moveq right_mask, right_mask, lsr #2
+
+ tst right_mask, #0x1
+ streqh color, [ fb_ptr ]
1:
add span_edge_data, span_edge_data, #8
@@ -1997,6 +2008,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
ldmia sp!, { r4 - r11, pc }
+ 5:
+ vst1.u32 { colors }, [ fb_ptr ]
+ bal 1b
#undef c_64
@@ -2337,6 +2351,7 @@ setup_blocks_shaded_untextured_indirect_builder(dithered)
#define draw_mask q0
#define pixels_low d16
+#define pixels_high d17
@@ -2500,23 +2515,67 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \
3: \
setup_blocks_shaded_untextured_dither_a_##dithering(); \
\
- ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \
+ ldrh right_mask, [ span_edge_data, #edge_data_right_mask_offset ]; \
setup_blocks_shaded_untextured_dither_b_##dithering(); \
\
vshr.u8 r_whole_8, r_whole_8, #3; \
+ rbit right_mask, right_mask; \
vmov pixels, msb_mask; \
vbic.u8 gb_whole_8, gb_whole_8, d128_0x7; \
- eor right_mask, right_mask, #0xFF; \
+ clz right_mask, right_mask; \
\
vmlal.u8 pixels, r_whole_8, d64_1; \
vmlal.u8 pixels, g_whole_8, d64_4; \
vmlal.u8 pixels, b_whole_8, d64_128; \
\
+ ldr pc, [ pc, right_mask, lsl #2 ]; \
+ nop; \
+ nop; \
+ .word 4f; \
+ .word 5f; \
+ .word 6f; \
+ .word 7f; \
+ .word 8f; \
+ .word 9f; \
+ .word 10f; \
+ .word 11f; \
+ \
4: \
- vst1.u16 { pixels_low[0] }, [ fb_ptr ]!; \
- vext.16 pixels, pixels, #1; \
- movs right_mask, right_mask, lsr #1; \
- bne 4b; \
+ vst1.u16 { pixels_low[0] }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 5: \
+ vst1.u32 { pixels_low[0] }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 6: \
+ vst1.u32 { pixels_low[0] }, [ fb_ptr ]!; \
+ vst1.u16 { pixels_low[2] }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 7: \
+ vst1.u32 { pixels_low }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 8: \
+ vst1.u32 { pixels_low }, [ fb_ptr ]!; \
+ vst1.u16 { pixels_high[0] }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 9: \
+ vst1.u32 { pixels_low }, [ fb_ptr ]!; \
+ vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \
+ bal 1f; \
+ \
+ 10: \
+ vst1.u32 { pixels_low }, [ fb_ptr ]!; \
+ vst1.u32 { pixels_high[0] }, [ fb_ptr ]!; \
+ vst1.u16 { pixels_high[2] }, [ fb_ptr ]; \
+ bal 1f; \
+ \
+ 11: \
+ vst1.u32 { pixels }, [ fb_ptr ]; \
+ bal 1f; \
\
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
@@ -2957,6 +3016,8 @@ function(texture_blocks_16bpp)
#define psx_gpu r0
#define num_blocks r1
#define color_ptr r2
+#define colors_scalar r2
+#define colors_scalar_compare r3
#define mask_msb_ptr r2
#define block_ptr_load_a r0
@@ -3013,9 +3074,21 @@ function(texture_blocks_16bpp)
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
vld1.u16 { msb_mask_low[], msb_mask_high[] }, [ mask_msb_ptr, :16 ] \
-#define shade_blocks_textured_modulated_prologue_shaded() \
-#define shade_blocks_textured_modulated_prologue_unshaded() \
+#define shade_blocks_textured_modulated_prologue_shaded(dithering, target) \
+
+#define shade_blocks_textured_false_modulation_check_undithered(target) \
+ ldr colors_scalar, [ psx_gpu, #psx_gpu_triangle_color_offset ]; \
+ movw colors_scalar_compare, #0x8080; \
+ \
+ movt colors_scalar_compare, #0x80; \
+ cmp colors_scalar, colors_scalar_compare; \
+ beq shade_blocks_textured_unmodulated_##target \
+
+#define shade_blocks_textured_false_modulation_check_dithered(target) \
+
+#define shade_blocks_textured_modulated_prologue_unshaded(dithering, target) \
+ shade_blocks_textured_false_modulation_check_##dithering(target); \
add color_ptr, psx_gpu, #psx_gpu_triangle_color_offset; \
vld1.u32 { colors_r[] }, [ color_ptr, :32 ]; \
vdup.u8 colors_g, colors_r[1]; \
@@ -3086,13 +3159,13 @@ function(texture_blocks_16bpp)
.align 3; \
\
function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \
+ shade_blocks_textured_modulated_prologue_##shading(dithering, target); \
stmdb sp!, { r4 - r5, lr }; \
ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \
\
vld1.u32 { test_mask }, [ psx_gpu, :128 ]; \
\
shade_blocks_textured_modulated_prologue_##target(); \
- shade_blocks_textured_modulated_prologue_##shading(); \
\
add block_ptr_load_a, psx_gpu, #psx_gpu_blocks_offset; \
mov c_32, #32; \
@@ -5271,7 +5344,7 @@ function(update_texture_4bpp_cache)
ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ]
- ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+ ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]
ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
and current_texture_page_x, current_texture_page, #0xF
@@ -5375,7 +5448,7 @@ function(update_texture_8bpp_cache_slice)
ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ]
ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
- ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+ ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]
mov tile_y, #16
and texture_page_x, texture_page, #0xF
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
index 1eadc79..0f85604 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c
@@ -20,7 +20,6 @@
extern u32 span_pixels;
extern u32 span_pixel_blocks;
-extern u32 span_pixel_blocks_unaligned;
extern u32 spans;
extern u32 triangles;
extern u32 sprites;
@@ -38,9 +37,6 @@ extern u32 texel_blocks_8bpp;
extern u32 texel_blocks_16bpp;
extern u32 texel_blocks_untextured;
extern u32 blend_blocks;
-extern u32 untextured_pixels;
-extern u32 blend_pixels;
-extern u32 transparent_pixels;
extern u32 render_buffer_flushes;
extern u32 state_changes;
extern u32 trivial_rejects;
@@ -49,8 +45,7 @@ extern u32 flat_triangles;
extern u32 clipped_triangles;
extern u32 zero_block_spans;
extern u32 texture_cache_loads;
-extern u32 false_modulated_triangles;
-extern u32 false_modulated_sprites;
+extern u32 false_modulated_blocks;
static u32 mismatches;
@@ -64,7 +59,7 @@ typedef struct
static gpu_dump_struct state;
psx_gpu_struct __attribute__((aligned(256))) _psx_gpu;
-u16 __attribute__((aligned(256))) _vram[1024 * 512];
+u16 __attribute__((aligned(256))) _vram[(1024 * 512) + 1024];
#define percent_of(numerator, denominator) \
((((double)(numerator)) / (denominator)) * 100.0) \
@@ -81,7 +76,6 @@ void clear_stats(void)
lines = 0;
span_pixels = 0;
span_pixel_blocks = 0;
- span_pixel_blocks_unaligned = 0;
spans = 0;
texels_4bpp = 0;
texels_8bpp = 0;
@@ -91,9 +85,6 @@ void clear_stats(void)
texel_blocks_8bpp = 0;
texel_blocks_16bpp = 0;
blend_blocks = 0;
- untextured_pixels = 0;
- blend_pixels = 0;
- transparent_pixels = 0;
render_buffer_flushes = 0;
state_changes = 0;
trivial_rejects = 0;
@@ -102,8 +93,7 @@ void clear_stats(void)
clipped_triangles = 0;
zero_block_spans = 0;
texture_cache_loads = 0;
- false_modulated_triangles = 0;
- false_modulated_sprites = 0;
+ false_modulated_blocks = 0;
}
void update_screen(psx_gpu_struct *psx_gpu, SDL_Surface *screen)
@@ -165,7 +155,7 @@ int main(int argc, char *argv[])
size = ftell(list_file);
fseek(list_file, 0, SEEK_SET);
//size = 0;
-
+
list = malloc(size);
fread(list, 1, size, list_file);
fclose(list_file);
@@ -175,44 +165,26 @@ int main(int argc, char *argv[])
SDL_Init(SDL_INIT_EVERYTHING);
screen = SDL_SetVideoMode(1024, 512, 32, 0);
}
-
- initialize_psx_gpu(psx_gpu, _vram);
#ifdef NEON_BUILD
system("ofbset -fb /dev/fb1 -mem 6291456 -en 0");
u32 fbdev_handle = open("/dev/fb1", O_RDWR);
- psx_gpu->vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,
+ u16 *vram_ptr =
+ vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,
MAP_SHARED | 0xA0000000, fbdev_handle, 0));
- psx_gpu->vram_ptr += 64;
-#endif
-
+ vram_ptr += 64;
+ initialize_psx_gpu(psx_gpu, vram_ptr + 64);
+#else
+ initialize_psx_gpu(psx_gpu, _vram + 64);
+#endif
#ifdef NEON_BUILD
//triangle_benchmark(psx_gpu);
//return 0;
#endif
-#ifdef FULL_COMPARE_MODE
- psx_gpu->pixel_count_mode = 1;
- psx_gpu->pixel_compare_mode = 0;
memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
- //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512);
- gpu_parse(psx_gpu, list, size);
-
- psx_gpu->pixel_count_mode = 0;
- psx_gpu->pixel_compare_mode = 1;
- memcpy(psx_gpu->compare_vram, state.vram, 1024 * 512 * 2);
- memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
- //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512);
- clear_stats();
- gpu_parse(psx_gpu, list, size);
- flush_render_block_buffer(psx_gpu);
-#else
- memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2);
-
- psx_gpu->pixel_count_mode = 0;
- psx_gpu->pixel_compare_mode = 0;
clear_stats();
@@ -232,7 +204,7 @@ int main(int argc, char *argv[])
gpu_parse(psx_gpu, list, size);
flush_render_block_buffer(psx_gpu);
- printf("%s: ", argv[1]);
+ printf("%-64s: ", argv[1]);
#ifdef NEON_BUILD
u32 cycles_elapsed = get_counter() - cycles;
@@ -265,17 +237,14 @@ int main(int argc, char *argv[])
}
}
#endif
-#endif
#if 0
printf("\n");
- printf(" %d pixels, %d pixel blocks (%d unaligned), %d spans\n"
- " (%lf pixels per block (%lf unaligned, r %lf), %lf pixels per span),\n"
+ printf(" %d pixels, %d pixel blocks, %d spans\n"
+ " (%lf pixels per block, %lf pixels per span),\n"
" %lf blocks per span (%lf per non-zero span), %lf overdraw)\n\n",
- span_pixels, span_pixel_blocks, span_pixel_blocks_unaligned, spans,
+ span_pixels, span_pixel_blocks, spans,
(double)span_pixels / span_pixel_blocks,
- (double)span_pixels / span_pixel_blocks_unaligned,
- (double)span_pixel_blocks / span_pixel_blocks_unaligned,
(double)span_pixels / spans,
(double)span_pixel_blocks / spans,
(double)span_pixel_blocks / (spans - zero_block_spans),
@@ -283,10 +252,10 @@ int main(int argc, char *argv[])
((psx_gpu->viewport_end_x - psx_gpu->viewport_start_x) *
(psx_gpu->viewport_end_y - psx_gpu->viewport_start_y)));
- printf(" %d triangles (%d false modulated)\n"
+ printf(" %d triangles\n"
" (%d trivial rejects, %lf%% flat, %lf%% left split, %lf%% clipped)\n"
" (%lf pixels per triangle, %lf rows per triangle)\n\n",
- triangles, false_modulated_triangles, trivial_rejects,
+ triangles, trivial_rejects,
percent_of(flat_triangles, triangles),
percent_of(left_split_triangles, triangles),
percent_of(clipped_triangles, triangles),
@@ -306,6 +275,8 @@ int main(int argc, char *argv[])
percent_of(sprite_blocks, span_pixel_blocks));
printf(" %7d blended blocks (%lf%%)\n", blend_blocks,
percent_of(blend_blocks, span_pixel_blocks));
+ printf(" %7d false-mod blocks (%lf%%)\n", false_modulated_blocks,
+ percent_of(false_modulated_blocks, span_pixel_blocks));
printf("\n");
printf(" %lf blocks per render buffer flush\n", (double)span_pixel_blocks /
render_buffer_flushes);
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
index f6143ee..fc9f3fb 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
@@ -38,61 +38,53 @@ const u8 command_lengths[256] =
void update_texture_ptr(psx_gpu_struct *psx_gpu)
{
+ u8 *texture_base;
u8 *texture_ptr;
switch((psx_gpu->render_state_base >> 8) & 0x3)
{
default:
case TEXTURE_MODE_4BPP:
-#ifdef TEXTURE_CACHE_4BPP
- texture_ptr = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page];
+ texture_base = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page];
+
+ texture_ptr = texture_base;
texture_ptr += psx_gpu->texture_window_x & 0xF;
texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;
texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;
texture_ptr += (psx_gpu->texture_window_y >> 4) << 12;
-#else
- texture_ptr = (u8 *)(psx_gpu->vram_ptr);
- texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
- texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
- texture_ptr += psx_gpu->texture_window_x / 2;
- texture_ptr += (psx_gpu->texture_window_y) * 2048;
-#endif
break;
case TEXTURE_MODE_8BPP:
-#ifdef TEXTURE_CACHE_8BPP
if(psx_gpu->current_texture_page & 0x1)
{
- texture_ptr =
+ texture_base =
psx_gpu->texture_8bpp_odd_cache[psx_gpu->current_texture_page >> 1];
}
else
{
- texture_ptr =
+ texture_base =
psx_gpu->texture_8bpp_even_cache[psx_gpu->current_texture_page >> 1];
}
+ texture_ptr = texture_base;
+ texture_ptr += psx_gpu->texture_window_x & 0xF;
texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;
texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;
texture_ptr += (psx_gpu->texture_window_y >> 4) << 12;
-#else
- texture_ptr = (u8 *)(psx_gpu->vram_ptr);
- texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
- texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
- texture_ptr += psx_gpu->texture_window_x;
- texture_ptr += (psx_gpu->texture_window_y) * 2048;
-#endif
break;
case TEXTURE_MODE_16BPP:
- texture_ptr = (u8 *)(psx_gpu->vram_ptr);
- texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128;
- texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
+ texture_base = (u8 *)(psx_gpu->vram_ptr);
+ texture_base += (psx_gpu->current_texture_page & 0xF) * 128;
+ texture_base += ((psx_gpu->current_texture_page >> 4) * 256) * 2048;
+
+ texture_ptr = texture_base;
texture_ptr += psx_gpu->texture_window_x * 2;
texture_ptr += (psx_gpu->texture_window_y) * 2048;
break;
}
+ psx_gpu->texture_page_base = texture_base;
psx_gpu->texture_page_ptr = texture_ptr;
}
@@ -447,8 +439,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
u32 width = list_s16[4] & 0x3FF;
u32 height = list_s16[5] & 0x1FF;
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);
break;
}
@@ -461,7 +451,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
u32 width = list_s16[6] & 0x3FF;
u32 height = list_s16[7] & 0x1FF;
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
set_clut(psx_gpu, list_s16[5]);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height,
@@ -477,8 +466,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s32 x = list_s16[2] + psx_gpu->offset_x;
s32 y = list_s16[3] + psx_gpu->offset_y;
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);
break;
}
@@ -491,8 +478,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s32 x = list_s16[2] + psx_gpu->offset_x;
s32 y = list_s16[3] + psx_gpu->offset_y;
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
-
render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);
break;
}
@@ -506,7 +491,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s32 y = list_s16[3] + psx_gpu->offset_y;
u32 uv = list_s16[4];
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
set_clut(psx_gpu, list_s16[5]);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8,
@@ -522,7 +506,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s32 x = list_s16[2] + psx_gpu->offset_x;
s32 y = list_s16[3] + psx_gpu->offset_y;
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);
break;
}
@@ -536,7 +519,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
s32 y = list_s16[3] + psx_gpu->offset_y;
u32 uv = list_s16[4];
- psx_gpu->primitive_color = list[0] & 0xFFFFFF;
set_clut(psx_gpu, list_s16[5]);
render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16,