From 7d5140f5d608cfe3f69cc4d75e78c8c2b30b7d1a Mon Sep 17 00:00:00 2001 From: Exophase Date: Sun, 7 Oct 2012 20:13:22 +0300 Subject: psx_gpu: increase reciprocal accuracy fixes 448 height issue --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 12 ++++++------ plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 68996c1..98aacc3 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -854,7 +854,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ dup_2x32b(edge_shifts, edge_shift); \ sub_2x32b(heights_b, heights, c_0x01); \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ \ mla_2x32b(heights_b, x_starts, heights); \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ @@ -883,8 +883,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, sub_2x32b(widths, x_ends, x_starts); \ width_alt = x_c - start_c; \ \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ - height_reciprocal_alt = edge_shift_alt >> 12; \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ + height_reciprocal_alt = edge_shift_alt >> 10; \ \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ edge_shift_alt &= 0x1F; \ @@ -4526,12 +4526,12 @@ void initialize_reciprocal_table(void) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) / height_normalized; - shift = 32 - (50 - shift); + shift = 32 - (52 - shift); - reciprocal_table[height] = (height_reciprocal << 12) | shift; + reciprocal_table[height] = (height_reciprocal << 10) | shift; } } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 294685a..3331d5d 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -657,7 +657,7 @@ function(compute_all_gradients) \ vdup.u32 edge_shifts, temp; \ vsub.u32 heights_b, heights, c_0x01; \ - vshr.u32 height_reciprocals, edge_shifts, #12; \ + vshr.u32 height_reciprocals, edge_shifts, #10; \ \ vmla.s32 heights_b, x_starts, heights; \ vbic.u16 edge_shifts, #0xE0; \ @@ -682,8 +682,8 @@ function(compute_all_gradients) vsub.u32 heights_b, heights, c_0x01; \ sub height_b_alt, height_minor_b, #1; \ \ - vshr.u32 height_reciprocals, edge_shifts, #12; \ - lsr height_reciprocal_alt, edge_shift_alt, #12; \ + vshr.u32 height_reciprocals, edge_shifts, #10; \ + lsr height_reciprocal_alt, edge_shift_alt, #10; \ \ vmla.s32 heights_b, x_starts, heights; \ mla height_b_alt, height_minor_b, start_c, height_b_alt; \ -- cgit v1.2.3 From cb88320b4ddbfd8c1714f9a6cba31543a585a8cd Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 11 Aug 2012 18:34:13 +0300 Subject: psx_gpu: add a tool to generate asm offsets --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 62 +---------------- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 55 +++++++++++++++ plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 84 +++++++++++++++++++++++ 3 files changed, 142 insertions(+), 59 deletions(-) create mode 100644 plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h create mode 100644 plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 3331d5d..79d5466 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -16,65 +16,9 @@ #define MAX_BLOCKS 64 #define MAX_BLOCKS_PER_ROW 128 -#define psx_gpu_test_mask_offset 0 -#define psx_gpu_uvrg_offset 16 -#define psx_gpu_uvrg_dx_offset 32 -#define psx_gpu_uvrg_dy_offset 48 -#define psx_gpu_u_block_span_offset 64 -#define psx_gpu_v_block_span_offset 80 -#define psx_gpu_r_block_span_offset 96 -#define psx_gpu_g_block_span_offset 112 -#define psx_gpu_b_block_span_offset 128 - -#define psx_gpu_b_dx_offset 132 - -#define psx_gpu_b_offset 144 -#define psx_gpu_b_dy_offset 148 -#define psx_gpu_triangle_area_offset 152 -#define psx_gpu_texture_window_settings_offset 156 -#define psx_gpu_current_texture_mask_offset 160 -#define psx_gpu_viewport_mask_offset 164 -#define psx_gpu_dirty_textures_4bpp_mask_offset 168 -#define psx_gpu_dirty_textures_8bpp_mask_offset 172 -#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176 -#define psx_gpu_triangle_color_offset 180 -#define psx_gpu_dither_table_offset 184 -#define psx_gpu_render_block_handler_offset 200 -#define psx_gpu_texture_page_ptr_offset 204 -#define psx_gpu_texture_page_base_offset 208 -#define psx_gpu_clut_ptr_offset 212 -#define psx_gpu_vram_ptr_offset 216 - -#define psx_gpu_render_state_base_offset 220 -#define psx_gpu_render_state_offset 222 -#define psx_gpu_num_spans_offset 224 -#define psx_gpu_num_blocks_offset 226 -#define psx_gpu_offset_x_offset 228 -#define psx_gpu_offset_y_offset 230 -#define psx_gpu_clut_settings_offset 232 -#define psx_gpu_texture_settings_offset 234 -#define psx_gpu_viewport_start_x_offset 236 -#define psx_gpu_viewport_start_y_offset 238 -#define psx_gpu_viewport_end_x_offset 240 -#define psx_gpu_viewport_end_y_offset 242 -#define psx_gpu_mask_msb_offset 244 - -#define psx_gpu_triangle_winding_offset 246 -#define psx_gpu_display_area_draw_enable_offset 247 -#define psx_gpu_current_texture_page_offset 248 -#define psx_gpu_last_8bpp_texture_page_offset 249 -#define psx_gpu_texture_mask_width_offset 250 -#define psx_gpu_texture_mask_height_offset 251 -#define psx_gpu_texture_window_x_offset 252 -#define psx_gpu_texture_window_y_offset 253 -#define psx_gpu_primitive_type_offset 254 - -#define psx_gpu_reserved_a_offset 255 - -#define psx_gpu_blocks_offset 0x0100 -#define psx_gpu_span_uvrg_offset_offset 0x2100 -#define psx_gpu_span_edge_data_offset 0x4100 -#define psx_gpu_span_b_offset_offset 0x5100 +#include "psx_gpu_offsets.h" + +#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) #define edge_data_left_x_offset 0 #define edge_data_num_blocks_offset 2 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h new file mode 100644 index 0000000..a47d965 --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -0,0 +1,55 @@ +#define psx_gpu_test_mask_offset 0x0 +#define psx_gpu_uvrg_offset 0x10 +#define psx_gpu_uvrg_dx_offset 0x20 +#define psx_gpu_uvrg_dy_offset 0x30 +#define psx_gpu_u_block_span_offset 0x40 +#define psx_gpu_v_block_span_offset 0x50 +#define psx_gpu_r_block_span_offset 0x60 +#define psx_gpu_g_block_span_offset 0x70 +#define psx_gpu_b_block_span_offset 0x80 +#define psx_gpu_b_offset 0x90 +#define psx_gpu_b_dy_offset 0x94 +#define psx_gpu_triangle_area_offset 0x98 +#define psx_gpu_texture_window_settings_offset 0x9c +#define psx_gpu_current_texture_mask_offset 0xa0 +#define psx_gpu_viewport_mask_offset 0xa4 +#define psx_gpu_dirty_textures_4bpp_mask_offset 0xa8 +#define psx_gpu_dirty_textures_8bpp_mask_offset 0xac +#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0 +#define psx_gpu_triangle_color_offset 0xb4 +#define psx_gpu_dither_table_offset 0xb8 +#define psx_gpu_render_block_handler_offset 0xc8 +#define psx_gpu_texture_page_ptr_offset 0xcc +#define psx_gpu_texture_page_base_offset 0xd0 +#define psx_gpu_clut_ptr_offset 0xd4 +#define psx_gpu_vram_ptr_offset 0xd8 +#define psx_gpu_render_state_base_offset 0xdc +#define psx_gpu_render_state_offset 0xde +#define psx_gpu_num_spans_offset 0xe0 +#define psx_gpu_num_blocks_offset 0xe2 +#define psx_gpu_offset_x_offset 0xe4 +#define psx_gpu_offset_y_offset 0xe6 +#define psx_gpu_clut_settings_offset 0xe8 +#define psx_gpu_texture_settings_offset 0xea +#define psx_gpu_viewport_start_x_offset 0xec +#define psx_gpu_viewport_start_y_offset 0xee +#define psx_gpu_viewport_end_x_offset 0xf0 +#define psx_gpu_viewport_end_y_offset 0xf2 +#define psx_gpu_mask_msb_offset 0xf4 +#define psx_gpu_triangle_winding_offset 0xf6 +#define psx_gpu_display_area_draw_enable_offset 0xf7 +#define psx_gpu_current_texture_page_offset 0xf8 +#define psx_gpu_last_8bpp_texture_page_offset 0xf9 +#define psx_gpu_texture_mask_width_offset 0xfa +#define psx_gpu_texture_mask_height_offset 0xfb +#define psx_gpu_texture_window_x_offset 0xfc +#define psx_gpu_texture_window_y_offset 0xfd +#define psx_gpu_primitive_type_offset 0xfe +#define psx_gpu_interlace_mode_offset 0xff +#define psx_gpu_blocks_offset 0x100 +#define psx_gpu_span_uvrg_offset_offset 0x2100 +#define psx_gpu_span_edge_data_offset 0x4100 +#define psx_gpu_span_b_offset_offset 0x5100 +#define psx_gpu_texture_4bpp_cache_offset 0x5900 +#define psx_gpu_texture_8bpp_even_cache_offset 0x205900 +#define psx_gpu_texture_8bpp_odd_cache_offset 0x305900 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c new file mode 100644 index 0000000..2275f59 --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -0,0 +1,84 @@ +#include +#include + +#include "common.h" + +#define WRITE_OFFSET(f, member) \ + fprintf(f, "#define %-50s0x%x\n", \ + "psx_gpu_" #member "_offset", \ + offsetof(psx_gpu_struct, member)); + +int main() +{ + FILE *f; + + if (sizeof(f) != 4) { + fprintf(stderr, "bad pointer size\n"); + return 1; + } + + f = fopen("psx_gpu_offsets.h", "w"); + if (f == NULL) { + perror("fopen"); + return 1; + } + + WRITE_OFFSET(f, test_mask); + WRITE_OFFSET(f, uvrg); + WRITE_OFFSET(f, uvrg_dx); + WRITE_OFFSET(f, uvrg_dy); + WRITE_OFFSET(f, u_block_span); + WRITE_OFFSET(f, v_block_span); + WRITE_OFFSET(f, r_block_span); + WRITE_OFFSET(f, g_block_span); + WRITE_OFFSET(f, b_block_span); + WRITE_OFFSET(f, b); + WRITE_OFFSET(f, b_dy); + WRITE_OFFSET(f, triangle_area); + WRITE_OFFSET(f, texture_window_settings); + WRITE_OFFSET(f, current_texture_mask); + WRITE_OFFSET(f, viewport_mask); + WRITE_OFFSET(f, dirty_textures_4bpp_mask); + WRITE_OFFSET(f, dirty_textures_8bpp_mask); + WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask); + WRITE_OFFSET(f, triangle_color); + WRITE_OFFSET(f, dither_table); + WRITE_OFFSET(f, render_block_handler); + WRITE_OFFSET(f, texture_page_ptr); + WRITE_OFFSET(f, texture_page_base); + WRITE_OFFSET(f, clut_ptr); + WRITE_OFFSET(f, vram_ptr); + WRITE_OFFSET(f, render_state_base); + WRITE_OFFSET(f, render_state); + WRITE_OFFSET(f, num_spans); + WRITE_OFFSET(f, num_blocks); + WRITE_OFFSET(f, offset_x); + WRITE_OFFSET(f, offset_y); + WRITE_OFFSET(f, clut_settings); + WRITE_OFFSET(f, texture_settings); + WRITE_OFFSET(f, viewport_start_x); + WRITE_OFFSET(f, viewport_start_y); + WRITE_OFFSET(f, viewport_end_x); + WRITE_OFFSET(f, viewport_end_y); + WRITE_OFFSET(f, mask_msb); + WRITE_OFFSET(f, triangle_winding); + WRITE_OFFSET(f, display_area_draw_enable); + WRITE_OFFSET(f, current_texture_page); + WRITE_OFFSET(f, last_8bpp_texture_page); + WRITE_OFFSET(f, texture_mask_width); + WRITE_OFFSET(f, texture_mask_height); + WRITE_OFFSET(f, texture_window_x); + WRITE_OFFSET(f, texture_window_y); + WRITE_OFFSET(f, primitive_type); + WRITE_OFFSET(f, interlace_mode); + WRITE_OFFSET(f, blocks); + WRITE_OFFSET(f, span_uvrg_offset); + WRITE_OFFSET(f, span_edge_data); + WRITE_OFFSET(f, span_b_offset); + WRITE_OFFSET(f, texture_4bpp_cache); + WRITE_OFFSET(f, texture_8bpp_even_cache); + WRITE_OFFSET(f, texture_8bpp_odd_cache); + fclose(f); + + return 0; +} -- cgit v1.2.3 From c1817bd9249ee616cf9545a57136d6dd3669ce34 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 13 Aug 2012 00:03:43 +0300 Subject: psx_gpu: add enhanced triangle rendering --- plugins/gpu_neon/psx_gpu/common.h | 2 + plugins/gpu_neon/psx_gpu/psx_gpu.c | 53 +- plugins/gpu_neon/psx_gpu/psx_gpu.h | 14 +- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 16 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 61 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 1 + plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 685 ++++++++++++++++++++-- 7 files changed, 738 insertions(+), 94 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/common.h b/plugins/gpu_neon/psx_gpu/common.h index f299f79..d5cf3e9 100644 --- a/plugins/gpu_neon/psx_gpu/common.h +++ b/plugins/gpu_neon/psx_gpu/common.h @@ -18,5 +18,7 @@ typedef unsigned long long int u64; #include "vector_ops.h" #include "psx_gpu.h" +#define unlikely(x) __builtin_expect((x), 0) + #endif diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 98aacc3..0c1c78d 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -47,7 +47,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; -u32 reciprocal_table[512]; +/* double size for enhancement */ +u32 reciprocal_table[512 * 2]; typedef s32 fixed_type; @@ -1872,7 +1873,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ if(span_num_blocks) \ { \ y = span_edge_data->y; \ - fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \ + fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \ \ setup_blocks_span_initialize_##shading##_##texturing(); \ setup_blocks_span_initialize_##dithering(texturing); \ @@ -2905,8 +2906,8 @@ char *render_block_flag_strings[] = (triangle_y_direction_##direction_c << 4) | \ (triangle_winding_##winding << 6)) \ -void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, - u32 flags) +static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + vertex_struct *vertexes_out[3]) { s32 y_top, y_bottom; s32 triangle_area; @@ -2927,7 +2928,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(b->y < a->y) @@ -2949,7 +2950,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(triangle_area < 0) @@ -2975,7 +2976,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, @@ -2984,13 +2985,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } - psx_gpu->num_spans = 0; psx_gpu->triangle_area = triangle_area; psx_gpu->triangle_winding = triangle_winding; + vertexes_out[0] = a; + vertexes_out[1] = b; + vertexes_out[2] = c; + + return 1; +} + +static void render_triangle_p(psx_gpu_struct *psx_gpu, + vertex_struct *vertex_ptrs[3], u32 flags) +{ + psx_gpu->num_spans = 0; + + vertex_struct *a = vertex_ptrs[0]; + vertex_struct *b = vertex_ptrs[1]; + vertex_struct *c = vertex_ptrs[2]; + s32 y_delta_a = b->y - a->y; s32 y_delta_b = c->y - b->y; s32 y_delta_c = c->y - a->y; @@ -3002,7 +3018,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, compute_all_gradients(psx_gpu, a, b, c); switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | - (triangle_winding << 6)) + (psx_gpu->triangle_winding << 6)) { triangle_case(up, up, up, negative): triangle_case(up, up, flat, negative): @@ -3126,6 +3142,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, (psx_gpu); } +void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 flags) +{ + vertex_struct *vertex_ptrs[3]; + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + render_triangle_p(psx_gpu, vertex_ptrs, flags); +} + void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); @@ -4245,7 +4269,7 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, flags &= ~RENDER_FLAGS_TEXTURE_MAP; - vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a; + vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; control_mask = 0x0; @@ -4435,7 +4459,6 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, if((width == 0) || (height == 0)) return; - flush_render_block_buffer(psx_gpu); invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); u32 r = color & 0xFF; @@ -4445,7 +4468,7 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, psx_gpu->mask_msb; u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024)); + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); u32 pitch = 512 - (width / 2); u32 num_width; @@ -4522,7 +4545,8 @@ void initialize_reciprocal_table(void) u32 height_reciprocal; s32 shift; - for(height = 1; height < 512; height++) + for(height = 1; height < sizeof(reciprocal_table) + / sizeof(reciprocal_table[0]); height++) { shift = __builtin_clz(height); height_normalized = height << shift; @@ -4561,6 +4585,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->num_blocks = 0; psx_gpu->vram_ptr = vram; + psx_gpu->vram_out_ptr = vram; psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 53a8717..7ed5622 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -122,7 +122,7 @@ typedef struct vec_4x32u g_block_span; vec_4x32u b_block_span; - // 72 bytes + // 76 bytes u32 b; u32 b_dy; @@ -143,6 +143,7 @@ typedef struct void *texture_page_base; u16 *clut_ptr; u16 *vram_ptr; + u16 *vram_out_ptr; // 26 bytes u16 render_state_base; @@ -180,9 +181,16 @@ typedef struct u8 primitive_type; u8 interlace_mode; + // enhancement stuff + u16 *enhancement_buf_ptr; + s16 saved_viewport_start_x; + s16 saved_viewport_start_y; + s16 saved_viewport_end_x; + s16 saved_viewport_end_y; + // Align up to 64 byte boundary to keep the upcoming buffers cache line - // aligned - //u8 reserved_a[0]; + // aligned, also make reachable with single immediate addition + u8 reserved_a[240]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 79d5466..6393e15 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1337,7 +1337,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1604,7 +1604,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1799,7 +1799,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -1919,7 +1919,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -2106,7 +2106,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -2346,7 +2346,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -4804,7 +4804,7 @@ function(setup_sprite_##texture_mode) \ and offset_u, u, #0xF; \ \ ldr width, [ sp, #40 ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ ldr height, [ sp, #44 ]; \ add fb_ptr, fb_ptr, y, lsl #11; \ @@ -5052,7 +5052,7 @@ setup_sprites_16bpp_flush_row: function(setup_sprite_16bpp) stmdb sp!, { r4 - r11, r14 } - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] ldr v, [ sp, #36 ] add fb_ptr, fb_ptr, y, lsl #11 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index a47d965..2e18174 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -23,33 +23,34 @@ #define psx_gpu_texture_page_base_offset 0xd0 #define psx_gpu_clut_ptr_offset 0xd4 #define psx_gpu_vram_ptr_offset 0xd8 -#define psx_gpu_render_state_base_offset 0xdc -#define psx_gpu_render_state_offset 0xde -#define psx_gpu_num_spans_offset 0xe0 -#define psx_gpu_num_blocks_offset 0xe2 -#define psx_gpu_offset_x_offset 0xe4 -#define psx_gpu_offset_y_offset 0xe6 -#define psx_gpu_clut_settings_offset 0xe8 -#define psx_gpu_texture_settings_offset 0xea -#define psx_gpu_viewport_start_x_offset 0xec -#define psx_gpu_viewport_start_y_offset 0xee -#define psx_gpu_viewport_end_x_offset 0xf0 -#define psx_gpu_viewport_end_y_offset 0xf2 -#define psx_gpu_mask_msb_offset 0xf4 -#define psx_gpu_triangle_winding_offset 0xf6 -#define psx_gpu_display_area_draw_enable_offset 0xf7 -#define psx_gpu_current_texture_page_offset 0xf8 -#define psx_gpu_last_8bpp_texture_page_offset 0xf9 -#define psx_gpu_texture_mask_width_offset 0xfa -#define psx_gpu_texture_mask_height_offset 0xfb -#define psx_gpu_texture_window_x_offset 0xfc -#define psx_gpu_texture_window_y_offset 0xfd -#define psx_gpu_primitive_type_offset 0xfe -#define psx_gpu_interlace_mode_offset 0xff -#define psx_gpu_blocks_offset 0x100 -#define psx_gpu_span_uvrg_offset_offset 0x2100 -#define psx_gpu_span_edge_data_offset 0x4100 -#define psx_gpu_span_b_offset_offset 0x5100 -#define psx_gpu_texture_4bpp_cache_offset 0x5900 -#define psx_gpu_texture_8bpp_even_cache_offset 0x205900 -#define psx_gpu_texture_8bpp_odd_cache_offset 0x305900 +#define psx_gpu_vram_out_ptr_offset 0xdc +#define psx_gpu_render_state_base_offset 0xe0 +#define psx_gpu_render_state_offset 0xe2 +#define psx_gpu_num_spans_offset 0xe4 +#define psx_gpu_num_blocks_offset 0xe6 +#define psx_gpu_offset_x_offset 0xe8 +#define psx_gpu_offset_y_offset 0xea +#define psx_gpu_clut_settings_offset 0xec +#define psx_gpu_texture_settings_offset 0xee +#define psx_gpu_viewport_start_x_offset 0xf0 +#define psx_gpu_viewport_start_y_offset 0xf2 +#define psx_gpu_viewport_end_x_offset 0xf4 +#define psx_gpu_viewport_end_y_offset 0xf6 +#define psx_gpu_mask_msb_offset 0xf8 +#define psx_gpu_triangle_winding_offset 0xfa +#define psx_gpu_display_area_draw_enable_offset 0xfb +#define psx_gpu_current_texture_page_offset 0xfc +#define psx_gpu_last_8bpp_texture_page_offset 0xfd +#define psx_gpu_texture_mask_width_offset 0xfe +#define psx_gpu_texture_mask_height_offset 0xff +#define psx_gpu_texture_window_x_offset 0x100 +#define psx_gpu_texture_window_y_offset 0x101 +#define psx_gpu_primitive_type_offset 0x102 +#define psx_gpu_interlace_mode_offset 0x103 +#define psx_gpu_blocks_offset 0x200 +#define psx_gpu_span_uvrg_offset_offset 0x2200 +#define psx_gpu_span_edge_data_offset 0x4200 +#define psx_gpu_span_b_offset_offset 0x5200 +#define psx_gpu_texture_4bpp_cache_offset 0x5a00 +#define psx_gpu_texture_8bpp_even_cache_offset 0x205a00 +#define psx_gpu_texture_8bpp_odd_cache_offset 0x305a00 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index 2275f59..d81f8aa 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -48,6 +48,7 @@ int main() WRITE_OFFSET(f, texture_page_base); WRITE_OFFSET(f, clut_ptr); WRITE_OFFSET(f, vram_ptr); + WRITE_OFFSET(f, vram_out_ptr); WRITE_OFFSET(f, render_state_base); WRITE_OFFSET(f, render_state); WRITE_OFFSET(f, num_spans); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 920c638..26715c6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -152,6 +152,52 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color) } } +static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, + u32 width, u32 height, u32 color) +{ + x &= ~0xF; + width = ((width + 0xF) & ~0xF); + + flush_render_block_buffer(psx_gpu); + + if(unlikely((x + width) > 1024)) + { + u32 width_a = 1024 - x; + u32 width_b = width - width_a; + + if(unlikely((y + height) > 512)) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width_a, height_a); + render_block_fill(psx_gpu, color, 0, y, width_b, height_a); + render_block_fill(psx_gpu, color, x, 0, width_a, height_b); + render_block_fill(psx_gpu, color, 0, 0, width_b, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width_a, height); + render_block_fill(psx_gpu, color, 0, y, width_b, height); + } + } + else + { + if(unlikely((y + height) > 512)) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width, height_a); + render_block_fill(psx_gpu, color, x, 0, width, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width, height); + } + } +} + #define sign_extend_12bit(value) \ (((s32)((value) << 20)) >> 20) \ @@ -235,45 +281,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; - x &= ~0xF; - width = ((width + 0xF) & ~0xF); - - if((x + width) > 1024) - { - u32 width_a = 1024 - x; - u32 width_b = width - width_a; - - if((y + height) > 512) - { - u32 height_a = 512 - y; - u32 height_b = height - height_a; - - render_block_fill(psx_gpu, color, x, y, width_a, height_a); - render_block_fill(psx_gpu, color, 0, y, width_b, height_a); - render_block_fill(psx_gpu, color, x, 0, width_a, height_b); - render_block_fill(psx_gpu, color, 0, 0, width_b, height_b); - } - else - { - render_block_fill(psx_gpu, color, x, y, width_a, height); - render_block_fill(psx_gpu, color, 0, y, width_b, height); - } - } - else - { - if((y + height) > 512) - { - u32 height_a = 512 - y; - u32 height_b = height - height_a; - - render_block_fill(psx_gpu, color, x, y, width, height_a); - render_block_fill(psx_gpu, color, x, 0, width, height_b); - } - else - { - render_block_fill(psx_gpu, color, x, y, width, height); - } - } + do_fill(psx_gpu, x, y, width, height, color); break; } @@ -741,3 +749,602 @@ breakloop: return list - list_start; } +#define enhancement_disable() { \ + psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \ + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \ + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ +} + +#define enhancement_enable() { \ + psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; \ + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \ + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \ +} + +#define shift_vertices3(v) { \ + v[0]->x *= 2; \ + v[0]->y *= 2; \ + v[1]->x *= 2; \ + v[1]->y *= 2; \ + v[2]->x *= 2; \ + v[2]->y *= 2; \ +} + +#define unshift_vertices3(v) { \ + v[0]->x /= 2; \ + v[0]->y /= 2; \ + v[1]->x /= 2; \ + v[1]->y /= 2; \ + v[2]->x /= 2; \ + v[2]->y /= 2; \ +} + +#define shift_triangle_area() \ + psx_gpu->triangle_area *= 4 + +static int disable_main_render; + +static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, + vertex_struct *vertexes, u32 current_command) +{ + vertex_struct *vertex_ptrs[3]; + + if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + return; + + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); +} + +static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 current_command) +{ + vertex_struct *vertex_ptrs[3]; + + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + unshift_vertices3(vertex_ptrs); + } + enhancement_disable(); + if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) { + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + } +} + +u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) +{ + u32 current_command = 0, command_length; + + u32 *list_start = list; + u32 *list_end = list + (size / 4); + + psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; + psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; + psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x; + psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y; + + for(; list < list_end; list += 1 + command_length) + { + s16 *list_s16 = (void *)list; + current_command = *list >> 24; + command_length = command_lengths[current_command]; + if (list + 1 + command_length > list_end) { + current_command = (u32)-1; + break; + } + + enhancement_disable(); + + switch(current_command) + { + case 0x00: + break; + + case 0x02: + { + u32 x = list_s16[2] & 0x3FF; + u32 y = list_s16[3] & 0x1FF; + u32 width = list_s16[4] & 0x3FF; + u32 height = list_s16[5] & 0x1FF; + u32 color = list[0] & 0xFFFFFF; + + do_fill(psx_gpu, x, y, width, height, color); + + psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; + x *= 2; + y *= 2; + width *= 2; + height *= 2; + if (width > 1024) + width = 1024; + render_block_fill(psx_gpu, color, x, y, width, height); + break; + } + + case 0x20 ... 0x23: + { + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy(0, 2); + get_vertex_data_xy(1, 4); + get_vertex_data_xy(2, 6); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x24 ... 0x27: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[9]); + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy_uv(0, 2); + get_vertex_data_xy_uv(1, 6); + get_vertex_data_xy_uv(2, 10); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x28 ... 0x2B: + { + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy(0, 2); + get_vertex_data_xy(1, 4); + get_vertex_data_xy(2, 6); + get_vertex_data_xy(3, 8); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x2C ... 0x2F: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[9]); + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy_uv(0, 2); + get_vertex_data_xy_uv(1, 6); + get_vertex_data_xy_uv(2, 10); + get_vertex_data_xy_uv(3, 14); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x30 ... 0x33: + { + get_vertex_data_xy_rgb(0, 0); + get_vertex_data_xy_rgb(1, 4); + get_vertex_data_xy_rgb(2, 8); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x34: + case 0x35: + case 0x36: + case 0x37: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[11]); + + get_vertex_data_xy_uv_rgb(0, 0); + get_vertex_data_xy_uv_rgb(1, 6); + get_vertex_data_xy_uv_rgb(2, 12); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + { + get_vertex_data_xy_rgb(0, 0); + get_vertex_data_xy_rgb(1, 4); + get_vertex_data_xy_rgb(2, 8); + get_vertex_data_xy_rgb(3, 12); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[11]); + + get_vertex_data_xy_uv_rgb(0, 0); + get_vertex_data_xy_uv_rgb(1, 6); + get_vertex_data_xy_uv_rgb(2, 12); + get_vertex_data_xy_uv_rgb(3, 18); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x40 ... 0x47: + { + vertexes[0].x = list_s16[2] + psx_gpu->offset_x; + vertexes[0].y = list_s16[3] + psx_gpu->offset_y; + vertexes[1].x = list_s16[4] + psx_gpu->offset_x; + vertexes[1].y = list_s16[5] + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, list[0]); + break; + } + + case 0x48 ... 0x4F: + { + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + u32 xy = list[1]; + + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + xy = *list_position; + while(1) + { + vertexes[0] = vertexes[1]; + + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, list[0]); + + list_position++; + num_vertexes++; + + if(list_position >= list_end) + break; + + xy = *list_position; + if((xy & 0xF000F000) == 0x50005000) + break; + } + + command_length += (num_vertexes - 2); + break; + } + + case 0x50 ... 0x57: + { + vertexes[0].r = list[0] & 0xFF; + vertexes[0].g = (list[0] >> 8) & 0xFF; + vertexes[0].b = (list[0] >> 16) & 0xFF; + vertexes[0].x = list_s16[2] + psx_gpu->offset_x; + vertexes[0].y = list_s16[3] + psx_gpu->offset_y; + + vertexes[1].r = list[2] & 0xFF; + vertexes[1].g = (list[2] >> 8) & 0xFF; + vertexes[1].b = (list[2] >> 16) & 0xFF; + vertexes[1].x = list_s16[6] + psx_gpu->offset_x; + vertexes[1].y = list_s16[7] + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, 0); + break; + } + + case 0x58 ... 0x5F: + { + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + u32 color = list[0]; + u32 xy = list[1]; + + vertexes[1].r = color & 0xFF; + vertexes[1].g = (color >> 8) & 0xFF; + vertexes[1].b = (color >> 16) & 0xFF; + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + color = list_position[0]; + while(1) + { + xy = list_position[1]; + + vertexes[0] = vertexes[1]; + + vertexes[1].r = color & 0xFF; + vertexes[1].g = (color >> 8) & 0xFF; + vertexes[1].b = (color >> 16) & 0xFF; + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, 0); + + list_position += 2; + num_vertexes++; + + if(list_position >= list_end) + break; + + color = list_position[0]; + if((color & 0xF000F000) == 0x50005000) + break; + } + + command_length += ((num_vertexes - 2) * 2); + break; + } + + case 0x60 ... 0x63: + { + u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u32 width = list_s16[4] & 0x3FF; + u32 height = list_s16[5] & 0x1FF; + + render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + break; + } + + case 0x64 ... 0x67: + { + u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u32 uv = list_s16[4]; + u32 width = list_s16[6] & 0x3FF; + u32 height = list_s16[7] & 0x1FF; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, + current_command, list[0]); + break; + } + + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + break; + } + + case 0x70: + case 0x71: + case 0x72: + case 0x73: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); + break; + } + + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u32 uv = list_s16[4]; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, + current_command, list[0]); + break; + } + + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); + break; + } + + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u32 uv = list_s16[4]; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, + current_command, list[0]); + break; + } + + case 0x80: // vid -> vid + render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, + list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, + ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1); + break; + +#ifdef PCSX + case 0xA0: // sys -> vid + case 0xC0: // vid -> sys + goto breakloop; +#else + case 0xA0: // sys -> vid + { + u32 load_x = list_s16[2] & 0x3FF; + u32 load_y = list_s16[3] & 0x1FF; + u32 load_width = list_s16[4] & 0x3FF; + u32 load_height = list_s16[5] & 0x1FF; + u32 load_size = load_width * load_height; + + command_length += load_size / 2; + + if(load_size & 1) + command_length++; + + render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y, + load_width, load_height, load_width); + break; + } + + case 0xC0: // vid -> sys + break; +#endif + + case 0xE1: + set_texture(psx_gpu, list[0] & 0x1FF); + + if(list[0] & (1 << 9)) + psx_gpu->render_state_base |= RENDER_STATE_DITHER; + else + psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; + + psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1; + SET_Ex(1, list[0]); + break; + + case 0xE2: + { + // TODO: Clean + u32 texture_window_settings = list[0]; + u32 tmp, x, y, w, h; + + if(texture_window_settings != psx_gpu->texture_window_settings) + { + tmp = (texture_window_settings & 0x1F) | 0x20; + for(w = 8; (tmp & 1) == 0; tmp >>= 1, w <<= 1); + + tmp = ((texture_window_settings >> 5) & 0x1f) | 0x20; + for (h = 8; (tmp & 1) == 0; tmp >>= 1, h <<= 1); + + tmp = 32 - (w >> 3); + x = ((texture_window_settings >> 10) & tmp) << 3; + + tmp = 32 - (h >> 3); + y = ((texture_window_settings >> 15) & tmp) << 3; + + flush_render_block_buffer(psx_gpu); + + psx_gpu->texture_window_settings = texture_window_settings; + psx_gpu->texture_window_x = x; + psx_gpu->texture_window_y = y; + psx_gpu->texture_mask_width = w - 1; + psx_gpu->texture_mask_height = h - 1; + + update_texture_ptr(psx_gpu); + } + SET_Ex(2, list[0]); + break; + } + + case 0xE3: + psx_gpu->viewport_start_x = list[0] & 0x3FF; + psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; + psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; + psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; + +#ifdef TEXTURE_CACHE_4BPP + psx_gpu->viewport_mask = + texture_region_mask(psx_gpu->viewport_start_x, + psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, + psx_gpu->viewport_end_y); +#endif + SET_Ex(3, list[0]); + break; + + case 0xE4: + psx_gpu->viewport_end_x = list[0] & 0x3FF; + psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF; + psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x; + psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y; + +#ifdef TEXTURE_CACHE_4BPP + psx_gpu->viewport_mask = + texture_region_mask(psx_gpu->viewport_start_x, + psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, + psx_gpu->viewport_end_y); +#endif + SET_Ex(4, list[0]); + break; + + case 0xE5: + { + s32 offset_x = list[0] << 21; + s32 offset_y = list[0] << 10; + psx_gpu->offset_x = offset_x >> 21; + psx_gpu->offset_y = offset_y >> 21; + + SET_Ex(5, list[0]); + break; + } + + case 0xE6: + { + u32 mask_settings = list[0]; + u16 mask_msb = mask_settings << 15; + + if(list[0] & 0x2) + psx_gpu->render_state_base |= RENDER_STATE_MASK_EVALUATE; + else + psx_gpu->render_state_base &= ~RENDER_STATE_MASK_EVALUATE; + + if(mask_msb != psx_gpu->mask_msb) + { + flush_render_block_buffer(psx_gpu); + psx_gpu->mask_msb = mask_msb; + } + + SET_Ex(6, list[0]); + break; + } + + default: + break; + } + } + +#ifdef PCSX +breakloop: +#endif +enhancement_disable(); + if (last_command != NULL) + *last_command = current_command; + return list - list_start; +} + +// vim:shiftwidth=2:expandtab -- cgit v1.2.3 From f1359c5758c2e745b1cbec63e21445fa65f7cafe Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 13 Aug 2012 02:53:21 +0300 Subject: psx_gpu: switch enhancement to 2048 width otherwise games that position framebuffers horizontally corrupt the display. --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 63 ++++++++++++++++++++--- plugins/gpu_neon/psx_gpu/psx_gpu.h | 7 +-- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 9 ++-- 5 files changed, 68 insertions(+), 15 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 0c1c78d..1385e2e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -454,7 +454,7 @@ void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { - if((psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) && + if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE)) { u32 num_blocks_dest = 0; @@ -464,7 +464,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) u16 *vram_ptr = psx_gpu->vram_ptr; u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_blocks; i++) { @@ -3097,11 +3097,11 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, spans += psx_gpu->num_spans; #endif - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)) { u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_spans; i++) { @@ -3118,6 +3118,14 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } + if(psx_gpu->render_mode & RENDER_DOUBLE_MODE) + { + u32 i; + for(i = 0; i < psx_gpu->num_spans; i++) + { + psx_gpu->span_edge_data[i].y *= 2; + } + } u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -4473,12 +4481,12 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, u32 pitch = 512 - (width / 2); u32 num_width; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) { pitch += 512; height /= 2; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) vram_ptr += 512; } @@ -4505,6 +4513,47 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, } } +void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, + u32 width, u32 height) +{ + if((width == 0) || (height == 0)) + return; + + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 2048)); + + u32 pitch = 2048 / 2 - (width / 2); + u32 num_width; + + while(height) + { + num_width = width; + while(num_width) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; + } + + vram_ptr += pitch; + height--; + } +} + void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { @@ -4598,7 +4647,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_mask_width = 0xFF; psx_gpu->texture_mask_height = 0xFF; - psx_gpu->interlace_mode = 0; + psx_gpu->render_mode = 0; memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 7ed5622..71b99cd 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -56,8 +56,9 @@ typedef enum typedef enum { RENDER_INTERLACE_ENABLED = 0x1, - RENDER_INTERLACE_ODD = 0x2 -} render_interlace_enum; + RENDER_INTERLACE_ODD = 0x2, + RENDER_DOUBLE_MODE = 0x4, +} render_mode_enum; typedef struct { @@ -179,7 +180,7 @@ typedef struct u8 texture_window_y; u8 primitive_type; - u8 interlace_mode; + u8 render_mode; // enhancement stuff u16 *enhancement_buf_ptr; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index 2e18174..7ebf7db 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -46,7 +46,7 @@ #define psx_gpu_texture_window_x_offset 0x100 #define psx_gpu_texture_window_y_offset 0x101 #define psx_gpu_primitive_type_offset 0x102 -#define psx_gpu_interlace_mode_offset 0x103 +#define psx_gpu_render_mode_offset 0x103 #define psx_gpu_blocks_offset 0x200 #define psx_gpu_span_uvrg_offset_offset 0x2200 #define psx_gpu_span_edge_data_offset 0x4200 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index d81f8aa..ff74f34 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -71,7 +71,7 @@ int main() WRITE_OFFSET(f, texture_window_x); WRITE_OFFSET(f, texture_window_y); WRITE_OFFSET(f, primitive_type); - WRITE_OFFSET(f, interlace_mode); + WRITE_OFFSET(f, render_mode); WRITE_OFFSET(f, blocks); WRITE_OFFSET(f, span_uvrg_offset); WRITE_OFFSET(f, span_edge_data); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 26715c6..d3616bd 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -755,6 +755,7 @@ breakloop: psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ + psx_gpu->render_mode &= ~RENDER_DOUBLE_MODE; \ } #define enhancement_enable() { \ @@ -763,6 +764,7 @@ breakloop: psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \ + psx_gpu->render_mode |= RENDER_DOUBLE_MODE; \ } #define shift_vertices3(v) { \ @@ -869,6 +871,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; + x &= ~0xF; + width = ((width + 0xF) & ~0xF); + do_fill(psx_gpu, x, y, width, height, color); psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; @@ -876,9 +881,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c y *= 2; width *= 2; height *= 2; - if (width > 1024) - width = 1024; - render_block_fill(psx_gpu, color, x, y, width, height); + render_block_fill_enh(psx_gpu, color, x, y, width, height); break; } -- cgit v1.2.3 From f3492649a0ced3b4e81f6df6e93c81746fcad54b Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 15 Aug 2012 19:39:48 +0300 Subject: psx_gpu: move masking to set_texture --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index d3616bd..61e7de5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -92,6 +92,7 @@ void update_texture_ptr(psx_gpu_struct *psx_gpu) void set_texture(psx_gpu_struct *psx_gpu, u32 texture_settings) { + texture_settings &= 0x1FF; if(psx_gpu->texture_settings != texture_settings) { u32 new_texture_page = texture_settings & 0x1F; @@ -634,7 +635,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) #endif case 0xE1: - set_texture(psx_gpu, list[0] & 0x1FF); + set_texture(psx_gpu, list[0]); if(list[0] & (1 << 9)) psx_gpu->render_state_base |= RENDER_STATE_DITHER; @@ -1230,7 +1231,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c #endif case 0xE1: - set_texture(psx_gpu, list[0] & 0x1FF); + set_texture(psx_gpu, list[0]); if(list[0] & (1 << 9)) psx_gpu->render_state_base |= RENDER_STATE_DITHER; -- cgit v1.2.3 From 24c742fccede19c9baecc36008f4d8ac4f9a51fa Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 15 Aug 2012 19:35:52 +0300 Subject: psx_gpu: do enhaced sprites with triangles --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 88 +++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 7 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 61e7de5..2e1c0b8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -835,6 +835,71 @@ static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, } } +#define fill_vertex(i, x_, y_, u_, v_, rgb_) \ + vertexes[i].x = x_; \ + vertexes[i].y = y_; \ + vertexes[i].u = u_; \ + vertexes[i].v = v_; \ + vertexes[i].r = rgb_; \ + vertexes[i].g = (rgb_) >> 8; \ + vertexes[i].b = (rgb_) >> 16 + +static void do_esprite_in_triangles(psx_gpu_struct *psx_gpu, int x, int y, + u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb) +{ + vertex_struct *vertex_ptrs[3]; + u32 flags = (cmd_rgb >> 24); + u32 color = cmd_rgb & 0xffffff; + u32 render_state_base_saved = psx_gpu->render_state_base; + int x1, y1; + u8 u1, v1; + + flags &= + (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | + RENDER_FLAGS_TEXTURE_MAP); + + set_triangle_color(psx_gpu, color); + if(color == 0x808080) + flags |= RENDER_FLAGS_MODULATE_TEXELS; + + psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; + enhancement_enable(); + + x1 = x + w; + y1 = y + h; + u1 = u + w; + v1 = v + h; + // FIXME.. + if (u1 < u) u1 = 0xff; + if (v1 < v) v1 = 0xff; + + // 0-2 + // |/ + // 1 + fill_vertex(0, x, y, u, v, color); + fill_vertex(1, x, y1, u, v1, color); + fill_vertex(2, x1, y, u1, v, color); + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, flags); + } + + // 0 + // /| + // 1-2 + fill_vertex(0, x1, y, u1, v, color); + fill_vertex(1, x, y1, u, v1, color); + fill_vertex(2, x1, y1, u1, v1, color); + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, flags); + } + + psx_gpu->render_state_base = render_state_base_saved; +} + u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { u32 current_command = 0, command_length; @@ -1111,6 +1176,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, 0, 0, width, height, list[0]); break; } @@ -1118,14 +1184,16 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c { u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 uv = list_s16[4]; + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; u32 width = list_s16[6] & 0x3FF; u32 height = list_s16[7] & 0x1FF; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, + render_sprite(psx_gpu, x, y, u, v, width, height, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, u, v, width, height, list[0]); break; } @@ -1138,6 +1206,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 1, 1, list[0]); break; } @@ -1150,6 +1219,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 8, 8, list[0]); break; } @@ -1160,12 +1230,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 uv = list_s16[4]; + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, + render_sprite(psx_gpu, x, y, u, v, 8, 8, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, u, v, 8, 8, list[0]); break; } @@ -1178,6 +1250,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 16, 16, list[0]); break; } @@ -1188,12 +1261,13 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c { s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); - u32 uv = list_s16[4]; + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; set_clut(psx_gpu, list_s16[5]); - render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16, - current_command, list[0]); + render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); + do_esprite_in_triangles(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } -- cgit v1.2.3 From c6063f8985c69362a89a12111f393229ab65d05f Mon Sep 17 00:00:00 2001 From: notaz Date: Wed, 15 Aug 2012 23:52:38 +0300 Subject: psx_gpu: use different uvrgb phase for enhancement --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 9 ++-- plugins/gpu_neon/psx_gpu/psx_gpu.h | 19 ++++---- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 11 +++-- plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h | 59 ++++++++++++----------- plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c | 9 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 2 + 6 files changed, 59 insertions(+), 50 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 1385e2e..28ebcf5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -567,7 +567,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vec_4x32u uvrg_base; vec_4x32u b_base; - vec_4x32u const_0x8000; + vec_4x32u uvrgb_phase; vec_4x16s d0_a_d3_c, d0_b, d0_c; vec_4x16s d1_a, d1_b, d1_c_d2_a; @@ -596,12 +596,12 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_gradient_calculation_input(1, b); setup_gradient_calculation_input(2, c); - dup_4x32b(const_0x8000, 0x8000); + dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase); shl_long_4x16b(uvrg_base, x0_a_y0_c, 16); shl_long_4x16b(b_base, x0_b, 16); - add_4x32b(uvrg_base, uvrg_base, const_0x8000); - add_4x32b(b_base, b_base, const_0x8000); + add_4x32b(uvrg_base, uvrg_base, uvrgb_phase); + add_4x32b(b_base, b_base, uvrgb_phase); // Can probably pair these, but it'll require careful register allocation sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c); @@ -4632,6 +4632,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->render_state = 0; psx_gpu->render_state_base = 0; psx_gpu->num_blocks = 0; + psx_gpu->uvrgb_phase = 0x8000; psx_gpu->vram_ptr = vram; psx_gpu->vram_out_ptr = vram; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 71b99cd..7252dc2 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -123,7 +123,6 @@ typedef struct vec_4x32u g_block_span; vec_4x32u b_block_span; - // 76 bytes u32 b; u32 b_dy; @@ -139,6 +138,8 @@ typedef struct u32 triangle_color; u32 dither_table[4]; + u32 uvrgb_phase; + struct render_block_handler_struct *render_block_handler; void *texture_page_ptr; void *texture_page_base; @@ -146,19 +147,12 @@ typedef struct u16 *vram_ptr; u16 *vram_out_ptr; - // 26 bytes u16 render_state_base; u16 render_state; u16 num_spans; u16 num_blocks; - s16 offset_x; - s16 offset_y; - - u16 clut_settings; - u16 texture_settings; - s16 viewport_start_x; s16 viewport_start_y; s16 viewport_end_x; @@ -166,7 +160,6 @@ typedef struct u16 mask_msb; - // 8 bytes u8 triangle_winding; u8 display_area_draw_enable; @@ -182,6 +175,12 @@ typedef struct u8 primitive_type; u8 render_mode; + s16 offset_x; + s16 offset_y; + + u16 clut_settings; + u16 texture_settings; + // enhancement stuff u16 *enhancement_buf_ptr; s16 saved_viewport_start_x; @@ -191,7 +190,7 @@ typedef struct // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[240]; + u8 reserved_a[236]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 6393e15..3239412 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -182,6 +182,7 @@ #define uvrg_dx3l d6 #define uvrg_dx3h d7 +#define uvrgb_phase q13 .align 4 @@ -313,11 +314,16 @@ function(compute_all_gradients) vmull.s16 ga_uvrg_y, d0_b, d1_b rsbmi ga_bx, ga_bx, #0 + @ r12 = psx_gpu->uvrgb_phase + ldr r12, [ psx_gpu, #psx_gpu_uvrgb_phase_offset ] + vmlsl.s16 ga_uvrg_y, d2_b, d3_b movs gs_by, ga_by, asr #31 vshr.u64 d0, d30, #22 - mov b_base, b0, lsl #16 + add b_base, r12, b0, lsl #16 + + vdup.u32 uvrgb_phase, r12 rsbmi ga_by, ga_by, #0 vclt.s32 gs_uvrg_x, ga_uvrg_x, #0 @ gs_uvrg_x = ga_uvrg_x < 0 @@ -326,7 +332,6 @@ function(compute_all_gradients) ldrb r12, [ psx_gpu, #psx_gpu_triangle_winding_offset ] vclt.s32 gs_uvrg_y, ga_uvrg_y, #0 @ gs_uvrg_y = ga_uvrg_y < 0 - add b_base, b_base, #0x8000 rsb r12, r12, #0 @ r12 = -(triangle->winding) vdup.u32 w_mask, r12 @ w_mask = { -w, -w, -w, -w } @@ -335,7 +340,7 @@ function(compute_all_gradients) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - vorr.u32 uvrg_base, #0x8000 + vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) vmov area_r_s, s0 @ area_r_s = triangle_reciprocal diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h index 7ebf7db..1307891 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -18,35 +18,36 @@ #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0 #define psx_gpu_triangle_color_offset 0xb4 #define psx_gpu_dither_table_offset 0xb8 -#define psx_gpu_render_block_handler_offset 0xc8 -#define psx_gpu_texture_page_ptr_offset 0xcc -#define psx_gpu_texture_page_base_offset 0xd0 -#define psx_gpu_clut_ptr_offset 0xd4 -#define psx_gpu_vram_ptr_offset 0xd8 -#define psx_gpu_vram_out_ptr_offset 0xdc -#define psx_gpu_render_state_base_offset 0xe0 -#define psx_gpu_render_state_offset 0xe2 -#define psx_gpu_num_spans_offset 0xe4 -#define psx_gpu_num_blocks_offset 0xe6 -#define psx_gpu_offset_x_offset 0xe8 -#define psx_gpu_offset_y_offset 0xea -#define psx_gpu_clut_settings_offset 0xec -#define psx_gpu_texture_settings_offset 0xee -#define psx_gpu_viewport_start_x_offset 0xf0 -#define psx_gpu_viewport_start_y_offset 0xf2 -#define psx_gpu_viewport_end_x_offset 0xf4 -#define psx_gpu_viewport_end_y_offset 0xf6 -#define psx_gpu_mask_msb_offset 0xf8 -#define psx_gpu_triangle_winding_offset 0xfa -#define psx_gpu_display_area_draw_enable_offset 0xfb -#define psx_gpu_current_texture_page_offset 0xfc -#define psx_gpu_last_8bpp_texture_page_offset 0xfd -#define psx_gpu_texture_mask_width_offset 0xfe -#define psx_gpu_texture_mask_height_offset 0xff -#define psx_gpu_texture_window_x_offset 0x100 -#define psx_gpu_texture_window_y_offset 0x101 -#define psx_gpu_primitive_type_offset 0x102 -#define psx_gpu_render_mode_offset 0x103 +#define psx_gpu_uvrgb_phase_offset 0xc8 +#define psx_gpu_render_block_handler_offset 0xcc +#define psx_gpu_texture_page_ptr_offset 0xd0 +#define psx_gpu_texture_page_base_offset 0xd4 +#define psx_gpu_clut_ptr_offset 0xd8 +#define psx_gpu_vram_ptr_offset 0xdc +#define psx_gpu_vram_out_ptr_offset 0xe0 +#define psx_gpu_render_state_base_offset 0xe4 +#define psx_gpu_render_state_offset 0xe6 +#define psx_gpu_num_spans_offset 0xe8 +#define psx_gpu_num_blocks_offset 0xea +#define psx_gpu_viewport_start_x_offset 0xec +#define psx_gpu_viewport_start_y_offset 0xee +#define psx_gpu_viewport_end_x_offset 0xf0 +#define psx_gpu_viewport_end_y_offset 0xf2 +#define psx_gpu_mask_msb_offset 0xf4 +#define psx_gpu_triangle_winding_offset 0xf6 +#define psx_gpu_display_area_draw_enable_offset 0xf7 +#define psx_gpu_current_texture_page_offset 0xf8 +#define psx_gpu_last_8bpp_texture_page_offset 0xf9 +#define psx_gpu_texture_mask_width_offset 0xfa +#define psx_gpu_texture_mask_height_offset 0xfb +#define psx_gpu_texture_window_x_offset 0xfc +#define psx_gpu_texture_window_y_offset 0xfd +#define psx_gpu_primitive_type_offset 0xfe +#define psx_gpu_render_mode_offset 0xff +#define psx_gpu_offset_x_offset 0x100 +#define psx_gpu_offset_y_offset 0x102 +#define psx_gpu_clut_settings_offset 0x104 +#define psx_gpu_texture_settings_offset 0x106 #define psx_gpu_blocks_offset 0x200 #define psx_gpu_span_uvrg_offset_offset 0x2200 #define psx_gpu_span_edge_data_offset 0x4200 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c index ff74f34..5adfb75 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -43,6 +43,7 @@ int main() WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask); WRITE_OFFSET(f, triangle_color); WRITE_OFFSET(f, dither_table); + WRITE_OFFSET(f, uvrgb_phase); WRITE_OFFSET(f, render_block_handler); WRITE_OFFSET(f, texture_page_ptr); WRITE_OFFSET(f, texture_page_base); @@ -53,10 +54,6 @@ int main() WRITE_OFFSET(f, render_state); WRITE_OFFSET(f, num_spans); WRITE_OFFSET(f, num_blocks); - WRITE_OFFSET(f, offset_x); - WRITE_OFFSET(f, offset_y); - WRITE_OFFSET(f, clut_settings); - WRITE_OFFSET(f, texture_settings); WRITE_OFFSET(f, viewport_start_x); WRITE_OFFSET(f, viewport_start_y); WRITE_OFFSET(f, viewport_end_x); @@ -72,6 +69,10 @@ int main() WRITE_OFFSET(f, texture_window_y); WRITE_OFFSET(f, primitive_type); WRITE_OFFSET(f, render_mode); + WRITE_OFFSET(f, offset_x); + WRITE_OFFSET(f, offset_y); + WRITE_OFFSET(f, clut_settings); + WRITE_OFFSET(f, texture_settings); WRITE_OFFSET(f, blocks); WRITE_OFFSET(f, span_uvrg_offset); WRITE_OFFSET(f, span_edge_data); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 2e1c0b8..1c449ea 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -757,6 +757,7 @@ breakloop: psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ psx_gpu->render_mode &= ~RENDER_DOUBLE_MODE; \ + psx_gpu->uvrgb_phase = 0x8000; \ } #define enhancement_enable() { \ @@ -766,6 +767,7 @@ breakloop: psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \ psx_gpu->render_mode |= RENDER_DOUBLE_MODE; \ + psx_gpu->uvrgb_phase = 0x1000; \ } #define shift_vertices3(v) { \ -- cgit v1.2.3 From e929dec505f8d3692248fe0d42c84a37c994ad39 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 18 Aug 2012 02:37:49 +0300 Subject: psx_gpu: switch to 1024 width again. --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 17 +++++++---------- plugins/gpu_neon/psx_gpu/psx_gpu.h | 5 +++-- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 17 +++++++++++++---- 3 files changed, 23 insertions(+), 16 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 28ebcf5..092125b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3118,14 +3118,6 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } - if(psx_gpu->render_mode & RENDER_DOUBLE_MODE) - { - u32 i; - for(i = 0; i < psx_gpu->num_spans; i++) - { - psx_gpu->span_edge_data[i].y *= 2; - } - } u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -4519,6 +4511,9 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, if((width == 0) || (height == 0)) return; + if(width > 1024) + width = 1024; + u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; u32 b = (color >> 16) & 0xFF; @@ -4526,9 +4521,9 @@ void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, psx_gpu->mask_msb; u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 2048)); + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); - u32 pitch = 2048 / 2 - (width / 2); + u32 pitch = 1024 / 2 - (width / 2); u32 num_width; while(height) @@ -4671,6 +4666,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; + + psx_gpu->enhancement_x_threshold = 256; } u64 get_us(void) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 7252dc2..fc5b566 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -57,7 +57,6 @@ typedef enum { RENDER_INTERLACE_ENABLED = 0x1, RENDER_INTERLACE_ODD = 0x2, - RENDER_DOUBLE_MODE = 0x4, } render_mode_enum; typedef struct @@ -183,6 +182,8 @@ typedef struct // enhancement stuff u16 *enhancement_buf_ptr; + u16 *enhancement_current_buf_ptr; + u32 enhancement_x_threshold; s16 saved_viewport_start_x; s16 saved_viewport_start_y; s16 saved_viewport_end_x; @@ -190,7 +191,7 @@ typedef struct // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[236]; + u8 reserved_a[228]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 1c449ea..54f5614 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -750,23 +750,30 @@ breakloop: return list - list_start; } +#define select_enhancement_buf(psx_gpu) { \ + u32 _x, _b; \ + _x = psx_gpu->saved_viewport_start_x + 8; \ + for (_b = 0; _x >= psx_gpu->enhancement_x_threshold; _b++) \ + _x -= psx_gpu->enhancement_x_threshold; \ + psx_gpu->enhancement_current_buf_ptr = \ + psx_gpu->enhancement_buf_ptr + _b * 1024 * 1024; \ +} + #define enhancement_disable() { \ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \ psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ - psx_gpu->render_mode &= ~RENDER_DOUBLE_MODE; \ psx_gpu->uvrgb_phase = 0x8000; \ } #define enhancement_enable() { \ - psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; \ + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \ - psx_gpu->render_mode |= RENDER_DOUBLE_MODE; \ psx_gpu->uvrgb_phase = 0x1000; \ } @@ -913,6 +920,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x; psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y; + select_enhancement_buf(psx_gpu); for(; list < list_end; list += 1 + command_length) { @@ -944,7 +952,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c do_fill(psx_gpu, x, y, width, height, color); - psx_gpu->vram_out_ptr = psx_gpu->enhancement_buf_ptr; + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; x *= 2; y *= 2; width *= 2; @@ -1357,6 +1365,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; + select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = -- cgit v1.2.3 From 3b3dee71d84bbbb376548d794b7a11cd38833cf0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 18 Aug 2012 18:25:12 +0300 Subject: psx_gpu: do enhanced lines --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 20 ++++++++++++-------- plugins/gpu_neon/psx_gpu/psx_gpu.h | 2 +- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 26 ++++++++++++++++++-------- 3 files changed, 31 insertions(+), 17 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 092125b..9b5a64d 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -4179,9 +4179,6 @@ do \ { \ delta_y *= -1; \ \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(decrement, shading, blending, dithering, \ @@ -4195,9 +4192,6 @@ do \ } \ else \ { \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(increment, shading, blending, dithering, \ @@ -4212,7 +4206,7 @@ do \ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, - u32 color) + u32 color, int double_resolution) { s32 color_r, color_g, color_b; u32 triangle_winding = 0; @@ -4264,9 +4258,19 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, delta_x = x_b - x_a; delta_y = y_b - y_a; - if(delta_x >= 1024) + if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512) return; + if(double_resolution) + { + x_a *= 2; + x_b *= 2; + y_a *= 2; + y_b *= 2; + delta_x *= 2; + delta_y *= 2; + } + flags &= ~RENDER_FLAGS_TEXTURE_MAP; vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index fc5b566..f8547f3 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -233,7 +233,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 width, s32 height, u32 flags, u32 color); void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags, - u32 color); + u32 color, int double_resolution); u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 54f5614..86a816e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -408,7 +408,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = list_s16[4] + psx_gpu->offset_x; vertexes[1].y = list_s16[5] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + render_line(psx_gpu, vertexes, current_command, list[0], 0); break; } @@ -429,7 +429,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + render_line(psx_gpu, vertexes, current_command, list[0], 0); list_position++; num_vertexes++; @@ -460,7 +460,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = list_s16[6] + psx_gpu->offset_x; vertexes[1].y = list_s16[7] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + render_line(psx_gpu, vertexes, current_command, 0, 0); break; } @@ -490,7 +490,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + render_line(psx_gpu, vertexes, current_command, 0, 0); list_position += 2; num_vertexes++; @@ -1079,7 +1079,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c vertexes[1].x = list_s16[4] + psx_gpu->offset_x; vertexes[1].y = list_s16[5] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + render_line(psx_gpu, vertexes, current_command, list[0], 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, list[0], 1); break; } @@ -1100,7 +1102,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + enhancement_disable(); + render_line(psx_gpu, vertexes, current_command, list[0], 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, list[0], 1); list_position++; num_vertexes++; @@ -1131,7 +1136,9 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c vertexes[1].x = list_s16[6] + psx_gpu->offset_x; vertexes[1].y = list_s16[7] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + render_line(psx_gpu, vertexes, current_command, 0, 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, 0, 1); break; } @@ -1161,7 +1168,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + enhancement_disable(); + render_line(psx_gpu, vertexes, current_command, 0, 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, 0, 1); list_position += 2; num_vertexes++; -- cgit v1.2.3 From 50f9355a2338111d940ed408f52fe1defe4df23e Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 19 Aug 2012 00:37:50 +0300 Subject: psx_gpu: start handling vram loads/moves for enhancement --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 35 ++++++++++++++++++++++ plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 45 ++++++++++++----------------- 2 files changed, 54 insertions(+), 26 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 3239412..87a14f6 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -5410,3 +5410,38 @@ function(update_texture_8bpp_cache_slice) vpop { q0 - q3 } ldmia sp!, { r4 - r11, pc } + +/* void scale2x_tiles8(void *dst, const void *src, int w8, int h) */ +function(scale2x_tiles8) + push { r4, r14 } + + mov r4, r1 + add r12, r0, #1024*2 + mov r14, r2 + +0: + vld1.u16 { q0 }, [ r1, :128 ]! + vld1.u16 { q2 }, [ r1, :128 ]! + vmov q1, q0 + vmov q3, q2 + vzip.16 q0, q1 + vzip.16 q2, q3 + subs r14, #2 + vst1.u16 { q0, q1 }, [ r0, :128 ]! + vst1.u16 { q0, q1 }, [ r12, :128 ]! + blt 1f + vst1.u16 { q2, q3 }, [ r0, :128 ]! + vst1.u16 { q2, q3 }, [ r12, :128 ]! + bgt 0b +1: + subs r3, #1 + mov r14, r2 + add r0, #1024*2*2 + add r4, #1024*2 + sub r0, r2, lsl #4+1 + mov r1, r4 + add r12, r0, #1024*2 + bgt 0b + nop + + pop { r4, pc } diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 86a816e..af82d7e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -798,6 +798,8 @@ breakloop: #define shift_triangle_area() \ psx_gpu->triangle_area *= 4 +extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); + static int disable_main_render; static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, @@ -1292,37 +1294,28 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c } case 0x80: // vid -> vid - render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, - list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, - ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1); + { + u32 sx = list_s16[2] & 0x3FF; + u32 sy = list_s16[3] & 0x1FF; + u32 dx = list_s16[4] & 0x3FF; + u32 dy = list_s16[5] & 0x1FF; + u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; + u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + + render_block_move(psx_gpu, sx, sy, dx, dy, w, h); + if (dy + h > 512) + h = 512 - dy; + sx = sx & ~7; // FIXME? + dx = dx * 2 & ~7; + dy *= 2; + scale2x_tiles8(psx_gpu->enhancement_buf_ptr + dy * 1024 + dx, + psx_gpu->vram_ptr + sy * 1024 + sx, w / 8, h); break; + } -#ifdef PCSX case 0xA0: // sys -> vid case 0xC0: // vid -> sys goto breakloop; -#else - case 0xA0: // sys -> vid - { - u32 load_x = list_s16[2] & 0x3FF; - u32 load_y = list_s16[3] & 0x1FF; - u32 load_width = list_s16[4] & 0x3FF; - u32 load_height = list_s16[5] & 0x1FF; - u32 load_size = load_width * load_height; - - command_length += load_size / 2; - - if(load_size & 1) - command_length++; - - render_block_copy(psx_gpu, (u16 *)&(list_s16[6]), load_x, load_y, - load_width, load_height, load_width); - break; - } - - case 0xC0: // vid -> sys - break; -#endif case 0xE1: set_texture(psx_gpu, list[0]); -- cgit v1.2.3 From 06bc35c833797ce9f6f3287abf954f037bb12319 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 27 Aug 2012 02:04:01 +0300 Subject: various enhancement tweaks --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 58 ++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 17 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index af82d7e..f920c73 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -601,12 +601,22 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x80: // vid -> vid - render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, - list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, - ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1); - break; - + case 0x80: // vid -> vid + { + u32 sx = list_s16[2] & 0x3FF; + u32 sy = list_s16[3] & 0x1FF; + u32 dx = list_s16[4] & 0x3FF; + u32 dy = list_s16[5] & 0x1FF; + u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; + u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + + if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) + break; + + render_block_move(psx_gpu, sx, sy, dx, dy, w, h); + break; + } + #ifdef PCSX case 0xA0: // sys -> vid case 0xC0: // vid -> sys @@ -750,15 +760,18 @@ breakloop: return list - list_start; } -#define select_enhancement_buf(psx_gpu) { \ - u32 _x, _b; \ - _x = psx_gpu->saved_viewport_start_x + 8; \ - for (_b = 0; _x >= psx_gpu->enhancement_x_threshold; _b++) \ - _x -= psx_gpu->enhancement_x_threshold; \ - psx_gpu->enhancement_current_buf_ptr = \ - psx_gpu->enhancement_buf_ptr + _b * 1024 * 1024; \ +static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, u32 x) +{ + u32 b; + for (b = 0; x >= psx_gpu->enhancement_x_threshold; b++) + x -= psx_gpu->enhancement_x_threshold; + return psx_gpu->enhancement_buf_ptr + b * 1024 * 1024; } +#define select_enhancement_buf(psx_gpu) \ + psx_gpu->enhancement_current_buf_ptr = \ + select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x + 8) + #define enhancement_disable() { \ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \ @@ -772,8 +785,8 @@ breakloop: psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ - psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2; \ - psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2; \ + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \ + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \ psx_gpu->uvrgb_phase = 0x1000; \ } @@ -800,6 +813,11 @@ breakloop: extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); +#ifndef NEON_BUILD +// TODO? +void scale2x_tiles8(void *dst, const void *src, int w8, int h) {} +#endif + static int disable_main_render; static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, @@ -1301,6 +1319,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c u32 dy = list_s16[5] & 0x1FF; u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + u16 *buf; + + if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) + break; render_block_move(psx_gpu, sx, sy, dx, dy, w, h); if (dy + h > 512) @@ -1308,8 +1330,10 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c sx = sx & ~7; // FIXME? dx = dx * 2 & ~7; dy *= 2; - scale2x_tiles8(psx_gpu->enhancement_buf_ptr + dy * 1024 + dx, - psx_gpu->vram_ptr + sy * 1024 + sx, w / 8, h); + w = (w + 7) / 8; + buf = select_enhancement_buf_ptr(psx_gpu, dx / 2); + scale2x_tiles8(buf + dy * 1024 + dx, + psx_gpu->vram_ptr + sy * 1024 + sx, w, h); break; } -- cgit v1.2.3 From 05e2e0c6e20a335c9ce86d22a2ae1ba0f5bd2865 Mon Sep 17 00:00:00 2001 From: Exophase Date: Sat, 13 Oct 2012 22:50:43 +0300 Subject: psx_gpu: 2x sprite renderer --- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 881 ++++++++++++++++++++++++++++++++++ plugins/gpu_neon/psx_gpu/vector_ops.h | 4 + 2 files changed, 885 insertions(+) create mode 100644 plugins/gpu_neon/psx_gpu/psx_gpu_4x.c (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c new file mode 100644 index 0000000..5e87bea --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -0,0 +1,881 @@ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + u16 *clut_ptr = psx_gpu->clut_ptr; \ + vec_8x16u clut_a, clut_b; \ + vec_16x8u clut_low, clut_high; \ + \ + load_8x16b(clut_a, clut_ptr); \ + load_8x16b(clut_b, clut_ptr + 8); \ + unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ + + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_tile_fetch_texel_block_8bpp_4x(offset) \ + texture_block_ptr = psx_gpu->texture_page_ptr + \ + ((texture_offset + offset) & texture_mask); \ + \ + load_64b(texels, texture_block_ptr) \ + + +#define setup_sprite_tile_setup_block_yes_4x(side, offset, texture_mode) \ + +#define setup_sprite_tile_setup_block_no_4x(side, offset, texture_mode) \ + +#define setup_sprite_tile_add_blocks_4x(tile_num_blocks) \ + num_blocks += tile_num_blocks * 4; \ + sprite_blocks += tile_num_blocks * 4; \ + \ + if(num_blocks > MAX_BLOCKS) \ + { \ + flush_render_block_buffer(psx_gpu); \ + num_blocks = tile_num_blocks * 4; \ + block = psx_gpu->blocks; \ + } \ + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ + vec_16x8u texels_wide; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ + vec_16x8u texels_wide; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_tile_column_height_single_4x(edge_mode, edge, \ + texture_mode) \ +do \ +{ \ + sub_tile_height = column_data; \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ +} while(0) \ + +#define setup_sprite_tile_column_height_multi_4x(edge_mode, edge, \ + texture_mode) \ +do \ +{ \ + u32 tiles_remaining = column_data >> 16; \ + sub_tile_height = column_data & 0xFF; \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ + tiles_remaining -= 1; \ + \ + while(tiles_remaining) \ + { \ + sub_tile_height = 16; \ + setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ + tiles_remaining--; \ + } \ + \ + sub_tile_height = (column_data >> 8) & 0xFF; \ + setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ +} while(0) \ + + +#define setup_sprite_column_data_single_4x() \ + column_data = height \ + +#define setup_sprite_column_data_multi_4x() \ + column_data = 16 - offset_v; \ + column_data |= ((height_rounded & 0xF) + 1) << 8; \ + column_data |= (tile_height - 1) << 16 \ + + +#define setup_sprite_tile_column_width_single_4x(texture_mode, multi_height, \ + edge_mode, edge) \ +{ \ + setup_sprite_column_data_##multi_height##_4x(); \ + left_mask_bits = left_block_mask | right_block_mask; \ + right_mask_bits = left_mask_bits >> 16; \ + \ + setup_sprite_tile_column_height_##multi_height##_4x(edge_mode, edge, \ + texture_mode); \ +} \ + +#define setup_sprite_tiled_advance_column_4x() \ + texture_offset_base += 0x100; \ + if((texture_offset_base & 0xF00) == 0) \ + texture_offset_base -= (0x100 + 0xF00) \ + +#define setup_sprite_tile_column_width_multi_4x(texture_mode, multi_height, \ + left_mode, right_mode) \ +{ \ + setup_sprite_column_data_##multi_height##_4x(); \ + s32 fb_ptr_advance_column = 32 - (2048 * height); \ + \ + tile_width -= 2; \ + left_mask_bits = left_block_mask; \ + right_mask_bits = left_mask_bits >> 16; \ + \ + setup_sprite_tile_column_height_##multi_height##_4x(left_mode, right, \ + texture_mode); \ + fb_ptr += fb_ptr_advance_column; \ + \ + left_mask_bits = 0x00; \ + right_mask_bits = 0x00; \ + \ + while(tile_width) \ + { \ + setup_sprite_tiled_advance_column_4x(); \ + setup_sprite_tile_column_height_##multi_height##_4x(full, none, \ + texture_mode); \ + fb_ptr += fb_ptr_advance_column; \ + tile_width--; \ + } \ + \ + left_mask_bits = right_block_mask; \ + right_mask_bits = left_mask_bits >> 16; \ + \ + setup_sprite_tiled_advance_column(); \ + setup_sprite_tile_column_height_##multi_height##_4x(right_mode, left, \ + texture_mode); \ +} \ + + +#define setup_sprite_tiled_builder_4x(texture_mode) \ +void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ + s32 u, s32 v, s32 width, s32 height, u32 color) \ +{ \ + s32 offset_u = u & 0xF; \ + s32 offset_v = v & 0xF; \ + \ + s32 width_rounded = offset_u + width + 15; \ + s32 height_rounded = offset_v + height + 15; \ + s32 tile_height = height_rounded / 16; \ + s32 tile_width = width_rounded / 16; \ + u32 offset_u_right = width_rounded & 0xF; \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << (offset_u * 2)); \ + u32 right_block_mask = 0xFFFFFFFE << (offset_u_right * 2); \ + \ + u32 left_mask_bits; \ + u32 right_mask_bits; \ + \ + u32 sub_tile_height; \ + u32 column_data; \ + \ + u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \ + ((psx_gpu->texture_mask_height & 0xF) << 4) | \ + ((psx_gpu->texture_mask_width >> 4) << 8) | \ + ((psx_gpu->texture_mask_height >> 4) << 12); \ + u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \ + ((v & 0xF0) << 8); \ + u32 texture_offset_base = texture_offset; \ + u32 control_mask; \ + \ + u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u32 num_blocks = psx_gpu->num_blocks; \ + block_struct *block = psx_gpu->blocks + num_blocks; \ + \ + u16 *texture_block_ptr; \ + vec_8x8u texels; \ + \ + setup_sprite_tiled_initialize_##texture_mode##_4x(); \ + \ + control_mask = tile_width == 1; \ + control_mask |= (tile_height == 1) << 1; \ + control_mask |= ((left_block_mask & 0xFFFF) == 0xFFFF) << 2; \ + control_mask |= (((right_block_mask >> 8) & 0xFFFF) == 0xFFFF) << 3; \ + \ + sprites_##texture_mode++; \ + \ + switch(control_mask) \ + { \ + default: \ + case 0x0: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ + full); \ + break; \ + \ + case 0x1: \ + setup_sprite_tile_column_width_single_4x(texture_mode, multi, full, \ + none); \ + break; \ + \ + case 0x2: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ + full); \ + break; \ + \ + case 0x3: \ + setup_sprite_tile_column_width_single_4x(texture_mode, single, full, \ + none); \ + break; \ + \ + case 0x4: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ + full); \ + break; \ + \ + case 0x5: \ + setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ + right); \ + break; \ + \ + case 0x6: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ + full); \ + break; \ + \ + case 0x7: \ + setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ + right); \ + break; \ + \ + case 0x8: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ + half); \ + break; \ + \ + case 0x9: \ + setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ + left); \ + break; \ + \ + case 0xA: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ + half); \ + break; \ + \ + case 0xB: \ + setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ + left); \ + break; \ + \ + case 0xC: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ + half); \ + break; \ + \ + case 0xE: \ + setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ + half); \ + break; \ + } \ +} \ + + +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + +//#ifndef NEON_BUILD +#if 1 +setup_sprite_tiled_builder_4x(4bpp); +setup_sprite_tiled_builder_4x(8bpp); + +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + u32 left_offset = u & 0x7; + u32 width_rounded = width + left_offset + 7; + + u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); + u32 right_width = width_rounded & 0x7; + u32 block_width = width_rounded / 8; + u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); + + u32 left_mask_bits = ~(0xFFFF << (left_offset * 2)); + u32 right_mask_bits = 0xFE << (right_width * 2); + + u32 texture_offset_base = u + (v * 1024); + u32 texture_mask = + psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024); + + u32 blocks_remaining; + u32 num_blocks = psx_gpu->num_blocks; + block_struct *block = psx_gpu->blocks + num_blocks; + + u16 *texture_page_ptr = psx_gpu->texture_page_ptr; + u16 *texture_block_ptr; + + texture_offset_base &= ~0x7; + + sprites_16bpp++; + + if(block_width == 1) + { + u32 mask_bits = left_mask_bits | right_mask_bits; + u32 mask_bits_a = mask_bits & 0xFF; + u32 mask_bits_b = mask_bits >> 8; + + vec_8x16u texels; + vec_8x16u texels_wide; + + while(height) + { + num_blocks += 4; + sprite_blocks += 4; + + if(num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + num_blocks = 4; + block = psx_gpu->blocks; + } + + texture_block_ptr = + texture_page_ptr + (texture_offset_base & texture_mask); + + load_128b(texels, texture_block_ptr); + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset_base += 1024; + fb_ptr += 2048; + + height--; + psx_gpu->num_blocks = num_blocks; + } + } + else + { + u32 texture_offset; + + u32 left_mask_bits_a = left_mask_bits & 0xFF; + u32 left_mask_bits_b = left_mask_bits >> 8; + u32 right_mask_bits_a = right_mask_bits & 0xFF; + u32 right_mask_bits_b = right_mask_bits >> 8; + + vec_8x16u texels; + vec_8x16u texels_wide; + + while(height) + { + blocks_remaining = block_width - 2; + num_blocks += block_width * 4; + sprite_blocks += block_width * 4; + + if(num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + num_blocks = block_width; + block = psx_gpu->blocks; + } + + texture_offset = texture_offset_base; + texture_offset_base += 1024; + + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + + load_128b(texels, texture_block_ptr); + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset += 8; + fb_ptr += 16; + + while(blocks_remaining) + { + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + load_128b(texels, texture_block_ptr); + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset += 8; + fb_ptr += 8; + + blocks_remaining--; + } + + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + load_128b(texels, texture_block_ptr); + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + fb_ptr += fb_ptr_pitch; + + height--; + psx_gpu->num_blocks = num_blocks; + } + } +} + +#endif + +#define setup_sprite_blocks_switch_textured_4x(texture_mode) \ + setup_sprite_##texture_mode##_4x \ + +#define setup_sprite_blocks_switch_untextured_4x(texture_mode) \ + setup_sprite_untextured \ + +#define setup_sprite_blocks_switch_4x(texturing, texture_mode) \ + setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \ + + +#define render_sprite_blocks_switch_block_modulation_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing, blending, \ + modulation) \ +{ \ + setup_sprite_blocks_switch_4x(texturing, texture_mode), \ + texture_sprite_blocks_switch_##texturing(texture_mode), \ + shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \ + mask_evaluate), \ + blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \ +} \ + +#define render_sprite_blocks_switch_block_blending_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing, blending) \ + render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blending, modulated), \ + render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blending, unmodulated) \ + +#define render_sprite_blocks_switch_block_texturing_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing) \ + render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, unblended), \ + render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blended) \ + +#define render_sprite_blocks_switch_block_dithering_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering) \ + render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, untextured), \ + render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, textured) \ + +#define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, shading) \ + render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, undithered), \ + render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithered) \ + +#define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, \ + blend_mode, mask_evaluate) \ + render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, unshaded), \ + render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, shaded) \ + +#define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, \ + blend_mode) \ + render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \ + off), \ + render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \ + on) \ + +#define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode) \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth) \ + +#define render_sprite_blocks_switch_block_4x() \ + render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(4bpp) \ + + +render_block_handler_struct render_sprite_block_handlers_4x[] = +{ + render_sprite_blocks_switch_block_4x() +}; + + +void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, + s32 width, s32 height, u32 flags, u32 color) +{ + x *= 2; + y *= 2; + + s32 x_right = x + width - 1; + s32 y_bottom = y + height - 1; + +#ifdef PROFILE + sprites++; +#endif + + if(x < psx_gpu->viewport_start_x) + { + u32 clip = psx_gpu->viewport_start_x - x; + x += clip; + u += clip; + width -= clip; + } + + if(y < psx_gpu->viewport_start_y) + { + s32 clip = psx_gpu->viewport_start_y - y; + y += clip; + v += clip; + height -= clip; + } + + if(x_right > psx_gpu->viewport_end_x) + width -= x_right - psx_gpu->viewport_end_x; + + if(y_bottom > psx_gpu->viewport_end_y) + height -= y_bottom - psx_gpu->viewport_end_y; + + if((width <= 0) || (height <= 0)) + return; + +#ifdef PROFILE + span_pixels += width * height; + spans += height; +#endif + + u32 render_state = flags & + (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | + RENDER_FLAGS_TEXTURE_MAP); + render_state |= + (psx_gpu->render_state_base & ~RENDER_STATE_DITHER); + + if((psx_gpu->render_state != render_state) || + (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)) + { + psx_gpu->render_state = render_state; + flush_render_block_buffer(psx_gpu); +#ifdef PROFILE + state_changes++; +#endif + } + + psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE; + + color &= 0xFFFFFF; + + if(psx_gpu->triangle_color != color) + { + flush_render_block_buffer(psx_gpu); + psx_gpu->triangle_color = color; + } + + if(color == 0x808080) + render_state |= RENDER_FLAGS_MODULATE_TEXELS; + + render_block_handler_struct *render_block_handler = + &(render_sprite_block_handlers_4x[render_state]); + psx_gpu->render_block_handler = render_block_handler; + + ((setup_sprite_function_type *)render_block_handler->setup_blocks) + (psx_gpu, x, y, u, v, width, height, color); +} + diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index c11955d..b58b190 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -394,6 +394,10 @@ build_vector_types(s); foreach_element(8, (dest).e[_i] = \ (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \ +#define zip_4x32b(dest, source_a, source_b) \ + foreach_element(4, (dest).e[_i] = \ + (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 16)) \ + #define zip_2x64b(dest, source_a, source_b) \ foreach_element(2, (dest).e[_i] = \ (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \ -- cgit v1.2.3 From fc6cef7d739c850a10bca2a19855654aa78897a8 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 13 Oct 2012 23:05:42 +0300 Subject: psx_gpu: 2x sprite fixes and integration --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 5 +-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 60 ++++++++++++++++++-------------- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 38 ++++++++++++++------ plugins/gpu_neon/psx_gpu/vector_ops.h | 2 +- 4 files changed, 64 insertions(+), 41 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 9b5a64d..2d552aa 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3574,7 +3574,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3696,7 +3696,7 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, { u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + x; + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; u32 block_width = (width + 7) / 8; u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8); u32 blocks_remaining; @@ -4736,3 +4736,4 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu) #endif +#include "psx_gpu_4x.c" diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 5e87bea..f8afcf1 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -50,7 +50,7 @@ tbl_16(texels_high, texels, clut_high); \ zip_8x16b(pixels, texels_low, texels_high); \ \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ block->texels = pixels_wide; \ block->draw_mask_bits = left_mask_bits_a; \ block->fb_ptr = fb_ptr; \ @@ -77,7 +77,7 @@ tbl_16(texels_high, texels, clut_high); \ zip_8x16b(pixels, texels_low, texels_high); \ \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ block->texels = pixels_wide; \ block->draw_mask_bits = right_mask_bits_a; \ block->fb_ptr = fb_ptr + 16; \ @@ -111,7 +111,7 @@ { \ vec_8x8u texels_low, texels_high; \ vec_8x16u pixels, pixels_wide; \ - setup_sprite_tile_add_blocks(sub_tile_height); \ + setup_sprite_tile_add_blocks_4x(sub_tile_height); \ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ \ @@ -122,7 +122,7 @@ tbl_16(texels_high, texels, clut_high); \ zip_8x16b(pixels, texels_low, texels_high); \ \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ block->texels = pixels_wide; \ block->draw_mask_bits = edge##_mask_bits_a; \ block->fb_ptr = fb_ptr; \ @@ -195,12 +195,12 @@ \ block->r = texels_wide.low; \ block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ block++; \ \ block->r = texels_wide.high; \ block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24 + 1024; \ + block->fb_ptr = fb_ptr + 24; \ block++; \ \ block->r = texels_wide.high; \ @@ -218,7 +218,7 @@ #define setup_sprite_tile_half_8bpp_4x(edge) \ { \ - setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ + setup_sprite_tile_add_blocks_4x(sub_tile_height); \ vec_16x8u texels_wide; \ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ @@ -387,7 +387,7 @@ void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 offset_u_right = width_rounded & 0xF; \ \ u32 left_block_mask = ~(0xFFFFFFFF << (offset_u * 2)); \ - u32 right_block_mask = 0xFFFFFFFE << (offset_u_right * 2); \ + u32 right_block_mask = 0xFFFFFFFC << (offset_u_right * 2); \ \ u32 left_mask_bits; \ u32 right_mask_bits; \ @@ -404,7 +404,7 @@ void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 texture_offset_base = texture_offset; \ u32 control_mask; \ \ - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u * 2); \ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ @@ -416,7 +416,7 @@ void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ control_mask = tile_width == 1; \ control_mask |= (tile_height == 1) << 1; \ control_mask |= ((left_block_mask & 0xFFFF) == 0xFFFF) << 2; \ - control_mask |= (((right_block_mask >> 8) & 0xFFFF) == 0xFFFF) << 3; \ + control_mask |= (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) << 3; \ \ sprites_##texture_mode++; \ \ @@ -514,13 +514,13 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; - u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); + u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16); u32 left_mask_bits = ~(0xFFFF << (left_offset * 2)); - u32 right_mask_bits = 0xFE << (right_width * 2); + u32 right_mask_bits = 0xFFFC << (right_width * 2); u32 texture_offset_base = u + (v * 1024); u32 texture_mask = @@ -563,7 +563,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, load_128b(texels, texture_block_ptr); - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; block->draw_mask_bits = mask_bits_a; block->fb_ptr = fb_ptr; @@ -574,7 +574,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, block->fb_ptr = fb_ptr + 1024; block++; - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); block->texels = texels_wide; block->draw_mask_bits = mask_bits_b; block->fb_ptr = fb_ptr + 8; @@ -613,7 +613,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, if(num_blocks > MAX_BLOCKS) { flush_render_block_buffer(psx_gpu); - num_blocks = block_width; + num_blocks = block_width * 4; block = psx_gpu->blocks; } @@ -624,7 +624,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, load_128b(texels, texture_block_ptr); - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; block->draw_mask_bits = left_mask_bits_a; block->fb_ptr = fb_ptr; @@ -635,7 +635,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, block->fb_ptr = fb_ptr + 1024; block++; - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); block->texels = texels_wide; block->draw_mask_bits = left_mask_bits_b; block->fb_ptr = fb_ptr + 8; @@ -654,7 +654,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); load_128b(texels, texture_block_ptr); - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; block->draw_mask_bits = 0; block->fb_ptr = fb_ptr; @@ -665,7 +665,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, block->fb_ptr = fb_ptr + 1024; block++; - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); block->texels = texels_wide; block->draw_mask_bits = 0; block->fb_ptr = fb_ptr + 8; @@ -677,7 +677,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, block++; texture_offset += 8; - fb_ptr += 8; + fb_ptr += 16; blocks_remaining--; } @@ -685,7 +685,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); load_128b(texels, texture_block_ptr); - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); block->texels = texels_wide; block->draw_mask_bits = right_mask_bits_a; block->fb_ptr = fb_ptr; @@ -696,7 +696,7 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, block->fb_ptr = fb_ptr + 1024; block++; - zip4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); block->texels = texels_wide; block->draw_mask_bits = right_mask_bits_b; block->fb_ptr = fb_ptr + 8; @@ -717,11 +717,17 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #endif +static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color); +} + #define setup_sprite_blocks_switch_textured_4x(texture_mode) \ setup_sprite_##texture_mode##_4x \ #define setup_sprite_blocks_switch_untextured_4x(texture_mode) \ - setup_sprite_untextured \ + setup_sprite_untextured_4x \ #define setup_sprite_blocks_switch_4x(texturing, texture_mode) \ setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \ @@ -802,9 +808,6 @@ render_block_handler_struct render_sprite_block_handlers_4x[] = void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 width, s32 height, u32 flags, u32 color) { - x *= 2; - y *= 2; - s32 x_right = x + width - 1; s32 y_bottom = y + height - 1; @@ -837,6 +840,9 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; + x *= 2; + y *= 2; + #ifdef PROFILE span_pixels += width * height; spans += height; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index f920c73..4260bc7 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -760,6 +760,8 @@ breakloop: return list - list_start; } +#ifdef PCSX + static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, u32 x) { u32 b; @@ -864,6 +866,8 @@ static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, } } +#if 0 + #define fill_vertex(i, x_, y_, u_, v_, rgb_) \ vertexes[i].x = x_; \ vertexes[i].y = y_; \ @@ -873,7 +877,7 @@ static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, vertexes[i].g = (rgb_) >> 8; \ vertexes[i].b = (rgb_) >> 16 -static void do_esprite_in_triangles(psx_gpu_struct *psx_gpu, int x, int y, +static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb) { vertex_struct *vertex_ptrs[3]; @@ -928,6 +932,17 @@ static void do_esprite_in_triangles(psx_gpu_struct *psx_gpu, int x, int y, psx_gpu->render_state_base = render_state_base_saved; } +#else +static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, + u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb) +{ + u32 flags = (cmd_rgb >> 24); + u32 color = cmd_rgb & 0xffffff; + + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; + render_sprite_4x(psx_gpu, x, y, u, v, w, h, flags, color); +} +#endif u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) { @@ -1216,7 +1231,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c u32 height = list_s16[5] & 0x1FF; render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, 0, 0, width, height, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); break; } @@ -1233,7 +1248,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c render_sprite(psx_gpu, x, y, u, v, width, height, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, u, v, width, height, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); break; } @@ -1246,7 +1261,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); break; } @@ -1259,7 +1274,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); break; } @@ -1277,7 +1292,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c render_sprite(psx_gpu, x, y, u, v, 8, 8, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, u, v, 8, 8, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); break; } @@ -1290,7 +1305,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); break; } @@ -1307,7 +1322,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c set_clut(psx_gpu, list_s16[5]); render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); - do_esprite_in_triangles(psx_gpu, x, y, u, v, 16, 16, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); break; } @@ -1454,13 +1469,14 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c } } -#ifdef PCSX + enhancement_disable(); + breakloop: -#endif -enhancement_disable(); if (last_command != NULL) *last_command = current_command; return list - list_start; } +#endif /* PCSX */ + // vim:shiftwidth=2:expandtab diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index b58b190..c91e7d9 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -396,7 +396,7 @@ build_vector_types(s); #define zip_4x32b(dest, source_a, source_b) \ foreach_element(4, (dest).e[_i] = \ - (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 16)) \ + (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \ #define zip_2x64b(dest, source_a, source_b) \ foreach_element(2, (dest).e[_i] = \ -- cgit v1.2.3 From 59d15d23d97d4347d8046057013f8979db0914f0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Oct 2012 02:42:03 +0300 Subject: psx_gpu: consolidate C code, implement exnhancement asm --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 410 ++++++++++++++-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 511 +------------------- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 698 +++++++++++++++++++++++----- 3 files changed, 947 insertions(+), 672 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 2d552aa..3de2ece 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3185,14 +3185,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #endif -#define setup_sprite_tiled_initialize_4bpp() \ +#define setup_sprite_tiled_initialize_4bpp_clut() \ u16 *clut_ptr = psx_gpu->clut_ptr; \ vec_8x16u clut_a, clut_b; \ vec_16x8u clut_low, clut_high; \ \ load_8x16b(clut_a, clut_ptr); \ load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \ + unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ + +#define setup_sprite_tiled_initialize_4bpp() \ + setup_sprite_tiled_initialize_4bpp_clut(); \ \ if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \ update_texture_4bpp_cache(psx_gpu) \ @@ -3209,10 +3212,6 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) load_64b(texels, texture_block_ptr) \ -#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_tile_add_blocks(tile_num_blocks) \ num_blocks += tile_num_blocks; \ sprite_blocks += tile_num_blocks; \ @@ -3358,34 +3357,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ u32 tiles_remaining = column_data >> 16; \ sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining -= 1; \ \ while(tiles_remaining) \ { \ sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining--; \ } \ \ sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ @@ -3398,15 +3399,18 @@ do \ column_data |= (tile_height - 1) << 16 \ +#define RIGHT_MASK_BIT_SHIFT 8 +#define RIGHT_MASK_BIT_SHIFT_4x 16 + #define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ + edge_mode, edge, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + texture_mode, x4mode); \ } \ #define setup_sprite_tiled_advance_column() \ @@ -3414,18 +3418,22 @@ do \ if((texture_offset_base & 0xF00) == 0) \ texture_offset_base -= (0x100 + 0xF00) \ +#define FB_PTR_MULTIPLIER 1 +#define FB_PTR_MULTIPLIER_4x 2 + #define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \ - left_mode, right_mode) \ + left_mode, right_mode, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ - s32 fb_ptr_advance_column = 16 - (1024 * height); \ + s32 fb_ptr_advance_column = (16 - (1024 * height)) \ + * FB_PTR_MULTIPLIER##x4mode; \ \ tile_width -= 2; \ left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(left_mode, right, \ - texture_mode); \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ \ left_mask_bits = 0x00; \ @@ -3434,22 +3442,297 @@ do \ while(tile_width) \ { \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \ + setup_sprite_tile_column_height_##multi_height(full, none, \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ tile_width--; \ } \ \ left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, \ - texture_mode); \ + texture_mode, x4mode); \ +} \ + + +/* 4x stuff */ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + setup_sprite_tiled_initialize_4bpp_clut() \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ } \ +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ -#define setup_sprite_tiled_builder(texture_mode) \ -void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + vec_16x8u texels_wide; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + vec_16x8u texels_wide; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_comapre_left_block_mask() \ + ((left_block_mask & 0xFF) == 0xFF) \ + +#define setup_sprite_comapre_right_block_mask() \ + (((right_block_mask >> 8) & 0xFF) == 0xFF) \ + + +#define setup_sprite_offset_u_adjust_4x() \ + offset_u *= 2; \ + offset_u_right = offset_u_right * 2 + 1 \ + +#define setup_sprite_comapre_left_block_mask_4x() \ + ((left_block_mask & 0xFFFF) == 0xFFFF) \ + +#define setup_sprite_comapre_right_block_mask_4x() \ + (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \ + + +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ +void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ s32 u, s32 v, s32 width, s32 height, u32 color) \ { \ s32 offset_u = u & 0xF; \ @@ -3461,8 +3744,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ s32 tile_width = width_rounded / 16; \ u32 offset_u_right = width_rounded & 0xF; \ \ - u32 left_block_mask = ~(0xFFFF << offset_u); \ - u32 right_block_mask = 0xFFFE << offset_u_right; \ + setup_sprite_offset_u_adjust##x4mode(); \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \ + u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \ \ u32 left_mask_bits; \ u32 right_mask_bits; \ @@ -3479,19 +3764,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 texture_offset_base = texture_offset; \ u32 control_mask; \ \ - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ u16 *texture_block_ptr; \ vec_8x8u texels; \ \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ control_mask = tile_width == 1; \ control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \ - control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \ + control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \ + control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \ \ sprites_##texture_mode++; \ \ @@ -3499,64 +3784,77 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ { \ default: \ case 0x0: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ break; \ \ case 0x1: \ - setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ break; \ \ case 0x2: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ break; \ \ case 0x3: \ - setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ break; \ \ case 0x4: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ break; \ \ case 0x5: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ break; \ \ case 0x6: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ break; \ \ case 0x7: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, right);\ + setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ break; \ \ case 0x8: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ break; \ \ case 0x9: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ break; \ \ case 0xA: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ break; \ \ case 0xB: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ break; \ \ case 0xC: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ break; \ \ case 0xE: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ break; \ } \ } \ - void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, @@ -3564,9 +3862,19 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + #ifndef NEON_BUILD -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); + +setup_sprite_tiled_builder(4bpp,_4x); +setup_sprite_tiled_builder(8bpp,_4x); void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index f8afcf1..19c4a9e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,513 +1,4 @@ -#define setup_sprite_tiled_initialize_4bpp_4x() \ - u16 *clut_ptr = psx_gpu->clut_ptr; \ - vec_8x16u clut_a, clut_b; \ - vec_16x8u clut_low, clut_high; \ - \ - load_8x16b(clut_a, clut_ptr); \ - load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ - - -#define setup_sprite_tiled_initialize_8bpp_4x() \ - - -#define setup_sprite_tile_fetch_texel_block_8bpp_4x(offset) \ - texture_block_ptr = psx_gpu->texture_page_ptr + \ - ((texture_offset + offset) & texture_mask); \ - \ - load_64b(texels, texture_block_ptr) \ - - -#define setup_sprite_tile_setup_block_yes_4x(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no_4x(side, offset, texture_mode) \ - -#define setup_sprite_tile_add_blocks_4x(tile_num_blocks) \ - num_blocks += tile_num_blocks * 4; \ - sprite_blocks += tile_num_blocks * 4; \ - \ - if(num_blocks > MAX_BLOCKS) \ - { \ - flush_render_block_buffer(psx_gpu); \ - num_blocks = tile_num_blocks * 4; \ - block = psx_gpu->blocks; \ - } \ - -#define setup_sprite_tile_full_4bpp_4x(edge) \ -{ \ - vec_8x8u texels_low, texels_high; \ - vec_8x16u pixels, pixels_wide; \ - setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ - u32 left_mask_bits_a = left_mask_bits & 0xFF; \ - u32 left_mask_bits_b = left_mask_bits >> 8; \ - u32 right_mask_bits_a = right_mask_bits & 0xFF; \ - u32 right_mask_bits_b = right_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 16; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024 + 16; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 24; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - -#define setup_sprite_tile_half_4bpp_4x(edge) \ -{ \ - vec_8x8u texels_low, texels_high; \ - vec_8x16u pixels, pixels_wide; \ - setup_sprite_tile_add_blocks_4x(sub_tile_height); \ - u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ - u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - - -#define setup_sprite_tile_full_8bpp_4x(edge) \ -{ \ - setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ - vec_16x8u texels_wide; \ - u32 left_mask_bits_a = left_mask_bits & 0xFF; \ - u32 left_mask_bits_b = left_mask_bits >> 8; \ - u32 right_mask_bits_a = right_mask_bits & 0xFF; \ - u32 right_mask_bits_b = right_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 16; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024 + 16; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24 + 1024; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - -#define setup_sprite_tile_half_8bpp_4x(edge) \ -{ \ - setup_sprite_tile_add_blocks_4x(sub_tile_height); \ - vec_16x8u texels_wide; \ - u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ - u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8 + 1024; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - - -#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ - texture_offset = texture_offset_base + 8; \ - fb_ptr += 16 \ - -#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ - texture_offset = texture_offset_base \ - -#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ - setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ - -#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ - texture_offset = texture_offset_base \ - -#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ - fb_ptr -= 16 \ - -#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ - -#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ - setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ - -#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ - - -#define setup_sprite_tile_column_height_single_4x(edge_mode, edge, \ - texture_mode) \ -do \ -{ \ - sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ -} while(0) \ - -#define setup_sprite_tile_column_height_multi_4x(edge_mode, edge, \ - texture_mode) \ -do \ -{ \ - u32 tiles_remaining = column_data >> 16; \ - sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - tiles_remaining -= 1; \ - \ - while(tiles_remaining) \ - { \ - sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - tiles_remaining--; \ - } \ - \ - sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ -} while(0) \ - - -#define setup_sprite_column_data_single_4x() \ - column_data = height \ - -#define setup_sprite_column_data_multi_4x() \ - column_data = 16 - offset_v; \ - column_data |= ((height_rounded & 0xF) + 1) << 8; \ - column_data |= (tile_height - 1) << 16 \ - - -#define setup_sprite_tile_column_width_single_4x(texture_mode, multi_height, \ - edge_mode, edge) \ -{ \ - setup_sprite_column_data_##multi_height##_4x(); \ - left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tile_column_height_##multi_height##_4x(edge_mode, edge, \ - texture_mode); \ -} \ - -#define setup_sprite_tiled_advance_column_4x() \ - texture_offset_base += 0x100; \ - if((texture_offset_base & 0xF00) == 0) \ - texture_offset_base -= (0x100 + 0xF00) \ - -#define setup_sprite_tile_column_width_multi_4x(texture_mode, multi_height, \ - left_mode, right_mode) \ -{ \ - setup_sprite_column_data_##multi_height##_4x(); \ - s32 fb_ptr_advance_column = 32 - (2048 * height); \ - \ - tile_width -= 2; \ - left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tile_column_height_##multi_height##_4x(left_mode, right, \ - texture_mode); \ - fb_ptr += fb_ptr_advance_column; \ - \ - left_mask_bits = 0x00; \ - right_mask_bits = 0x00; \ - \ - while(tile_width) \ - { \ - setup_sprite_tiled_advance_column_4x(); \ - setup_sprite_tile_column_height_##multi_height##_4x(full, none, \ - texture_mode); \ - fb_ptr += fb_ptr_advance_column; \ - tile_width--; \ - } \ - \ - left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height##_4x(right_mode, left, \ - texture_mode); \ -} \ - - -#define setup_sprite_tiled_builder_4x(texture_mode) \ -void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ - s32 u, s32 v, s32 width, s32 height, u32 color) \ -{ \ - s32 offset_u = u & 0xF; \ - s32 offset_v = v & 0xF; \ - \ - s32 width_rounded = offset_u + width + 15; \ - s32 height_rounded = offset_v + height + 15; \ - s32 tile_height = height_rounded / 16; \ - s32 tile_width = width_rounded / 16; \ - u32 offset_u_right = width_rounded & 0xF; \ - \ - u32 left_block_mask = ~(0xFFFFFFFF << (offset_u * 2)); \ - u32 right_block_mask = 0xFFFFFFFC << (offset_u_right * 2); \ - \ - u32 left_mask_bits; \ - u32 right_mask_bits; \ - \ - u32 sub_tile_height; \ - u32 column_data; \ - \ - u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \ - ((psx_gpu->texture_mask_height & 0xF) << 4) | \ - ((psx_gpu->texture_mask_width >> 4) << 8) | \ - ((psx_gpu->texture_mask_height >> 4) << 12); \ - u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \ - ((v & 0xF0) << 8); \ - u32 texture_offset_base = texture_offset; \ - u32 control_mask; \ - \ - u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u * 2); \ - u32 num_blocks = psx_gpu->num_blocks; \ - block_struct *block = psx_gpu->blocks + num_blocks; \ - \ - u16 *texture_block_ptr; \ - vec_8x8u texels; \ - \ - setup_sprite_tiled_initialize_##texture_mode##_4x(); \ - \ - control_mask = tile_width == 1; \ - control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFFFF) == 0xFFFF) << 2; \ - control_mask |= (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) << 3; \ - \ - sprites_##texture_mode++; \ - \ - switch(control_mask) \ - { \ - default: \ - case 0x0: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ - full); \ - break; \ - \ - case 0x1: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, full, \ - none); \ - break; \ - \ - case 0x2: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ - full); \ - break; \ - \ - case 0x3: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, full, \ - none); \ - break; \ - \ - case 0x4: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ - full); \ - break; \ - \ - case 0x5: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ - right); \ - break; \ - \ - case 0x6: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ - full); \ - break; \ - \ - case 0x7: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ - right); \ - break; \ - \ - case 0x8: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ - half); \ - break; \ - \ - case 0x9: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ - left); \ - break; \ - \ - case 0xA: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ - half); \ - break; \ - \ - case 0xB: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ - left); \ - break; \ - \ - case 0xC: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ - half); \ - break; \ - \ - case 0xE: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ - half); \ - break; \ - } \ -} \ - - -void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - -//#ifndef NEON_BUILD -#if 1 -setup_sprite_tiled_builder_4x(4bpp); -setup_sprite_tiled_builder_4x(8bpp); - +#ifndef NEON_BUILD void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 87a14f6..103483a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1,5 +1,6 @@ /* * Copyright (C) 2011 Gilead Kutnick "Exophase" + * Copyright (C) 2012 Gražvydas Ignotas "notaz" * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -3188,6 +3189,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_load_bdm_##shading(); \ vshrn.u16 texels_b, texels, #7; \ \ + pld [ block_ptr_load_a ]; \ vmovn.u16 texels_r, texels; \ vmlal.u8 pixels, pixels_r_low, d64_1; \ \ @@ -4405,6 +4407,12 @@ function(render_block_fill_body) #define draw_mask_fb_ptr_left d2 #define draw_mask_fb_ptr_right d3 +#define draw_mask_fb_ptr_left_a d2 +#define draw_mask_fb_ptr_left_b d3 +#define draw_mask_fb_ptr_right_a d10 +#define draw_mask_fb_ptr_right_b d11 +#define draw_masks_fb_ptrs2 q5 + #define clut_low_a d4 #define clut_low_b d5 #define clut_high_a d6 @@ -4416,37 +4424,24 @@ function(render_block_fill_body) #define clut_a q2 #define clut_b q3 -#define texels_low d10 -#define texels_high d11 - - -setup_sprite_flush_blocks_single: - vpush { q1 - q4 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { q1 - q4 } - - add block, psx_gpu, #psx_gpu_blocks_offset +#define texels_low d12 +#define texels_high d13 - mov num_blocks, sub_tile_height - bx lr +#define texels_wide_low d14 +#define texels_wide_high d15 +#define texels_wide q7 -setup_sprite_flush_blocks_double: - vpush { q1 - q4 } +setup_sprite_flush_blocks: + vpush { q1 - q5 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { q1 - q4 } + vpop { q1 - q5 } add block, psx_gpu, #psx_gpu_blocks_offset - - mov num_blocks, sub_tile_height, lsl #1 bx lr @@ -4484,8 +4479,6 @@ setup_sprite_update_texture_8bpp_cache: blne setup_sprite_update_texture_8bpp_cache \ -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_block_count_single() \ sub_tile_height \ @@ -4496,7 +4489,8 @@ setup_sprite_update_texture_8bpp_cache: add num_blocks, num_blocks, setup_sprite_block_count_##type(); \ cmp num_blocks, #MAX_BLOCKS; \ \ - blgt setup_sprite_flush_blocks_##type \ + movgt num_blocks, setup_sprite_block_count_##type(); \ + blgt setup_sprite_flush_blocks \ #define setup_sprite_tile_full_4bpp(edge) \ @@ -4678,31 +4672,33 @@ setup_sprite_update_texture_8bpp_cache: #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ mov sub_tile_height, column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ and sub_tile_height, column_data, #0xFF; \ mov tiles_remaining, column_data, lsr #16; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ \ subs tiles_remaining, tiles_remaining, #1; \ beq 2f; \ \ 3: \ mov sub_tile_height, #16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ subs tiles_remaining, tiles_remaining, #1; \ bne 3b; \ \ 2: \ uxtb sub_tile_height, column_data, ror #8; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ #define setup_sprite_column_data_single() \ @@ -4721,17 +4717,30 @@ setup_sprite_update_texture_8bpp_cache: \ orr column_data, column_data, height_rounded, lsl #8 \ -#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ - setup_sprite_##texture_mode##_single_##multi_height##_##edge_mode##_##edge: \ +#define setup_sprite_setup_left_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column() \ + mov fb_ptr_advance_column, #32; \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + \ + sub fb_ptr_advance_column, height, lsl #11; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[5] \ + +#define setup_sprite_tile_column_width_single(tm, multi_height, edge_mode, \ + edge, x4mode) \ + setup_sprite_##tm##_single_##multi_height##_##edge_mode##_##edge##x4mode: \ setup_sprite_column_data_##multi_height(); \ vext.32 block_masks_shifted, block_masks, block_masks, #1; \ vorr.u32 block_masks, block_masks, block_masks_shifted; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ + setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ - setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4740,39 +4749,335 @@ setup_sprite_update_texture_8bpp_cache: subeq texture_offset_base, texture_offset_base, #(0x100 + 0xF00) \ #define setup_sprite_tile_column_width_multi(tm, multi_height, left_mode, \ - right_mode) \ - setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode: \ + right_mode, x4mode) \ + setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode##x4mode:\ setup_sprite_column_data_##multi_height(); \ - mov fb_ptr_advance_column, #32; \ \ - sub fb_ptr_advance_column, height, lsl #11; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + setup_sprite_setup_left_draw_mask_fb_ptr_advance_column##x4mode(); \ \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ - setup_sprite_tile_column_height_##multi_height(left_mode, right, tm); \ + setup_sprite_tile_column_height_##multi_height(left_mode, right, tm, x4mode);\ \ subs tile_width, tile_width, #2; \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ \ - vmov.u8 draw_masks_fb_ptrs, #0; \ beq 1f; \ \ + vmov.u8 draw_masks_fb_ptrs, #0; \ + vmov.u8 draw_masks_fb_ptrs2, #0; \ + \ 0: \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, tm); \ + setup_sprite_tile_column_height_##multi_height(full, none, tm, x4mode); \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ subs tile_width, tile_width, #1; \ bne 0b; \ \ 1: \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[5]; \ + setup_sprite_setup_right_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(right_mode, left, tm); \ + setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ ldmia sp!, { r4 - r11, pc } \ +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_get_left_block_mask() \ + and left_block_mask, left_block_mask, #0xFF \ + +#define setup_sprite_compare_left_block_mask() \ + cmp left_block_mask, #0xFF \ + +#define setup_sprite_get_right_block_mask() \ + uxtb right_block_mask, right_block_mask, ror #8 \ + +#define setup_sprite_compare_right_block_mask() \ + cmp right_block_mask, #0xFF \ + + + +/* 4x stuff */ +#define fb_ptr2 column_data + +#define setup_sprite_offset_u_adjust_4x() \ + sub fb_ptr, fb_ptr, offset_u, lsl #1; \ + lsl offset_u_right, #1; \ + lsl offset_u, #1; \ + add offset_u_right, #1 \ + +#define setup_sprite_get_left_block_mask_4x() \ + sxth left_block_mask, left_block_mask \ + +#define setup_sprite_compare_left_block_mask_4x() \ + cmp left_block_mask, #0xFFFFFFFF \ + +#define setup_sprite_get_right_block_mask_4x() \ + sxth right_block_mask, right_block_mask, ror #16 \ + +#define setup_sprite_compare_right_block_mask_4x() \ + cmp right_block_mask, #0xFFFFFFFF \ + + +#define widen_texels_16bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.16 texels_wide_low, texels_wide_high \ + +#define widen_texels_8bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.8 texels_wide_low, texels_wide_high \ + +#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :128 ]; \ + add block_, block_, #40; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #24 \ + +/* assumes 16-byte offset already added to block_ */ +#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :64 ]; \ + add block_, block_, #24; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #40 \ + +#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_16bpp(texels_low); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + widen_texels_16bpp(texels_high); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + +#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_8bpp(texels); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + + +#define setup_sprite_tiled_initialize_4bpp_4x() \ + ldr clut_ptr, [ psx_gpu, #psx_gpu_clut_ptr_offset ]; \ + vld1.u32 { clut_a, clut_b }, [ clut_ptr, :128 ]; \ + \ + vuzp.u8 clut_a, clut_b \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_block_count_single_4x() \ + sub_tile_height, lsl #2 \ + +#define setup_sprite_block_count_double_4x() \ + sub_tile_height, lsl #(1+2) \ + +#define setup_sprite_tile_full_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + add fb_ptr, fb_ptr, #16*2; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + pld [ fb_ptr ]; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + add texture_offset, texture_offset, #0x10; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + add fb_ptr, fb_ptr, #2048 * 2; \ + subs sub_tile_height, sub_tile_height, #1; \ + \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_full_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + \ + add fb_ptr, fb_ptr, #16*2; \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + pld [ fb_ptr ]; \ + \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #2048 * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + add texture_offset, texture_offset_base, #8; \ + add fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + sub fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \ + mov fb_ptr_advance_column, #32 * 2; \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + sub fb_ptr_advance_column, height, lsl #11 + 1; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \ + + // r0: psx_gpu // r1: x // r2: y @@ -4782,28 +5087,42 @@ setup_sprite_update_texture_8bpp_cache: // [ sp + 8 ]: height // [ sp + 12 ]: color (unused) -#define setup_sprite_tiled_builder(texture_mode) \ - \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ + \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ \ .align 4; \ \ -function(setup_sprite_##texture_mode) \ +function(setup_sprite_##texture_mode##x4mode) \ stmdb sp!, { r4 - r11, r14 }; \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ ldr v, [ sp, #36 ]; \ and offset_u, u, #0xF; \ @@ -4832,11 +5151,13 @@ function(setup_sprite_##texture_mode) \ \ /* texture_offset_base = VH-UH-UL-00 */\ bfi texture_offset_base, u, #4, #8; \ - movw right_block_mask, #0xFFFE; \ + mov right_block_mask, #0xFFFFFFFE; \ + \ + setup_sprite_offset_u_adjust##x4mode(); \ \ /* texture_offset_base = VH-UH-VL-00 */\ bfi texture_offset_base, v, #4, #4; \ - movw left_block_mask, #0xFFFF; \ + mov left_block_mask, #0xFFFFFFFF; \ \ mov tile_height, height_rounded, lsr #4; \ mvn left_block_mask, left_block_mask, lsl offset_u; \ @@ -4856,16 +5177,16 @@ function(setup_sprite_##texture_mode) \ \ /* texture_mask = HH-WH-HL-WL */\ bfi texture_mask, texture_mask_rev, #8, #4; \ - and left_block_mask, left_block_mask, #0xFF; \ + setup_sprite_get_left_block_mask##x4mode(); \ \ mov control_mask, #0; \ - cmp left_block_mask, #0xFF; \ + setup_sprite_compare_left_block_mask##x4mode(); \ \ - uxtb right_block_mask, right_block_mask, ror #8; \ + setup_sprite_get_right_block_mask##x4mode(); \ orreq control_mask, control_mask, #0x4; \ \ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \ - cmp right_block_mask, #0xFF; \ + setup_sprite_compare_right_block_mask##x4mode(); \ \ orreq control_mask, control_mask, #0x8; \ cmp tile_width, #1; \ @@ -4880,25 +5201,31 @@ function(setup_sprite_##texture_mode) \ ldr pc, [ pc, control_mask, lsl #2 ]; \ nop; \ \ - .word setup_sprite_##texture_mode##_multi_multi_full_full; \ - .word setup_sprite_##texture_mode##_single_multi_full_none; \ - .word setup_sprite_##texture_mode##_multi_single_full_full; \ - .word setup_sprite_##texture_mode##_single_single_full_none; \ - .word setup_sprite_##texture_mode##_multi_multi_half_full; \ - .word setup_sprite_##texture_mode##_single_multi_half_right; \ - .word setup_sprite_##texture_mode##_multi_single_half_full; \ - .word setup_sprite_##texture_mode##_single_single_half_right; \ - .word setup_sprite_##texture_mode##_multi_multi_full_half; \ - .word setup_sprite_##texture_mode##_single_multi_half_left; \ - .word setup_sprite_##texture_mode##_multi_single_full_half; \ - .word setup_sprite_##texture_mode##_single_single_half_left; \ - .word setup_sprite_##texture_mode##_multi_multi_half_half; \ + .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \ .word 0x00000000; \ - .word setup_sprite_##texture_mode##_multi_single_half_half \ + .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \ + + +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); +#undef draw_mask_fb_ptr_left +#undef draw_mask_fb_ptr_right -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp, _4x); +setup_sprite_tiled_builder(8bpp, _4x); #undef block_ptr @@ -4987,6 +5314,12 @@ function(texture_sprite_blocks_8bpp) #undef texture_mask #undef num_blocks #undef texture_offset +#undef texels_low +#undef texels_high +#undef texels_wide_low +#undef texels_wide_high +#undef texels_wide +#undef fb_ptr2 #define psx_gpu r0 #define x r1 @@ -4998,6 +5331,7 @@ function(texture_sprite_blocks_8bpp) #define left_offset r8 #define width_rounded r9 #define right_width r10 + #define block_width r11 #define texture_offset_base r1 @@ -5008,6 +5342,7 @@ function(texture_sprite_blocks_8bpp) #define fb_ptr r7 #define texture_offset r8 #define blocks_remaining r9 +#define fb_ptr2 r10 #define fb_ptr_pitch r12 #define texture_block_ptr r14 @@ -5026,29 +5361,23 @@ function(texture_sprite_blocks_8bpp) #define draw_mask_fb_ptr d2 #define texels q2 +#define draw_mask_fb_ptr_a d2 +#define draw_mask_fb_ptr_b d3 +#define texels_low d4 +#define texels_high d5 +#define texels_wide_low d6 +#define texels_wide_high d7 +#define texels_wide q3 -setup_sprites_16bpp_flush_single: - vpush { d0 - d2 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { d0 - d2 } - - add block, psx_gpu, #psx_gpu_blocks_offset - mov num_blocks, #1 - - bx lr -setup_sprites_16bpp_flush_row: - vpush { d0 - d2 } +setup_sprites_16bpp_flush: + vpush { d0 - d3 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { d0 - d2 } + vpop { d0 - d3 } add block, psx_gpu, #psx_gpu_blocks_offset mov num_blocks, block_width @@ -5113,7 +5442,7 @@ function(setup_sprite_16bpp) 1: add num_blocks, num_blocks, #1 cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_single + blgt setup_sprites_16bpp_flush and texture_block_ptr, texture_offset_base, texture_mask subs height, height, #1 @@ -5142,7 +5471,7 @@ function(setup_sprite_16bpp) mov texture_offset, texture_offset_base cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_row + blgt setup_sprites_16bpp_flush add texture_offset_base, texture_offset_base, #2048 and texture_block_ptr, texture_offset, texture_mask @@ -5213,6 +5542,151 @@ function(setup_sprite_16bpp) ldmia sp!, { r4 - r11, pc } +// 4x version +// FIXME: duplicate code with normal version :( +#undef draw_mask_fb_ptr + +function(setup_sprite_16bpp_4x) + stmdb sp!, { r4 - r11, r14 } + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr v, [ sp, #36 ] + add fb_ptr, fb_ptr, y, lsl #11 + + ldr width, [ sp, #40 ] + add fb_ptr, fb_ptr, x, lsl #1 + + ldr height, [ sp, #44 ] + and left_offset, u, #0x7 + + add texture_offset_base, u, u + add width_rounded, width, #7 + + add texture_offset_base, v, lsl #11 + movw left_mask_bits, #0xFFFF + + ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ] + add width_rounded, width_rounded, left_offset + + lsl left_offset, #1 + + ldrb texture_mask_height, [ psx_gpu, #psx_gpu_texture_mask_height_offset ] + sub fb_ptr, fb_ptr, left_offset, lsl #1 + + add texture_mask, texture_mask_width, texture_mask_width + movw right_mask_bits, #0xFFFC + + and right_width, width_rounded, #0x7 + mvn left_mask_bits, left_mask_bits, lsl left_offset + + lsl right_width, #1 + + add texture_mask, texture_mask_height, lsl #11 + mov block_width, width_rounded, lsr #3 + + mov right_mask_bits, right_mask_bits, lsl right_width + movw fb_ptr_pitch, #(2048 + 16) * 2 + + sub fb_ptr_pitch, fb_ptr_pitch, block_width, lsl #4+1 + vmov block_masks, left_mask_bits, right_mask_bits + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + add block, psx_gpu, #psx_gpu_blocks_offset + + bic texture_offset_base, texture_offset_base, #0xF + cmp block_width, #1 + + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + add block, block, num_blocks, lsl #6 + + lsl block_width, #2 + bne 0f + + vext.32 block_masks_shifted, block_masks, block_masks, #1 + vorr.u32 block_masks, block_masks, block_masks_shifted + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + 1: + add num_blocks, num_blocks, block_width + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + and texture_block_ptr, texture_offset_base, texture_mask + subs height, height, #1 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + add texture_offset_base, texture_offset_base, #2048 + add fb_ptr, fb_ptr, #2048*2 + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bne 1b + + ldmia sp!, { r4 - r11, pc } + + 0: + add num_blocks, num_blocks, block_width + mov texture_offset, texture_offset_base + + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] // left_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + add texture_offset_base, texture_offset_base, #2048 + and texture_block_ptr, texture_offset, texture_mask + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + subs blocks_remaining, block_width, #2*4 + add texture_offset, texture_offset, #16 + + vmov.u8 draw_mask_fb_ptr_a, #0 + vmov.u8 draw_mask_fb_ptr_b, #0 + + add fb_ptr, fb_ptr, #16*2 + beq 2f + + 1: + and texture_block_ptr, texture_offset, texture_mask + subs blocks_remaining, blocks_remaining, #4 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + add texture_offset, texture_offset, #16 + + add fb_ptr, fb_ptr, #16*2 + bgt 1b + + 2: + vdup.u8 draw_mask_fb_ptr_a, block_masks[4] // right_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[5] + + and texture_block_ptr, texture_offset, texture_mask + add texture_block_ptr, texture_page_ptr, texture_block_ptr + + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + subs height, height, #1 + + add fb_ptr, fb_ptr, fb_ptr_pitch + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + + bne 0b + + ldmia sp!, { r4 - r11, pc } + + #undef texture_page_ptr #undef vram_ptr #undef dirty_textures_mask @@ -5445,3 +5919,5 @@ function(scale2x_tiles8) nop pop { r4, pc } + +// vim:filetype=armasm -- cgit v1.2.3 From c111e8f8fb8a0d3bd7b05c743a48d942e107cc79 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Oct 2012 20:06:51 +0300 Subject: psx_gpu: change reciprocal table again otherwise something overflows somewhere and causes num_block overflow. Also added debug code to cache that issue early. --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 20 ++++++++++++++++++-- plugins/gpu_neon/psx_gpu/tests/Makefile | 2 +- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 3de2ece..ce72af5 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -767,6 +767,21 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ +#ifndef NDEBUG +#define setup_spans_debug_check(span_edge_data_element) \ + if (&span_edge_data_element - psx_gpu->span_edge_data < psx_gpu->num_spans) \ + { \ + if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ + *(int *)0 = 1; \ + if(span_edge_data_element.y > 2048) \ + *(int *)0 = 1; \ + } \ + +#else +#define setup_spans_debug_check(span_edge_data_element) \ + +#endif + #define setup_spans_prologue_alternate_yes() \ vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ @@ -1070,6 +1085,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ + setup_spans_debug_check(span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -4906,10 +4922,10 @@ void initialize_reciprocal_table(void) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) / height_normalized; - shift = 32 - (52 - shift); + shift = 32 - (51 - shift); reciprocal_table[height] = (height_reciprocal << 10) | shift; } diff --git a/plugins/gpu_neon/psx_gpu/tests/Makefile b/plugins/gpu_neon/psx_gpu/tests/Makefile index 210f44d..58cca29 100644 --- a/plugins/gpu_neon/psx_gpu/tests/Makefile +++ b/plugins/gpu_neon/psx_gpu/tests/Makefile @@ -13,7 +13,7 @@ ASFLAGS = $(CFLAGS) OBJ += psx_gpu_arm_neon.o endif ifndef DEBUG -CFLAGS += -O2 -fno-strict-aliasing +CFLAGS += -O2 -DNDEBUG -fno-strict-aliasing endif OBJ += psx_gpu.o psx_gpu_parse.o psx_gpu_main.o -- cgit v1.2.3 From 7956599fa5f666016f71870d9889748c97839041 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 22 Oct 2012 01:42:56 +0300 Subject: psx_gpu: select buffers differently this handles weird drawing areas better --- plugins/gpu_neon/psx_gpu/psx_gpu.h | 3 ++- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 6 +++++ plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 40 ++++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 11 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index f8547f3..846658c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -188,10 +188,11 @@ typedef struct s16 saved_viewport_start_y; s16 saved_viewport_end_x; s16 saved_viewport_end_y; + u8 enhancement_buf_by_x16[64]; // Align up to 64 byte boundary to keep the upcoming buffers cache line // aligned, also make reachable with single immediate addition - u8 reserved_a[228]; + u8 reserved_a[164]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index 19c4a9e..83c6680 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,3 +1,7 @@ +#define select_enhancement_buf_ptr(psx_gpu, x) \ + ((psx_gpu)->enhancement_buf_ptr + \ + ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20)) + #ifndef NEON_BUILD void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) @@ -331,6 +335,8 @@ void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, if((width <= 0) || (height <= 0)) return; + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + x *= 2; y *= 2; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 4260bc7..0536613 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -762,17 +762,26 @@ breakloop: #ifdef PCSX -static void *select_enhancement_buf_ptr(psx_gpu_struct *psx_gpu, u32 x) +static void update_enhancement_buf_table(psx_gpu_struct *psx_gpu) { - u32 b; - for (b = 0; x >= psx_gpu->enhancement_x_threshold; b++) - x -= psx_gpu->enhancement_x_threshold; - return psx_gpu->enhancement_buf_ptr + b * 1024 * 1024; + u32 b, x, s; + + b = 0; + s = psx_gpu->enhancement_x_threshold; + for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) + { + if (x * 16 >= s - 15) + { + s += psx_gpu->enhancement_x_threshold; + b++; + } + psx_gpu->enhancement_buf_by_x16[x] = b; + } } #define select_enhancement_buf(psx_gpu) \ psx_gpu->enhancement_current_buf_ptr = \ - select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x + 8) + select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x) #define enhancement_disable() { \ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ @@ -939,12 +948,12 @@ static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, u32 flags = (cmd_rgb >> 24); u32 color = cmd_rgb & 0xffffff; - psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; render_sprite_4x(psx_gpu, x, y, u, v, w, h, flags, color); } #endif -u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) +u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + u32 *last_command) { u32 current_command = 0, command_length; @@ -987,7 +996,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c do_fill(psx_gpu, x, y, width, height, color); - psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); x *= 2; y *= 2; width *= 2; @@ -1403,10 +1412,20 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c } case 0xE3: + { + u32 d; psx_gpu->viewport_start_x = list[0] & 0x3FF; psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; + + d = psx_gpu->enhancement_x_threshold - psx_gpu->viewport_start_x; + if(unlikely(0 < d && d <= 8)) + { + // Grandia hack.. + psx_gpu->enhancement_x_threshold = psx_gpu->viewport_start_x; + update_enhancement_buf_table(psx_gpu); + } select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP @@ -1417,7 +1436,8 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_c #endif SET_Ex(3, list[0]); break; - + } + case 0xE4: psx_gpu->viewport_end_x = list[0] & 0x3FF; psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF; -- cgit v1.2.3 From f0931e56b2428fe5e0f6b4d7d6d0f41462cfc551 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Oct 2012 22:14:16 +0300 Subject: psx_gpu: implement setup_sprite_untextured in asm --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 74 +++++++++++++- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 143 ++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+), 2 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index ce72af5..485ef27 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3885,6 +3885,11 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color); + #ifndef NEON_BUILD setup_sprite_tiled_builder(4bpp,); setup_sprite_tiled_builder(8bpp,); @@ -4013,11 +4018,16 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } -#endif - void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -4083,6 +4093,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024); + u32 *vram_ptr; + + u32 num_width; + + if(psx_gpu->num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + } + + while(height) + { + num_width = width; + + vram_ptr = (void *)vram_ptr16; + if((long)vram_ptr16 & 2) + { + *vram_ptr16 = color_32bpp; + vram_ptr = (void *)(vram_ptr16 + 1); + num_width--; + } + + while(num_width >= 4 * 2) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + + vram_ptr += 4; + num_width -= 4 * 2; + } + + while(num_width >= 2) + { + *vram_ptr++ = color_32bpp; + num_width -= 2; + } + + if(num_width > 0) + { + *(u16 *)vram_ptr = color_32bpp; + } + + vram_ptr16 += 1024; + height--; + } +} #define setup_sprite_blocks_switch_textured(texture_mode) \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 103483a..085e11b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -17,6 +17,10 @@ #define MAX_BLOCKS 64 #define MAX_BLOCKS_PER_ROW 128 +#define RENDER_STATE_MASK_EVALUATE 0x20 +#define RENDER_FLAGS_MODULATE_TEXELS 0x1 +#define RENDER_FLAGS_BLEND 0x2 + #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -5687,6 +5691,145 @@ function(setup_sprite_16bpp_4x) ldmia sp!, { r4 - r11, pc } +#undef width +#undef right_width +#undef right_mask_bits +#undef color +#undef height +#undef blocks_remaining +#undef colors +#undef right_mask +#undef test_mask +#undef draw_mask + +#define psx_gpu r0 +#define x r1 +#define y r2 +#define width r3 +#define right_width r5 +#define right_mask_bits r6 +#define fb_ptr r7 +#define color r8 +#define height r9 +#define fb_ptr_pitch r12 + +// referenced by setup_sprites_16bpp_flush +#define num_blocks r4 +#define block r5 +#define block_width r11 + +#define color_r r1 +#define color_g r2 +#define color_b r8 +#define blocks_remaining r6 + +#define colors q0 +#define right_mask q1 +#define test_mask q2 +#define draw_mask q2 +#define draw_mask_bits_fb_ptr d6 + + +.align 3 + +function(setup_sprite_untextured) + ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ] + tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ + | RENDER_FLAGS_BLEND) + beq setup_sprite_untextured_simple + + stmdb sp!, { r4 - r11, r14 } + + ldr width, [ sp, #40 ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr height, [ sp, #44 ] + add fb_ptr, fb_ptr, y, lsl #11 + + add fb_ptr, fb_ptr, x, lsl #1 + sub right_width, width, #1 + + ldr color, [ sp, #48 ] + and right_width, #7 + + add block_width, width, #7 + add right_width, #1 + + lsr block_width, #3 + mov right_mask_bits, #0xff + + sub fb_ptr_pitch, block_width, #1 + lsl right_mask_bits, right_width + + lsl fb_ptr_pitch, #3+1 + ubfx color_r, color, #3, #5 + + rsb fb_ptr_pitch, #1024*2 + ubfx color_g, color, #11, #5 + + vld1.u32 { test_mask }, [ psx_gpu, :128 ] + ubfx color_b, color, #19, #5 + + vdup.u16 right_mask, right_mask_bits + orr color, color_r, color_b, lsl #10 + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + orr color, color, color_g, lsl #5 + + vtst.u16 right_mask, right_mask, test_mask + add block, psx_gpu, #psx_gpu_blocks_offset + + vdup.u16 colors, color + add block, block, num_blocks, lsl #6 + + +setup_sprite_untextured_height_loop: + add num_blocks, block_width + sub blocks_remaining, block_width, #1 + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + cmp blocks_remaining, #0 + ble 1f + + vmov.u8 draw_mask, #0 /* zero_mask */ + vmov.u8 draw_mask_bits_fb_ptr, #0 + + 0: + vst1.u32 { draw_mask }, [ block, :128 ]! + subs blocks_remaining, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, #8*2 + bgt 0b + + 1: + vst1.u32 { right_mask }, [ block, :128 ]! + subs height, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, fb_ptr_pitch + + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bgt setup_sprite_untextured_height_loop + + ldmia sp!, { r4 - r11, pc } + + + #undef texture_page_ptr #undef vram_ptr #undef dirty_textures_mask -- cgit v1.2.3 From 8438c3c78159bd3986560e30bfe97b7bb91f8cc4 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 27 Oct 2012 22:18:36 +0300 Subject: psx_gpu: add some preloads seem to be helpful for enhancement --- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 085e11b..a2bfa5b 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -3392,10 +3392,12 @@ function(shade_blocks_textured_unmodulated_direct) [ draw_mask_bits_ptr, :16 ], c_64 vbif.u16 fb_pixels, pixels, draw_mask_combined - vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 - sub fb_ptr_cmp, fb_ptr_next, fb_ptr + pld [ fb_ptr_next, #64 ] + add fb_ptr_cmp, fb_ptr_cmp, #14 + vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 + cmp fb_ptr_cmp, #28 bls 4f @@ -3754,11 +3756,15 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_mg, pixels, d128_0x83E0; \ \ - vbit.u16 blend_pixels, fb_pixels, draw_mask; \ - vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + pld [ fb_ptr_next, #64 ]; \ \ sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + vbit.u16 blend_pixels, fb_pixels, draw_mask; \ + \ add fb_ptr_cmp, fb_ptr_cmp, #14; \ + vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + \ cmp fb_ptr_cmp, #28; \ bls 2f; \ \ @@ -4917,12 +4923,12 @@ setup_sprite_update_texture_8bpp_cache: draw_mask_fb_ptr_left_b); \ \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ - add fb_ptr, fb_ptr, #16*2; \ + pld [ fb_ptr, #2048 ]; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + add fb_ptr, fb_ptr, #16*2; \ \ - pld [ fb_ptr ]; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ \ vzip.8 texels_low, texels_high; \ @@ -4961,9 +4967,10 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \ + pld [ fb_ptr, #2048 ]; \ add fb_ptr, fb_ptr, #2048 * 2; \ - subs sub_tile_height, sub_tile_height, #1; \ \ + subs sub_tile_height, sub_tile_height, #1; \ bne 4b; \ \ ldr column_data, [sp], #8; /* fb_ptr2 */ \ @@ -4987,13 +4994,13 @@ setup_sprite_update_texture_8bpp_cache: do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ draw_mask_fb_ptr_left_b); \ \ + pld [ fb_ptr, #2048 ]; \ and texture_block_ptr, texture_block_ptr, texture_mask; \ \ add fb_ptr, fb_ptr, #16*2; \ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ - pld [ fb_ptr ]; \ \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ draw_mask_fb_ptr_right_b); \ @@ -5022,6 +5029,7 @@ setup_sprite_update_texture_8bpp_cache: add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ \ + pld [ fb_ptr, #2048 ]; \ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ draw_mask_fb_ptr_##edge##_b); \ \ -- cgit v1.2.3 From b7569147823a8fc5a9de98e5d491da906e119296 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 28 Oct 2012 16:32:24 +0200 Subject: psx_gpu: workaround overflow crash rare corner case --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 15 ++++++++++++--- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 13 +++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 485ef27..2cba878 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -769,13 +769,18 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, #ifndef NDEBUG #define setup_spans_debug_check(span_edge_data_element) \ - if (&span_edge_data_element - psx_gpu->span_edge_data < psx_gpu->num_spans) \ +{ \ + u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ + if (_num_spans > MAX_SPANS) \ + *(int *)0 = 1; \ + if (_num_spans < psx_gpu->num_spans) \ { \ if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ *(int *)0 = 1; \ if(span_edge_data_element.y > 2048) \ *(int *)0 = 1; \ } \ +} \ #else #define setup_spans_debug_check(span_edge_data_element) \ @@ -1423,12 +1428,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, y_x4.e[3] = y_a + 3; setup_spans_adjust_edges_alternate_no(index_left, index_right); + // FIXME: overflow corner case + if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) + height_minor_b &= ~3; + psx_gpu->num_spans += height_minor_b; - do + while(height_minor_b > 0) { setup_spans_set_x4(none, down, no); height_minor_b -= 4; - } while(height_minor_b > 0); + } } left_split_triangles++; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index a2bfa5b..4e1e403 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1175,6 +1175,10 @@ function(setup_spans_up_down) ldrh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] add temp, temp, height_minor_b + + cmp temp, #MAX_SPANS + beq 5f + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] 2: @@ -1190,6 +1194,15 @@ function(setup_spans_up_down) setup_spans_prologue_b() bal 4b + 5: + // FIXME: overflow corner case + sub temp, temp, height_minor_b + bics height_minor_b, #3 + add temp, temp, height_minor_b + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] + bne 2b + bal 1b + .pool #undef span_uvrg_offset -- cgit v1.2.3 From 77e34391a6b3c8ae59768a941037451b7c81169f Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 1 Nov 2012 19:03:27 +0200 Subject: psx_gpu: rework enhancement buffer selection --- plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 121 ++++++++++++++++++++++++------- 1 file changed, 93 insertions(+), 28 deletions(-) (limited to 'plugins/gpu_neon/psx_gpu') diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 0536613..67da86e 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -688,11 +688,21 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) } SET_Ex(2, list[0]); break; - } + } + + case 0xE3: + { + s16 viewport_start_x = list[0] & 0x3FF; + s16 viewport_start_y = (list[0] >> 10) & 0x1FF; + + if(viewport_start_x == psx_gpu->viewport_start_x && + viewport_start_y == psx_gpu->viewport_start_y) + { + break; + } - case 0xE3: - psx_gpu->viewport_start_x = list[0] & 0x3FF; - psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; + psx_gpu->viewport_start_x = viewport_start_x; + psx_gpu->viewport_start_y = viewport_start_y; #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = @@ -700,12 +710,23 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif - SET_Ex(3, list[0]); - break; - - case 0xE4: - psx_gpu->viewport_end_x = list[0] & 0x3FF; - psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF; + SET_Ex(3, list[0]); + break; + } + + case 0xE4: + { + s16 viewport_end_x = list[0] & 0x3FF; + s16 viewport_end_y = (list[0] >> 10) & 0x1FF; + + if(viewport_end_x == psx_gpu->viewport_end_x && + viewport_end_y == psx_gpu->viewport_end_y) + { + break; + } + + psx_gpu->viewport_end_x = viewport_end_x; + psx_gpu->viewport_end_y = viewport_end_y; #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = @@ -713,10 +734,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif - SET_Ex(4, list[0]); - break; + SET_Ex(4, list[0]); + break; + } - case 0xE5: + case 0xE5: { s32 offset_x = list[0] << 21; s32 offset_y = list[0] << 10; @@ -762,7 +784,9 @@ breakloop: #ifdef PCSX -static void update_enhancement_buf_table(psx_gpu_struct *psx_gpu) +#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) + +static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) { u32 b, x, s; @@ -770,7 +794,7 @@ static void update_enhancement_buf_table(psx_gpu_struct *psx_gpu) s = psx_gpu->enhancement_x_threshold; for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) { - if (x * 16 >= s - 15) + if (b < 3 && x * ENH_BUF_TABLE_STEP >= s - ENH_BUF_TABLE_STEP - 1) { s += psx_gpu->enhancement_x_threshold; b++; @@ -779,6 +803,20 @@ static void update_enhancement_buf_table(psx_gpu_struct *psx_gpu) } } +static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, + u32 x0, u32 len) +{ + u32 x, b; + + for (x = x0, b = 0; x >= len; b++) + x -= len; + if (b > 3) + b = 3; + + memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, + b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); +} + #define select_enhancement_buf(psx_gpu) \ psx_gpu->enhancement_current_buf_ptr = \ select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x) @@ -1413,18 +1451,25 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, case 0xE3: { + s16 viewport_start_x = list[0] & 0x3FF; + s16 viewport_start_y = (list[0] >> 10) & 0x1FF; u32 d; - psx_gpu->viewport_start_x = list[0] & 0x3FF; - psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; - psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; - psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; - d = psx_gpu->enhancement_x_threshold - psx_gpu->viewport_start_x; - if(unlikely(0 < d && d <= 8)) + if(viewport_start_x == psx_gpu->viewport_start_x && + viewport_start_y == psx_gpu->viewport_start_y) + { + break; + } + psx_gpu->viewport_start_x = viewport_start_x; + psx_gpu->viewport_start_y = viewport_start_y; + psx_gpu->saved_viewport_start_x = viewport_start_x; + psx_gpu->saved_viewport_start_y = viewport_start_y; + + d = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; + if((u32)psx_gpu->enhancement_x_threshold - d <= 16) { - // Grandia hack.. - psx_gpu->enhancement_x_threshold = psx_gpu->viewport_start_x; - update_enhancement_buf_table(psx_gpu); + update_enhancement_buf_table_from_x(psx_gpu, + viewport_start_x, d); } select_enhancement_buf(psx_gpu); @@ -1439,10 +1484,29 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, } case 0xE4: - psx_gpu->viewport_end_x = list[0] & 0x3FF; - psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF; - psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x; - psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y; + { + s16 viewport_end_x = list[0] & 0x3FF; + s16 viewport_end_y = (list[0] >> 10) & 0x1FF; + u32 d; + + if(viewport_end_x == psx_gpu->viewport_end_x && + viewport_end_y == psx_gpu->viewport_end_y) + { + break; + } + + psx_gpu->viewport_end_x = viewport_end_x; + psx_gpu->viewport_end_y = viewport_end_y; + psx_gpu->saved_viewport_end_x = viewport_end_x; + psx_gpu->saved_viewport_end_y = viewport_end_y; + + d = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; + if((u32)psx_gpu->enhancement_x_threshold - d <= 16) + { + update_enhancement_buf_table_from_x(psx_gpu, + psx_gpu->viewport_start_x, d); + } + select_enhancement_buf(psx_gpu); #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = @@ -1452,6 +1516,7 @@ u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, #endif SET_Ex(4, list[0]); break; + } case 0xE5: { -- cgit v1.2.3