diff options
| author | Exophase | 2011-12-23 02:47:19 +0200 | 
|---|---|---|
| committer | notaz | 2011-12-23 02:47:26 +0200 | 
| commit | 3867c6efed8d1cd6cd40f07cd46876f59da8912f (patch) | |
| tree | c6c7edcb38c17ab2affb67aeae79350bf9cd03ae | |
| parent | de35a4641acce078961c2e748e1b6da6fe8d8b70 (diff) | |
| download | pcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.tar.gz pcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.tar.bz2 pcsx_rearmed-3867c6efed8d1cd6cd40f07cd46876f59da8912f.zip  | |
psx_gpu: texture cache fix, updates
| -rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu.c | 122 | ||||
| -rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu.h | 10 | ||||
| -rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 113 | ||||
| -rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu_main.c | 67 | ||||
| -rw-r--r-- | plugins/gpu_neon/psx_gpu/psx_gpu_parse.c | 46 | 
5 files changed, 218 insertions, 140 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 84848f8..2acfedc 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -20,7 +20,6 @@  u32 span_pixels = 0;  u32 span_pixel_blocks = 0; -u32 span_pixel_blocks_unaligned = 0;  u32 spans = 0;  u32 triangles = 0;  u32 sprites = 0; @@ -39,9 +38,6 @@ u32 texel_blocks_8bpp = 0;  u32 texel_blocks_16bpp = 0;  u32 texel_blocks_untextured = 0;  u32 blend_blocks = 0; -u32 untextured_pixels = 0; -u32 blend_pixels = 0; -u32 transparent_pixels = 0;  u32 render_buffer_flushes = 0;  u32 state_changes = 0;  u32 left_split_triangles = 0; @@ -49,8 +45,7 @@ u32 flat_triangles = 0;  u32 clipped_triangles = 0;  u32 zero_block_spans = 0;  u32 texture_cache_loads = 0; -u32 false_modulated_triangles = 0; -u32 false_modulated_sprites = 0; +u32 false_modulated_blocks = 0;  u32 reciprocal_table[512]; @@ -241,6 +236,7 @@ u32 invalidate_texture_cache_region_viewport(psx_gpu_struct *psx_gpu, u32 x1,  {    u32 mask = texture_region_mask(x1, y1, x2, y2) &     psx_gpu->viewport_mask; +    psx_gpu->dirty_textures_4bpp_mask |= mask;    psx_gpu->dirty_textures_8bpp_mask |= mask;    psx_gpu->dirty_textures_8bpp_alternate_mask |= mask; @@ -257,7 +253,7 @@ void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,  void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)  {    u32 current_texture_page = psx_gpu->current_texture_page; -  u8 *texture_page_ptr = psx_gpu->texture_page_ptr; +  u8 *texture_page_ptr = psx_gpu->texture_page_base;    u16 *vram_ptr = psx_gpu->vram_ptr;    u32 texel_block; @@ -319,7 +315,7 @@ void update_texture_4bpp_cache(psx_gpu_struct *psx_gpu)  void update_texture_8bpp_cache_slice(psx_gpu_struct *psx_gpu,   u32 texture_page)  { -  u16 *texture_page_ptr = psx_gpu->texture_page_ptr; +  u16 *texture_page_ptr = psx_gpu->texture_page_base;    u16 *vram_ptr = psx_gpu->vram_ptr;    u32 tile_x, tile_y; @@ -413,8 +409,10 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)      render_block_handler->shade_blocks(psx_gpu);      render_block_handler->blend_blocks(psx_gpu); +#ifdef PROFILE      span_pixel_blocks += psx_gpu->num_blocks;      render_buffer_flushes++; +#endif      psx_gpu->num_blocks = 0;    } @@ -1748,6 +1746,8 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,    }                                                                            \  #define setup_blocks_add_blocks_direct()                                       \ +  texel_blocks_untextured += span_num_blocks;                                  \ +  span_pixel_blocks += span_num_blocks                                         \  #define setup_blocks_builder(shading, texturing, dithering, sw, target)        \ @@ -1787,7 +1787,6 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target(     \        s32 pixel_span = span_num_blocks * 8;                                    \        pixel_span -= __builtin_popcount(span_edge_data->right_mask & 0xFF);     \        span_pixels += pixel_span;                                               \ -      span_pixel_blocks_unaligned += (pixel_span + 7) / 8;                     \                                                                                 \        span_num_blocks--;                                                       \        while(span_num_blocks)                                                   \ @@ -2017,16 +2016,29 @@ void texture_blocks_16bpp(psx_gpu_struct *psx_gpu)  }                                                                              \ -#define shade_blocks_textured_modulated_shaded_primitive_load()                \ +#define shade_blocks_textured_false_modulated_check_dithered(target)           \ + +#define shade_blocks_textured_false_modulated_check_undithered(target)         \ +  if(psx_gpu->triangle_color == 0x808080)                                      \ +  {                                                                            \ +                                                                               \ +    shade_blocks_textured_unmodulated_##target(psx_gpu);                       \ +    false_modulated_blocks += num_blocks;                                      \ +    return;                                                                    \ +  }                                                                            \ + + +#define shade_blocks_textured_modulated_shaded_primitive_load(dithering,       \ + target)                                                                       \ -#define shade_blocks_textured_modulated_unshaded_primitive_load()              \ +#define shade_blocks_textured_modulated_unshaded_primitive_load(dithering,     \ + target)                                                                       \  {                                                                              \    u32 color = psx_gpu->triangle_color;                                         \    dup_8x8b(colors_r, color);                                                   \    dup_8x8b(colors_g, color >> 8);                                              \    dup_8x8b(colors_b, color >> 16);                                             \ -  if(psx_gpu->triangle_color == 0x808080)                                      \ -    false_modulated_triangles++;                                               \ +  shade_blocks_textured_false_modulated_check_##dithering(target);             \  }                                                                              \  #define shade_blocks_textured_modulated_shaded_block_load()                    \ @@ -2091,7 +2103,8 @@ void shade_blocks_##shading##_textured_modulated_##dithering##_##target(       \                                                                                 \    dup_8x16b(d128_0x8000, 0x8000);                                              \                                                                                 \ -  shade_blocks_textured_modulated_##shading##_primitive_load();                \ +  shade_blocks_textured_modulated_##shading##_primitive_load(dithering,        \ +   target);                                                                    \                                                                                 \    while(num_blocks)                                                            \    {                                                                            \ @@ -2157,6 +2170,9 @@ void shade_blocks_unshaded_textured_modulated_dithered_indirect(psx_gpu_struct  void shade_blocks_unshaded_textured_modulated_undithered_indirect(psx_gpu_struct   *psx_gpu); +void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); +void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); +  #ifndef NEON_BUILD  shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -2204,8 +2220,38 @@ void shade_blocks_textured_unmodulated_##target(psx_gpu_struct *psx_gpu)       \    }                                                                            \  }                                                                              \ -void shade_blocks_textured_unmodulated_indirect(psx_gpu_struct *psx_gpu); -void shade_blocks_textured_unmodulated_direct(psx_gpu_struct *psx_gpu); +#define shade_blocks_textured_unmodulated_dithered_builder(target)             \ +void shade_blocks_textured_unmodulated_dithered_##target(psx_gpu_struct        \ + *psx_gpu)                                                                     \ +{                                                                              \ +  block_struct *block = psx_gpu->blocks;                                       \ +  u32 num_blocks = psx_gpu->num_blocks;                                        \ +  vec_8x16u draw_mask;                                                         \ +  vec_8x16u test_mask = psx_gpu->test_mask;                                    \ +  u32 draw_mask_bits;                                                          \ +                                                                               \ +  vec_8x16u pixels;                                                            \ +  shade_blocks_load_msb_mask_##target();                                       \ +                                                                               \ +  while(num_blocks)                                                            \ +  {                                                                            \ +    vec_8x16u zero_mask;                                                       \ +                                                                               \ +    draw_mask_bits = block->draw_mask_bits;                                    \ +    dup_8x16b(draw_mask, draw_mask_bits);                                      \ +    tst_8x16b(draw_mask, draw_mask, test_mask);                                \ +                                                                               \ +    pixels = block->texels;                                                    \ +                                                                               \ +    cmpeqz_8x16b(zero_mask, pixels);                                           \ +    or_8x16b(zero_mask, draw_mask, zero_mask);                                 \ +                                                                               \ +    shade_blocks_store_##target(zero_mask, pixels);                            \ +                                                                               \ +    num_blocks--;                                                              \ +    block++;                                                                   \ +  }                                                                            \ +}                                                                              \  #ifndef NEON_BUILD @@ -2773,11 +2819,15 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,    triangle_area = triangle_signed_area_x2(a->x, a->y, b->x, b->y, c->x, c->y); +#ifdef PROFILE    triangles++; +#endif    if(triangle_area == 0)    { +#ifdef PROFILE      trivial_rejects++; +#endif      return;    } @@ -2797,7 +2847,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,    if((y_bottom - y_top) >= 512)    { +#ifdef PROFILE      trivial_rejects++; +#endif      return;    } @@ -2821,14 +2873,18 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,    if((c->x - a->x) >= 1024)    { +#ifdef PROFILE      trivial_rejects++; +#endif      return;    }    if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,     y_bottom) == 0)    { +#ifdef PROFILE      trivial_rejects++; +#endif      return;    } @@ -2922,7 +2978,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,        break;    } +#ifdef PROFILE    spans += psx_gpu->num_spans; +#endif    u32 render_state = flags &     (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |  @@ -2934,7 +2992,9 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,    {      psx_gpu->render_state = render_state;      flush_render_block_buffer(psx_gpu); +#ifdef PROFILE      state_changes++; +#endif    }    psx_gpu->primitive_type = PRIMITIVE_TYPE_TRIANGLE; @@ -3369,7 +3429,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,    u32 left_offset = u & 0x7;    u32 width_rounded = width + left_offset + 7; -  u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - left_offset); +  u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset);    u32 right_width = width_rounded & 0x7;    u32 block_width = width_rounded / 8;    u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3519,7 +3579,10 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,    {      blocks_remaining = block_width - 1;      num_blocks += block_width; + +#ifdef PROFILE      sprite_blocks += block_width; +#endif      if(num_blocks > MAX_BLOCKS)      { @@ -3654,6 +3717,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,    s32 x_right = x + width - 1;    s32 y_bottom = y + height - 1; +#ifdef PROFILE +  sprites++; +#endif +    if(invalidate_texture_cache_region_viewport(psx_gpu, x, y, x_right,     y_bottom) == 0)    { @@ -3685,10 +3752,10 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,    if((width <= 0) || (height <= 0))      return; -  sprites++; - +#ifdef PROFILE    span_pixels += width * height;    spans += height; +#endif    u32 render_state = flags &     (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3701,7 +3768,9 @@ void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,    {      psx_gpu->render_state = render_state;      flush_render_block_buffer(psx_gpu); +#ifdef PROFILE      state_changes++; +#endif    }    psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE; @@ -4032,7 +4101,9 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,    u32 control_mask; +#ifdef PROFILE    lines++; +#endif    if(vertex_a->x >= vertex_b->x)    { @@ -4276,8 +4347,6 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,    invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); -  //printf("copy for %d, %d\n", width, height); -    for(draw_y = 0; draw_y < height; draw_y++)    {      for(draw_x = 0; draw_x < width; draw_x++) @@ -4329,15 +4398,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)    psx_gpu->test_mask = test_mask; -  psx_gpu->pixel_count_mode = 0; -  psx_gpu->pixel_compare_mode = 0; - -  psx_gpu->vram_pixel_counts_a = malloc(sizeof(u8) * 1024 * 512); -  psx_gpu->vram_pixel_counts_b = malloc(sizeof(u8) * 1024 * 512); -  memset(psx_gpu->vram_pixel_counts_a, 0, sizeof(u8) * 1024 * 512); -  memset(psx_gpu->vram_pixel_counts_b, 0, sizeof(u8) * 1024 * 512); -  psx_gpu->compare_vram = malloc(sizeof(u16) * 1024 * 512); -    psx_gpu->dirty_textures_4bpp_mask = 0xFFFFFFFF;    psx_gpu->dirty_textures_8bpp_mask = 0xFFFFFFFF;    psx_gpu->dirty_textures_8bpp_alternate_mask = 0xFFFFFFFF; @@ -4354,6 +4414,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)    psx_gpu->vram_ptr = vram; +  psx_gpu->texture_page_base = psx_gpu->vram_ptr;    psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;    psx_gpu->clut_ptr = psx_gpu->vram_ptr; @@ -4374,7 +4435,6 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)    // d1: (2 3 6 7): y0    // d2: (4 5 6 7): x0 ^ y0 -    psx_gpu->dither_table[0] = dither_table_row(-4, 0, -3, 1);    psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);    psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 49425ce..137dda9 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -130,12 +130,11 @@ typedef struct    u32 dirty_textures_8bpp_alternate_mask;    u32 triangle_color; -  u32 primitive_color; -    u32 dither_table[4];    struct render_block_handler_struct *render_block_handler;    void *texture_page_ptr; +  void *texture_page_base;    u16 *clut_ptr;    u16 *vram_ptr; @@ -189,13 +188,6 @@ typedef struct    u8 texture_4bpp_cache[32][256 * 256];    u8 texture_8bpp_even_cache[16][256 * 256];    u8 texture_8bpp_odd_cache[16][256 * 256]; - -  u32 pixel_count_mode; -  u32 pixel_compare_mode; - -  u8 *vram_pixel_counts_a; -  u8 *vram_pixel_counts_b; -  u16 *compare_vram;  } psx_gpu_struct;  typedef struct __attribute__((aligned(16))) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 54605b8..fd99798 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -38,10 +38,10 @@  #define psx_gpu_dirty_textures_8bpp_mask_offset           172  #define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176  #define psx_gpu_triangle_color_offset                     180 -#define psx_gpu_primitive_color_offset                    184 -#define psx_gpu_dither_table_offset                       188 -#define psx_gpu_render_block_handler_offset               204 -#define psx_gpu_texture_page_ptr_offset                   208 +#define psx_gpu_dither_table_offset                       184 +#define psx_gpu_render_block_handler_offset               200 +#define psx_gpu_texture_page_ptr_offset                   204 +#define psx_gpu_texture_page_base_offset                  208  #define psx_gpu_clut_ptr_offset                           212  #define psx_gpu_vram_ptr_offset                           216 @@ -1955,6 +1955,8 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)    vdup.u16 colors, color    add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset +  orr color, color, lsl #16 +   0:    ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] @@ -1981,12 +1983,21 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)   3:    ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ] -  eor right_mask, right_mask, #0xFF - 4: -  strh color, [ fb_ptr ], #2 -  movs right_mask, right_mask, lsr #1 -  bne 4b +  cmp right_mask, #0x0 +  beq 5f + +  tst right_mask, #0xF +  streq color, [ fb_ptr ], #4 +  moveq right_mask, right_mask, lsr #4 +  streq color, [ fb_ptr ], #4 + +  tst right_mask, #0x3 +  streq color, [ fb_ptr ], #4 +  moveq right_mask, right_mask, lsr #2 + +  tst right_mask, #0x1 +  streqh color, [ fb_ptr ]   1:    add span_edge_data, span_edge_data, #8 @@ -1997,6 +2008,9 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)    ldmia sp!, { r4 - r11, pc } + 5: +  vst1.u32 { colors }, [ fb_ptr ] +  bal 1b  #undef c_64 @@ -2337,6 +2351,7 @@ setup_blocks_shaded_untextured_indirect_builder(dithered)  #define draw_mask                                         q0  #define pixels_low                                        d16 +#define pixels_high                                       d17 @@ -2500,23 +2515,67 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct)       \   3:                                                                            \    setup_blocks_shaded_untextured_dither_a_##dithering();                       \                                                                                 \ -  ldrb right_mask, [ span_edge_data, #edge_data_right_mask_offset ];           \ +  ldrh right_mask, [ span_edge_data, #edge_data_right_mask_offset ];           \    setup_blocks_shaded_untextured_dither_b_##dithering();                       \                                                                                 \    vshr.u8 r_whole_8, r_whole_8, #3;                                            \ +  rbit right_mask, right_mask;                                                 \    vmov pixels, msb_mask;                                                       \    vbic.u8 gb_whole_8, gb_whole_8, d128_0x7;                                    \ -  eor right_mask, right_mask, #0xFF;                                           \ +  clz right_mask, right_mask;                                                  \                                                                                 \    vmlal.u8 pixels, r_whole_8, d64_1;                                           \    vmlal.u8 pixels, g_whole_8, d64_4;                                           \    vmlal.u8 pixels, b_whole_8, d64_128;                                         \                                                                                 \ +  ldr pc, [ pc, right_mask, lsl #2 ];                                          \ +  nop;                                                                         \ +  nop;                                                                         \ +  .word 4f;                                                                    \ +  .word 5f;                                                                    \ +  .word 6f;                                                                    \ +  .word 7f;                                                                    \ +  .word 8f;                                                                    \ +  .word 9f;                                                                    \ +  .word 10f;                                                                   \ +  .word 11f;                                                                   \ +                                                                               \   4:                                                                            \ -  vst1.u16 { pixels_low[0] }, [ fb_ptr ]!;                                     \ -  vext.16 pixels, pixels, #1;                                                  \ -  movs right_mask, right_mask, lsr #1;                                         \ -  bne 4b;                                                                      \ +  vst1.u16 { pixels_low[0] }, [ fb_ptr ];                                      \ +  bal 1f;                                                                      \ +                                                                               \ + 5:                                                                            \ +  vst1.u32 { pixels_low[0] }, [ fb_ptr ];                                      \ +  bal 1f;                                                                      \ +                                                                               \ + 6:                                                                            \ +  vst1.u32 { pixels_low[0] }, [ fb_ptr ]!;                                     \ +  vst1.u16 { pixels_low[2] }, [ fb_ptr ];                                      \ +  bal 1f;                                                                      \ +                                                                               \ + 7:                                                                            \ +  vst1.u32 { pixels_low }, [ fb_ptr ];                                         \ +  bal 1f;                                                                      \ +                                                                               \ + 8:                                                                            \ +  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \ +  vst1.u16 { pixels_high[0] }, [ fb_ptr ];                                     \ +  bal 1f;                                                                      \ +                                                                               \ + 9:                                                                            \ +  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \ +  vst1.u32 { pixels_high[0] }, [ fb_ptr ]!;                                    \ +  bal 1f;                                                                      \ +                                                                               \ + 10:                                                                           \ +  vst1.u32 { pixels_low }, [ fb_ptr ]!;                                        \ +  vst1.u32 { pixels_high[0] }, [ fb_ptr ]!;                                    \ +  vst1.u16 { pixels_high[2] }, [ fb_ptr ];                                     \ +  bal 1f;                                                                      \ +                                                                               \ + 11:                                                                           \ +  vst1.u32 { pixels }, [ fb_ptr ];                                             \ +  bal 1f;                                                                      \                                                                                 \   1:                                                                            \    add span_uvrg_offset, span_uvrg_offset, #16;                                 \ @@ -2957,6 +3016,8 @@ function(texture_blocks_16bpp)  #define psx_gpu                                  r0  #define num_blocks                               r1  #define color_ptr                                r2 +#define colors_scalar                            r2 +#define colors_scalar_compare                    r3  #define mask_msb_ptr                             r2  #define block_ptr_load_a                         r0 @@ -3013,9 +3074,21 @@ function(texture_blocks_16bpp)    add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset;                         \    vld1.u16 { msb_mask_low[], msb_mask_high[] }, [ mask_msb_ptr, :16 ]          \ -#define shade_blocks_textured_modulated_prologue_shaded()                      \ -#define shade_blocks_textured_modulated_prologue_unshaded()                    \ +#define shade_blocks_textured_modulated_prologue_shaded(dithering, target)     \ +   +#define shade_blocks_textured_false_modulation_check_undithered(target)        \ +  ldr colors_scalar, [ psx_gpu, #psx_gpu_triangle_color_offset ];              \ +  movw colors_scalar_compare, #0x8080;                                         \ +                                                                               \ +  movt colors_scalar_compare, #0x80;                                           \ +  cmp colors_scalar, colors_scalar_compare;                                    \ +  beq shade_blocks_textured_unmodulated_##target                               \ + +#define shade_blocks_textured_false_modulation_check_dithered(target)          \ + +#define shade_blocks_textured_modulated_prologue_unshaded(dithering, target)   \ +  shade_blocks_textured_false_modulation_check_##dithering(target);            \    add color_ptr, psx_gpu, #psx_gpu_triangle_color_offset;                      \    vld1.u32 { colors_r[] }, [ color_ptr, :32 ];                                 \    vdup.u8 colors_g, colors_r[1];                                               \ @@ -3086,13 +3159,13 @@ function(texture_blocks_16bpp)  .align 3;                                                                      \                                                                                 \  function(shade_blocks_##shading##_textured_modulated_##dithering##_##target)   \ +  shade_blocks_textured_modulated_prologue_##shading(dithering, target);       \    stmdb sp!, { r4 - r5, lr };                                                  \    ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ];                    \                                                                                 \    vld1.u32 { test_mask }, [ psx_gpu, :128 ];                                   \                                                                                 \    shade_blocks_textured_modulated_prologue_##target();                         \ -  shade_blocks_textured_modulated_prologue_##shading();                        \                                                                                 \    add block_ptr_load_a, psx_gpu, #psx_gpu_blocks_offset;                       \    mov c_32, #32;                                                               \ @@ -5271,7 +5344,7 @@ function(update_texture_4bpp_cache)    ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ] -  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] +  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]    ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ]    and current_texture_page_x, current_texture_page, #0xF @@ -5375,7 +5448,7 @@ function(update_texture_8bpp_cache_slice)    ldrb current_texture_page, [ psx_gpu, #psx_gpu_current_texture_page_offset ]    ldr vram_ptr_a, [ psx_gpu, #psx_gpu_vram_ptr_offset ] -  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] +  ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_base_offset ]    mov tile_y, #16    and texture_page_x, texture_page, #0xF diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c index 1eadc79..0f85604 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_main.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_main.c @@ -20,7 +20,6 @@  extern u32 span_pixels;  extern u32 span_pixel_blocks; -extern u32 span_pixel_blocks_unaligned;  extern u32 spans;  extern u32 triangles;  extern u32 sprites; @@ -38,9 +37,6 @@ extern u32 texel_blocks_8bpp;  extern u32 texel_blocks_16bpp;  extern u32 texel_blocks_untextured;  extern u32 blend_blocks; -extern u32 untextured_pixels; -extern u32 blend_pixels; -extern u32 transparent_pixels;  extern u32 render_buffer_flushes;  extern u32 state_changes;  extern u32 trivial_rejects; @@ -49,8 +45,7 @@ extern u32 flat_triangles;  extern u32 clipped_triangles;  extern u32 zero_block_spans;  extern u32 texture_cache_loads; -extern u32 false_modulated_triangles; -extern u32 false_modulated_sprites; +extern u32 false_modulated_blocks;  static u32 mismatches; @@ -64,7 +59,7 @@ typedef struct  static gpu_dump_struct state;  psx_gpu_struct __attribute__((aligned(256))) _psx_gpu; -u16 __attribute__((aligned(256))) _vram[1024 * 512]; +u16 __attribute__((aligned(256))) _vram[(1024 * 512) + 1024];  #define percent_of(numerator, denominator)                                     \    ((((double)(numerator)) / (denominator)) * 100.0)                            \ @@ -81,7 +76,6 @@ void clear_stats(void)    lines = 0;    span_pixels = 0;    span_pixel_blocks = 0; -  span_pixel_blocks_unaligned = 0;    spans = 0;    texels_4bpp = 0;    texels_8bpp = 0; @@ -91,9 +85,6 @@ void clear_stats(void)    texel_blocks_8bpp = 0;    texel_blocks_16bpp = 0;    blend_blocks = 0; -  untextured_pixels = 0; -  blend_pixels = 0; -  transparent_pixels = 0;    render_buffer_flushes = 0;    state_changes = 0;    trivial_rejects = 0; @@ -102,8 +93,7 @@ void clear_stats(void)    clipped_triangles = 0;    zero_block_spans = 0;    texture_cache_loads = 0; -  false_modulated_triangles = 0; -  false_modulated_sprites = 0; +  false_modulated_blocks = 0;  }  void update_screen(psx_gpu_struct *psx_gpu, SDL_Surface *screen) @@ -165,7 +155,7 @@ int main(int argc, char *argv[])    size = ftell(list_file);    fseek(list_file, 0, SEEK_SET);    //size = 0; -   +    list = malloc(size);    fread(list, 1, size, list_file);    fclose(list_file); @@ -175,44 +165,26 @@ int main(int argc, char *argv[])      SDL_Init(SDL_INIT_EVERYTHING);      screen = SDL_SetVideoMode(1024, 512, 32, 0);    } -   -  initialize_psx_gpu(psx_gpu, _vram);  #ifdef NEON_BUILD    system("ofbset -fb /dev/fb1 -mem 6291456 -en 0");    u32 fbdev_handle = open("/dev/fb1", O_RDWR); -  psx_gpu->vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE, +  u16 *vram_ptr = +  vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,     MAP_SHARED | 0xA0000000, fbdev_handle, 0)); -  psx_gpu->vram_ptr += 64; -#endif - +  vram_ptr += 64; +  initialize_psx_gpu(psx_gpu, vram_ptr + 64); +#else +  initialize_psx_gpu(psx_gpu, _vram + 64); +#endif  #ifdef NEON_BUILD    //triangle_benchmark(psx_gpu);    //return 0;  #endif -#ifdef FULL_COMPARE_MODE -  psx_gpu->pixel_count_mode = 1;  -  psx_gpu->pixel_compare_mode = 0;    memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); -  //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); -  gpu_parse(psx_gpu, list, size); - -  psx_gpu->pixel_count_mode = 0; -  psx_gpu->pixel_compare_mode = 1; -  memcpy(psx_gpu->compare_vram, state.vram, 1024 * 512 * 2);  -  memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); -  //render_block_fill(psx_gpu, 0, 0, 0, 1024, 512); -  clear_stats(); -  gpu_parse(psx_gpu, list, size); -  flush_render_block_buffer(psx_gpu); -#else -  memcpy(psx_gpu->vram_ptr, state.vram, 1024 * 512 * 2); - -  psx_gpu->pixel_count_mode = 0; -  psx_gpu->pixel_compare_mode = 0;    clear_stats(); @@ -232,7 +204,7 @@ int main(int argc, char *argv[])    gpu_parse(psx_gpu, list, size);    flush_render_block_buffer(psx_gpu); -  printf("%s: ", argv[1]); +  printf("%-64s: ", argv[1]);  #ifdef NEON_BUILD    u32 cycles_elapsed = get_counter() - cycles; @@ -265,17 +237,14 @@ int main(int argc, char *argv[])      }    }  #endif -#endif  #if 0    printf("\n"); -  printf("  %d pixels, %d pixel blocks (%d unaligned), %d spans\n" -   "   (%lf pixels per block (%lf unaligned, r %lf), %lf pixels per span),\n" +  printf("  %d pixels, %d pixel blocks, %d spans\n" +   "   (%lf pixels per block, %lf pixels per span),\n"     "   %lf blocks per span (%lf per non-zero span), %lf overdraw)\n\n", -   span_pixels, span_pixel_blocks, span_pixel_blocks_unaligned, spans, +   span_pixels, span_pixel_blocks, spans,     (double)span_pixels / span_pixel_blocks, -   (double)span_pixels / span_pixel_blocks_unaligned, -   (double)span_pixel_blocks / span_pixel_blocks_unaligned,     (double)span_pixels / spans,     (double)span_pixel_blocks / spans,      (double)span_pixel_blocks / (spans - zero_block_spans), @@ -283,10 +252,10 @@ int main(int argc, char *argv[])     ((psx_gpu->viewport_end_x - psx_gpu->viewport_start_x) *      (psx_gpu->viewport_end_y - psx_gpu->viewport_start_y))); -  printf("  %d triangles (%d false modulated)\n" +  printf("  %d triangles\n"     "   (%d trivial rejects, %lf%% flat, %lf%% left split, %lf%% clipped)\n"     "   (%lf pixels per triangle, %lf rows per triangle)\n\n", -   triangles, false_modulated_triangles, trivial_rejects, +   triangles, trivial_rejects,     percent_of(flat_triangles, triangles),     percent_of(left_split_triangles, triangles),     percent_of(clipped_triangles, triangles), @@ -306,6 +275,8 @@ int main(int argc, char *argv[])     percent_of(sprite_blocks, span_pixel_blocks));    printf("   %7d blended blocks     (%lf%%)\n", blend_blocks,     percent_of(blend_blocks, span_pixel_blocks)); +  printf("   %7d false-mod blocks   (%lf%%)\n", false_modulated_blocks, +   percent_of(false_modulated_blocks, span_pixel_blocks));    printf("\n");    printf("  %lf blocks per render buffer flush\n", (double)span_pixel_blocks /     render_buffer_flushes); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index f6143ee..fc9f3fb 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -38,61 +38,53 @@ const u8 command_lengths[256] =  void update_texture_ptr(psx_gpu_struct *psx_gpu)  { +  u8 *texture_base;    u8 *texture_ptr;    switch((psx_gpu->render_state_base >> 8) & 0x3)    {      default:      case TEXTURE_MODE_4BPP: -#ifdef TEXTURE_CACHE_4BPP -      texture_ptr = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page]; +      texture_base = psx_gpu->texture_4bpp_cache[psx_gpu->current_texture_page]; + +      texture_ptr = texture_base;        texture_ptr += psx_gpu->texture_window_x & 0xF;        texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;        texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;        texture_ptr += (psx_gpu->texture_window_y >> 4) << 12; -#else -      texture_ptr = (u8 *)(psx_gpu->vram_ptr); -      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; -      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; -      texture_ptr += psx_gpu->texture_window_x / 2; -      texture_ptr += (psx_gpu->texture_window_y) * 2048; -#endif        break;      case TEXTURE_MODE_8BPP: -#ifdef TEXTURE_CACHE_8BPP        if(psx_gpu->current_texture_page & 0x1)        { -        texture_ptr = +        texture_base =           psx_gpu->texture_8bpp_odd_cache[psx_gpu->current_texture_page >> 1];        }        else        { -        texture_ptr = +        texture_base =           psx_gpu->texture_8bpp_even_cache[psx_gpu->current_texture_page >> 1];        } +      texture_ptr = texture_base; +      texture_ptr += psx_gpu->texture_window_x & 0xF;        texture_ptr += (psx_gpu->texture_window_y & 0xF) << 4;        texture_ptr += (psx_gpu->texture_window_x >> 4) << 8;        texture_ptr += (psx_gpu->texture_window_y >> 4) << 12; -#else -      texture_ptr = (u8 *)(psx_gpu->vram_ptr); -      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; -      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; -      texture_ptr += psx_gpu->texture_window_x; -      texture_ptr += (psx_gpu->texture_window_y) * 2048; -#endif        break;      case TEXTURE_MODE_16BPP: -      texture_ptr = (u8 *)(psx_gpu->vram_ptr); -      texture_ptr += (psx_gpu->current_texture_page & 0xF) * 128; -      texture_ptr += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; +      texture_base = (u8 *)(psx_gpu->vram_ptr); +      texture_base += (psx_gpu->current_texture_page & 0xF) * 128; +      texture_base += ((psx_gpu->current_texture_page >> 4) * 256) * 2048; + +      texture_ptr = texture_base;        texture_ptr += psx_gpu->texture_window_x * 2;        texture_ptr += (psx_gpu->texture_window_y) * 2048;        break;    } +  psx_gpu->texture_page_base = texture_base;    psx_gpu->texture_page_ptr = texture_ptr;    } @@ -447,8 +439,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          u32 width = list_s16[4] & 0x3FF;          u32 height = list_s16[5] & 0x1FF; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF; -          render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);    			break;        } @@ -461,7 +451,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          u32 width = list_s16[6] & 0x3FF;          u32 height = list_s16[7] & 0x1FF; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF;          set_clut(psx_gpu, list_s16[5]);          render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, width, height, @@ -477,8 +466,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          s32 x = list_s16[2] + psx_gpu->offset_x;          s32 y = list_s16[3] + psx_gpu->offset_y; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF; -          render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);    			break;        } @@ -491,8 +478,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          s32 x = list_s16[2] + psx_gpu->offset_x;          s32 y = list_s16[3] + psx_gpu->offset_y; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF; -          render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);    			break;        } @@ -506,7 +491,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          s32 y = list_s16[3] + psx_gpu->offset_y;          u32 uv = list_s16[4]; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF;          set_clut(psx_gpu, list_s16[5]);          render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 8, 8, @@ -522,7 +506,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          s32 x = list_s16[2] + psx_gpu->offset_x;          s32 y = list_s16[3] + psx_gpu->offset_y; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF;          render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);    			break;        } @@ -536,7 +519,6 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)          s32 y = list_s16[3] + psx_gpu->offset_y;          u32 uv = list_s16[4]; -        psx_gpu->primitive_color = list[0] & 0xFFFFFF;          set_clut(psx_gpu, list_s16[5]);          render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF, 16, 16,  | 
