aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpu_neon
diff options
context:
space:
mode:
authornotaz2012-10-21 02:42:03 +0300
committernotaz2012-10-23 01:28:24 +0300
commit59d15d23d97d4347d8046057013f8979db0914f0 (patch)
tree45a558c343710a33e87207daaa3c011246ce2c36 /plugins/gpu_neon
parentfc6cef7d739c850a10bca2a19855654aa78897a8 (diff)
downloadpcsx_rearmed-59d15d23d97d4347d8046057013f8979db0914f0.tar.gz
pcsx_rearmed-59d15d23d97d4347d8046057013f8979db0914f0.tar.bz2
pcsx_rearmed-59d15d23d97d4347d8046057013f8979db0914f0.zip
psx_gpu: consolidate C code, implement exnhancement asm
Diffstat (limited to 'plugins/gpu_neon')
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c410
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_4x.c511
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S698
3 files changed, 947 insertions, 672 deletions
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index 2d552aa..3de2ece 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -3185,14 +3185,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#endif
-#define setup_sprite_tiled_initialize_4bpp() \
+#define setup_sprite_tiled_initialize_4bpp_clut() \
u16 *clut_ptr = psx_gpu->clut_ptr; \
vec_8x16u clut_a, clut_b; \
vec_16x8u clut_low, clut_high; \
\
load_8x16b(clut_a, clut_ptr); \
load_8x16b(clut_b, clut_ptr + 8); \
- unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \
+ unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \
+
+#define setup_sprite_tiled_initialize_4bpp() \
+ setup_sprite_tiled_initialize_4bpp_clut(); \
\
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \
update_texture_4bpp_cache(psx_gpu) \
@@ -3209,10 +3212,6 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
load_64b(texels, texture_block_ptr) \
-#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \
-
-#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
-
#define setup_sprite_tile_add_blocks(tile_num_blocks) \
num_blocks += tile_num_blocks; \
sprite_blocks += tile_num_blocks; \
@@ -3358,34 +3357,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
-#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
sub_tile_height = column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
-#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
u32 tiles_remaining = column_data >> 16; \
sub_tile_height = column_data & 0xFF; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining -= 1; \
\
while(tiles_remaining) \
{ \
sub_tile_height = 16; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining--; \
} \
\
sub_tile_height = (column_data >> 8) & 0xFF; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
@@ -3398,15 +3399,18 @@ do \
column_data |= (tile_height - 1) << 16 \
+#define RIGHT_MASK_BIT_SHIFT 8
+#define RIGHT_MASK_BIT_SHIFT_4x 16
+
#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
- edge_mode, edge) \
+ edge_mode, edge, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
left_mask_bits = left_block_mask | right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
- texture_mode); \
+ texture_mode, x4mode); \
} \
#define setup_sprite_tiled_advance_column() \
@@ -3414,18 +3418,22 @@ do \
if((texture_offset_base & 0xF00) == 0) \
texture_offset_base -= (0x100 + 0xF00) \
+#define FB_PTR_MULTIPLIER 1
+#define FB_PTR_MULTIPLIER_4x 2
+
#define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \
- left_mode, right_mode) \
+ left_mode, right_mode, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
- s32 fb_ptr_advance_column = 16 - (1024 * height); \
+ s32 fb_ptr_advance_column = (16 - (1024 * height)) \
+ * FB_PTR_MULTIPLIER##x4mode; \
\
tile_width -= 2; \
left_mask_bits = left_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(left_mode, right, \
- texture_mode); \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
\
left_mask_bits = 0x00; \
@@ -3434,22 +3442,297 @@ do \
while(tile_width) \
{ \
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \
+ setup_sprite_tile_column_height_##multi_height(full, none, \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
tile_width--; \
} \
\
left_mask_bits = right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, \
- texture_mode); \
+ texture_mode, x4mode); \
+} \
+
+
+/* 4x stuff */
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ setup_sprite_tiled_initialize_4bpp_clut() \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 24; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
} \
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
-#define setup_sprite_tiled_builder(texture_mode) \
-void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ vec_16x8u texels_wide; \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ vec_16x8u texels_wide; \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ texture_offset = texture_offset_base + 8; \
+ fb_ptr += 16 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ fb_ptr -= 16 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_comapre_left_block_mask() \
+ ((left_block_mask & 0xFF) == 0xFF) \
+
+#define setup_sprite_comapre_right_block_mask() \
+ (((right_block_mask >> 8) & 0xFF) == 0xFF) \
+
+
+#define setup_sprite_offset_u_adjust_4x() \
+ offset_u *= 2; \
+ offset_u_right = offset_u_right * 2 + 1 \
+
+#define setup_sprite_comapre_left_block_mask_4x() \
+ ((left_block_mask & 0xFFFF) == 0xFFFF) \
+
+#define setup_sprite_comapre_right_block_mask_4x() \
+ (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \
+
+
+#define setup_sprite_tiled_builder(texture_mode, x4mode) \
+void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
s32 u, s32 v, s32 width, s32 height, u32 color) \
{ \
s32 offset_u = u & 0xF; \
@@ -3461,8 +3744,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
s32 tile_width = width_rounded / 16; \
u32 offset_u_right = width_rounded & 0xF; \
\
- u32 left_block_mask = ~(0xFFFF << offset_u); \
- u32 right_block_mask = 0xFFFE << offset_u_right; \
+ setup_sprite_offset_u_adjust##x4mode(); \
+ \
+ u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \
+ u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \
\
u32 left_mask_bits; \
u32 right_mask_bits; \
@@ -3479,19 +3764,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
u32 texture_offset_base = texture_offset; \
u32 control_mask; \
\
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
u16 *texture_block_ptr; \
vec_8x8u texels; \
\
- setup_sprite_tiled_initialize_##texture_mode(); \
+ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
control_mask = tile_width == 1; \
control_mask |= (tile_height == 1) << 1; \
- control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \
- control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \
+ control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \
+ control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \
\
sprites_##texture_mode++; \
\
@@ -3499,64 +3784,77 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
{ \
default: \
case 0x0: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
+ x4mode); \
break; \
\
case 0x1: \
- setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
+ x4mode); \
break; \
\
case 0x2: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
+ x4mode); \
break; \
\
case 0x3: \
- setup_sprite_tile_column_width_single(texture_mode, single, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
+ x4mode); \
break; \
\
case 0x4: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
+ x4mode); \
break; \
\
case 0x5: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
+ x4mode); \
break; \
\
case 0x6: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
+ x4mode); \
break; \
\
case 0x7: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, right);\
+ setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
+ x4mode); \
break; \
\
case 0x8: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
+ x4mode); \
break; \
\
case 0x9: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
+ x4mode); \
break; \
\
case 0xA: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
+ x4mode); \
break; \
\
case 0xB: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
+ x4mode); \
break; \
\
case 0xC: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
+ x4mode); \
break; \
\
case 0xE: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
+ x4mode); \
break; \
} \
} \
-
void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
s32 width, s32 height, u32 color);
void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
@@ -3564,9 +3862,19 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
s32 width, s32 height, u32 color);
+void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+
#ifndef NEON_BUILD
-setup_sprite_tiled_builder(4bpp);
-setup_sprite_tiled_builder(8bpp);
+setup_sprite_tiled_builder(4bpp,);
+setup_sprite_tiled_builder(8bpp,);
+
+setup_sprite_tiled_builder(4bpp,_4x);
+setup_sprite_tiled_builder(8bpp,_4x);
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
index f8afcf1..19c4a9e 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
@@ -1,513 +1,4 @@
-#define setup_sprite_tiled_initialize_4bpp_4x() \
- u16 *clut_ptr = psx_gpu->clut_ptr; \
- vec_8x16u clut_a, clut_b; \
- vec_16x8u clut_low, clut_high; \
- \
- load_8x16b(clut_a, clut_ptr); \
- load_8x16b(clut_b, clut_ptr + 8); \
- unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \
-
-
-#define setup_sprite_tiled_initialize_8bpp_4x() \
-
-
-#define setup_sprite_tile_fetch_texel_block_8bpp_4x(offset) \
- texture_block_ptr = psx_gpu->texture_page_ptr + \
- ((texture_offset + offset) & texture_mask); \
- \
- load_64b(texels, texture_block_ptr) \
-
-
-#define setup_sprite_tile_setup_block_yes_4x(side, offset, texture_mode) \
-
-#define setup_sprite_tile_setup_block_no_4x(side, offset, texture_mode) \
-
-#define setup_sprite_tile_add_blocks_4x(tile_num_blocks) \
- num_blocks += tile_num_blocks * 4; \
- sprite_blocks += tile_num_blocks * 4; \
- \
- if(num_blocks > MAX_BLOCKS) \
- { \
- flush_render_block_buffer(psx_gpu); \
- num_blocks = tile_num_blocks * 4; \
- block = psx_gpu->blocks; \
- } \
-
-#define setup_sprite_tile_full_4bpp_4x(edge) \
-{ \
- vec_8x8u texels_low, texels_high; \
- vec_8x16u pixels, pixels_wide; \
- setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \
- u32 left_mask_bits_a = left_mask_bits & 0xFF; \
- u32 left_mask_bits_b = left_mask_bits >> 8; \
- u32 right_mask_bits_a = right_mask_bits & 0xFF; \
- u32 right_mask_bits_b = right_mask_bits >> 8; \
- \
- while(sub_tile_height) \
- { \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \
- tbl_16(texels_low, texels, clut_low); \
- tbl_16(texels_high, texels, clut_high); \
- zip_8x16b(pixels, texels_low, texels_high); \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = left_mask_bits_a; \
- block->fb_ptr = fb_ptr; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = left_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024; \
- block++; \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = left_mask_bits_b; \
- block->fb_ptr = fb_ptr + 8; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = left_mask_bits_b; \
- block->fb_ptr = fb_ptr + 1024 + 8; \
- block++; \
- \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \
- tbl_16(texels_low, texels, clut_low); \
- tbl_16(texels_high, texels, clut_high); \
- zip_8x16b(pixels, texels_low, texels_high); \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = right_mask_bits_a; \
- block->fb_ptr = fb_ptr + 16; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = right_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024 + 16; \
- block++; \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = right_mask_bits_b; \
- block->fb_ptr = fb_ptr + 24; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = right_mask_bits_b; \
- block->fb_ptr = fb_ptr + 1024 + 24; \
- block++; \
- \
- fb_ptr += 2048; \
- texture_offset += 0x10; \
- sub_tile_height--; \
- } \
- texture_offset += 0xF00; \
- psx_gpu->num_blocks = num_blocks; \
-} \
-
-#define setup_sprite_tile_half_4bpp_4x(edge) \
-{ \
- vec_8x8u texels_low, texels_high; \
- vec_8x16u pixels, pixels_wide; \
- setup_sprite_tile_add_blocks_4x(sub_tile_height); \
- u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
- u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
- \
- while(sub_tile_height) \
- { \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \
- tbl_16(texels_low, texels, clut_low); \
- tbl_16(texels_high, texels, clut_high); \
- zip_8x16b(pixels, texels_low, texels_high); \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = edge##_mask_bits_a; \
- block->fb_ptr = fb_ptr; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = edge##_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024; \
- block++; \
- \
- zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
- block->texels = pixels_wide; \
- block->draw_mask_bits = edge##_mask_bits_b; \
- block->fb_ptr = fb_ptr + 8; \
- block++; \
- \
- block->texels = pixels_wide; \
- block->draw_mask_bits = edge##_mask_bits_b; \
- block->fb_ptr = fb_ptr + 1024 + 8; \
- block++; \
- \
- fb_ptr += 2048; \
- texture_offset += 0x10; \
- sub_tile_height--; \
- } \
- texture_offset += 0xF00; \
- psx_gpu->num_blocks = num_blocks; \
-} \
-
-
-#define setup_sprite_tile_full_8bpp_4x(edge) \
-{ \
- setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \
- vec_16x8u texels_wide; \
- u32 left_mask_bits_a = left_mask_bits & 0xFF; \
- u32 left_mask_bits_b = left_mask_bits >> 8; \
- u32 right_mask_bits_a = right_mask_bits & 0xFF; \
- u32 right_mask_bits_b = right_mask_bits >> 8; \
- \
- while(sub_tile_height) \
- { \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \
- zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
- block->r = texels_wide.low; \
- block->draw_mask_bits = left_mask_bits_a; \
- block->fb_ptr = fb_ptr; \
- block++; \
- \
- block->r = texels_wide.low; \
- block->draw_mask_bits = left_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = left_mask_bits_b; \
- block->fb_ptr = fb_ptr + 8; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = left_mask_bits_b; \
- block->fb_ptr = fb_ptr + 1024 + 8; \
- block++; \
- \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \
- zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
- block->r = texels_wide.low; \
- block->draw_mask_bits = right_mask_bits_a; \
- block->fb_ptr = fb_ptr + 16; \
- block++; \
- \
- block->r = texels_wide.low; \
- block->draw_mask_bits = right_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024 + 16; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = right_mask_bits_b; \
- block->fb_ptr = fb_ptr + 24; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = right_mask_bits_b; \
- block->fb_ptr = fb_ptr + 24 + 1024; \
- block++; \
- \
- fb_ptr += 2048; \
- texture_offset += 0x10; \
- sub_tile_height--; \
- } \
- texture_offset += 0xF00; \
- psx_gpu->num_blocks = num_blocks; \
-} \
-
-#define setup_sprite_tile_half_8bpp_4x(edge) \
-{ \
- setup_sprite_tile_add_blocks_4x(sub_tile_height); \
- vec_16x8u texels_wide; \
- u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
- u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
- \
- while(sub_tile_height) \
- { \
- setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \
- zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
- block->r = texels_wide.low; \
- block->draw_mask_bits = edge##_mask_bits_a; \
- block->fb_ptr = fb_ptr; \
- block++; \
- \
- block->r = texels_wide.low; \
- block->draw_mask_bits = edge##_mask_bits_a; \
- block->fb_ptr = fb_ptr + 1024; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = edge##_mask_bits_b; \
- block->fb_ptr = fb_ptr + 8; \
- block++; \
- \
- block->r = texels_wide.high; \
- block->draw_mask_bits = edge##_mask_bits_b; \
- block->fb_ptr = fb_ptr + 8 + 1024; \
- block++; \
- \
- fb_ptr += 2048; \
- texture_offset += 0x10; \
- sub_tile_height--; \
- } \
- texture_offset += 0xF00; \
- psx_gpu->num_blocks = num_blocks; \
-} \
-
-
-#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
- texture_offset = texture_offset_base + 8; \
- fb_ptr += 16 \
-
-#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
- texture_offset = texture_offset_base \
-
-#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
- setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
-
-#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
- texture_offset = texture_offset_base \
-
-#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
- fb_ptr -= 16 \
-
-#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
-
-#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
- setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
-
-#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
-
-
-#define setup_sprite_tile_column_height_single_4x(edge_mode, edge, \
- texture_mode) \
-do \
-{ \
- sub_tile_height = column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \
-} while(0) \
-
-#define setup_sprite_tile_column_height_multi_4x(edge_mode, edge, \
- texture_mode) \
-do \
-{ \
- u32 tiles_remaining = column_data >> 16; \
- sub_tile_height = column_data & 0xFF; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \
- tiles_remaining -= 1; \
- \
- while(tiles_remaining) \
- { \
- sub_tile_height = 16; \
- setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \
- tiles_remaining--; \
- } \
- \
- sub_tile_height = (column_data >> 8) & 0xFF; \
- setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \
-} while(0) \
-
-
-#define setup_sprite_column_data_single_4x() \
- column_data = height \
-
-#define setup_sprite_column_data_multi_4x() \
- column_data = 16 - offset_v; \
- column_data |= ((height_rounded & 0xF) + 1) << 8; \
- column_data |= (tile_height - 1) << 16 \
-
-
-#define setup_sprite_tile_column_width_single_4x(texture_mode, multi_height, \
- edge_mode, edge) \
-{ \
- setup_sprite_column_data_##multi_height##_4x(); \
- left_mask_bits = left_block_mask | right_block_mask; \
- right_mask_bits = left_mask_bits >> 16; \
- \
- setup_sprite_tile_column_height_##multi_height##_4x(edge_mode, edge, \
- texture_mode); \
-} \
-
-#define setup_sprite_tiled_advance_column_4x() \
- texture_offset_base += 0x100; \
- if((texture_offset_base & 0xF00) == 0) \
- texture_offset_base -= (0x100 + 0xF00) \
-
-#define setup_sprite_tile_column_width_multi_4x(texture_mode, multi_height, \
- left_mode, right_mode) \
-{ \
- setup_sprite_column_data_##multi_height##_4x(); \
- s32 fb_ptr_advance_column = 32 - (2048 * height); \
- \
- tile_width -= 2; \
- left_mask_bits = left_block_mask; \
- right_mask_bits = left_mask_bits >> 16; \
- \
- setup_sprite_tile_column_height_##multi_height##_4x(left_mode, right, \
- texture_mode); \
- fb_ptr += fb_ptr_advance_column; \
- \
- left_mask_bits = 0x00; \
- right_mask_bits = 0x00; \
- \
- while(tile_width) \
- { \
- setup_sprite_tiled_advance_column_4x(); \
- setup_sprite_tile_column_height_##multi_height##_4x(full, none, \
- texture_mode); \
- fb_ptr += fb_ptr_advance_column; \
- tile_width--; \
- } \
- \
- left_mask_bits = right_block_mask; \
- right_mask_bits = left_mask_bits >> 16; \
- \
- setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height##_4x(right_mode, left, \
- texture_mode); \
-} \
-
-
-#define setup_sprite_tiled_builder_4x(texture_mode) \
-void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
- s32 u, s32 v, s32 width, s32 height, u32 color) \
-{ \
- s32 offset_u = u & 0xF; \
- s32 offset_v = v & 0xF; \
- \
- s32 width_rounded = offset_u + width + 15; \
- s32 height_rounded = offset_v + height + 15; \
- s32 tile_height = height_rounded / 16; \
- s32 tile_width = width_rounded / 16; \
- u32 offset_u_right = width_rounded & 0xF; \
- \
- u32 left_block_mask = ~(0xFFFFFFFF << (offset_u * 2)); \
- u32 right_block_mask = 0xFFFFFFFC << (offset_u_right * 2); \
- \
- u32 left_mask_bits; \
- u32 right_mask_bits; \
- \
- u32 sub_tile_height; \
- u32 column_data; \
- \
- u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \
- ((psx_gpu->texture_mask_height & 0xF) << 4) | \
- ((psx_gpu->texture_mask_width >> 4) << 8) | \
- ((psx_gpu->texture_mask_height >> 4) << 12); \
- u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \
- ((v & 0xF0) << 8); \
- u32 texture_offset_base = texture_offset; \
- u32 control_mask; \
- \
- u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u * 2); \
- u32 num_blocks = psx_gpu->num_blocks; \
- block_struct *block = psx_gpu->blocks + num_blocks; \
- \
- u16 *texture_block_ptr; \
- vec_8x8u texels; \
- \
- setup_sprite_tiled_initialize_##texture_mode##_4x(); \
- \
- control_mask = tile_width == 1; \
- control_mask |= (tile_height == 1) << 1; \
- control_mask |= ((left_block_mask & 0xFFFF) == 0xFFFF) << 2; \
- control_mask |= (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) << 3; \
- \
- sprites_##texture_mode++; \
- \
- switch(control_mask) \
- { \
- default: \
- case 0x0: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \
- full); \
- break; \
- \
- case 0x1: \
- setup_sprite_tile_column_width_single_4x(texture_mode, multi, full, \
- none); \
- break; \
- \
- case 0x2: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \
- full); \
- break; \
- \
- case 0x3: \
- setup_sprite_tile_column_width_single_4x(texture_mode, single, full, \
- none); \
- break; \
- \
- case 0x4: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \
- full); \
- break; \
- \
- case 0x5: \
- setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \
- right); \
- break; \
- \
- case 0x6: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \
- full); \
- break; \
- \
- case 0x7: \
- setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \
- right); \
- break; \
- \
- case 0x8: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \
- half); \
- break; \
- \
- case 0x9: \
- setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \
- left); \
- break; \
- \
- case 0xA: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \
- half); \
- break; \
- \
- case 0xB: \
- setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \
- left); \
- break; \
- \
- case 0xC: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \
- half); \
- break; \
- \
- case 0xE: \
- setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \
- half); \
- break; \
- } \
-} \
-
-
-void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
- s32 width, s32 height, u32 color);
-
-//#ifndef NEON_BUILD
-#if 1
-setup_sprite_tiled_builder_4x(4bpp);
-setup_sprite_tiled_builder_4x(8bpp);
-
+#ifndef NEON_BUILD
void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 87a14f6..103483a 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
+ * Copyright (C) 2012 Gražvydas Ignotas "notaz" <notasas@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -3188,6 +3189,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \
shade_blocks_textured_modulated_load_bdm_##shading(); \
vshrn.u16 texels_b, texels, #7; \
\
+ pld [ block_ptr_load_a ]; \
vmovn.u16 texels_r, texels; \
vmlal.u8 pixels, pixels_r_low, d64_1; \
\
@@ -4405,6 +4407,12 @@ function(render_block_fill_body)
#define draw_mask_fb_ptr_left d2
#define draw_mask_fb_ptr_right d3
+#define draw_mask_fb_ptr_left_a d2
+#define draw_mask_fb_ptr_left_b d3
+#define draw_mask_fb_ptr_right_a d10
+#define draw_mask_fb_ptr_right_b d11
+#define draw_masks_fb_ptrs2 q5
+
#define clut_low_a d4
#define clut_low_b d5
#define clut_high_a d6
@@ -4416,37 +4424,24 @@ function(render_block_fill_body)
#define clut_a q2
#define clut_b q3
-#define texels_low d10
-#define texels_high d11
-
-
-setup_sprite_flush_blocks_single:
- vpush { q1 - q4 }
-
- stmdb sp!, { r0 - r3, r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, r12, r14 }
-
- vpop { q1 - q4 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
+#define texels_low d12
+#define texels_high d13
- mov num_blocks, sub_tile_height
- bx lr
+#define texels_wide_low d14
+#define texels_wide_high d15
+#define texels_wide q7
-setup_sprite_flush_blocks_double:
- vpush { q1 - q4 }
+setup_sprite_flush_blocks:
+ vpush { q1 - q5 }
stmdb sp!, { r0 - r3, r12, r14 }
bl flush_render_block_buffer
ldmia sp!, { r0 - r3, r12, r14 }
- vpop { q1 - q4 }
+ vpop { q1 - q5 }
add block, psx_gpu, #psx_gpu_blocks_offset
-
- mov num_blocks, sub_tile_height, lsl #1
bx lr
@@ -4484,8 +4479,6 @@ setup_sprite_update_texture_8bpp_cache:
blne setup_sprite_update_texture_8bpp_cache \
-#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
-
#define setup_sprite_block_count_single() \
sub_tile_height \
@@ -4496,7 +4489,8 @@ setup_sprite_update_texture_8bpp_cache:
add num_blocks, num_blocks, setup_sprite_block_count_##type(); \
cmp num_blocks, #MAX_BLOCKS; \
\
- blgt setup_sprite_flush_blocks_##type \
+ movgt num_blocks, setup_sprite_block_count_##type(); \
+ blgt setup_sprite_flush_blocks \
#define setup_sprite_tile_full_4bpp(edge) \
@@ -4678,31 +4672,33 @@ setup_sprite_update_texture_8bpp_cache:
#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
-#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
+ x4mode) \
mov sub_tile_height, column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \
-#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
+ x4mode) \
and sub_tile_height, column_data, #0xFF; \
mov tiles_remaining, column_data, lsr #16; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
\
subs tiles_remaining, tiles_remaining, #1; \
beq 2f; \
\
3: \
mov sub_tile_height, #16; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
subs tiles_remaining, tiles_remaining, #1; \
bne 3b; \
\
2: \
uxtb sub_tile_height, column_data, ror #8; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \
#define setup_sprite_column_data_single() \
@@ -4721,17 +4717,30 @@ setup_sprite_update_texture_8bpp_cache:
\
orr column_data, column_data, height_rounded, lsl #8 \
-#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
- edge_mode, edge) \
- setup_sprite_##texture_mode##_single_##multi_height##_##edge_mode##_##edge: \
+#define setup_sprite_setup_left_draw_mask_fb_ptr() \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column() \
+ mov fb_ptr_advance_column, #32; \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ \
+ sub fb_ptr_advance_column, height, lsl #11; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
+
+#define setup_sprite_setup_right_draw_mask_fb_ptr() \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[5] \
+
+#define setup_sprite_tile_column_width_single(tm, multi_height, edge_mode, \
+ edge, x4mode) \
+ setup_sprite_##tm##_single_##multi_height##_##edge_mode##_##edge##x4mode: \
setup_sprite_column_data_##multi_height(); \
vext.32 block_masks_shifted, block_masks, block_masks, #1; \
vorr.u32 block_masks, block_masks, block_masks_shifted; \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
- vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \
+ setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \
\
- setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
- texture_mode); \
+ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \
ldmia sp!, { r4 - r11, pc } \
#define setup_sprite_tiled_advance_column() \
@@ -4740,39 +4749,335 @@ setup_sprite_update_texture_8bpp_cache:
subeq texture_offset_base, texture_offset_base, #(0x100 + 0xF00) \
#define setup_sprite_tile_column_width_multi(tm, multi_height, left_mode, \
- right_mode) \
- setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode: \
+ right_mode, x4mode) \
+ setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode##x4mode:\
setup_sprite_column_data_##multi_height(); \
- mov fb_ptr_advance_column, #32; \
\
- sub fb_ptr_advance_column, height, lsl #11; \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ setup_sprite_setup_left_draw_mask_fb_ptr_advance_column##x4mode(); \
\
- vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \
- setup_sprite_tile_column_height_##multi_height(left_mode, right, tm); \
+ setup_sprite_tile_column_height_##multi_height(left_mode, right, tm, x4mode);\
\
subs tile_width, tile_width, #2; \
add fb_ptr, fb_ptr, fb_ptr_advance_column; \
\
- vmov.u8 draw_masks_fb_ptrs, #0; \
beq 1f; \
\
+ vmov.u8 draw_masks_fb_ptrs, #0; \
+ vmov.u8 draw_masks_fb_ptrs2, #0; \
+ \
0: \
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(full, none, tm); \
+ setup_sprite_tile_column_height_##multi_height(full, none, tm, x4mode); \
add fb_ptr, fb_ptr, fb_ptr_advance_column; \
subs tile_width, tile_width, #1; \
bne 0b; \
\
1: \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \
- vdup.u8 draw_mask_fb_ptr_right, block_masks[5]; \
+ setup_sprite_setup_right_draw_mask_fb_ptr##x4mode(); \
\
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(right_mode, left, tm); \
+ setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\
ldmia sp!, { r4 - r11, pc } \
+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_get_left_block_mask() \
+ and left_block_mask, left_block_mask, #0xFF \
+
+#define setup_sprite_compare_left_block_mask() \
+ cmp left_block_mask, #0xFF \
+
+#define setup_sprite_get_right_block_mask() \
+ uxtb right_block_mask, right_block_mask, ror #8 \
+
+#define setup_sprite_compare_right_block_mask() \
+ cmp right_block_mask, #0xFF \
+
+
+
+/* 4x stuff */
+#define fb_ptr2 column_data
+
+#define setup_sprite_offset_u_adjust_4x() \
+ sub fb_ptr, fb_ptr, offset_u, lsl #1; \
+ lsl offset_u_right, #1; \
+ lsl offset_u, #1; \
+ add offset_u_right, #1 \
+
+#define setup_sprite_get_left_block_mask_4x() \
+ sxth left_block_mask, left_block_mask \
+
+#define setup_sprite_compare_left_block_mask_4x() \
+ cmp left_block_mask, #0xFFFFFFFF \
+
+#define setup_sprite_get_right_block_mask_4x() \
+ sxth right_block_mask, right_block_mask, ror #16 \
+
+#define setup_sprite_compare_right_block_mask_4x() \
+ cmp right_block_mask, #0xFFFFFFFF \
+
+
+#define widen_texels_16bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.16 texels_wide_low, texels_wide_high \
+
+#define widen_texels_8bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.8 texels_wide_low, texels_wide_high \
+
+#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [ block_, :128 ]; \
+ add block_, block_, #40; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \
+ add block_, block_, #24 \
+
+/* assumes 16-byte offset already added to block_ */
+#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [ block_, :64 ]; \
+ add block_, block_, #24; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \
+ add block_, block_, #40 \
+
+#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_16bpp(texels_low); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ widen_texels_16bpp(texels_high); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_8bpp(texels); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ ldr clut_ptr, [ psx_gpu, #psx_gpu_clut_ptr_offset ]; \
+ vld1.u32 { clut_a, clut_b }, [ clut_ptr, :128 ]; \
+ \
+ vuzp.u8 clut_a, clut_b \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_block_count_single_4x() \
+ sub_tile_height, lsl #2 \
+
+#define setup_sprite_block_count_double_4x() \
+ sub_tile_height, lsl #(1+2) \
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ add fb_ptr, fb_ptr, #16*2; \
+ \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ pld [ fb_ptr ]; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ add texture_offset, texture_offset, #0x10; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ subs sub_tile_height, sub_tile_height, #1; \
+ \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ \
+ add fb_ptr, fb_ptr, #16*2; \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ pld [ fb_ptr ]; \
+ \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ add texture_offset, texture_offset_base, #8; \
+ add fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ sub fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \
+ mov fb_ptr_advance_column, #32 * 2; \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ sub fb_ptr_advance_column, height, lsl #11 + 1; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \
+
+
// r0: psx_gpu
// r1: x
// r2: y
@@ -4782,28 +5087,42 @@ setup_sprite_update_texture_8bpp_cache:
// [ sp + 8 ]: height
// [ sp + 12 ]: color (unused)
-#define setup_sprite_tiled_builder(texture_mode) \
- \
-setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \
-setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \
-setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \
-setup_sprite_tile_column_width_single(texture_mode, single, full, none); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \
-setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \
-setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \
-setup_sprite_tile_column_width_single(texture_mode, single, half, right); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \
-setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \
-setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \
-setup_sprite_tile_column_width_single(texture_mode, single, half, left); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \
-setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \
+#define setup_sprite_tiled_builder(texture_mode, x4mode) \
+ \
+setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
+ x4mode); \
\
.align 4; \
\
-function(setup_sprite_##texture_mode) \
+function(setup_sprite_##texture_mode##x4mode) \
stmdb sp!, { r4 - r11, r14 }; \
- setup_sprite_tiled_initialize_##texture_mode(); \
+ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
ldr v, [ sp, #36 ]; \
and offset_u, u, #0xF; \
@@ -4832,11 +5151,13 @@ function(setup_sprite_##texture_mode) \
\
/* texture_offset_base = VH-UH-UL-00 */\
bfi texture_offset_base, u, #4, #8; \
- movw right_block_mask, #0xFFFE; \
+ mov right_block_mask, #0xFFFFFFFE; \
+ \
+ setup_sprite_offset_u_adjust##x4mode(); \
\
/* texture_offset_base = VH-UH-VL-00 */\
bfi texture_offset_base, v, #4, #4; \
- movw left_block_mask, #0xFFFF; \
+ mov left_block_mask, #0xFFFFFFFF; \
\
mov tile_height, height_rounded, lsr #4; \
mvn left_block_mask, left_block_mask, lsl offset_u; \
@@ -4856,16 +5177,16 @@ function(setup_sprite_##texture_mode) \
\
/* texture_mask = HH-WH-HL-WL */\
bfi texture_mask, texture_mask_rev, #8, #4; \
- and left_block_mask, left_block_mask, #0xFF; \
+ setup_sprite_get_left_block_mask##x4mode(); \
\
mov control_mask, #0; \
- cmp left_block_mask, #0xFF; \
+ setup_sprite_compare_left_block_mask##x4mode(); \
\
- uxtb right_block_mask, right_block_mask, ror #8; \
+ setup_sprite_get_right_block_mask##x4mode(); \
orreq control_mask, control_mask, #0x4; \
\
ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \
- cmp right_block_mask, #0xFF; \
+ setup_sprite_compare_right_block_mask##x4mode(); \
\
orreq control_mask, control_mask, #0x8; \
cmp tile_width, #1; \
@@ -4880,25 +5201,31 @@ function(setup_sprite_##texture_mode) \
ldr pc, [ pc, control_mask, lsl #2 ]; \
nop; \
\
- .word setup_sprite_##texture_mode##_multi_multi_full_full; \
- .word setup_sprite_##texture_mode##_single_multi_full_none; \
- .word setup_sprite_##texture_mode##_multi_single_full_full; \
- .word setup_sprite_##texture_mode##_single_single_full_none; \
- .word setup_sprite_##texture_mode##_multi_multi_half_full; \
- .word setup_sprite_##texture_mode##_single_multi_half_right; \
- .word setup_sprite_##texture_mode##_multi_single_half_full; \
- .word setup_sprite_##texture_mode##_single_single_half_right; \
- .word setup_sprite_##texture_mode##_multi_multi_full_half; \
- .word setup_sprite_##texture_mode##_single_multi_half_left; \
- .word setup_sprite_##texture_mode##_multi_single_full_half; \
- .word setup_sprite_##texture_mode##_single_single_half_left; \
- .word setup_sprite_##texture_mode##_multi_multi_half_half; \
+ .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \
.word 0x00000000; \
- .word setup_sprite_##texture_mode##_multi_single_half_half \
+ .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \
+
+
+setup_sprite_tiled_builder(4bpp,);
+setup_sprite_tiled_builder(8bpp,);
+#undef draw_mask_fb_ptr_left
+#undef draw_mask_fb_ptr_right
-setup_sprite_tiled_builder(4bpp);
-setup_sprite_tiled_builder(8bpp);
+setup_sprite_tiled_builder(4bpp, _4x);
+setup_sprite_tiled_builder(8bpp, _4x);
#undef block_ptr
@@ -4987,6 +5314,12 @@ function(texture_sprite_blocks_8bpp)
#undef texture_mask
#undef num_blocks
#undef texture_offset
+#undef texels_low
+#undef texels_high
+#undef texels_wide_low
+#undef texels_wide_high
+#undef texels_wide
+#undef fb_ptr2
#define psx_gpu r0
#define x r1
@@ -4998,6 +5331,7 @@ function(texture_sprite_blocks_8bpp)
#define left_offset r8
#define width_rounded r9
#define right_width r10
+
#define block_width r11
#define texture_offset_base r1
@@ -5008,6 +5342,7 @@ function(texture_sprite_blocks_8bpp)
#define fb_ptr r7
#define texture_offset r8
#define blocks_remaining r9
+#define fb_ptr2 r10
#define fb_ptr_pitch r12
#define texture_block_ptr r14
@@ -5026,29 +5361,23 @@ function(texture_sprite_blocks_8bpp)
#define draw_mask_fb_ptr d2
#define texels q2
+#define draw_mask_fb_ptr_a d2
+#define draw_mask_fb_ptr_b d3
+#define texels_low d4
+#define texels_high d5
+#define texels_wide_low d6
+#define texels_wide_high d7
+#define texels_wide q3
-setup_sprites_16bpp_flush_single:
- vpush { d0 - d2 }
-
- stmdb sp!, { r0 - r3, r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, r12, r14 }
-
- vpop { d0 - d2 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
- mov num_blocks, #1
-
- bx lr
-setup_sprites_16bpp_flush_row:
- vpush { d0 - d2 }
+setup_sprites_16bpp_flush:
+ vpush { d0 - d3 }
stmdb sp!, { r0 - r3, r12, r14 }
bl flush_render_block_buffer
ldmia sp!, { r0 - r3, r12, r14 }
- vpop { d0 - d2 }
+ vpop { d0 - d3 }
add block, psx_gpu, #psx_gpu_blocks_offset
mov num_blocks, block_width
@@ -5113,7 +5442,7 @@ function(setup_sprite_16bpp)
1:
add num_blocks, num_blocks, #1
cmp num_blocks, #MAX_BLOCKS
- blgt setup_sprites_16bpp_flush_single
+ blgt setup_sprites_16bpp_flush
and texture_block_ptr, texture_offset_base, texture_mask
subs height, height, #1
@@ -5142,7 +5471,7 @@ function(setup_sprite_16bpp)
mov texture_offset, texture_offset_base
cmp num_blocks, #MAX_BLOCKS
- blgt setup_sprites_16bpp_flush_row
+ blgt setup_sprites_16bpp_flush
add texture_offset_base, texture_offset_base, #2048
and texture_block_ptr, texture_offset, texture_mask
@@ -5213,6 +5542,151 @@ function(setup_sprite_16bpp)
ldmia sp!, { r4 - r11, pc }
+// 4x version
+// FIXME: duplicate code with normal version :(
+#undef draw_mask_fb_ptr
+
+function(setup_sprite_16bpp_4x)
+ stmdb sp!, { r4 - r11, r14 }
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
+
+ ldr v, [ sp, #36 ]
+ add fb_ptr, fb_ptr, y, lsl #11
+
+ ldr width, [ sp, #40 ]
+ add fb_ptr, fb_ptr, x, lsl #1
+
+ ldr height, [ sp, #44 ]
+ and left_offset, u, #0x7
+
+ add texture_offset_base, u, u
+ add width_rounded, width, #7
+
+ add texture_offset_base, v, lsl #11
+ movw left_mask_bits, #0xFFFF
+
+ ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
+ add width_rounded, width_rounded, left_offset
+
+ lsl left_offset, #1
+
+ ldrb texture_mask_height, [ psx_gpu, #psx_gpu_texture_mask_height_offset ]
+ sub fb_ptr, fb_ptr, left_offset, lsl #1
+
+ add texture_mask, texture_mask_width, texture_mask_width
+ movw right_mask_bits, #0xFFFC
+
+ and right_width, width_rounded, #0x7
+ mvn left_mask_bits, left_mask_bits, lsl left_offset
+
+ lsl right_width, #1
+
+ add texture_mask, texture_mask_height, lsl #11
+ mov block_width, width_rounded, lsr #3
+
+ mov right_mask_bits, right_mask_bits, lsl right_width
+ movw fb_ptr_pitch, #(2048 + 16) * 2
+
+ sub fb_ptr_pitch, fb_ptr_pitch, block_width, lsl #4+1
+ vmov block_masks, left_mask_bits, right_mask_bits
+
+ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ add block, psx_gpu, #psx_gpu_blocks_offset
+
+ bic texture_offset_base, texture_offset_base, #0xF
+ cmp block_width, #1
+
+ ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+ add block, block, num_blocks, lsl #6
+
+ lsl block_width, #2
+ bne 0f
+
+ vext.32 block_masks_shifted, block_masks, block_masks, #1
+ vorr.u32 block_masks, block_masks, block_masks_shifted
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[0]
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[1]
+
+ 1:
+ add num_blocks, num_blocks, block_width
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ and texture_block_ptr, texture_offset_base, texture_mask
+ subs height, height, #1
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+
+ add texture_offset_base, texture_offset_base, #2048
+ add fb_ptr, fb_ptr, #2048*2
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ bne 1b
+
+ ldmia sp!, { r4 - r11, pc }
+
+ 0:
+ add num_blocks, num_blocks, block_width
+ mov texture_offset, texture_offset_base
+
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[0] // left_mask_bits
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[1]
+
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ add texture_offset_base, texture_offset_base, #2048
+ and texture_block_ptr, texture_offset, texture_mask
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+
+ subs blocks_remaining, block_width, #2*4
+ add texture_offset, texture_offset, #16
+
+ vmov.u8 draw_mask_fb_ptr_a, #0
+ vmov.u8 draw_mask_fb_ptr_b, #0
+
+ add fb_ptr, fb_ptr, #16*2
+ beq 2f
+
+ 1:
+ and texture_block_ptr, texture_offset, texture_mask
+ subs blocks_remaining, blocks_remaining, #4
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+ add texture_offset, texture_offset, #16
+
+ add fb_ptr, fb_ptr, #16*2
+ bgt 1b
+
+ 2:
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[4] // right_mask_bits
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[5]
+
+ and texture_block_ptr, texture_offset, texture_mask
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+ subs height, height, #1
+
+ add fb_ptr, fb_ptr, fb_ptr_pitch
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+
+ bne 0b
+
+ ldmia sp!, { r4 - r11, pc }
+
+
#undef texture_page_ptr
#undef vram_ptr
#undef dirty_textures_mask
@@ -5445,3 +5919,5 @@ function(scale2x_tiles8)
nop
pop { r4, pc }
+
+// vim:filetype=armasm