aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornotaz2012-11-02 03:07:05 +0200
committernotaz2012-11-02 03:07:05 +0200
commit2857d72e4ca743bba3cf55e298949e24d97dff02 (patch)
tree3742f3cd7cb1c10fc42fc4b581fff9c4ead3d9ce
parent0e53ec55691229fee1cbb7c23b305be6a59431c4 (diff)
parentbcb62992749a7f66e9a16a8022e090ff334c4424 (diff)
downloadpcsx_rearmed-2857d72e4ca743bba3cf55e298949e24d97dff02.tar.gz
pcsx_rearmed-2857d72e4ca743bba3cf55e298949e24d97dff02.tar.bz2
pcsx_rearmed-2857d72e4ca743bba3cf55e298949e24d97dff02.zip
Merge branch 'enhancement'
Conflicts: frontend/libretro.c
-rw-r--r--.gitmodules5
-rw-r--r--Makefile19
-rw-r--r--frontend/common/plat.h4
m---------frontend/libpicofe0
-rw-r--r--frontend/libretro.c45
-rw-r--r--frontend/linux/plat.c49
-rw-r--r--frontend/main.c10
-rw-r--r--frontend/main.h1
-rw-r--r--frontend/menu.c30
-rw-r--r--frontend/menu.h8
-rwxr-xr-xfrontend/pandora/pcsx.sh5
-rw-r--r--frontend/plat_omap.c5
-rw-r--r--frontend/plat_pandora.c1
-rw-r--r--frontend/plat_pollux.c33
-rw-r--r--frontend/plugin_lib.c251
-rw-r--r--frontend/plugin_lib.h23
-rw-r--r--libpcsxcore/psxmem.c15
-rw-r--r--plugins/dfxvideo/draw_pl.c45
-rw-r--r--plugins/dfxvideo/gpu.c2
-rw-r--r--plugins/dfxvideo/gpulib_if.c20
-rw-r--r--plugins/gpu-gles/gpulib_if.c21
-rw-r--r--plugins/gpu_neon/Makefile2
-rw-r--r--plugins/gpu_neon/psx_gpu/common.h2
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.c672
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu.h40
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_4x.c384
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S1000
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h57
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c86
-rw-r--r--plugins/gpu_neon/psx_gpu/psx_gpu_parse.c950
-rw-r--r--plugins/gpu_neon/psx_gpu/tests/Makefile2
-rw-r--r--plugins/gpu_neon/psx_gpu/vector_ops.h4
-rw-r--r--plugins/gpu_neon/psx_gpu_if.c127
-rw-r--r--plugins/gpu_unai/gpu.cpp69
-rw-r--r--plugins/gpu_unai/gpulib_if.cpp10
-rw-r--r--plugins/gpulib/gpu.c47
-rw-r--r--plugins/gpulib/gpu.h11
-rw-r--r--plugins/gpulib/vout_pl.c95
38 files changed, 3531 insertions, 619 deletions
diff --git a/.gitmodules b/.gitmodules
index 650250d..f93599e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
-[submodule "frontend/warm"]
+[submodule "libpicofe"]
+ path = frontend/libpicofe
+ url = git://notaz.gp2x.de/~notaz/libpicofe.git
+[submodule "warm"]
path = frontend/warm
url = git://notaz.gp2x.de/~notaz/warm.git
diff --git a/Makefile b/Makefile
index c10f739..1f3e736 100644
--- a/Makefile
+++ b/Makefile
@@ -127,6 +127,14 @@ OBJS += plugins/cdrcimg/cdrcimg.o
# dfinput
OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o
+# misc
+ifeq "$(HAVE_NEON)" "1"
+OBJS += frontend/libpicofe/arm/neon_scale2x.o
+OBJS += frontend/libpicofe/arm/neon_eagle2x.o
+frontend/libpicofe/arm/neon_scale2x.o: CFLAGS += -DDO_BGR_TO_RGB
+frontend/libpicofe/arm/neon_eagle2x.o: CFLAGS += -DDO_BGR_TO_RGB
+endif
+
# gui
OBJS += frontend/main.o frontend/plugin.o
OBJS += frontend/common/readpng.o frontend/common/fonts.o
@@ -178,6 +186,12 @@ endif
frontend/%.o: CFLAGS += -DIN_EVDEV
frontend/menu.o frontend/main.o frontend/plat_sdl.o: frontend/revision.h
+frontend/libpicofe/arm/neon_scale2x.S frontend/libpicofe/menu.c:
+ @echo "libpicofe module is missing, please run:"
+ @echo "git submodule init && git submodule update"
+ @exit 1
+
+
libpcsxcore/gte_nf.o: libpcsxcore/gte.c
$(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS
@@ -185,7 +199,6 @@ frontend/revision.h: FORCE
@(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_
@diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@
@rm $@_
-.PHONY: FORCE
%.o: %.S
$(CC) $(CFLAGS) -c $^ -o $@
@@ -213,9 +226,11 @@ plugins_:
clean_plugins:
endif
+.PHONY: all clean target_ plugins_ clean_plugins FORCE
+
# ----------- release -----------
-VER ?= $(shell git describe master)
+VER ?= $(shell git describe HEAD)
ifeq "$(PLATFORM)" "generic"
OUT = pcsx_rearmed_$(VER)
diff --git a/frontend/common/plat.h b/frontend/common/plat.h
index 0a9fc0b..1fb8767 100644
--- a/frontend/common/plat.h
+++ b/frontend/common/plat.h
@@ -45,6 +45,10 @@ int plat_is_dir(const char *path);
int plat_wait_event(int *fds_hnds, int count, int timeout_ms);
void plat_sleep_ms(int ms);
+void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed);
+void *plat_mremap(void *ptr, size_t oldsize, size_t newsize);
+void plat_munmap(void *ptr, size_t size);
+
/* timers, to be used for time diff and must refer to the same clock */
unsigned int plat_get_ticks_ms(void);
unsigned int plat_get_ticks_us(void);
diff --git a/frontend/libpicofe b/frontend/libpicofe
new file mode 160000
+Subproject 6ce097ba2f3cd1c269bacd032b775b6d296433f
diff --git a/frontend/libretro.c b/frontend/libretro.c
index 4305aa7..1eb2147 100644
--- a/frontend/libretro.c
+++ b/frontend/libretro.c
@@ -13,6 +13,7 @@
#include "../libpcsxcore/psxcounters.h"
#include "../libpcsxcore/new_dynarec/new_dynarec.h"
#include "../plugins/dfsound/out.h"
+#include "../plugins/gpulib/cspace.h"
#include "main.h"
#include "plugin.h"
#include "plugin_lib.h"
@@ -26,7 +27,6 @@ static retro_environment_t environ_cb;
static retro_audio_sample_batch_t audio_batch_cb;
static void *vout_buf;
-static int vout_width, vout_height;
static int samples_sent, samples_to_send;
static int plugins_opened;
static int native_rgb565;
@@ -42,14 +42,10 @@ static int vout_open(void)
return 0;
}
-static void *vout_set_mode(int w, int h, int bpp)
+static void vout_set_mode(int w, int h, int bpp)
{
- vout_width = w;
- vout_height = h;
- return vout_buf;
}
-/* FIXME: either teach PCSX to blit to RGB1555 or RetroArch to support RGB565 */
static void convert(void *buf, size_t bytes)
{
unsigned int i, v, *p = buf;
@@ -60,14 +56,39 @@ static void convert(void *buf, size_t bytes)
}
}
-static void *vout_flip(void)
+static void vout_flip(const void *vram, int stride, int bgr24, int w, int h)
{
- pl_rearmed_cbs.flip_cnt++;
- if (!native_rgb565)
- convert(vout_buf, vout_width * vout_height * 2);
- video_cb(vout_buf, vout_width, vout_height, vout_width * 2);
+ unsigned short *dest = vout_buf;
+ const unsigned short *src = vram;
+ int dstride = w, h1 = h;
+
+ if (vram == NULL) {
+ // blanking
+ memset(pl_vout_buf, 0, dstride * h * 2);
+ goto out;
+ }
- return vout_buf;
+ if (bgr24)
+ {
+ // XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here?
+ for (; h1-- > 0; dest += dstride, src += stride)
+ {
+ bgr888_to_rgb565(dest, src, w * 3);
+ }
+ }
+ else
+ {
+ for (; h1-- > 0; dest += dstride, src += stride)
+ {
+ bgr555_to_rgb565(dest, src, w * 2);
+ }
+ }
+
+out:
+ if (!native_rgb565)
+ convert(vout_buf, w * h * 2);
+ video_cb(vout_buf, w, h, w * 2);
+ pl_rearmed_cbs.flip_cnt++;
}
static void vout_close(void)
diff --git a/frontend/linux/plat.c b/frontend/linux/plat.c
index b7152b5..4ed1e65 100644
--- a/frontend/linux/plat.c
+++ b/frontend/linux/plat.c
@@ -17,9 +17,17 @@
#include <time.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <errno.h>
#include "../common/plat.h"
+/* XXX: maybe unhardcode pagesize? */
+#define HUGETLB_PAGESIZE (2 * 1024 * 1024)
+#define HUGETLB_THRESHOLD (HUGETLB_PAGESIZE / 2)
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
int plat_is_dir(const char *path)
{
@@ -126,16 +134,36 @@ int plat_wait_event(int *fds_hnds, int count, int timeout_ms)
return ret;
}
-void *plat_mmap(unsigned long addr, size_t size)
+void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed)
{
+ static int hugetlb_disabled;
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
void *req, *ret;
req = (void *)addr;
- ret = mmap(req, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (need_exec)
+ prot |= PROT_EXEC;
+ if (is_fixed)
+ flags |= MAP_FIXED;
+ if (size >= HUGETLB_THRESHOLD && !hugetlb_disabled)
+ flags |= MAP_HUGETLB;
+
+ ret = mmap(req, size, prot, flags, -1, 0);
+ if (ret == MAP_FAILED && (flags & MAP_HUGETLB)) {
+ fprintf(stderr,
+ "warning: failed to do hugetlb mmap (%p, %zu): %d\n",
+ req, size, errno);
+ hugetlb_disabled = 1;
+ flags &= ~MAP_HUGETLB;
+ ret = mmap(req, size, prot, flags, -1, 0);
+ }
if (ret == MAP_FAILED)
return NULL;
- if (ret != req)
- printf("warning: mmaped to %p, requested %p\n", ret, req);
+
+ if (req != NULL && ret != req)
+ fprintf(stderr,
+ "warning: mmaped to %p, requested %p\n", ret, req);
return ret;
}
@@ -155,7 +183,18 @@ void *plat_mremap(void *ptr, size_t oldsize, size_t newsize)
void plat_munmap(void *ptr, size_t size)
{
- munmap(ptr, size);
+ int ret;
+
+ ret = munmap(ptr, size);
+ if (ret != 0 && (size & (HUGETLB_PAGESIZE - 1))) {
+ // prehaps an autorounded hugetlb mapping?
+ size = (size + HUGETLB_PAGESIZE - 1) & ~(HUGETLB_PAGESIZE - 1);
+ ret = munmap(ptr, size);
+ }
+ if (ret != 0) {
+ fprintf(stderr,
+ "munmap(%p, %zu) failed: %d\n", ptr, size, errno);
+ }
}
/* lprintf */
diff --git a/frontend/main.c b/frontend/main.c
index 19e8319..56b5cb7 100644
--- a/frontend/main.c
+++ b/frontend/main.c
@@ -143,6 +143,8 @@ void emu_set_default_config(void)
Config.PsxAuto = 1;
pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto
+ pl_rearmed_cbs.gpu_neon.enhancement_enable =
+ pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0;
pl_rearmed_cbs.gpu_peops.iUseDither = 0;
pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7;
pl_rearmed_cbs.gpu_unai.abe_hack =
@@ -230,6 +232,14 @@ do_state_slot:
pl_rearmed_cbs.frameskip == 0 ? "OFF" : "1" );
plugin_call_rearmed_cbs();
break;
+ case SACTION_SWITCH_DISPMODE:
+ pl_switch_dispmode();
+ plugin_call_rearmed_cbs();
+ if (GPU_open != NULL && GPU_close != NULL) {
+ GPU_close();
+ GPU_open(&gpuDisp, "PCSX", NULL);
+ }
+ break;
case SACTION_SCREENSHOT:
{
char buf[MAXPATHLEN];
diff --git a/frontend/main.h b/frontend/main.h
index bdb4870..a03db8b 100644
--- a/frontend/main.h
+++ b/frontend/main.h
@@ -65,6 +65,7 @@ enum sched_action {
SACTION_NEXT_SSLOT,
SACTION_PREV_SSLOT,
SACTION_TOGGLE_FSKIP,
+ SACTION_SWITCH_DISPMODE,
SACTION_SCREENSHOT,
SACTION_VOLUME_UP,
SACTION_VOLUME_DOWN,
diff --git a/frontend/menu.c b/frontend/menu.c
index 42a53e1..d3ce06c 100644
--- a/frontend/menu.c
+++ b/frontend/menu.c
@@ -75,6 +75,7 @@ typedef enum
MA_OPT_SCALER,
MA_OPT_SCALER2,
MA_OPT_FILTERING,
+ MA_OPT_FILTERING2,
MA_OPT_SCALER_C,
} menu_id;
@@ -87,7 +88,7 @@ static int psx_clock;
static int memcard1_sel, memcard2_sel;
int g_opts, g_scaler;
int soft_scaling, analog_deadzone; // for Caanoo
-int filter;
+int filter, soft_filter;
#ifdef __ARM_ARCH_7A__
#define DEFAULT_PSX_CLOCK 57
@@ -213,6 +214,7 @@ static void menu_set_defconfig(void)
frameskip = 0;
analog_deadzone = 50;
soft_scaling = 1;
+ soft_filter = 0;
psx_clock = DEFAULT_PSX_CLOCK;
region = 0;
@@ -274,6 +276,7 @@ static const struct {
CE_INTVAL(g_layer_w),
CE_INTVAL(g_layer_h),
CE_INTVAL(filter),
+ CE_INTVAL(soft_filter),
CE_INTVAL(state_slot),
CE_INTVAL(cpu_clock),
CE_INTVAL(g_opts),
@@ -288,6 +291,8 @@ static const struct {
CE_INTVAL_P(gpu_unai.no_light),
CE_INTVAL_P(gpu_unai.no_blend),
CE_INTVAL_P(gpu_neon.allow_interlace),
+ CE_INTVAL_P(gpu_neon.enhancement_enable),
+ CE_INTVAL_P(gpu_neon.enhancement_no_main),
CE_INTVAL_P(gpu_peopsgl.bDrawDither),
CE_INTVAL_P(gpu_peopsgl.iFilterType),
CE_INTVAL_P(gpu_peopsgl.iFrameTexType),
@@ -661,6 +666,7 @@ me_bind_action emuctrl_actions[] =
{ "Next Save Slot ", 1 << SACTION_NEXT_SSLOT },
{ "Toggle Frameskip ", 1 << SACTION_TOGGLE_FSKIP },
{ "Take Screenshot ", 1 << SACTION_SCREENSHOT },
+ { "Switch Renderer ", 1 << SACTION_SWITCH_DISPMODE },
{ "Enter Menu ", 1 << SACTION_ENTER_MENU },
#ifdef __ARM_ARCH_7A__ /* XXX */
{ "Minimize ", 1 << SACTION_MINIMIZE },
@@ -1029,9 +1035,15 @@ static int menu_loop_keyconfig(int id, int keys)
// ------------ gfx options menu ------------
static const char *men_scaler[] = { "1x1", "scaled 4:3", "integer scaled 4:3", "fullscreen", "custom", NULL };
+static const char *men_soft_filter[] = { "None",
+#ifdef __ARM_NEON__
+ "scale2x", "eagle2x",
+#endif
+ NULL };
+static const char *men_dummy[] = { NULL };
static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n"
"using d-pad or move it using R+d-pad";
-static const char *men_dummy[] = { NULL };
+static const char h_soft_filter[] = "Works only if game uses low resolution modes";
static int menu_loop_cscaler(int id, int keys)
{
@@ -1090,6 +1102,7 @@ static menu_entry e_menu_gfx_options[] =
mee_enum ("Scaler", MA_OPT_SCALER, g_scaler, men_scaler),
mee_onoff ("Software Scaling", MA_OPT_SCALER2, soft_scaling, 1),
mee_enum ("Filter", MA_OPT_FILTERING, filter, men_dummy),
+ mee_enum_h ("Software Filter", MA_OPT_FILTERING2, soft_filter, men_soft_filter, h_soft_filter),
// mee_onoff ("Vsync", 0, vsync, 1),
mee_cust_h ("Setup custom scaler", MA_OPT_SCALER_C, menu_loop_cscaler, NULL, h_cscaler),
mee_end,
@@ -1118,18 +1131,26 @@ void menu_set_filter_list(void *filters)
#ifdef __ARM_NEON__
-static const char h_gpu_neon[] = "Configure built-in NEON GPU plugin";
+static const char h_gpu_neon[] =
+ "Configure built-in NEON GPU plugin";
+static const char h_gpu_neon_enhanced[] =
+ "Renders in double resolution at the cost of lower performance\n"
+ "(not available for high resolution games)";
+static const char h_gpu_neon_enhanced_hack[] =
+ "Speed hack for above option (glitches some games)";
static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL };
static menu_entry e_menu_plugin_gpu_neon[] =
{
mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace),
+ mee_onoff_h ("Enhanced resolution (slow)", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced),
+ mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack),
mee_end,
};
static int menu_loop_plugin_gpu_neon(int id, int keys)
{
- int sel = 0;
+ static int sel = 0;
me_loop(e_menu_plugin_gpu_neon, &sel);
return 0;
}
@@ -2247,6 +2268,7 @@ void menu_init(void)
#ifndef __ARM_ARCH_7A__ /* XXX */
me_enable(e_menu_gfx_options, MA_OPT_SCALER, 0);
me_enable(e_menu_gfx_options, MA_OPT_FILTERING, 0);
+ me_enable(e_menu_gfx_options, MA_OPT_FILTERING2, 0);
me_enable(e_menu_gfx_options, MA_OPT_SCALER_C, 0);
me_enable(e_menu_keyconfig, MA_CTRL_NUBS_BTNS, 0);
#else
diff --git a/frontend/menu.h b/frontend/menu.h
index 2062acd..221be15 100644
--- a/frontend/menu.h
+++ b/frontend/menu.h
@@ -22,9 +22,15 @@ enum g_scaler_opts {
SCALE_CUSTOM,
};
+enum g_soft_filter_opts {
+ SOFT_FILTER_NONE,
+ SOFT_FILTER_SCALE2X,
+ SOFT_FILTER_EAGLE2X,
+};
+
extern int g_opts, g_scaler;
extern int soft_scaling, analog_deadzone;
-extern int filter;
+extern int filter, soft_filter;
extern int g_menuscreen_w;
extern int g_menuscreen_h;
diff --git a/frontend/pandora/pcsx.sh b/frontend/pandora/pcsx.sh
index 0957b94..bc1d6c5 100755
--- a/frontend/pandora/pcsx.sh
+++ b/frontend/pandora/pcsx.sh
@@ -5,10 +5,15 @@ nub0mode=`cat /proc/pandora/nub0/mode`
nub1mode=`cat /proc/pandora/nub1/mode`
/usr/pandora/scripts/op_nubchange.sh absolute absolute
+# 4MB for RAM (2+align) + 2MB for vram (1+overdraw) + 10MB for gpu_neon (8+overdraw)
+# no big deal if this fails, only performance loss
+sudo -n /usr/pandora/scripts/op_hugetlb.sh 16
+
./pcsx "$@"
# restore stuff if pcsx crashes
./picorestore
sudo -n /usr/pandora/scripts/op_lcdrate.sh 60
+sudo -n /usr/pandora/scripts/op_hugetlb.sh 0
/usr/pandora/scripts/op_nubchange.sh $nub0mode $nub1mode
diff --git a/frontend/plat_omap.c b/frontend/plat_omap.c
index b01c634..e5b6c04 100644
--- a/frontend/plat_omap.c
+++ b/frontend/plat_omap.c
@@ -52,8 +52,9 @@ static int omap_setup_layer_(int fd, int enabled, int x, int y, int w, int h)
perror("SETUP_PLANE");
}
- if (mi.size < 640*512*3*3) {
- mi.size = 640*512*3*3;
+ // upto 1024x512 (2x resolution enhancement)
+ if (mi.size < 1024*512*2 * 3) {
+ mi.size = 1024*512*2 * 3;
ret = ioctl(fd, OMAPFB_SETUP_MEM, &mi);
if (ret != 0) {
perror("SETUP_MEM");
diff --git a/frontend/plat_pandora.c b/frontend/plat_pandora.c
index 9ec747d..b82450c 100644
--- a/frontend/plat_pandora.c
+++ b/frontend/plat_pandora.c
@@ -65,6 +65,7 @@ static const struct in_default_bind in_evdev_defbinds[] = {
{ KEY_4, IN_BINDTYPE_EMU, SACTION_NEXT_SSLOT },
{ KEY_5, IN_BINDTYPE_EMU, SACTION_TOGGLE_FSKIP },
{ KEY_6, IN_BINDTYPE_EMU, SACTION_SCREENSHOT },
+ { KEY_7, IN_BINDTYPE_EMU, SACTION_SWITCH_DISPMODE },
{ 0, 0, 0 }
};
diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c
index 1dafb7c..52a09b1 100644
--- a/frontend/plat_pollux.c
+++ b/frontend/plat_pollux.c
@@ -305,12 +305,13 @@ static void spend_cycles(int loops)
#define DMA_REG(x) memregl[(DMA_BASE6 + x) >> 2]
/* this takes ~1.5ms, while ldm/stm ~1.95ms */
-static void raw_flip_dma(int x, int y)
+static void raw_flip_dma(const void *vram, int stride, int bgr24, int w, int h)
{
+ unsigned int pixel_offset = psx_vram - (unsigned short *)vram;
unsigned int dst = fb_paddrs[fb_work_buf] +
(fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8;
- int spsx_line = y + psx_offset_y;
- int spsx_offset = (x + psx_offset_x) & 0x3f8;
+ int spsx_line = pixel_offset / 1024 + psx_offset_y;
+ int spsx_offset = (pixel_offset + psx_offset_x) & 0x3f8;
int dst_stride = 320 * psx_bpp / 8;
int len = psx_src_width * psx_bpp / 8;
int i;
@@ -344,7 +345,7 @@ static void raw_flip_dma(int x, int y)
if (psx_bpp == 16) {
pl_vout_buf = g_menuscreen_ptr;
- pl_print_hud(fb_offset_x);
+ pl_print_hud(w, h, fb_offset_x);
}
g_menuscreen_ptr = fb_flip();
@@ -354,26 +355,24 @@ static void raw_flip_dma(int x, int y)
}
#define make_flip_func(name, blitfunc) \
-static void name(int x, int y) \
+static void name(const void *vram_, int stride, int bgr24, int w, int h) \
{ \
- unsigned short *vram = psx_vram; \
+ const unsigned short *vram = vram_; \
unsigned char *dst = (unsigned char *)g_menuscreen_ptr + \
(fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8; \
- unsigned int src = (y + psx_offset_y) * 1024 + x + psx_offset_x; \
int dst_stride = 320 * psx_bpp / 8; \
int len = psx_src_width * psx_bpp / 8; \
int i; \
\
pcnt_start(PCNT_BLIT); \
\
- for (i = psx_src_height; i > 0; i--, src += psx_step * 1024, dst += dst_stride) { \
- src &= 1024*512-1; \
- blitfunc(dst, vram + src, len); \
- } \
+ vram += psx_offset_y * 1024 + psx_offset_x; \
+ for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
+ blitfunc(dst, vram, len); \
\
if (psx_bpp == 16) { \
pl_vout_buf = g_menuscreen_ptr; \
- pl_print_hud(fb_offset_x); \
+ pl_print_hud(w, h, fb_offset_x); \
} \
\
g_menuscreen_ptr = fb_flip(); \
@@ -402,20 +401,20 @@ void *plat_gvideo_set_mode(int *w_, int *h_, int *bpp_)
switch (w + (bpp != 16) + !soft_scaling) {
case 640:
- pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_640;
+ pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_640;
w_max = 640;
break;
case 512:
- pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_512;
+ pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_512;
w_max = 512;
break;
case 384:
case 368:
- pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_368;
+ pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_368;
w_max = 368;
break;
default:
- pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft;
+ pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft;
w_max = 320;
break;
}
@@ -621,7 +620,7 @@ void plat_init(void)
if (mixerdev == -1)
perror("open(/dev/mixer)");
- pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft;
+ pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft;
pl_rearmed_cbs.pl_vout_set_raw_vram = pl_vout_set_raw_vram;
psx_src_width = 320;
diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c
index 4dbb9a7..3ee5947 100644
--- a/frontend/plugin_lib.c
+++ b/frontend/plugin_lib.c
@@ -21,13 +21,17 @@
#include "linux/fbdev.h"
#include "common/fonts.h"
#include "common/input.h"
+#include "common/plat.h"
#include "menu.h"
#include "main.h"
#include "plat.h"
#include "pcnt.h"
#include "pl_gun_ts.h"
+#include "libpicofe/arm/neon_scale2x.h"
+#include "libpicofe/arm/neon_eagle2x.h"
#include "../libpcsxcore/new_dynarec/new_dynarec.h"
#include "../libpcsxcore/psemu_plugin_defs.h"
+#include "../plugins/gpulib/cspace.h"
int in_type1, in_type2;
int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 };
@@ -38,6 +42,7 @@ void *tsdev;
void *pl_vout_buf;
int g_layer_x, g_layer_y, g_layer_w, g_layer_h;
static int pl_vout_w, pl_vout_h, pl_vout_bpp; /* output display/layer */
+static int pl_vout_scale;
static int psx_w, psx_h, psx_bpp;
static int vsync_cnt;
static int is_pal, frame_interval, frame_interval1024;
@@ -113,10 +118,8 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h)
}
}
-void pl_print_hud(int xborder)
+void pl_print_hud(int w, int h, int xborder)
{
- int w = pl_vout_w, h = pl_vout_h;
-
if (h < 16)
return;
@@ -184,55 +187,142 @@ static void update_layer_size(int w, int h)
if (g_layer_h > g_menuscreen_h) g_layer_h = g_menuscreen_h;
}
-static void *pl_vout_set_mode(int w, int h, int bpp)
+// XXX: this is platform specific really
+static int resolution_ok(int w, int h)
{
+ return w <= 1024 && h <= 512;
+}
+
+static void pl_vout_set_mode(int w, int h, int bpp)
+{
+ int vout_w, vout_h, vout_bpp;
+
// special h handling, Wipeout likes to change it by 1-6
static int vsync_cnt_ms_prev;
if ((unsigned int)(vsync_cnt - vsync_cnt_ms_prev) < 5*60)
h = (h + 7) & ~7;
vsync_cnt_ms_prev = vsync_cnt;
- if (w == psx_w && h == psx_h && bpp == psx_bpp)
- return pl_vout_buf;
+ vout_w = psx_w = w;
+ vout_h = psx_h = h;
+ vout_bpp = psx_bpp = bpp;
+
+ pl_vout_scale = 1;
+#ifdef __ARM_NEON__
+ if (soft_filter) {
+ if (resolution_ok(w * 2, h * 2) && bpp == 16) {
+ vout_w *= 2;
+ vout_h *= 2;
+ pl_vout_scale = 2;
+ }
+ else {
+ // filter unavailable
+ hud_msg[0] = 0;
+ }
+ }
+#endif
- pl_vout_w = psx_w = w;
- pl_vout_h = psx_h = h;
- pl_vout_bpp = psx_bpp = bpp;
+ if (pl_vout_buf != NULL && vout_w == pl_vout_w && vout_h == pl_vout_h
+ && vout_bpp == pl_vout_bpp)
+ return;
- update_layer_size(pl_vout_w, pl_vout_h);
+ update_layer_size(vout_w, vout_h);
- pl_vout_buf = plat_gvideo_set_mode(&pl_vout_w, &pl_vout_h, &pl_vout_bpp);
- if (pl_vout_buf == NULL && pl_rearmed_cbs.pl_vout_raw_flip == NULL)
+ pl_vout_buf = plat_gvideo_set_mode(&vout_w, &vout_h, &vout_bpp);
+ if (pl_vout_buf == NULL)
fprintf(stderr, "failed to set mode %dx%d@%d\n",
psx_w, psx_h, psx_bpp);
+ else {
+ pl_vout_w = vout_w;
+ pl_vout_h = vout_h;
+ pl_vout_bpp = vout_bpp;
+ }
menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp);
-
- return pl_vout_buf;
}
-// only used if raw flip is not defined
-static void *pl_vout_flip(void)
+static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h)
{
- pl_rearmed_cbs.flip_cnt++;
+ static int doffs_old, clear_counter;
+ unsigned char *dest = pl_vout_buf;
+ const unsigned short *src = vram;
+ int dstride = pl_vout_w, h1 = h;
+ int doffs;
+
+ if (dest == NULL)
+ goto out;
+
+ if (vram == NULL) {
+ // blanking
+ memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8);
+ goto out;
+ }
+
+ // borders
+ doffs = (dstride - w * pl_vout_scale) / 2 & ~1;
+ dest += doffs * 2;
+
+ if (doffs > doffs_old)
+ clear_counter = 2;
+ doffs_old = doffs;
+
+ if (clear_counter > 0) {
+ memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8);
+ clear_counter--;
+ }
- if (pl_vout_buf != NULL)
- pl_print_hud(0);
+ if (bgr24)
+ {
+ if (pl_rearmed_cbs.only_16bpp) {
+ for (; h1-- > 0; dest += dstride * 2, src += stride)
+ {
+ bgr888_to_rgb565(dest, src, w * 3);
+ }
+ }
+ else {
+ dest -= doffs * 2;
+ dest += (doffs / 8) * 24;
+ for (; h1-- > 0; dest += dstride * 3, src += stride)
+ {
+ bgr888_to_rgb888(dest, src, w * 3);
+ }
+ }
+ }
+#ifdef __ARM_NEON__
+ else if (soft_filter == SOFT_FILTER_SCALE2X && pl_vout_scale == 2)
+ {
+ neon_scale2x_16_16(src, (void *)dest, w,
+ stride * 2, dstride * 2, h1);
+ }
+ else if (soft_filter == SOFT_FILTER_EAGLE2X && pl_vout_scale == 2)
+ {
+ neon_eagle2x_16_16(src, (void *)dest, w,
+ stride * 2, dstride * 2, h1);
+ }
+#endif
+ else
+ {
+ for (; h1-- > 0; dest += dstride * 2, src += stride)
+ {
+ bgr555_to_rgb565(dest, src, w * 2);
+ }
+ }
+
+ pl_print_hud(w * pl_vout_scale, h * pl_vout_scale, 0);
+
+out:
// let's flip now
pl_vout_buf = plat_gvideo_flip();
- return pl_vout_buf;
+ pl_rearmed_cbs.flip_cnt++;
}
static int pl_vout_open(void)
{
struct timeval now;
- int h;
- // force mode update
- h = psx_h;
- psx_h--;
- pl_vout_buf = pl_vout_set_mode(psx_w, h, psx_bpp);
+ // force mode update on pl_vout_set_mode() call from gpulib/vout_pl
+ pl_vout_buf = NULL;
plat_gvideo_open(is_pal);
@@ -249,6 +339,11 @@ static void pl_vout_close(void)
plat_gvideo_close();
}
+static void pl_set_gpu_caps(int caps)
+{
+ pl_rearmed_cbs.gpu_caps = caps;
+}
+
void *pl_prepare_screenshot(int *w, int *h, int *bpp)
{
void *ret = plat_prepare_screenshot(w, h, bpp);
@@ -262,6 +357,75 @@ void *pl_prepare_screenshot(int *w, int *h, int *bpp)
return pl_vout_buf;
}
+/* display/redering mode switcher */
+static int dispmode_default(void)
+{
+ pl_rearmed_cbs.gpu_neon.enhancement_enable = 0;
+ soft_filter = SOFT_FILTER_NONE;
+ snprintf(hud_msg, sizeof(hud_msg), "default mode");
+ return 1;
+}
+
+int dispmode_doubleres(void)
+{
+ if (!(pl_rearmed_cbs.gpu_caps & GPU_CAP_SUPPORTS_2X)
+ || !resolution_ok(psx_w * 2, psx_h * 2) || psx_bpp != 16)
+ return 0;
+
+ dispmode_default();
+ pl_rearmed_cbs.gpu_neon.enhancement_enable = 1;
+ snprintf(hud_msg, sizeof(hud_msg), "double resolution");
+ return 1;
+}
+
+int dispmode_scale2x(void)
+{
+ if (psx_bpp != 16)
+ return 0;
+
+ dispmode_default();
+ soft_filter = SOFT_FILTER_SCALE2X;
+ snprintf(hud_msg, sizeof(hud_msg), "scale2x");
+ return 1;
+}
+
+int dispmode_eagle2x(void)
+{
+ if (psx_bpp != 16)
+ return 0;
+
+ dispmode_default();
+ soft_filter = SOFT_FILTER_EAGLE2X;
+ snprintf(hud_msg, sizeof(hud_msg), "eagle2x");
+ return 1;
+}
+
+static int (*dispmode_switchers[])(void) = {
+ dispmode_default,
+#ifdef __ARM_NEON__
+ dispmode_doubleres,
+ dispmode_scale2x,
+ dispmode_eagle2x,
+#endif
+};
+
+static int dispmode_current;
+
+void pl_switch_dispmode(void)
+{
+ if (pl_rearmed_cbs.gpu_caps & GPU_CAP_OWNS_DISPLAY)
+ return;
+
+ while (1) {
+ dispmode_current++;
+ if (dispmode_current >=
+ sizeof(dispmode_switchers) / sizeof(dispmode_switchers[0]))
+ dispmode_current = 0;
+ if (dispmode_switchers[dispmode_current]())
+ break;
+ }
+}
+
#ifndef MAEMO
static void update_analogs(void)
{
@@ -442,16 +606,31 @@ void pl_timing_prepare(int is_pal_)
static void pl_text_out16_(int x, int y, const char *text)
{
- int i, l, len = strlen(text), w = pl_vout_w;
- unsigned short *screen = (unsigned short *)pl_vout_buf + x + y * w;
+ int i, l, w = pl_vout_w;
+ unsigned short *screen;
unsigned short val = 0xffff;
- for (i = 0; i < len; i++, screen += 8)
+ x &= ~1;
+ screen = (unsigned short *)pl_vout_buf + x + y * w;
+ for (i = 0; ; i++, screen += 8)
{
+ char c = text[i];
+ if (c == 0)
+ break;
+ if (c == ' ')
+ continue;
+
for (l = 0; l < 8; l++)
{
- unsigned char fd = fontdata8x8[text[i] * 8 + l];
+ unsigned char fd = fontdata8x8[c * 8 + l];
unsigned short *s = screen + l * w;
+ unsigned int *s32 = (void *)s;
+
+ s32[0] = (s32[0] >> 1) & 0x7bef7bef;
+ s32[1] = (s32[1] >> 1) & 0x7bef7bef;
+ s32[2] = (s32[2] >> 1) & 0x7bef7bef;
+ s32[3] = (s32[3] >> 1) & 0x7bef7bef;
+
if (fd&0x80) s[0] = val;
if (fd&0x40) s[1] = val;
if (fd&0x20) s[2] = val;
@@ -484,12 +663,26 @@ static void pl_get_layer_pos(int *x, int *y, int *w, int *h)
*h = g_layer_h;
}
+static void *pl_mmap(unsigned int size)
+{
+ return plat_mmap(0, size, 0, 0);
+}
+
+static void pl_munmap(void *ptr, unsigned int size)
+{
+ plat_munmap(ptr, size);
+}
+
struct rearmed_cbs pl_rearmed_cbs = {
pl_get_layer_pos,
pl_vout_open,
pl_vout_set_mode,
pl_vout_flip,
pl_vout_close,
+
+ .mmap = pl_mmap,
+ .munmap = pl_munmap,
+ .pl_set_gpu_caps = pl_set_gpu_caps,
};
/* watchdog */
diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h
index bcf74ac..332fbc2 100644
--- a/frontend/plugin_lib.h
+++ b/frontend/plugin_lib.h
@@ -31,7 +31,8 @@ void pl_text_out16(int x, int y, const char *texto, ...);
void pl_start_watchdog(void);
void *pl_prepare_screenshot(int *w, int *h, int *bpp);
void pl_init(void);
-void pl_print_hud(int xborder);
+void pl_print_hud(int width, int height, int xborder);
+void pl_switch_dispmode(void);
void pl_timing_prepare(int is_pal);
void pl_frame_limit(void);
@@ -41,12 +42,15 @@ void pl_update_gun(int *xn, int *xres, int *y, int *in);
struct rearmed_cbs {
void (*pl_get_layer_pos)(int *x, int *y, int *w, int *h);
int (*pl_vout_open)(void);
- void *(*pl_vout_set_mode)(int w, int h, int bpp);
- void *(*pl_vout_flip)(void);
+ void (*pl_vout_set_mode)(int w, int h, int bpp);
+ void (*pl_vout_flip)(const void *vram, int stride, int bgr24,
+ int w, int h);
void (*pl_vout_close)(void);
- // these are only used by some frontends
- void (*pl_vout_raw_flip)(int x, int y);
+ void *(*mmap)(unsigned int size);
+ void (*munmap)(void *ptr, unsigned int size);
+ // only used by some frontends
void (*pl_vout_set_raw_vram)(void *vram);
+ void (*pl_set_gpu_caps)(int caps);
// some stats, for display by some plugins
int flips_per_sec, cpu_usage;
float vsps_cur; // currect vsync/s
@@ -60,6 +64,8 @@ struct rearmed_cbs {
unsigned int only_16bpp; // platform is 16bpp-only
struct {
int allow_interlace; // 0 off, 1 on, 2 guess
+ int enhancement_enable;
+ int enhancement_no_main;
} gpu_neon;
struct {
int iUseDither;
@@ -78,10 +84,17 @@ struct rearmed_cbs {
int iUseMask, bOpaquePass, bAdvancedBlend, bUseFastMdec;
int iVRamSize, iTexGarbageCollection;
} gpu_peopsgl;
+ // misc
+ int gpu_caps;
};
extern struct rearmed_cbs pl_rearmed_cbs;
+enum gpu_plugin_caps {
+ GPU_CAP_OWNS_DISPLAY = (1 << 0),
+ GPU_CAP_SUPPORTS_2X = (1 << 1),
+};
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#endif
diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c
index 1cabd53..ddcd05b 100644
--- a/libpcsxcore/psxmem.c
+++ b/libpcsxcore/psxmem.c
@@ -60,6 +60,16 @@ u8 **psxMemRLUT = NULL;
0xbfc0_0000-0xbfc7_ffff BIOS Mirror (512K) Uncached
*/
+#if 1
+void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed);
+void plat_munmap(void *ptr, size_t size);
+#else
+#define plat_mmap(addr, size, need_exec, is_fixed) \
+ mmap((void *)addr, size, PROT_WRITE | PROT_READ, \
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0)
+#define plat_munmap munmap
+#endif
+
int psxMemInit() {
int i;
@@ -68,8 +78,7 @@ int psxMemInit() {
memset(psxMemRLUT, 0, 0x10000 * sizeof(void *));
memset(psxMemWLUT, 0, 0x10000 * sizeof(void *));
- psxM = mmap((void *)0x80000000, 0x00210000,
- PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ psxM = plat_mmap(0x80000000, 0x00210000, 0, 1);
#ifndef RAM_FIXED
if (psxM == MAP_FAILED)
psxM = mmap((void *)0x70000000, 0x00210000,
@@ -144,7 +153,7 @@ void psxMemReset() {
}
void psxMemShutdown() {
- munmap(psxM, 0x00210000);
+ plat_munmap(psxM, 0x00210000);
munmap(psxH, 0x1f800000);
munmap(psxR, 0x80000);
diff --git a/plugins/dfxvideo/draw_pl.c b/plugins/dfxvideo/draw_pl.c
index dffd52b..06a635d 100644
--- a/plugins/dfxvideo/draw_pl.c
+++ b/plugins/dfxvideo/draw_pl.c
@@ -19,56 +19,26 @@ BOOL bCheckMask = FALSE;
unsigned short sSetMask;
unsigned long lSetMask;
-static void blit(void *vout_buf)
+static void blit(void)
{
int px = PSXDisplay.DisplayPosition.x & ~1; // XXX: align needed by bgr*_to_...
int py = PSXDisplay.DisplayPosition.y;
int w = PreviousPSXDisplay.Range.x1;
int h = PreviousPSXDisplay.DisplayMode.y;
- int pitch = PreviousPSXDisplay.DisplayMode.x;
unsigned short *srcs = psxVuw + py * 1024 + px;
- unsigned char *dest = vout_buf;
if (w <= 0)
return;
- pitch *= (PSXDisplay.RGB24 && !rcbs->only_16bpp) ? 3 : 2;
-
// account for centering
h -= PreviousPSXDisplay.Range.y0;
- dest += PreviousPSXDisplay.Range.y0 / 2 * pitch;
- dest += (PreviousPSXDisplay.Range.x0 & ~3) * 2; // must align here too..
-
- if (PSXDisplay.RGB24)
- {
- if (!rcbs->only_16bpp)
- {
- for (; h-- > 0; dest += pitch, srcs += 1024)
- {
- bgr888_to_rgb888(dest, srcs, w * 3);
- }
- }
- else
- {
- for (; h-- > 0; dest += pitch, srcs += 1024)
- {
- bgr888_to_rgb565(dest, srcs, w * 3);
- }
- }
- }
- else
- {
- for (; h-- > 0; dest += pitch, srcs += 1024)
- {
- bgr555_to_rgb565(dest, srcs, w * 2);
- }
- }
+
+ rcbs->pl_vout_flip(srcs, 1024, PSXDisplay.RGB24, w, h);
}
void DoBufferSwap(void)
{
static int fbw, fbh, fb24bpp;
- static void *vout_buf;
if (PreviousPSXDisplay.DisplayMode.x == 0 || PreviousPSXDisplay.DisplayMode.y == 0)
return;
@@ -80,17 +50,12 @@ void DoBufferSwap(void)
fbw = PreviousPSXDisplay.DisplayMode.x;
fbh = PreviousPSXDisplay.DisplayMode.y;
fb24bpp = PSXDisplay.RGB24;
- vout_buf = rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16);
+ rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16);
}
pcnt_start(PCNT_BLIT);
- if (rcbs->pl_vout_raw_flip != NULL)
- rcbs->pl_vout_raw_flip(PSXDisplay.DisplayPosition.x, PSXDisplay.DisplayPosition.y);
- else
- blit(vout_buf);
+ blit();
pcnt_end(PCNT_BLIT);
-
- vout_buf = rcbs->pl_vout_flip();
}
void DoClearScreenBuffer(void)
diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c
index 9fa08fe..3d20dfa 100644
--- a/plugins/dfxvideo/gpu.c
+++ b/plugins/dfxvideo/gpu.c
@@ -1143,6 +1143,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
dwFrameRateTicks = cbs->gpu_peops.dwFrameRateTicks;
if (cbs->pl_vout_set_raw_vram)
cbs->pl_vout_set_raw_vram(psxVub);
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(0);
skip_advice = &cbs->fskip_advice;
fps_skip = 100.0f;
diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c
index 12aa0a3..d98520c 100644
--- a/plugins/dfxvideo/gpulib_if.c
+++ b/plugins/dfxvideo/gpulib_if.c
@@ -265,9 +265,9 @@ long lLowerpart;
/////////////////////////////////////////////////////////////////////////////
-int renderer_init(void)
+static void set_vram(void *vram)
{
- psxVub=(void *)gpu.vram;
+ psxVub=vram;
psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM
psxVsw=(signed short *)psxVub;
@@ -276,6 +276,11 @@ int renderer_init(void)
psxVul=(uint32_t *)psxVub;
psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram
+}
+
+int renderer_init(void)
+{
+ set_vram(gpu.vram);
PSXDisplay.RGB24 = FALSE; // init some stuff
PSXDisplay.Interlaced = FALSE;
@@ -294,6 +299,14 @@ int renderer_init(void)
return 0;
}
+void renderer_finish(void)
+{
+}
+
+void renderer_notify_res_change(void)
+{
+}
+
extern const unsigned char cmd_lengths[256];
int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
@@ -408,4 +421,7 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
{
iUseDither = cbs->gpu_peops.iUseDither;
dwActFixes = cbs->gpu_peops.dwActFixes;
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(0);
+ set_vram(gpu.vram);
}
diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c
index c25ad8b..068dc41 100644
--- a/plugins/gpu-gles/gpulib_if.c
+++ b/plugins/gpu-gles/gpulib_if.c
@@ -479,10 +479,15 @@ switch((gdata>>24)&0xff)
static int is_opened;
-int renderer_init(void)
+static void set_vram(void *vram)
{
- psxVub=(void *)gpu.vram;
+ psxVub=vram;
psxVuw=(unsigned short *)psxVub;
+}
+
+int renderer_init(void)
+{
+ set_vram(gpu.vram);
PSXDisplay.RGB24 = FALSE; // init some stuff
PSXDisplay.Interlaced = FALSE;
@@ -500,6 +505,14 @@ int renderer_init(void)
return 0;
}
+void renderer_finish(void)
+{
+}
+
+void renderer_notify_res_change(void)
+{
+}
+
extern const unsigned char cmd_lengths[256];
// XXX: mostly dupe code from soft peops
@@ -702,6 +715,10 @@ void renderer_set_config(const struct rearmed_cbs *cbs_)
bUseFastMdec = cbs->gpu_peopsgl.bUseFastMdec;
iTexGarbageCollection = cbs->gpu_peopsgl.iTexGarbageCollection;
iVRamSize = cbs->gpu_peopsgl.iVRamSize;
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(GPU_CAP_OWNS_DISPLAY);
+
+ set_vram(gpu.vram);
}
void SetAspectRatio(void)
diff --git a/plugins/gpu_neon/Makefile b/plugins/gpu_neon/Makefile
index 8a7342b..08bf0ee 100644
--- a/plugins/gpu_neon/Makefile
+++ b/plugins/gpu_neon/Makefile
@@ -1,4 +1,4 @@
-CFLAGS += -ggdb -Wall -O2
+CFLAGS += -ggdb -Wall -O2 -DNDEBUG
include ../../config.mak
diff --git a/plugins/gpu_neon/psx_gpu/common.h b/plugins/gpu_neon/psx_gpu/common.h
index f299f79..d5cf3e9 100644
--- a/plugins/gpu_neon/psx_gpu/common.h
+++ b/plugins/gpu_neon/psx_gpu/common.h
@@ -18,5 +18,7 @@ typedef unsigned long long int u64;
#include "vector_ops.h"
#include "psx_gpu.h"
+#define unlikely(x) __builtin_expect((x), 0)
+
#endif
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c
index 68996c1..2cba878 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -47,7 +47,8 @@ u32 zero_block_spans = 0;
u32 texture_cache_loads = 0;
u32 false_modulated_blocks = 0;
-u32 reciprocal_table[512];
+/* double size for enhancement */
+u32 reciprocal_table[512 * 2];
typedef s32 fixed_type;
@@ -453,7 +454,7 @@ void setup_blocks_shaded_untextured_undithered_unswizzled_indirect(
void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
{
- if((psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) &&
+ if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) &&
(psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE))
{
u32 num_blocks_dest = 0;
@@ -463,7 +464,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu)
u16 *vram_ptr = psx_gpu->vram_ptr;
u32 i;
- if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
+ if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
{
for(i = 0; i < psx_gpu->num_blocks; i++)
{
@@ -566,7 +567,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
vec_4x32u uvrg_base;
vec_4x32u b_base;
- vec_4x32u const_0x8000;
+ vec_4x32u uvrgb_phase;
vec_4x16s d0_a_d3_c, d0_b, d0_c;
vec_4x16s d1_a, d1_b, d1_c_d2_a;
@@ -595,12 +596,12 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
setup_gradient_calculation_input(1, b);
setup_gradient_calculation_input(2, c);
- dup_4x32b(const_0x8000, 0x8000);
+ dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase);
shl_long_4x16b(uvrg_base, x0_a_y0_c, 16);
shl_long_4x16b(b_base, x0_b, 16);
- add_4x32b(uvrg_base, uvrg_base, const_0x8000);
- add_4x32b(b_base, b_base, const_0x8000);
+ add_4x32b(uvrg_base, uvrg_base, uvrgb_phase);
+ add_4x32b(b_base, b_base, uvrgb_phase);
// Can probably pair these, but it'll require careful register allocation
sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c);
@@ -766,6 +767,26 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \
+#ifndef NDEBUG
+#define setup_spans_debug_check(span_edge_data_element) \
+{ \
+ u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \
+ if (_num_spans > MAX_SPANS) \
+ *(int *)0 = 1; \
+ if (_num_spans < psx_gpu->num_spans) \
+ { \
+ if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \
+ *(int *)0 = 1; \
+ if(span_edge_data_element.y > 2048) \
+ *(int *)0 = 1; \
+ } \
+} \
+
+#else
+#define setup_spans_debug_check(span_edge_data_element) \
+
+#endif
+
#define setup_spans_prologue_alternate_yes() \
vec_2x64s alternate_x; \
vec_2x64s alternate_dx_dy; \
@@ -854,7 +875,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
\
dup_2x32b(edge_shifts, edge_shift); \
sub_2x32b(heights_b, heights, c_0x01); \
- shr_2x32b(height_reciprocals, edge_shifts, 12); \
+ shr_2x32b(height_reciprocals, edge_shifts, 10); \
\
mla_2x32b(heights_b, x_starts, heights); \
bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
@@ -883,8 +904,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
sub_2x32b(widths, x_ends, x_starts); \
width_alt = x_c - start_c; \
\
- shr_2x32b(height_reciprocals, edge_shifts, 12); \
- height_reciprocal_alt = edge_shift_alt >> 12; \
+ shr_2x32b(height_reciprocals, edge_shifts, 10); \
+ height_reciprocal_alt = edge_shift_alt >> 10; \
\
bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
edge_shift_alt &= 0x1F; \
@@ -1069,6 +1090,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \
span_edge_data[i].right_mask = span_shift.e[i]; \
span_edge_data[i].y = y_x4.e[i]; \
+ setup_spans_debug_check(span_edge_data[i]); \
} \
\
span_edge_data += 4; \
@@ -1406,12 +1428,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a,
y_x4.e[3] = y_a + 3;
setup_spans_adjust_edges_alternate_no(index_left, index_right);
+ // FIXME: overflow corner case
+ if(psx_gpu->num_spans + height_minor_b == MAX_SPANS)
+ height_minor_b &= ~3;
+
psx_gpu->num_spans += height_minor_b;
- do
+ while(height_minor_b > 0)
{
setup_spans_set_x4(none, down, no);
height_minor_b -= 4;
- } while(height_minor_b > 0);
+ }
}
left_split_triangles++;
@@ -1872,7 +1898,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \
if(span_num_blocks) \
{ \
y = span_edge_data->y; \
- fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \
+ fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \
\
setup_blocks_span_initialize_##shading##_##texturing(); \
setup_blocks_span_initialize_##dithering(texturing); \
@@ -2905,8 +2931,8 @@ char *render_block_flag_strings[] =
(triangle_y_direction_##direction_c << 4) | \
(triangle_winding_##winding << 6)) \
-void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
- u32 flags)
+static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ vertex_struct *vertexes_out[3])
{
s32 y_top, y_bottom;
s32 triangle_area;
@@ -2927,7 +2953,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
#ifdef PROFILE
trivial_rejects++;
#endif
- return;
+ return 0;
}
if(b->y < a->y)
@@ -2949,7 +2975,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
#ifdef PROFILE
trivial_rejects++;
#endif
- return;
+ return 0;
}
if(triangle_area < 0)
@@ -2975,7 +3001,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
#ifdef PROFILE
trivial_rejects++;
#endif
- return;
+ return 0;
}
if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
@@ -2984,13 +3010,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
#ifdef PROFILE
trivial_rejects++;
#endif
- return;
+ return 0;
}
- psx_gpu->num_spans = 0;
psx_gpu->triangle_area = triangle_area;
psx_gpu->triangle_winding = triangle_winding;
+ vertexes_out[0] = a;
+ vertexes_out[1] = b;
+ vertexes_out[2] = c;
+
+ return 1;
+}
+
+static void render_triangle_p(psx_gpu_struct *psx_gpu,
+ vertex_struct *vertex_ptrs[3], u32 flags)
+{
+ psx_gpu->num_spans = 0;
+
+ vertex_struct *a = vertex_ptrs[0];
+ vertex_struct *b = vertex_ptrs[1];
+ vertex_struct *c = vertex_ptrs[2];
+
s32 y_delta_a = b->y - a->y;
s32 y_delta_b = c->y - b->y;
s32 y_delta_c = c->y - a->y;
@@ -3002,7 +3043,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
compute_all_gradients(psx_gpu, a, b, c);
switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
- (triangle_winding << 6))
+ (psx_gpu->triangle_winding << 6))
{
triangle_case(up, up, up, negative):
triangle_case(up, up, flat, negative):
@@ -3081,11 +3122,11 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
spans += psx_gpu->num_spans;
#endif
- if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED)
+ if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED))
{
u32 i;
- if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
+ if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
{
for(i = 0; i < psx_gpu->num_spans; i++)
{
@@ -3126,6 +3167,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
(psx_gpu);
}
+void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 flags)
+{
+ vertex_struct *vertex_ptrs[3];
+ if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
+ render_triangle_p(psx_gpu, vertex_ptrs, flags);
+}
+
void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu);
@@ -3161,14 +3210,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#endif
-#define setup_sprite_tiled_initialize_4bpp() \
+#define setup_sprite_tiled_initialize_4bpp_clut() \
u16 *clut_ptr = psx_gpu->clut_ptr; \
vec_8x16u clut_a, clut_b; \
vec_16x8u clut_low, clut_high; \
\
load_8x16b(clut_a, clut_ptr); \
load_8x16b(clut_b, clut_ptr + 8); \
- unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \
+ unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \
+
+#define setup_sprite_tiled_initialize_4bpp() \
+ setup_sprite_tiled_initialize_4bpp_clut(); \
\
if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \
update_texture_4bpp_cache(psx_gpu) \
@@ -3185,10 +3237,6 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
load_64b(texels, texture_block_ptr) \
-#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \
-
-#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
-
#define setup_sprite_tile_add_blocks(tile_num_blocks) \
num_blocks += tile_num_blocks; \
sprite_blocks += tile_num_blocks; \
@@ -3334,34 +3382,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu)
#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
-#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
sub_tile_height = column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
-#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
+ x4mode) \
do \
{ \
u32 tiles_remaining = column_data >> 16; \
sub_tile_height = column_data & 0xFF; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining -= 1; \
\
while(tiles_remaining) \
{ \
sub_tile_height = 16; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
tiles_remaining--; \
} \
\
sub_tile_height = (column_data >> 8) & 0xFF; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \
} while(0) \
@@ -3374,15 +3424,18 @@ do \
column_data |= (tile_height - 1) << 16 \
+#define RIGHT_MASK_BIT_SHIFT 8
+#define RIGHT_MASK_BIT_SHIFT_4x 16
+
#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
- edge_mode, edge) \
+ edge_mode, edge, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
left_mask_bits = left_block_mask | right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
- texture_mode); \
+ texture_mode, x4mode); \
} \
#define setup_sprite_tiled_advance_column() \
@@ -3390,18 +3443,22 @@ do \
if((texture_offset_base & 0xF00) == 0) \
texture_offset_base -= (0x100 + 0xF00) \
+#define FB_PTR_MULTIPLIER 1
+#define FB_PTR_MULTIPLIER_4x 2
+
#define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \
- left_mode, right_mode) \
+ left_mode, right_mode, x4mode) \
{ \
setup_sprite_column_data_##multi_height(); \
- s32 fb_ptr_advance_column = 16 - (1024 * height); \
+ s32 fb_ptr_advance_column = (16 - (1024 * height)) \
+ * FB_PTR_MULTIPLIER##x4mode; \
\
tile_width -= 2; \
left_mask_bits = left_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tile_column_height_##multi_height(left_mode, right, \
- texture_mode); \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
\
left_mask_bits = 0x00; \
@@ -3410,22 +3467,297 @@ do \
while(tile_width) \
{ \
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \
+ setup_sprite_tile_column_height_##multi_height(full, none, \
+ texture_mode, x4mode); \
fb_ptr += fb_ptr_advance_column; \
tile_width--; \
} \
\
left_mask_bits = right_block_mask; \
- right_mask_bits = left_mask_bits >> 8; \
+ right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, \
- texture_mode); \
+ texture_mode, x4mode); \
+} \
+
+
+/* 4x stuff */
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ setup_sprite_tiled_initialize_4bpp_clut() \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 24; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
} \
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+{ \
+ vec_8x8u texels_low, texels_high; \
+ vec_8x16u pixels, pixels_wide; \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ tbl_16(texels_low, texels, clut_low); \
+ tbl_16(texels_high, texels, clut_high); \
+ zip_8x16b(pixels, texels_low, texels_high); \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->texels = pixels_wide; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
-#define setup_sprite_tiled_builder(texture_mode) \
-void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \
+ vec_16x8u texels_wide; \
+ u32 left_mask_bits_a = left_mask_bits & 0xFF; \
+ u32 left_mask_bits_b = left_mask_bits >> 8; \
+ u32 right_mask_bits_a = right_mask_bits & 0xFF; \
+ u32 right_mask_bits_b = right_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = left_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = left_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 1024 + 8; \
+ block++; \
+ \
+ setup_sprite_tile_fetch_texel_block_8bpp(8); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 16; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = right_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024 + 16; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = right_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 24 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+{ \
+ setup_sprite_tile_add_blocks(sub_tile_height * 4); \
+ vec_16x8u texels_wide; \
+ u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \
+ u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \
+ \
+ while(sub_tile_height) \
+ { \
+ setup_sprite_tile_fetch_texel_block_8bpp(0); \
+ zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr; \
+ block++; \
+ \
+ block->r = texels_wide.low; \
+ block->draw_mask_bits = edge##_mask_bits_a; \
+ block->fb_ptr = fb_ptr + 1024; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8; \
+ block++; \
+ \
+ block->r = texels_wide.high; \
+ block->draw_mask_bits = edge##_mask_bits_b; \
+ block->fb_ptr = fb_ptr + 8 + 1024; \
+ block++; \
+ \
+ fb_ptr += 2048; \
+ texture_offset += 0x10; \
+ sub_tile_height--; \
+ } \
+ texture_offset += 0xF00; \
+ psx_gpu->num_blocks = num_blocks; \
+} \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ texture_offset = texture_offset_base + 8; \
+ fb_ptr += 16 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ texture_offset = texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ fb_ptr -= 16 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_comapre_left_block_mask() \
+ ((left_block_mask & 0xFF) == 0xFF) \
+
+#define setup_sprite_comapre_right_block_mask() \
+ (((right_block_mask >> 8) & 0xFF) == 0xFF) \
+
+
+#define setup_sprite_offset_u_adjust_4x() \
+ offset_u *= 2; \
+ offset_u_right = offset_u_right * 2 + 1 \
+
+#define setup_sprite_comapre_left_block_mask_4x() \
+ ((left_block_mask & 0xFFFF) == 0xFFFF) \
+
+#define setup_sprite_comapre_right_block_mask_4x() \
+ (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \
+
+
+#define setup_sprite_tiled_builder(texture_mode, x4mode) \
+void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\
s32 u, s32 v, s32 width, s32 height, u32 color) \
{ \
s32 offset_u = u & 0xF; \
@@ -3437,8 +3769,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
s32 tile_width = width_rounded / 16; \
u32 offset_u_right = width_rounded & 0xF; \
\
- u32 left_block_mask = ~(0xFFFF << offset_u); \
- u32 right_block_mask = 0xFFFE << offset_u_right; \
+ setup_sprite_offset_u_adjust##x4mode(); \
+ \
+ u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \
+ u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \
\
u32 left_mask_bits; \
u32 right_mask_bits; \
@@ -3455,19 +3789,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
u32 texture_offset_base = texture_offset; \
u32 control_mask; \
\
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \
u32 num_blocks = psx_gpu->num_blocks; \
block_struct *block = psx_gpu->blocks + num_blocks; \
\
u16 *texture_block_ptr; \
vec_8x8u texels; \
\
- setup_sprite_tiled_initialize_##texture_mode(); \
+ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
control_mask = tile_width == 1; \
control_mask |= (tile_height == 1) << 1; \
- control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \
- control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \
+ control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \
+ control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \
\
sprites_##texture_mode++; \
\
@@ -3475,64 +3809,77 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \
{ \
default: \
case 0x0: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
+ x4mode); \
break; \
\
case 0x1: \
- setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
+ x4mode); \
break; \
\
case 0x2: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
+ x4mode); \
break; \
\
case 0x3: \
- setup_sprite_tile_column_width_single(texture_mode, single, full, none); \
+ setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
+ x4mode); \
break; \
\
case 0x4: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
+ x4mode); \
break; \
\
case 0x5: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
+ x4mode); \
break; \
\
case 0x6: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
+ x4mode); \
break; \
\
case 0x7: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, right);\
+ setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
+ x4mode); \
break; \
\
case 0x8: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
+ x4mode); \
break; \
\
case 0x9: \
- setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
+ x4mode); \
break; \
\
case 0xA: \
- setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
+ x4mode); \
break; \
\
case 0xB: \
- setup_sprite_tile_column_width_single(texture_mode, single, half, left); \
+ setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
+ x4mode); \
break; \
\
case 0xC: \
- setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
+ x4mode); \
break; \
\
case 0xE: \
- setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \
+ setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
+ x4mode); \
break; \
} \
} \
-
void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
s32 width, s32 height, u32 color);
void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
@@ -3540,9 +3887,24 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
s32 width, s32 height, u32 color);
+void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
+ s32 width, s32 height, u32 color);
+
+void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color);
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color);
+
#ifndef NEON_BUILD
-setup_sprite_tiled_builder(4bpp);
-setup_sprite_tiled_builder(8bpp);
+setup_sprite_tiled_builder(4bpp,);
+setup_sprite_tiled_builder(8bpp,);
+
+setup_sprite_tiled_builder(4bpp,_4x);
+setup_sprite_tiled_builder(8bpp,_4x);
void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
@@ -3550,7 +3912,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
u32 left_offset = u & 0x7;
u32 width_rounded = width + left_offset + 7;
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset);
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset);
u32 right_width = width_rounded & 0x7;
u32 block_width = width_rounded / 8;
u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8);
@@ -3665,14 +4027,19 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
}
-#endif
-
void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
s32 v, s32 width, s32 height, u32 color)
{
+ if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+ RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0)
+ {
+ setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+ return;
+ }
+
u32 right_width = ((width - 1) & 0x7) + 1;
u32 right_mask_bits = (0xFF << right_width);
- u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + x;
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
u32 block_width = (width + 7) / 8;
u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8);
u32 blocks_remaining;
@@ -3735,6 +4102,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
}
}
+#endif
+
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+ u32 r = color & 0xFF;
+ u32 g = (color >> 8) & 0xFF;
+ u32 b = (color >> 16) & 0xFF;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+ psx_gpu->mask_msb;
+ u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+ u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+ u32 *vram_ptr;
+
+ u32 num_width;
+
+ if(psx_gpu->num_blocks > MAX_BLOCKS)
+ {
+ flush_render_block_buffer(psx_gpu);
+ }
+
+ while(height)
+ {
+ num_width = width;
+
+ vram_ptr = (void *)vram_ptr16;
+ if((long)vram_ptr16 & 2)
+ {
+ *vram_ptr16 = color_32bpp;
+ vram_ptr = (void *)(vram_ptr16 + 1);
+ num_width--;
+ }
+
+ while(num_width >= 4 * 2)
+ {
+ vram_ptr[0] = color_32bpp;
+ vram_ptr[1] = color_32bpp;
+ vram_ptr[2] = color_32bpp;
+ vram_ptr[3] = color_32bpp;
+
+ vram_ptr += 4;
+ num_width -= 4 * 2;
+ }
+
+ while(num_width >= 2)
+ {
+ *vram_ptr++ = color_32bpp;
+ num_width -= 2;
+ }
+
+ if(num_width > 0)
+ {
+ *(u16 *)vram_ptr = color_32bpp;
+ }
+
+ vram_ptr16 += 1024;
+ height--;
+ }
+}
#define setup_sprite_blocks_switch_textured(texture_mode) \
@@ -4155,9 +4582,6 @@ do \
{ \
delta_y *= -1; \
\
- if(delta_y >= 512) \
- return; \
- \
if(delta_x > delta_y) \
{ \
draw_line_span_horizontal(decrement, shading, blending, dithering, \
@@ -4171,9 +4595,6 @@ do \
} \
else \
{ \
- if(delta_y >= 512) \
- return; \
- \
if(delta_x > delta_y) \
{ \
draw_line_span_horizontal(increment, shading, blending, dithering, \
@@ -4188,7 +4609,7 @@ do \
void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
- u32 color)
+ u32 color, int double_resolution)
{
s32 color_r, color_g, color_b;
u32 triangle_winding = 0;
@@ -4240,12 +4661,22 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags,
delta_x = x_b - x_a;
delta_y = y_b - y_a;
- if(delta_x >= 1024)
+ if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512)
return;
+ if(double_resolution)
+ {
+ x_a *= 2;
+ x_b *= 2;
+ y_a *= 2;
+ y_b *= 2;
+ delta_x *= 2;
+ delta_y *= 2;
+ }
+
flags &= ~RENDER_FLAGS_TEXTURE_MAP;
- vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a;
+ vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a;
control_mask = 0x0;
@@ -4435,7 +4866,6 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
if((width == 0) || (height == 0))
return;
- flush_render_block_buffer(psx_gpu);
invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
u32 r = color & 0xFF;
@@ -4445,17 +4875,17 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
psx_gpu->mask_msb;
u32 color_32bpp = color_16bpp | (color_16bpp << 16);
- u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024));
+ u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
u32 pitch = 512 - (width / 2);
u32 num_width;
- if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED)
+ if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)
{
pitch += 512;
height /= 2;
- if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
+ if(psx_gpu->render_mode & RENDER_INTERLACE_ODD)
vram_ptr += 512;
}
@@ -4482,6 +4912,50 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
}
}
+void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
+ u32 width, u32 height)
+{
+ if((width == 0) || (height == 0))
+ return;
+
+ if(width > 1024)
+ width = 1024;
+
+ u32 r = color & 0xFF;
+ u32 g = (color >> 8) & 0xFF;
+ u32 b = (color >> 16) & 0xFF;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+ psx_gpu->mask_msb;
+ u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+ u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
+
+ u32 pitch = 1024 / 2 - (width / 2);
+ u32 num_width;
+
+ while(height)
+ {
+ num_width = width;
+ while(num_width)
+ {
+ vram_ptr[0] = color_32bpp;
+ vram_ptr[1] = color_32bpp;
+ vram_ptr[2] = color_32bpp;
+ vram_ptr[3] = color_32bpp;
+ vram_ptr[4] = color_32bpp;
+ vram_ptr[5] = color_32bpp;
+ vram_ptr[6] = color_32bpp;
+ vram_ptr[7] = color_32bpp;
+
+ vram_ptr += 8;
+ num_width -= 16;
+ }
+
+ vram_ptr += pitch;
+ height--;
+ }
+}
+
void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
u32 width, u32 height, u32 pitch)
{
@@ -4522,16 +4996,17 @@ void initialize_reciprocal_table(void)
u32 height_reciprocal;
s32 shift;
- for(height = 1; height < 512; height++)
+ for(height = 1; height < sizeof(reciprocal_table)
+ / sizeof(reciprocal_table[0]); height++)
{
shift = __builtin_clz(height);
height_normalized = height << shift;
- height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) /
+ height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) /
height_normalized;
- shift = 32 - (50 - shift);
+ shift = 32 - (51 - shift);
- reciprocal_table[height] = (height_reciprocal << 12) | shift;
+ reciprocal_table[height] = (height_reciprocal << 10) | shift;
}
}
@@ -4559,8 +5034,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->render_state = 0;
psx_gpu->render_state_base = 0;
psx_gpu->num_blocks = 0;
+ psx_gpu->uvrgb_phase = 0x8000;
psx_gpu->vram_ptr = vram;
+ psx_gpu->vram_out_ptr = vram;
psx_gpu->texture_page_base = psx_gpu->vram_ptr;
psx_gpu->texture_page_ptr = psx_gpu->vram_ptr;
@@ -4573,7 +5050,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->texture_mask_width = 0xFF;
psx_gpu->texture_mask_height = 0xFF;
- psx_gpu->interlace_mode = 0;
+ psx_gpu->render_mode = 0;
memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
@@ -4596,6 +5073,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2);
psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
+
+ psx_gpu->enhancement_x_threshold = 256;
}
u64 get_us(void)
@@ -4660,3 +5139,4 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu)
#endif
+#include "psx_gpu_4x.c"
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h
index 53a8717..846658c 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h
@@ -56,8 +56,8 @@ typedef enum
typedef enum
{
RENDER_INTERLACE_ENABLED = 0x1,
- RENDER_INTERLACE_ODD = 0x2
-} render_interlace_enum;
+ RENDER_INTERLACE_ODD = 0x2,
+} render_mode_enum;
typedef struct
{
@@ -122,7 +122,6 @@ typedef struct
vec_4x32u g_block_span;
vec_4x32u b_block_span;
- // 72 bytes
u32 b;
u32 b_dy;
@@ -138,25 +137,21 @@ typedef struct
u32 triangle_color;
u32 dither_table[4];
+ u32 uvrgb_phase;
+
struct render_block_handler_struct *render_block_handler;
void *texture_page_ptr;
void *texture_page_base;
u16 *clut_ptr;
u16 *vram_ptr;
+ u16 *vram_out_ptr;
- // 26 bytes
u16 render_state_base;
u16 render_state;
u16 num_spans;
u16 num_blocks;
- s16 offset_x;
- s16 offset_y;
-
- u16 clut_settings;
- u16 texture_settings;
-
s16 viewport_start_x;
s16 viewport_start_y;
s16 viewport_end_x;
@@ -164,7 +159,6 @@ typedef struct
u16 mask_msb;
- // 8 bytes
u8 triangle_winding;
u8 display_area_draw_enable;
@@ -178,11 +172,27 @@ typedef struct
u8 texture_window_y;
u8 primitive_type;
- u8 interlace_mode;
+ u8 render_mode;
+
+ s16 offset_x;
+ s16 offset_y;
+
+ u16 clut_settings;
+ u16 texture_settings;
+
+ // enhancement stuff
+ u16 *enhancement_buf_ptr;
+ u16 *enhancement_current_buf_ptr;
+ u32 enhancement_x_threshold;
+ s16 saved_viewport_start_x;
+ s16 saved_viewport_start_y;
+ s16 saved_viewport_end_x;
+ s16 saved_viewport_end_y;
+ u8 enhancement_buf_by_x16[64];
// Align up to 64 byte boundary to keep the upcoming buffers cache line
- // aligned
- //u8 reserved_a[0];
+ // aligned, also make reachable with single immediate addition
+ u8 reserved_a[164];
// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
@@ -224,7 +234,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
s32 width, s32 height, u32 flags, u32 color);
void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags,
- u32 color);
+ u32 color, int double_resolution);
u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2);
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
new file mode 100644
index 0000000..83c6680
--- /dev/null
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c
@@ -0,0 +1,384 @@
+#define select_enhancement_buf_ptr(psx_gpu, x) \
+ ((psx_gpu)->enhancement_buf_ptr + \
+ ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20))
+
+#ifndef NEON_BUILD
+void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color)
+{
+ u32 left_offset = u & 0x7;
+ u32 width_rounded = width + left_offset + 7;
+
+ u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2);
+ u32 right_width = width_rounded & 0x7;
+ u32 block_width = width_rounded / 8;
+ u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16);
+
+ u32 left_mask_bits = ~(0xFFFF << (left_offset * 2));
+ u32 right_mask_bits = 0xFFFC << (right_width * 2);
+
+ u32 texture_offset_base = u + (v * 1024);
+ u32 texture_mask =
+ psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024);
+
+ u32 blocks_remaining;
+ u32 num_blocks = psx_gpu->num_blocks;
+ block_struct *block = psx_gpu->blocks + num_blocks;
+
+ u16 *texture_page_ptr = psx_gpu->texture_page_ptr;
+ u16 *texture_block_ptr;
+
+ texture_offset_base &= ~0x7;
+
+ sprites_16bpp++;
+
+ if(block_width == 1)
+ {
+ u32 mask_bits = left_mask_bits | right_mask_bits;
+ u32 mask_bits_a = mask_bits & 0xFF;
+ u32 mask_bits_b = mask_bits >> 8;
+
+ vec_8x16u texels;
+ vec_8x16u texels_wide;
+
+ while(height)
+ {
+ num_blocks += 4;
+ sprite_blocks += 4;
+
+ if(num_blocks > MAX_BLOCKS)
+ {
+ flush_render_block_buffer(psx_gpu);
+ num_blocks = 4;
+ block = psx_gpu->blocks;
+ }
+
+ texture_block_ptr =
+ texture_page_ptr + (texture_offset_base & texture_mask);
+
+ load_128b(texels, texture_block_ptr);
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);
+ block->texels = texels_wide;
+ block->draw_mask_bits = mask_bits_a;
+ block->fb_ptr = fb_ptr;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = mask_bits_a;
+ block->fb_ptr = fb_ptr + 1024;
+ block++;
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);
+ block->texels = texels_wide;
+ block->draw_mask_bits = mask_bits_b;
+ block->fb_ptr = fb_ptr + 8;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = mask_bits_b;
+ block->fb_ptr = fb_ptr + 8 + 1024;
+ block++;
+
+ texture_offset_base += 1024;
+ fb_ptr += 2048;
+
+ height--;
+ psx_gpu->num_blocks = num_blocks;
+ }
+ }
+ else
+ {
+ u32 texture_offset;
+
+ u32 left_mask_bits_a = left_mask_bits & 0xFF;
+ u32 left_mask_bits_b = left_mask_bits >> 8;
+ u32 right_mask_bits_a = right_mask_bits & 0xFF;
+ u32 right_mask_bits_b = right_mask_bits >> 8;
+
+ vec_8x16u texels;
+ vec_8x16u texels_wide;
+
+ while(height)
+ {
+ blocks_remaining = block_width - 2;
+ num_blocks += block_width * 4;
+ sprite_blocks += block_width * 4;
+
+ if(num_blocks > MAX_BLOCKS)
+ {
+ flush_render_block_buffer(psx_gpu);
+ num_blocks = block_width * 4;
+ block = psx_gpu->blocks;
+ }
+
+ texture_offset = texture_offset_base;
+ texture_offset_base += 1024;
+
+ texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
+
+ load_128b(texels, texture_block_ptr);
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);
+ block->texels = texels_wide;
+ block->draw_mask_bits = left_mask_bits_a;
+ block->fb_ptr = fb_ptr;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = left_mask_bits_a;
+ block->fb_ptr = fb_ptr + 1024;
+ block++;
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);
+ block->texels = texels_wide;
+ block->draw_mask_bits = left_mask_bits_b;
+ block->fb_ptr = fb_ptr + 8;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = left_mask_bits_b;
+ block->fb_ptr = fb_ptr + 8 + 1024;
+ block++;
+
+ texture_offset += 8;
+ fb_ptr += 16;
+
+ while(blocks_remaining)
+ {
+ texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
+ load_128b(texels, texture_block_ptr);
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);
+ block->texels = texels_wide;
+ block->draw_mask_bits = 0;
+ block->fb_ptr = fb_ptr;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = 0;
+ block->fb_ptr = fb_ptr + 1024;
+ block++;
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);
+ block->texels = texels_wide;
+ block->draw_mask_bits = 0;
+ block->fb_ptr = fb_ptr + 8;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = 0;
+ block->fb_ptr = fb_ptr + 8 + 1024;
+ block++;
+
+ texture_offset += 8;
+ fb_ptr += 16;
+
+ blocks_remaining--;
+ }
+
+ texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);
+ load_128b(texels, texture_block_ptr);
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);
+ block->texels = texels_wide;
+ block->draw_mask_bits = right_mask_bits_a;
+ block->fb_ptr = fb_ptr;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = right_mask_bits_a;
+ block->fb_ptr = fb_ptr + 1024;
+ block++;
+
+ zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high);
+ block->texels = texels_wide;
+ block->draw_mask_bits = right_mask_bits_b;
+ block->fb_ptr = fb_ptr + 8;
+ block++;
+
+ block->texels = texels_wide;
+ block->draw_mask_bits = right_mask_bits_b;
+ block->fb_ptr = fb_ptr + 8 + 1024;
+ block++;
+
+ fb_ptr += fb_ptr_pitch;
+
+ height--;
+ psx_gpu->num_blocks = num_blocks;
+ }
+ }
+}
+
+#endif
+
+static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+ setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color);
+}
+
+#define setup_sprite_blocks_switch_textured_4x(texture_mode) \
+ setup_sprite_##texture_mode##_4x \
+
+#define setup_sprite_blocks_switch_untextured_4x(texture_mode) \
+ setup_sprite_untextured_4x \
+
+#define setup_sprite_blocks_switch_4x(texturing, texture_mode) \
+ setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \
+
+
+#define render_sprite_blocks_switch_block_modulation_4x(texture_mode, \
+ blend_mode, mask_evaluate, shading, dithering, texturing, blending, \
+ modulation) \
+{ \
+ setup_sprite_blocks_switch_4x(texturing, texture_mode), \
+ texture_sprite_blocks_switch_##texturing(texture_mode), \
+ shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \
+ mask_evaluate), \
+ blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \
+} \
+
+#define render_sprite_blocks_switch_block_blending_4x(texture_mode, \
+ blend_mode, mask_evaluate, shading, dithering, texturing, blending) \
+ render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, texturing, blending, modulated), \
+ render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, texturing, blending, unmodulated) \
+
+#define render_sprite_blocks_switch_block_texturing_4x(texture_mode, \
+ blend_mode, mask_evaluate, shading, dithering, texturing) \
+ render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, texturing, unblended), \
+ render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, texturing, blended) \
+
+#define render_sprite_blocks_switch_block_dithering_4x(texture_mode, \
+ blend_mode, mask_evaluate, shading, dithering) \
+ render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, untextured), \
+ render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithering, textured) \
+
+#define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading) \
+ render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, undithered), \
+ render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \
+ mask_evaluate, shading, dithered) \
+
+#define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, \
+ blend_mode, mask_evaluate) \
+ render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \
+ mask_evaluate, unshaded), \
+ render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \
+ mask_evaluate, shaded) \
+
+#define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, \
+ blend_mode) \
+ render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \
+ off), \
+ render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \
+ on) \
+
+#define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode) \
+ render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average), \
+ render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add), \
+ render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract), \
+ render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth) \
+
+#define render_sprite_blocks_switch_block_4x() \
+ render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \
+ render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \
+ render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \
+ render_sprite_blocks_switch_block_texture_mode_4x(4bpp) \
+
+
+render_block_handler_struct render_sprite_block_handlers_4x[] =
+{
+ render_sprite_blocks_switch_block_4x()
+};
+
+
+void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v,
+ s32 width, s32 height, u32 flags, u32 color)
+{
+ s32 x_right = x + width - 1;
+ s32 y_bottom = y + height - 1;
+
+#ifdef PROFILE
+ sprites++;
+#endif
+
+ if(x < psx_gpu->viewport_start_x)
+ {
+ u32 clip = psx_gpu->viewport_start_x - x;
+ x += clip;
+ u += clip;
+ width -= clip;
+ }
+
+ if(y < psx_gpu->viewport_start_y)
+ {
+ s32 clip = psx_gpu->viewport_start_y - y;
+ y += clip;
+ v += clip;
+ height -= clip;
+ }
+
+ if(x_right > psx_gpu->viewport_end_x)
+ width -= x_right - psx_gpu->viewport_end_x;
+
+ if(y_bottom > psx_gpu->viewport_end_y)
+ height -= y_bottom - psx_gpu->viewport_end_y;
+
+ if((width <= 0) || (height <= 0))
+ return;
+
+ psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);
+
+ x *= 2;
+ y *= 2;
+
+#ifdef PROFILE
+ span_pixels += width * height;
+ spans += height;
+#endif
+
+ u32 render_state = flags &
+ (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
+ RENDER_FLAGS_TEXTURE_MAP);
+ render_state |=
+ (psx_gpu->render_state_base & ~RENDER_STATE_DITHER);
+
+ if((psx_gpu->render_state != render_state) ||
+ (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE))
+ {
+ psx_gpu->render_state = render_state;
+ flush_render_block_buffer(psx_gpu);
+#ifdef PROFILE
+ state_changes++;
+#endif
+ }
+
+ psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE;
+
+ color &= 0xFFFFFF;
+
+ if(psx_gpu->triangle_color != color)
+ {
+ flush_render_block_buffer(psx_gpu);
+ psx_gpu->triangle_color = color;
+ }
+
+ if(color == 0x808080)
+ render_state |= RENDER_FLAGS_MODULATE_TEXELS;
+
+ render_block_handler_struct *render_block_handler =
+ &(render_sprite_block_handlers_4x[render_state]);
+ psx_gpu->render_block_handler = render_block_handler;
+
+ ((setup_sprite_function_type *)render_block_handler->setup_blocks)
+ (psx_gpu, x, y, u, v, width, height, color);
+}
+
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index 294685a..4e1e403 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2011 Gilead Kutnick "Exophase" <exophase@gmail.com>
+ * Copyright (C) 2012 Gražvydas Ignotas "notaz" <notasas@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -16,65 +17,13 @@
#define MAX_BLOCKS 64
#define MAX_BLOCKS_PER_ROW 128
-#define psx_gpu_test_mask_offset 0
-#define psx_gpu_uvrg_offset 16
-#define psx_gpu_uvrg_dx_offset 32
-#define psx_gpu_uvrg_dy_offset 48
-#define psx_gpu_u_block_span_offset 64
-#define psx_gpu_v_block_span_offset 80
-#define psx_gpu_r_block_span_offset 96
-#define psx_gpu_g_block_span_offset 112
-#define psx_gpu_b_block_span_offset 128
-
-#define psx_gpu_b_dx_offset 132
-
-#define psx_gpu_b_offset 144
-#define psx_gpu_b_dy_offset 148
-#define psx_gpu_triangle_area_offset 152
-#define psx_gpu_texture_window_settings_offset 156
-#define psx_gpu_current_texture_mask_offset 160
-#define psx_gpu_viewport_mask_offset 164
-#define psx_gpu_dirty_textures_4bpp_mask_offset 168
-#define psx_gpu_dirty_textures_8bpp_mask_offset 172
-#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176
-#define psx_gpu_triangle_color_offset 180
-#define psx_gpu_dither_table_offset 184
-#define psx_gpu_render_block_handler_offset 200
-#define psx_gpu_texture_page_ptr_offset 204
-#define psx_gpu_texture_page_base_offset 208
-#define psx_gpu_clut_ptr_offset 212
-#define psx_gpu_vram_ptr_offset 216
-
-#define psx_gpu_render_state_base_offset 220
-#define psx_gpu_render_state_offset 222
-#define psx_gpu_num_spans_offset 224
-#define psx_gpu_num_blocks_offset 226
-#define psx_gpu_offset_x_offset 228
-#define psx_gpu_offset_y_offset 230
-#define psx_gpu_clut_settings_offset 232
-#define psx_gpu_texture_settings_offset 234
-#define psx_gpu_viewport_start_x_offset 236
-#define psx_gpu_viewport_start_y_offset 238
-#define psx_gpu_viewport_end_x_offset 240
-#define psx_gpu_viewport_end_y_offset 242
-#define psx_gpu_mask_msb_offset 244
-
-#define psx_gpu_triangle_winding_offset 246
-#define psx_gpu_display_area_draw_enable_offset 247
-#define psx_gpu_current_texture_page_offset 248
-#define psx_gpu_last_8bpp_texture_page_offset 249
-#define psx_gpu_texture_mask_width_offset 250
-#define psx_gpu_texture_mask_height_offset 251
-#define psx_gpu_texture_window_x_offset 252
-#define psx_gpu_texture_window_y_offset 253
-#define psx_gpu_primitive_type_offset 254
-
-#define psx_gpu_reserved_a_offset 255
-
-#define psx_gpu_blocks_offset 0x0100
-#define psx_gpu_span_uvrg_offset_offset 0x2100
-#define psx_gpu_span_edge_data_offset 0x4100
-#define psx_gpu_span_b_offset_offset 0x5100
+#define RENDER_STATE_MASK_EVALUATE 0x20
+#define RENDER_FLAGS_MODULATE_TEXELS 0x1
+#define RENDER_FLAGS_BLEND 0x2
+
+#include "psx_gpu_offsets.h"
+
+#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
#define edge_data_left_x_offset 0
#define edge_data_num_blocks_offset 2
@@ -238,6 +187,7 @@
#define uvrg_dx3l d6
#define uvrg_dx3h d7
+#define uvrgb_phase q13
.align 4
@@ -369,11 +319,16 @@ function(compute_all_gradients)
vmull.s16 ga_uvrg_y, d0_b, d1_b
rsbmi ga_bx, ga_bx, #0
+ @ r12 = psx_gpu->uvrgb_phase
+ ldr r12, [ psx_gpu, #psx_gpu_uvrgb_phase_offset ]
+
vmlsl.s16 ga_uvrg_y, d2_b, d3_b
movs gs_by, ga_by, asr #31
vshr.u64 d0, d30, #22
- mov b_base, b0, lsl #16
+ add b_base, r12, b0, lsl #16
+
+ vdup.u32 uvrgb_phase, r12
rsbmi ga_by, ga_by, #0
vclt.s32 gs_uvrg_x, ga_uvrg_x, #0 @ gs_uvrg_x = ga_uvrg_x < 0
@@ -382,7 +337,6 @@ function(compute_all_gradients)
ldrb r12, [ psx_gpu, #psx_gpu_triangle_winding_offset ]
vclt.s32 gs_uvrg_y, ga_uvrg_y, #0 @ gs_uvrg_y = ga_uvrg_y < 0
- add b_base, b_base, #0x8000
rsb r12, r12, #0 @ r12 = -(triangle->winding)
vdup.u32 w_mask, r12 @ w_mask = { -w, -w, -w, -w }
@@ -391,7 +345,7 @@ function(compute_all_gradients)
vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16
vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift }
- vorr.u32 uvrg_base, #0x8000
+ vadd.u32 uvrg_base, uvrgb_phase
vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x)
vmov area_r_s, s0 @ area_r_s = triangle_reciprocal
@@ -657,7 +611,7 @@ function(compute_all_gradients)
\
vdup.u32 edge_shifts, temp; \
vsub.u32 heights_b, heights, c_0x01; \
- vshr.u32 height_reciprocals, edge_shifts, #12; \
+ vshr.u32 height_reciprocals, edge_shifts, #10; \
\
vmla.s32 heights_b, x_starts, heights; \
vbic.u16 edge_shifts, #0xE0; \
@@ -682,8 +636,8 @@ function(compute_all_gradients)
vsub.u32 heights_b, heights, c_0x01; \
sub height_b_alt, height_minor_b, #1; \
\
- vshr.u32 height_reciprocals, edge_shifts, #12; \
- lsr height_reciprocal_alt, edge_shift_alt, #12; \
+ vshr.u32 height_reciprocals, edge_shifts, #10; \
+ lsr height_reciprocal_alt, edge_shift_alt, #10; \
\
vmla.s32 heights_b, x_starts, heights; \
mla height_b_alt, height_minor_b, start_c, height_b_alt; \
@@ -1221,6 +1175,10 @@ function(setup_spans_up_down)
ldrh temp, [ psx_gpu, #psx_gpu_num_spans_offset ]
add temp, temp, height_minor_b
+
+ cmp temp, #MAX_SPANS
+ beq 5f
+
strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ]
2:
@@ -1236,6 +1194,15 @@ function(setup_spans_up_down)
setup_spans_prologue_b()
bal 4b
+ 5:
+ // FIXME: overflow corner case
+ sub temp, temp, height_minor_b
+ bics height_minor_b, #3
+ add temp, temp, height_minor_b
+ strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ]
+ bne 2b
+ bal 1b
+
.pool
#undef span_uvrg_offset
@@ -1393,7 +1360,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \
add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \
\
ldrh y, [ span_edge_data, #edge_data_y_offset ]; \
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \
\
cmp span_num_blocks, #0; \
beq 1f; \
@@ -1660,7 +1627,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \
add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \
\
ldrh y, [ span_edge_data, #edge_data_y_offset ]; \
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \
\
cmp span_num_blocks, #0; \
beq 1f; \
@@ -1855,7 +1822,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect)
ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
ldrh y, [ span_edge_data, #edge_data_y_offset ]
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
cmp span_num_blocks, #0
beq 1f
@@ -1975,7 +1942,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
ldrh y, [ span_edge_data, #edge_data_y_offset ]
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
cmp span_num_blocks, #0
beq 1f
@@ -2162,7 +2129,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \
add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \
\
ldrh y, [ span_edge_data, #edge_data_y_offset ]; \
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \
\
cmp span_num_blocks, #0; \
beq 1f; \
@@ -2402,7 +2369,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \
add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \
\
ldrh y, [ span_edge_data, #edge_data_y_offset ]; \
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \
\
cmp span_num_blocks, #0; \
beq 1f; \
@@ -3239,6 +3206,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \
shade_blocks_textured_modulated_load_bdm_##shading(); \
vshrn.u16 texels_b, texels, #7; \
\
+ pld [ block_ptr_load_a ]; \
vmovn.u16 texels_r, texels; \
vmlal.u8 pixels, pixels_r_low, d64_1; \
\
@@ -3437,10 +3405,12 @@ function(shade_blocks_textured_unmodulated_direct)
[ draw_mask_bits_ptr, :16 ], c_64
vbif.u16 fb_pixels, pixels, draw_mask_combined
- vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64
-
sub fb_ptr_cmp, fb_ptr_next, fb_ptr
+ pld [ fb_ptr_next, #64 ]
+
add fb_ptr_cmp, fb_ptr_cmp, #14
+ vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64
+
cmp fb_ptr_cmp, #28
bls 4f
@@ -3799,11 +3769,15 @@ function(blend_blocks_textured_add_##mask_evaluate) \
vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \
vand.u16 pixels_mg, pixels, d128_0x83E0; \
\
- vbit.u16 blend_pixels, fb_pixels, draw_mask; \
- vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \
+ sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \
+ pld [ fb_ptr_next, #64 ]; \
\
sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \
+ vbit.u16 blend_pixels, fb_pixels, draw_mask; \
+ \
add fb_ptr_cmp, fb_ptr_cmp, #14; \
+ vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \
+ \
cmp fb_ptr_cmp, #28; \
bls 2f; \
\
@@ -4456,6 +4430,12 @@ function(render_block_fill_body)
#define draw_mask_fb_ptr_left d2
#define draw_mask_fb_ptr_right d3
+#define draw_mask_fb_ptr_left_a d2
+#define draw_mask_fb_ptr_left_b d3
+#define draw_mask_fb_ptr_right_a d10
+#define draw_mask_fb_ptr_right_b d11
+#define draw_masks_fb_ptrs2 q5
+
#define clut_low_a d4
#define clut_low_b d5
#define clut_high_a d6
@@ -4467,37 +4447,24 @@ function(render_block_fill_body)
#define clut_a q2
#define clut_b q3
-#define texels_low d10
-#define texels_high d11
-
+#define texels_low d12
+#define texels_high d13
-setup_sprite_flush_blocks_single:
- vpush { q1 - q4 }
-
- stmdb sp!, { r0 - r3, r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, r12, r14 }
-
- vpop { q1 - q4 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
-
- mov num_blocks, sub_tile_height
- bx lr
+#define texels_wide_low d14
+#define texels_wide_high d15
+#define texels_wide q7
-setup_sprite_flush_blocks_double:
- vpush { q1 - q4 }
+setup_sprite_flush_blocks:
+ vpush { q1 - q5 }
stmdb sp!, { r0 - r3, r12, r14 }
bl flush_render_block_buffer
ldmia sp!, { r0 - r3, r12, r14 }
- vpop { q1 - q4 }
+ vpop { q1 - q5 }
add block, psx_gpu, #psx_gpu_blocks_offset
-
- mov num_blocks, sub_tile_height, lsl #1
bx lr
@@ -4535,8 +4502,6 @@ setup_sprite_update_texture_8bpp_cache:
blne setup_sprite_update_texture_8bpp_cache \
-#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \
-
#define setup_sprite_block_count_single() \
sub_tile_height \
@@ -4547,7 +4512,8 @@ setup_sprite_update_texture_8bpp_cache:
add num_blocks, num_blocks, setup_sprite_block_count_##type(); \
cmp num_blocks, #MAX_BLOCKS; \
\
- blgt setup_sprite_flush_blocks_##type \
+ movgt num_blocks, setup_sprite_block_count_##type(); \
+ blgt setup_sprite_flush_blocks \
#define setup_sprite_tile_full_4bpp(edge) \
@@ -4729,31 +4695,33 @@ setup_sprite_update_texture_8bpp_cache:
#define setup_sprite_tile_column_edge_post_adjust_full(edge) \
-#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \
+ x4mode) \
mov sub_tile_height, column_data; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \
-#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \
+#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \
+ x4mode) \
and sub_tile_height, column_data, #0xFF; \
mov tiles_remaining, column_data, lsr #16; \
- setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
\
subs tiles_remaining, tiles_remaining, #1; \
beq 2f; \
\
3: \
mov sub_tile_height, #16; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
subs tiles_remaining, tiles_remaining, #1; \
bne 3b; \
\
2: \
uxtb sub_tile_height, column_data, ror #8; \
- setup_sprite_tile_##edge_mode##_##texture_mode(edge); \
- setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \
+ setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \
+ setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \
#define setup_sprite_column_data_single() \
@@ -4772,17 +4740,30 @@ setup_sprite_update_texture_8bpp_cache:
\
orr column_data, column_data, height_rounded, lsl #8 \
-#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \
- edge_mode, edge) \
- setup_sprite_##texture_mode##_single_##multi_height##_##edge_mode##_##edge: \
+#define setup_sprite_setup_left_draw_mask_fb_ptr() \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column() \
+ mov fb_ptr_advance_column, #32; \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ \
+ sub fb_ptr_advance_column, height, lsl #11; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
+
+#define setup_sprite_setup_right_draw_mask_fb_ptr() \
+ vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \
+ vdup.u8 draw_mask_fb_ptr_right, block_masks[5] \
+
+#define setup_sprite_tile_column_width_single(tm, multi_height, edge_mode, \
+ edge, x4mode) \
+ setup_sprite_##tm##_single_##multi_height##_##edge_mode##_##edge##x4mode: \
setup_sprite_column_data_##multi_height(); \
vext.32 block_masks_shifted, block_masks, block_masks, #1; \
vorr.u32 block_masks, block_masks, block_masks_shifted; \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
- vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \
+ setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \
\
- setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \
- texture_mode); \
+ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \
ldmia sp!, { r4 - r11, pc } \
#define setup_sprite_tiled_advance_column() \
@@ -4791,39 +4772,337 @@ setup_sprite_update_texture_8bpp_cache:
subeq texture_offset_base, texture_offset_base, #(0x100 + 0xF00) \
#define setup_sprite_tile_column_width_multi(tm, multi_height, left_mode, \
- right_mode) \
- setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode: \
+ right_mode, x4mode) \
+ setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode##x4mode:\
setup_sprite_column_data_##multi_height(); \
- mov fb_ptr_advance_column, #32; \
\
- sub fb_ptr_advance_column, height, lsl #11; \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
+ setup_sprite_setup_left_draw_mask_fb_ptr_advance_column##x4mode(); \
\
- vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \
- setup_sprite_tile_column_height_##multi_height(left_mode, right, tm); \
+ setup_sprite_tile_column_height_##multi_height(left_mode, right, tm, x4mode);\
\
subs tile_width, tile_width, #2; \
add fb_ptr, fb_ptr, fb_ptr_advance_column; \
\
- vmov.u8 draw_masks_fb_ptrs, #0; \
beq 1f; \
\
+ vmov.u8 draw_masks_fb_ptrs, #0; \
+ vmov.u8 draw_masks_fb_ptrs2, #0; \
+ \
0: \
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(full, none, tm); \
+ setup_sprite_tile_column_height_##multi_height(full, none, tm, x4mode); \
add fb_ptr, fb_ptr, fb_ptr_advance_column; \
subs tile_width, tile_width, #1; \
bne 0b; \
\
1: \
- vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \
- vdup.u8 draw_mask_fb_ptr_right, block_masks[5]; \
+ setup_sprite_setup_right_draw_mask_fb_ptr##x4mode(); \
\
setup_sprite_tiled_advance_column(); \
- setup_sprite_tile_column_height_##multi_height(right_mode, left, tm); \
+ setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\
ldmia sp!, { r4 - r11, pc } \
+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_get_left_block_mask() \
+ and left_block_mask, left_block_mask, #0xFF \
+
+#define setup_sprite_compare_left_block_mask() \
+ cmp left_block_mask, #0xFF \
+
+#define setup_sprite_get_right_block_mask() \
+ uxtb right_block_mask, right_block_mask, ror #8 \
+
+#define setup_sprite_compare_right_block_mask() \
+ cmp right_block_mask, #0xFF \
+
+
+
+/* 4x stuff */
+#define fb_ptr2 column_data
+
+#define setup_sprite_offset_u_adjust_4x() \
+ sub fb_ptr, fb_ptr, offset_u, lsl #1; \
+ lsl offset_u_right, #1; \
+ lsl offset_u, #1; \
+ add offset_u_right, #1 \
+
+#define setup_sprite_get_left_block_mask_4x() \
+ sxth left_block_mask, left_block_mask \
+
+#define setup_sprite_compare_left_block_mask_4x() \
+ cmp left_block_mask, #0xFFFFFFFF \
+
+#define setup_sprite_get_right_block_mask_4x() \
+ sxth right_block_mask, right_block_mask, ror #16 \
+
+#define setup_sprite_compare_right_block_mask_4x() \
+ cmp right_block_mask, #0xFFFFFFFF \
+
+
+#define widen_texels_16bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.16 texels_wide_low, texels_wide_high \
+
+#define widen_texels_8bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.8 texels_wide_low, texels_wide_high \
+
+#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [ block_, :128 ]; \
+ add block_, block_, #40; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \
+ add block_, block_, #24 \
+
+/* assumes 16-byte offset already added to block_ */
+#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [ block_, :64 ]; \
+ add block_, block_, #24; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \
+ add block_, block_, #40 \
+
+#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_16bpp(texels_low); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ widen_texels_16bpp(texels_high); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_8bpp(texels); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ ldr clut_ptr, [ psx_gpu, #psx_gpu_clut_ptr_offset ]; \
+ vld1.u32 { clut_a, clut_b }, [ clut_ptr, :128 ]; \
+ \
+ vuzp.u8 clut_a, clut_b \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_block_count_single_4x() \
+ sub_tile_height, lsl #2 \
+
+#define setup_sprite_block_count_double_4x() \
+ sub_tile_height, lsl #(1+2) \
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ pld [ fb_ptr, #2048 ]; \
+ \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ add fb_ptr, fb_ptr, #16*2; \
+ \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ add texture_offset, texture_offset, #0x10; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ pld [ fb_ptr, #2048 ]; \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ pld [ fb_ptr, #2048 ]; \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ \
+ add fb_ptr, fb_ptr, #16*2; \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [ fb_ptr ]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \
+ \
+ pld [ fb_ptr, #2048 ]; \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ add texture_offset, texture_offset_base, #8; \
+ add fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ sub fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \
+ mov fb_ptr_advance_column, #32 * 2; \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ sub fb_ptr_advance_column, height, lsl #11 + 1; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \
+
+
// r0: psx_gpu
// r1: x
// r2: y
@@ -4833,34 +5112,48 @@ setup_sprite_update_texture_8bpp_cache:
// [ sp + 8 ]: height
// [ sp + 12 ]: color (unused)
-#define setup_sprite_tiled_builder(texture_mode) \
- \
-setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \
-setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \
-setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \
-setup_sprite_tile_column_width_single(texture_mode, single, full, none); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \
-setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \
-setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \
-setup_sprite_tile_column_width_single(texture_mode, single, half, right); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \
-setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \
-setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \
-setup_sprite_tile_column_width_single(texture_mode, single, half, left); \
-setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \
-setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \
+#define setup_sprite_tiled_builder(texture_mode, x4mode) \
+ \
+setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, full, none, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, half, right, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \
+ x4mode); \
+setup_sprite_tile_column_width_single(texture_mode, single, half, left, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \
+ x4mode); \
+setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \
+ x4mode); \
\
.align 4; \
\
-function(setup_sprite_##texture_mode) \
+function(setup_sprite_##texture_mode##x4mode) \
stmdb sp!, { r4 - r11, r14 }; \
- setup_sprite_tiled_initialize_##texture_mode(); \
+ setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
ldr v, [ sp, #36 ]; \
and offset_u, u, #0xF; \
\
ldr width, [ sp, #40 ]; \
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \
\
ldr height, [ sp, #44 ]; \
add fb_ptr, fb_ptr, y, lsl #11; \
@@ -4883,11 +5176,13 @@ function(setup_sprite_##texture_mode) \
\
/* texture_offset_base = VH-UH-UL-00 */\
bfi texture_offset_base, u, #4, #8; \
- movw right_block_mask, #0xFFFE; \
+ mov right_block_mask, #0xFFFFFFFE; \
+ \
+ setup_sprite_offset_u_adjust##x4mode(); \
\
/* texture_offset_base = VH-UH-VL-00 */\
bfi texture_offset_base, v, #4, #4; \
- movw left_block_mask, #0xFFFF; \
+ mov left_block_mask, #0xFFFFFFFF; \
\
mov tile_height, height_rounded, lsr #4; \
mvn left_block_mask, left_block_mask, lsl offset_u; \
@@ -4907,16 +5202,16 @@ function(setup_sprite_##texture_mode) \
\
/* texture_mask = HH-WH-HL-WL */\
bfi texture_mask, texture_mask_rev, #8, #4; \
- and left_block_mask, left_block_mask, #0xFF; \
+ setup_sprite_get_left_block_mask##x4mode(); \
\
mov control_mask, #0; \
- cmp left_block_mask, #0xFF; \
+ setup_sprite_compare_left_block_mask##x4mode(); \
\
- uxtb right_block_mask, right_block_mask, ror #8; \
+ setup_sprite_get_right_block_mask##x4mode(); \
orreq control_mask, control_mask, #0x4; \
\
ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \
- cmp right_block_mask, #0xFF; \
+ setup_sprite_compare_right_block_mask##x4mode(); \
\
orreq control_mask, control_mask, #0x8; \
cmp tile_width, #1; \
@@ -4931,25 +5226,31 @@ function(setup_sprite_##texture_mode) \
ldr pc, [ pc, control_mask, lsl #2 ]; \
nop; \
\
- .word setup_sprite_##texture_mode##_multi_multi_full_full; \
- .word setup_sprite_##texture_mode##_single_multi_full_none; \
- .word setup_sprite_##texture_mode##_multi_single_full_full; \
- .word setup_sprite_##texture_mode##_single_single_full_none; \
- .word setup_sprite_##texture_mode##_multi_multi_half_full; \
- .word setup_sprite_##texture_mode##_single_multi_half_right; \
- .word setup_sprite_##texture_mode##_multi_single_half_full; \
- .word setup_sprite_##texture_mode##_single_single_half_right; \
- .word setup_sprite_##texture_mode##_multi_multi_full_half; \
- .word setup_sprite_##texture_mode##_single_multi_half_left; \
- .word setup_sprite_##texture_mode##_multi_single_full_half; \
- .word setup_sprite_##texture_mode##_single_single_half_left; \
- .word setup_sprite_##texture_mode##_multi_multi_half_half; \
+ .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \
+ .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \
+ .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \
+ .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \
.word 0x00000000; \
- .word setup_sprite_##texture_mode##_multi_single_half_half \
+ .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \
-setup_sprite_tiled_builder(4bpp);
-setup_sprite_tiled_builder(8bpp);
+setup_sprite_tiled_builder(4bpp,);
+setup_sprite_tiled_builder(8bpp,);
+
+#undef draw_mask_fb_ptr_left
+#undef draw_mask_fb_ptr_right
+
+setup_sprite_tiled_builder(4bpp, _4x);
+setup_sprite_tiled_builder(8bpp, _4x);
#undef block_ptr
@@ -5038,6 +5339,12 @@ function(texture_sprite_blocks_8bpp)
#undef texture_mask
#undef num_blocks
#undef texture_offset
+#undef texels_low
+#undef texels_high
+#undef texels_wide_low
+#undef texels_wide_high
+#undef texels_wide
+#undef fb_ptr2
#define psx_gpu r0
#define x r1
@@ -5049,6 +5356,7 @@ function(texture_sprite_blocks_8bpp)
#define left_offset r8
#define width_rounded r9
#define right_width r10
+
#define block_width r11
#define texture_offset_base r1
@@ -5059,6 +5367,7 @@ function(texture_sprite_blocks_8bpp)
#define fb_ptr r7
#define texture_offset r8
#define blocks_remaining r9
+#define fb_ptr2 r10
#define fb_ptr_pitch r12
#define texture_block_ptr r14
@@ -5077,29 +5386,23 @@ function(texture_sprite_blocks_8bpp)
#define draw_mask_fb_ptr d2
#define texels q2
+#define draw_mask_fb_ptr_a d2
+#define draw_mask_fb_ptr_b d3
+#define texels_low d4
+#define texels_high d5
+#define texels_wide_low d6
+#define texels_wide_high d7
+#define texels_wide q3
-setup_sprites_16bpp_flush_single:
- vpush { d0 - d2 }
-
- stmdb sp!, { r0 - r3, r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, r12, r14 }
-
- vpop { d0 - d2 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
- mov num_blocks, #1
-
- bx lr
-setup_sprites_16bpp_flush_row:
- vpush { d0 - d2 }
+setup_sprites_16bpp_flush:
+ vpush { d0 - d3 }
stmdb sp!, { r0 - r3, r12, r14 }
bl flush_render_block_buffer
ldmia sp!, { r0 - r3, r12, r14 }
- vpop { d0 - d2 }
+ vpop { d0 - d3 }
add block, psx_gpu, #psx_gpu_blocks_offset
mov num_blocks, block_width
@@ -5108,7 +5411,7 @@ setup_sprites_16bpp_flush_row:
function(setup_sprite_16bpp)
stmdb sp!, { r4 - r11, r14 }
- ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
ldr v, [ sp, #36 ]
add fb_ptr, fb_ptr, y, lsl #11
@@ -5164,7 +5467,7 @@ function(setup_sprite_16bpp)
1:
add num_blocks, num_blocks, #1
cmp num_blocks, #MAX_BLOCKS
- blgt setup_sprites_16bpp_flush_single
+ blgt setup_sprites_16bpp_flush
and texture_block_ptr, texture_offset_base, texture_mask
subs height, height, #1
@@ -5193,7 +5496,7 @@ function(setup_sprite_16bpp)
mov texture_offset, texture_offset_base
cmp num_blocks, #MAX_BLOCKS
- blgt setup_sprites_16bpp_flush_row
+ blgt setup_sprites_16bpp_flush
add texture_offset_base, texture_offset_base, #2048
and texture_block_ptr, texture_offset, texture_mask
@@ -5264,6 +5567,290 @@ function(setup_sprite_16bpp)
ldmia sp!, { r4 - r11, pc }
+// 4x version
+// FIXME: duplicate code with normal version :(
+#undef draw_mask_fb_ptr
+
+function(setup_sprite_16bpp_4x)
+ stmdb sp!, { r4 - r11, r14 }
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
+
+ ldr v, [ sp, #36 ]
+ add fb_ptr, fb_ptr, y, lsl #11
+
+ ldr width, [ sp, #40 ]
+ add fb_ptr, fb_ptr, x, lsl #1
+
+ ldr height, [ sp, #44 ]
+ and left_offset, u, #0x7
+
+ add texture_offset_base, u, u
+ add width_rounded, width, #7
+
+ add texture_offset_base, v, lsl #11
+ movw left_mask_bits, #0xFFFF
+
+ ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
+ add width_rounded, width_rounded, left_offset
+
+ lsl left_offset, #1
+
+ ldrb texture_mask_height, [ psx_gpu, #psx_gpu_texture_mask_height_offset ]
+ sub fb_ptr, fb_ptr, left_offset, lsl #1
+
+ add texture_mask, texture_mask_width, texture_mask_width
+ movw right_mask_bits, #0xFFFC
+
+ and right_width, width_rounded, #0x7
+ mvn left_mask_bits, left_mask_bits, lsl left_offset
+
+ lsl right_width, #1
+
+ add texture_mask, texture_mask_height, lsl #11
+ mov block_width, width_rounded, lsr #3
+
+ mov right_mask_bits, right_mask_bits, lsl right_width
+ movw fb_ptr_pitch, #(2048 + 16) * 2
+
+ sub fb_ptr_pitch, fb_ptr_pitch, block_width, lsl #4+1
+ vmov block_masks, left_mask_bits, right_mask_bits
+
+ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ add block, psx_gpu, #psx_gpu_blocks_offset
+
+ bic texture_offset_base, texture_offset_base, #0xF
+ cmp block_width, #1
+
+ ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ]
+ add block, block, num_blocks, lsl #6
+
+ lsl block_width, #2
+ bne 0f
+
+ vext.32 block_masks_shifted, block_masks, block_masks, #1
+ vorr.u32 block_masks, block_masks, block_masks_shifted
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[0]
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[1]
+
+ 1:
+ add num_blocks, num_blocks, block_width
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ and texture_block_ptr, texture_offset_base, texture_mask
+ subs height, height, #1
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+
+ add texture_offset_base, texture_offset_base, #2048
+ add fb_ptr, fb_ptr, #2048*2
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ bne 1b
+
+ ldmia sp!, { r4 - r11, pc }
+
+ 0:
+ add num_blocks, num_blocks, block_width
+ mov texture_offset, texture_offset_base
+
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[0] // left_mask_bits
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[1]
+
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ add texture_offset_base, texture_offset_base, #2048
+ and texture_block_ptr, texture_offset, texture_mask
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+
+ subs blocks_remaining, block_width, #2*4
+ add texture_offset, texture_offset, #16
+
+ vmov.u8 draw_mask_fb_ptr_a, #0
+ vmov.u8 draw_mask_fb_ptr_b, #0
+
+ add fb_ptr, fb_ptr, #16*2
+ beq 2f
+
+ 1:
+ and texture_block_ptr, texture_offset, texture_mask
+ subs blocks_remaining, blocks_remaining, #4
+
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+ add texture_offset, texture_offset, #16
+
+ add fb_ptr, fb_ptr, #16*2
+ bgt 1b
+
+ 2:
+ vdup.u8 draw_mask_fb_ptr_a, block_masks[4] // right_mask_bits
+ vdup.u8 draw_mask_fb_ptr_b, block_masks[5]
+
+ and texture_block_ptr, texture_offset, texture_mask
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr
+
+ vld1.u32 { texels }, [ texture_block_ptr, :128 ]
+
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b)
+ subs height, height, #1
+
+ add fb_ptr, fb_ptr, fb_ptr_pitch
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+
+ bne 0b
+
+ ldmia sp!, { r4 - r11, pc }
+
+
+#undef width
+#undef right_width
+#undef right_mask_bits
+#undef color
+#undef height
+#undef blocks_remaining
+#undef colors
+#undef right_mask
+#undef test_mask
+#undef draw_mask
+
+#define psx_gpu r0
+#define x r1
+#define y r2
+#define width r3
+#define right_width r5
+#define right_mask_bits r6
+#define fb_ptr r7
+#define color r8
+#define height r9
+#define fb_ptr_pitch r12
+
+// referenced by setup_sprites_16bpp_flush
+#define num_blocks r4
+#define block r5
+#define block_width r11
+
+#define color_r r1
+#define color_g r2
+#define color_b r8
+#define blocks_remaining r6
+
+#define colors q0
+#define right_mask q1
+#define test_mask q2
+#define draw_mask q2
+#define draw_mask_bits_fb_ptr d6
+
+
+.align 3
+
+function(setup_sprite_untextured)
+ ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
+ tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
+ | RENDER_FLAGS_BLEND)
+ beq setup_sprite_untextured_simple
+
+ stmdb sp!, { r4 - r11, r14 }
+
+ ldr width, [ sp, #40 ]
+ ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
+
+ ldr height, [ sp, #44 ]
+ add fb_ptr, fb_ptr, y, lsl #11
+
+ add fb_ptr, fb_ptr, x, lsl #1
+ sub right_width, width, #1
+
+ ldr color, [ sp, #48 ]
+ and right_width, #7
+
+ add block_width, width, #7
+ add right_width, #1
+
+ lsr block_width, #3
+ mov right_mask_bits, #0xff
+
+ sub fb_ptr_pitch, block_width, #1
+ lsl right_mask_bits, right_width
+
+ lsl fb_ptr_pitch, #3+1
+ ubfx color_r, color, #3, #5
+
+ rsb fb_ptr_pitch, #1024*2
+ ubfx color_g, color, #11, #5
+
+ vld1.u32 { test_mask }, [ psx_gpu, :128 ]
+ ubfx color_b, color, #19, #5
+
+ vdup.u16 right_mask, right_mask_bits
+ orr color, color_r, color_b, lsl #10
+
+ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ orr color, color, color_g, lsl #5
+
+ vtst.u16 right_mask, right_mask, test_mask
+ add block, psx_gpu, #psx_gpu_blocks_offset
+
+ vdup.u16 colors, color
+ add block, block, num_blocks, lsl #6
+
+
+setup_sprite_untextured_height_loop:
+ add num_blocks, block_width
+ sub blocks_remaining, block_width, #1
+
+ cmp num_blocks, #MAX_BLOCKS
+ blgt setup_sprites_16bpp_flush
+
+ cmp blocks_remaining, #0
+ ble 1f
+
+ vmov.u8 draw_mask, #0 /* zero_mask */
+ vmov.u8 draw_mask_bits_fb_ptr, #0
+
+ 0:
+ vst1.u32 { draw_mask }, [ block, :128 ]!
+ subs blocks_remaining, #1
+
+ vst1.u32 { colors }, [ block, :128 ]
+ add block, block, #24
+
+ vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+ vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+
+ add block, block, #24
+ add fb_ptr, #8*2
+ bgt 0b
+
+ 1:
+ vst1.u32 { right_mask }, [ block, :128 ]!
+ subs height, #1
+
+ vst1.u32 { colors }, [ block, :128 ]
+ add block, block, #24
+
+ vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+ vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+
+ add block, block, #24
+ add fb_ptr, fb_ptr_pitch
+
+ strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+ bgt setup_sprite_untextured_height_loop
+
+ ldmia sp!, { r4 - r11, pc }
+
+
+
#undef texture_page_ptr
#undef vram_ptr
#undef dirty_textures_mask
@@ -5461,3 +6048,40 @@ function(update_texture_8bpp_cache_slice)
vpop { q0 - q3 }
ldmia sp!, { r4 - r11, pc }
+
+/* void scale2x_tiles8(void *dst, const void *src, int w8, int h) */
+function(scale2x_tiles8)
+ push { r4, r14 }
+
+ mov r4, r1
+ add r12, r0, #1024*2
+ mov r14, r2
+
+0:
+ vld1.u16 { q0 }, [ r1, :128 ]!
+ vld1.u16 { q2 }, [ r1, :128 ]!
+ vmov q1, q0
+ vmov q3, q2
+ vzip.16 q0, q1
+ vzip.16 q2, q3
+ subs r14, #2
+ vst1.u16 { q0, q1 }, [ r0, :128 ]!
+ vst1.u16 { q0, q1 }, [ r12, :128 ]!
+ blt 1f
+ vst1.u16 { q2, q3 }, [ r0, :128 ]!
+ vst1.u16 { q2, q3 }, [ r12, :128 ]!
+ bgt 0b
+1:
+ subs r3, #1
+ mov r14, r2
+ add r0, #1024*2*2
+ add r4, #1024*2
+ sub r0, r2, lsl #4+1
+ mov r1, r4
+ add r12, r0, #1024*2
+ bgt 0b
+ nop
+
+ pop { r4, pc }
+
+// vim:filetype=armasm
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
new file mode 100644
index 0000000..1307891
--- /dev/null
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
@@ -0,0 +1,57 @@
+#define psx_gpu_test_mask_offset 0x0
+#define psx_gpu_uvrg_offset 0x10
+#define psx_gpu_uvrg_dx_offset 0x20
+#define psx_gpu_uvrg_dy_offset 0x30
+#define psx_gpu_u_block_span_offset 0x40
+#define psx_gpu_v_block_span_offset 0x50
+#define psx_gpu_r_block_span_offset 0x60
+#define psx_gpu_g_block_span_offset 0x70
+#define psx_gpu_b_block_span_offset 0x80
+#define psx_gpu_b_offset 0x90
+#define psx_gpu_b_dy_offset 0x94
+#define psx_gpu_triangle_area_offset 0x98
+#define psx_gpu_texture_window_settings_offset 0x9c
+#define psx_gpu_current_texture_mask_offset 0xa0
+#define psx_gpu_viewport_mask_offset 0xa4
+#define psx_gpu_dirty_textures_4bpp_mask_offset 0xa8
+#define psx_gpu_dirty_textures_8bpp_mask_offset 0xac
+#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0
+#define psx_gpu_triangle_color_offset 0xb4
+#define psx_gpu_dither_table_offset 0xb8
+#define psx_gpu_uvrgb_phase_offset 0xc8
+#define psx_gpu_render_block_handler_offset 0xcc
+#define psx_gpu_texture_page_ptr_offset 0xd0
+#define psx_gpu_texture_page_base_offset 0xd4
+#define psx_gpu_clut_ptr_offset 0xd8
+#define psx_gpu_vram_ptr_offset 0xdc
+#define psx_gpu_vram_out_ptr_offset 0xe0
+#define psx_gpu_render_state_base_offset 0xe4
+#define psx_gpu_render_state_offset 0xe6
+#define psx_gpu_num_spans_offset 0xe8
+#define psx_gpu_num_blocks_offset 0xea
+#define psx_gpu_viewport_start_x_offset 0xec
+#define psx_gpu_viewport_start_y_offset 0xee
+#define psx_gpu_viewport_end_x_offset 0xf0
+#define psx_gpu_viewport_end_y_offset 0xf2
+#define psx_gpu_mask_msb_offset 0xf4
+#define psx_gpu_triangle_winding_offset 0xf6
+#define psx_gpu_display_area_draw_enable_offset 0xf7
+#define psx_gpu_current_texture_page_offset 0xf8
+#define psx_gpu_last_8bpp_texture_page_offset 0xf9
+#define psx_gpu_texture_mask_width_offset 0xfa
+#define psx_gpu_texture_mask_height_offset 0xfb
+#define psx_gpu_texture_window_x_offset 0xfc
+#define psx_gpu_texture_window_y_offset 0xfd
+#define psx_gpu_primitive_type_offset 0xfe
+#define psx_gpu_render_mode_offset 0xff
+#define psx_gpu_offset_x_offset 0x100
+#define psx_gpu_offset_y_offset 0x102
+#define psx_gpu_clut_settings_offset 0x104
+#define psx_gpu_texture_settings_offset 0x106
+#define psx_gpu_blocks_offset 0x200
+#define psx_gpu_span_uvrg_offset_offset 0x2200
+#define psx_gpu_span_edge_data_offset 0x4200
+#define psx_gpu_span_b_offset_offset 0x5200
+#define psx_gpu_texture_4bpp_cache_offset 0x5a00
+#define psx_gpu_texture_8bpp_even_cache_offset 0x205a00
+#define psx_gpu_texture_8bpp_odd_cache_offset 0x305a00
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
new file mode 100644
index 0000000..5adfb75
--- /dev/null
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
@@ -0,0 +1,86 @@
+#include <stdio.h>
+#include <stddef.h>
+
+#include "common.h"
+
+#define WRITE_OFFSET(f, member) \
+ fprintf(f, "#define %-50s0x%x\n", \
+ "psx_gpu_" #member "_offset", \
+ offsetof(psx_gpu_struct, member));
+
+int main()
+{
+ FILE *f;
+
+ if (sizeof(f) != 4) {
+ fprintf(stderr, "bad pointer size\n");
+ return 1;
+ }
+
+ f = fopen("psx_gpu_offsets.h", "w");
+ if (f == NULL) {
+ perror("fopen");
+ return 1;
+ }
+
+ WRITE_OFFSET(f, test_mask);
+ WRITE_OFFSET(f, uvrg);
+ WRITE_OFFSET(f, uvrg_dx);
+ WRITE_OFFSET(f, uvrg_dy);
+ WRITE_OFFSET(f, u_block_span);
+ WRITE_OFFSET(f, v_block_span);
+ WRITE_OFFSET(f, r_block_span);
+ WRITE_OFFSET(f, g_block_span);
+ WRITE_OFFSET(f, b_block_span);
+ WRITE_OFFSET(f, b);
+ WRITE_OFFSET(f, b_dy);
+ WRITE_OFFSET(f, triangle_area);
+ WRITE_OFFSET(f, texture_window_settings);
+ WRITE_OFFSET(f, current_texture_mask);
+ WRITE_OFFSET(f, viewport_mask);
+ WRITE_OFFSET(f, dirty_textures_4bpp_mask);
+ WRITE_OFFSET(f, dirty_textures_8bpp_mask);
+ WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask);
+ WRITE_OFFSET(f, triangle_color);
+ WRITE_OFFSET(f, dither_table);
+ WRITE_OFFSET(f, uvrgb_phase);
+ WRITE_OFFSET(f, render_block_handler);
+ WRITE_OFFSET(f, texture_page_ptr);
+ WRITE_OFFSET(f, texture_page_base);
+ WRITE_OFFSET(f, clut_ptr);
+ WRITE_OFFSET(f, vram_ptr);
+ WRITE_OFFSET(f, vram_out_ptr);
+ WRITE_OFFSET(f, render_state_base);
+ WRITE_OFFSET(f, render_state);
+ WRITE_OFFSET(f, num_spans);
+ WRITE_OFFSET(f, num_blocks);
+ WRITE_OFFSET(f, viewport_start_x);
+ WRITE_OFFSET(f, viewport_start_y);
+ WRITE_OFFSET(f, viewport_end_x);
+ WRITE_OFFSET(f, viewport_end_y);
+ WRITE_OFFSET(f, mask_msb);
+ WRITE_OFFSET(f, triangle_winding);
+ WRITE_OFFSET(f, display_area_draw_enable);
+ WRITE_OFFSET(f, current_texture_page);
+ WRITE_OFFSET(f, last_8bpp_texture_page);
+ WRITE_OFFSET(f, texture_mask_width);
+ WRITE_OFFSET(f, texture_mask_height);
+ WRITE_OFFSET(f, texture_window_x);
+ WRITE_OFFSET(f, texture_window_y);
+ WRITE_OFFSET(f, primitive_type);
+ WRITE_OFFSET(f, render_mode);
+ WRITE_OFFSET(f, offset_x);
+ WRITE_OFFSET(f, offset_y);
+ WRITE_OFFSET(f, clut_settings);
+ WRITE_OFFSET(f, texture_settings);
+ WRITE_OFFSET(f, blocks);
+ WRITE_OFFSET(f, span_uvrg_offset);
+ WRITE_OFFSET(f, span_edge_data);
+ WRITE_OFFSET(f, span_b_offset);
+ WRITE_OFFSET(f, texture_4bpp_cache);
+ WRITE_OFFSET(f, texture_8bpp_even_cache);
+ WRITE_OFFSET(f, texture_8bpp_odd_cache);
+ fclose(f);
+
+ return 0;
+}
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
index 920c638..67da86e 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c
@@ -92,6 +92,7 @@ void update_texture_ptr(psx_gpu_struct *psx_gpu)
void set_texture(psx_gpu_struct *psx_gpu, u32 texture_settings)
{
+ texture_settings &= 0x1FF;
if(psx_gpu->texture_settings != texture_settings)
{
u32 new_texture_page = texture_settings & 0x1F;
@@ -152,6 +153,52 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color)
}
}
+static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y,
+ u32 width, u32 height, u32 color)
+{
+ x &= ~0xF;
+ width = ((width + 0xF) & ~0xF);
+
+ flush_render_block_buffer(psx_gpu);
+
+ if(unlikely((x + width) > 1024))
+ {
+ u32 width_a = 1024 - x;
+ u32 width_b = width - width_a;
+
+ if(unlikely((y + height) > 512))
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width_a, height_a);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
+ render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width_a, height);
+ render_block_fill(psx_gpu, color, 0, y, width_b, height);
+ }
+ }
+ else
+ {
+ if(unlikely((y + height) > 512))
+ {
+ u32 height_a = 512 - y;
+ u32 height_b = height - height_a;
+
+ render_block_fill(psx_gpu, color, x, y, width, height_a);
+ render_block_fill(psx_gpu, color, x, 0, width, height_b);
+ }
+ else
+ {
+ render_block_fill(psx_gpu, color, x, y, width, height);
+ }
+ }
+}
+
#define sign_extend_12bit(value) \
(((s32)((value) << 20)) >> 20) \
@@ -235,45 +282,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
u32 height = list_s16[5] & 0x1FF;
u32 color = list[0] & 0xFFFFFF;
- x &= ~0xF;
- width = ((width + 0xF) & ~0xF);
-
- if((x + width) > 1024)
- {
- u32 width_a = 1024 - x;
- u32 width_b = width - width_a;
-
- if((y + height) > 512)
- {
- u32 height_a = 512 - y;
- u32 height_b = height - height_a;
-
- render_block_fill(psx_gpu, color, x, y, width_a, height_a);
- render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
- render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
- render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
- }
- else
- {
- render_block_fill(psx_gpu, color, x, y, width_a, height);
- render_block_fill(psx_gpu, color, 0, y, width_b, height);
- }
- }
- else
- {
- if((y + height) > 512)
- {
- u32 height_a = 512 - y;
- u32 height_b = height - height_a;
-
- render_block_fill(psx_gpu, color, x, y, width, height_a);
- render_block_fill(psx_gpu, color, x, 0, width, height_b);
- }
- else
- {
- render_block_fill(psx_gpu, color, x, y, width, height);
- }
- }
+ do_fill(psx_gpu, x, y, width, height, color);
break;
}
@@ -399,7 +408,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
vertexes[1].x = list_s16[4] + psx_gpu->offset_x;
vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
- render_line(psx_gpu, vertexes, current_command, list[0]);
+ render_line(psx_gpu, vertexes, current_command, list[0], 0);
break;
}
@@ -420,7 +429,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
- render_line(psx_gpu, vertexes, current_command, list[0]);
+ render_line(psx_gpu, vertexes, current_command, list[0], 0);
list_position++;
num_vertexes++;
@@ -451,7 +460,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
vertexes[1].x = list_s16[6] + psx_gpu->offset_x;
vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
- render_line(psx_gpu, vertexes, current_command, 0);
+ render_line(psx_gpu, vertexes, current_command, 0, 0);
break;
}
@@ -481,7 +490,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
- render_line(psx_gpu, vertexes, current_command, 0);
+ render_line(psx_gpu, vertexes, current_command, 0, 0);
list_position += 2;
num_vertexes++;
@@ -592,12 +601,22 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
break;
}
- case 0x80: // vid -> vid
- render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF,
- list_s16[4] & 0x3FF, list_s16[5] & 0x1FF,
- ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1);
- break;
-
+ case 0x80: // vid -> vid
+ {
+ u32 sx = list_s16[2] & 0x3FF;
+ u32 sy = list_s16[3] & 0x1FF;
+ u32 dx = list_s16[4] & 0x3FF;
+ u32 dy = list_s16[5] & 0x1FF;
+ u32 w = ((list_s16[6] - 1) & 0x3FF) + 1;
+ u32 h = ((list_s16[7] - 1) & 0x1FF) + 1;
+
+ if (sx == dx && sy == dy && psx_gpu->mask_msb == 0)
+ break;
+
+ render_block_move(psx_gpu, sx, sy, dx, dy, w, h);
+ break;
+ }
+
#ifdef PCSX
case 0xA0: // sys -> vid
case 0xC0: // vid -> sys
@@ -626,7 +645,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
#endif
case 0xE1:
- set_texture(psx_gpu, list[0] & 0x1FF);
+ set_texture(psx_gpu, list[0]);
if(list[0] & (1 << 9))
psx_gpu->render_state_base |= RENDER_STATE_DITHER;
@@ -669,11 +688,21 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
}
SET_Ex(2, list[0]);
break;
- }
+ }
+
+ case 0xE3:
+ {
+ s16 viewport_start_x = list[0] & 0x3FF;
+ s16 viewport_start_y = (list[0] >> 10) & 0x1FF;
+
+ if(viewport_start_x == psx_gpu->viewport_start_x &&
+ viewport_start_y == psx_gpu->viewport_start_y)
+ {
+ break;
+ }
- case 0xE3:
- psx_gpu->viewport_start_x = list[0] & 0x3FF;
- psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF;
+ psx_gpu->viewport_start_x = viewport_start_x;
+ psx_gpu->viewport_start_y = viewport_start_y;
#ifdef TEXTURE_CACHE_4BPP
psx_gpu->viewport_mask =
@@ -681,12 +710,23 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(3, list[0]);
- break;
-
- case 0xE4:
- psx_gpu->viewport_end_x = list[0] & 0x3FF;
- psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF;
+ SET_Ex(3, list[0]);
+ break;
+ }
+
+ case 0xE4:
+ {
+ s16 viewport_end_x = list[0] & 0x3FF;
+ s16 viewport_end_y = (list[0] >> 10) & 0x1FF;
+
+ if(viewport_end_x == psx_gpu->viewport_end_x &&
+ viewport_end_y == psx_gpu->viewport_end_y)
+ {
+ break;
+ }
+
+ psx_gpu->viewport_end_x = viewport_end_x;
+ psx_gpu->viewport_end_y = viewport_end_y;
#ifdef TEXTURE_CACHE_4BPP
psx_gpu->viewport_mask =
@@ -694,10 +734,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command)
psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
psx_gpu->viewport_end_y);
#endif
- SET_Ex(4, list[0]);
- break;
+ SET_Ex(4, list[0]);
+ break;
+ }
- case 0xE5:
+ case 0xE5:
{
s32 offset_x = list[0] << 21;
s32 offset_y = list[0] << 10;
@@ -741,3 +782,786 @@ breakloop:
return list - list_start;
}
+#ifdef PCSX
+
+#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16))
+
+static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu)
+{
+ u32 b, x, s;
+
+ b = 0;
+ s = psx_gpu->enhancement_x_threshold;
+ for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++)
+ {
+ if (b < 3 && x * ENH_BUF_TABLE_STEP >= s - ENH_BUF_TABLE_STEP - 1)
+ {
+ s += psx_gpu->enhancement_x_threshold;
+ b++;
+ }
+ psx_gpu->enhancement_buf_by_x16[x] = b;
+ }
+}
+
+static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu,
+ u32 x0, u32 len)
+{
+ u32 x, b;
+
+ for (x = x0, b = 0; x >= len; b++)
+ x -= len;
+ if (b > 3)
+ b = 3;
+
+ memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP,
+ b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP);
+}
+
+#define select_enhancement_buf(psx_gpu) \
+ psx_gpu->enhancement_current_buf_ptr = \
+ select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x)
+
+#define enhancement_disable() { \
+ psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \
+ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \
+ psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \
+ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \
+ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \
+ psx_gpu->uvrgb_phase = 0x8000; \
+}
+
+#define enhancement_enable() { \
+ psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \
+ psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \
+ psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \
+ psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \
+ psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \
+ psx_gpu->uvrgb_phase = 0x1000; \
+}
+
+#define shift_vertices3(v) { \
+ v[0]->x *= 2; \
+ v[0]->y *= 2; \
+ v[1]->x *= 2; \
+ v[1]->y *= 2; \
+ v[2]->x *= 2; \
+ v[2]->y *= 2; \
+}
+
+#define unshift_vertices3(v) { \
+ v[0]->x /= 2; \
+ v[0]->y /= 2; \
+ v[1]->x /= 2; \
+ v[1]->y /= 2; \
+ v[2]->x /= 2; \
+ v[2]->y /= 2; \
+}
+
+#define shift_triangle_area() \
+ psx_gpu->triangle_area *= 4
+
+extern void scale2x_tiles8(void *dst, const void *src, int w8, int h);
+
+#ifndef NEON_BUILD
+// TODO?
+void scale2x_tiles8(void *dst, const void *src, int w8, int h) {}
+#endif
+
+static int disable_main_render;
+
+static void do_triangle_enhanced(psx_gpu_struct *psx_gpu,
+ vertex_struct *vertexes, u32 current_command)
+{
+ vertex_struct *vertex_ptrs[3];
+
+ if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
+ return;
+
+ if (!disable_main_render)
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+ enhancement_enable();
+ shift_vertices3(vertex_ptrs);
+ shift_triangle_area();
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+}
+
+static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 current_command)
+{
+ vertex_struct *vertex_ptrs[3];
+
+ if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) {
+ if (!disable_main_render)
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+ enhancement_enable();
+ shift_vertices3(vertex_ptrs);
+ shift_triangle_area();
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+ unshift_vertices3(vertex_ptrs);
+ }
+ enhancement_disable();
+ if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) {
+ if (!disable_main_render)
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+
+ enhancement_enable();
+ shift_vertices3(vertex_ptrs);
+ shift_triangle_area();
+ render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+ }
+}
+
+#if 0
+
+#define fill_vertex(i, x_, y_, u_, v_, rgb_) \
+ vertexes[i].x = x_; \
+ vertexes[i].y = y_; \
+ vertexes[i].u = u_; \
+ vertexes[i].v = v_; \
+ vertexes[i].r = rgb_; \
+ vertexes[i].g = (rgb_) >> 8; \
+ vertexes[i].b = (rgb_) >> 16
+
+static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y,
+ u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb)
+{
+ vertex_struct *vertex_ptrs[3];
+ u32 flags = (cmd_rgb >> 24);
+ u32 color = cmd_rgb & 0xffffff;
+ u32 render_state_base_saved = psx_gpu->render_state_base;
+ int x1, y1;
+ u8 u1, v1;
+
+ flags &=
+ (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND |
+ RENDER_FLAGS_TEXTURE_MAP);
+
+ set_triangle_color(psx_gpu, color);
+ if(color == 0x808080)
+ flags |= RENDER_FLAGS_MODULATE_TEXELS;
+
+ psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
+ enhancement_enable();
+
+ x1 = x + w;
+ y1 = y + h;
+ u1 = u + w;
+ v1 = v + h;
+ // FIXME..
+ if (u1 < u) u1 = 0xff;
+ if (v1 < v) v1 = 0xff;
+
+ // 0-2
+ // |/
+ // 1
+ fill_vertex(0, x, y, u, v, color);
+ fill_vertex(1, x, y1, u, v1, color);
+ fill_vertex(2, x1, y, u1, v, color);
+ if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) {
+ shift_vertices3(vertex_ptrs);
+ shift_triangle_area();
+ render_triangle_p(psx_gpu, vertex_ptrs, flags);
+ }
+
+ // 0
+ // /|
+ // 1-2
+ fill_vertex(0, x1, y, u1, v, color);
+ fill_vertex(1, x, y1, u, v1, color);
+ fill_vertex(2, x1, y1, u1, v1, color);
+ if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) {
+ shift_vertices3(vertex_ptrs);
+ shift_triangle_area();
+ render_triangle_p(psx_gpu, vertex_ptrs, flags);
+ }
+
+ psx_gpu->render_state_base = render_state_base_saved;
+}
+#else
+static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y,
+ u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb)
+{
+ u32 flags = (cmd_rgb >> 24);
+ u32 color = cmd_rgb & 0xffffff;
+
+ render_sprite_4x(psx_gpu, x, y, u, v, w, h, flags, color);
+}
+#endif
+
+u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
+ u32 *last_command)
+{
+ u32 current_command = 0, command_length;
+
+ u32 *list_start = list;
+ u32 *list_end = list + (size / 4);
+
+ psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x;
+ psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y;
+ psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x;
+ psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y;
+ select_enhancement_buf(psx_gpu);
+
+ for(; list < list_end; list += 1 + command_length)
+ {
+ s16 *list_s16 = (void *)list;
+ current_command = *list >> 24;
+ command_length = command_lengths[current_command];
+ if (list + 1 + command_length > list_end) {
+ current_command = (u32)-1;
+ break;
+ }
+
+ enhancement_disable();
+
+ switch(current_command)
+ {
+ case 0x00:
+ break;
+
+ case 0x02:
+ {
+ u32 x = list_s16[2] & 0x3FF;
+ u32 y = list_s16[3] & 0x1FF;
+ u32 width = list_s16[4] & 0x3FF;
+ u32 height = list_s16[5] & 0x1FF;
+ u32 color = list[0] & 0xFFFFFF;
+
+ x &= ~0xF;
+ width = ((width + 0xF) & ~0xF);
+
+ do_fill(psx_gpu, x, y, width, height, color);
+
+ psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x);
+ x *= 2;
+ y *= 2;
+ width *= 2;
+ height *= 2;
+ render_block_fill_enh(psx_gpu, color, x, y, width, height);
+ break;
+ }
+
+ case 0x20 ... 0x23:
+ {
+ set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+
+ get_vertex_data_xy(0, 2);
+ get_vertex_data_xy(1, 4);
+ get_vertex_data_xy(2, 6);
+
+ do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x24 ... 0x27:
+ {
+ set_clut(psx_gpu, list_s16[5]);
+ set_texture(psx_gpu, list_s16[9]);
+ set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+
+ get_vertex_data_xy_uv(0, 2);
+ get_vertex_data_xy_uv(1, 6);
+ get_vertex_data_xy_uv(2, 10);
+
+ do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x28 ... 0x2B:
+ {
+ set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+
+ get_vertex_data_xy(0, 2);
+ get_vertex_data_xy(1, 4);
+ get_vertex_data_xy(2, 6);
+ get_vertex_data_xy(3, 8);
+
+ do_quad_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x2C ... 0x2F:
+ {
+ set_clut(psx_gpu, list_s16[5]);
+ set_texture(psx_gpu, list_s16[9]);
+ set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
+
+ get_vertex_data_xy_uv(0, 2);
+ get_vertex_data_xy_uv(1, 6);
+ get_vertex_data_xy_uv(2, 10);
+ get_vertex_data_xy_uv(3, 14);
+
+ do_quad_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x30 ... 0x33:
+ {
+ get_vertex_data_xy_rgb(0, 0);
+ get_vertex_data_xy_rgb(1, 4);
+ get_vertex_data_xy_rgb(2, 8);
+
+ do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x34:
+ case 0x35:
+ case 0x36:
+ case 0x37:
+ {
+ set_clut(psx_gpu, list_s16[5]);
+ set_texture(psx_gpu, list_s16[11]);
+
+ get_vertex_data_xy_uv_rgb(0, 0);
+ get_vertex_data_xy_uv_rgb(1, 6);
+ get_vertex_data_xy_uv_rgb(2, 12);
+
+ do_triangle_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x38:
+ case 0x39:
+ case 0x3A:
+ case 0x3B:
+ {
+ get_vertex_data_xy_rgb(0, 0);
+ get_vertex_data_xy_rgb(1, 4);
+ get_vertex_data_xy_rgb(2, 8);
+ get_vertex_data_xy_rgb(3, 12);
+
+ do_quad_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x3C:
+ case 0x3D:
+ case 0x3E:
+ case 0x3F:
+ {
+ set_clut(psx_gpu, list_s16[5]);
+ set_texture(psx_gpu, list_s16[11]);
+
+ get_vertex_data_xy_uv_rgb(0, 0);
+ get_vertex_data_xy_uv_rgb(1, 6);
+ get_vertex_data_xy_uv_rgb(2, 12);
+ get_vertex_data_xy_uv_rgb(3, 18);
+
+ do_quad_enhanced(psx_gpu, vertexes, current_command);
+ break;
+ }
+
+ case 0x40 ... 0x47:
+ {
+ vertexes[0].x = list_s16[2] + psx_gpu->offset_x;
+ vertexes[0].y = list_s16[3] + psx_gpu->offset_y;
+ vertexes[1].x = list_s16[4] + psx_gpu->offset_x;
+ vertexes[1].y = list_s16[5] + psx_gpu->offset_y;
+
+ render_line(psx_gpu, vertexes, current_command, list[0], 0);
+ enhancement_enable();
+ render_line(psx_gpu, vertexes, current_command, list[0], 1);
+ break;
+ }
+
+ case 0x48 ... 0x4F:
+ {
+ u32 num_vertexes = 1;
+ u32 *list_position = &(list[2]);
+ u32 xy = list[1];
+
+ vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+ vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+ xy = *list_position;
+ while(1)
+ {
+ vertexes[0] = vertexes[1];
+
+ vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+ vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+ enhancement_disable();
+ render_line(psx_gpu, vertexes, current_command, list[0], 0);
+ enhancement_enable();
+ render_line(psx_gpu, vertexes, current_command, list[0], 1);
+
+ list_position++;
+ num_vertexes++;
+
+ if(list_position >= list_end)
+ break;
+
+ xy = *list_position;
+ if((xy & 0xF000F000) == 0x50005000)
+ break;
+ }
+
+ command_length += (num_vertexes - 2);
+ break;
+ }
+
+ case 0x50 ... 0x57:
+ {
+ vertexes[0].r = list[0] & 0xFF;
+ vertexes[0].g = (list[0] >> 8) & 0xFF;
+ vertexes[0].b = (list[0] >> 16) & 0xFF;
+ vertexes[0].x = list_s16[2] + psx_gpu->offset_x;
+ vertexes[0].y = list_s16[3] + psx_gpu->offset_y;
+
+ vertexes[1].r = list[2] & 0xFF;
+ vertexes[1].g = (list[2] >> 8) & 0xFF;
+ vertexes[1].b = (list[2] >> 16) & 0xFF;
+ vertexes[1].x = list_s16[6] + psx_gpu->offset_x;
+ vertexes[1].y = list_s16[7] + psx_gpu->offset_y;
+
+ render_line(psx_gpu, vertexes, current_command, 0, 0);
+ enhancement_enable();
+ render_line(psx_gpu, vertexes, current_command, 0, 1);
+ break;
+ }
+
+ case 0x58 ... 0x5F:
+ {
+ u32 num_vertexes = 1;
+ u32 *list_position = &(list[2]);
+ u32 color = list[0];
+ u32 xy = list[1];
+
+ vertexes[1].r = color & 0xFF;
+ vertexes[1].g = (color >> 8) & 0xFF;
+ vertexes[1].b = (color >> 16) & 0xFF;
+ vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+ vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+ color = list_position[0];
+ while(1)
+ {
+ xy = list_position[1];
+
+ vertexes[0] = vertexes[1];
+
+ vertexes[1].r = color & 0xFF;
+ vertexes[1].g = (color >> 8) & 0xFF;
+ vertexes[1].b = (color >> 16) & 0xFF;
+ vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x;
+ vertexes[1].y = (xy >> 16) + psx_gpu->offset_y;
+
+ enhancement_disable();
+ render_line(psx_gpu, vertexes, current_command, 0, 0);
+ enhancement_enable();
+ render_line(psx_gpu, vertexes, current_command, 0, 1);
+
+ list_position += 2;
+ num_vertexes++;
+
+ if(list_position >= list_end)
+ break;
+
+ color = list_position[0];
+ if((color & 0xF000F000) == 0x50005000)
+ break;
+ }
+
+ command_length += ((num_vertexes - 2) * 2);
+ break;
+ }
+
+ case 0x60 ... 0x63:
+ {
+ u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+ u32 width = list_s16[4] & 0x3FF;
+ u32 height = list_s16[5] & 0x1FF;
+
+ render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]);
+ break;
+ }
+
+ case 0x64 ... 0x67:
+ {
+ u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+ u8 u = list_s16[4];
+ u8 v = list_s16[4] >> 8;
+ u32 width = list_s16[6] & 0x3FF;
+ u32 height = list_s16[7] & 0x1FF;
+
+ set_clut(psx_gpu, list_s16[5]);
+
+ render_sprite(psx_gpu, x, y, u, v, width, height,
+ current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
+ break;
+ }
+
+ case 0x68:
+ case 0x69:
+ case 0x6A:
+ case 0x6B:
+ {
+ s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+ render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]);
+ break;
+ }
+
+ case 0x70:
+ case 0x71:
+ case 0x72:
+ case 0x73:
+ {
+ s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+ render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]);
+ break;
+ }
+
+ case 0x74:
+ case 0x75:
+ case 0x76:
+ case 0x77:
+ {
+ s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+ u8 u = list_s16[4];
+ u8 v = list_s16[4] >> 8;
+
+ set_clut(psx_gpu, list_s16[5]);
+
+ render_sprite(psx_gpu, x, y, u, v, 8, 8,
+ current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]);
+ break;
+ }
+
+ case 0x78:
+ case 0x79:
+ case 0x7A:
+ case 0x7B:
+ {
+ s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+
+ render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]);
+ break;
+ }
+
+ case 0x7C:
+ case 0x7D:
+ case 0x7E:
+ case 0x7F:
+ {
+ s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
+ u8 u = list_s16[4];
+ u8 v = list_s16[4] >> 8;
+
+ set_clut(psx_gpu, list_s16[5]);
+
+ render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]);
+ do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]);
+ break;
+ }
+
+ case 0x80: // vid -> vid
+ {
+ u32 sx = list_s16[2] & 0x3FF;
+ u32 sy = list_s16[3] & 0x1FF;
+ u32 dx = list_s16[4] & 0x3FF;
+ u32 dy = list_s16[5] & 0x1FF;
+ u32 w = ((list_s16[6] - 1) & 0x3FF) + 1;
+ u32 h = ((list_s16[7] - 1) & 0x1FF) + 1;
+ u16 *buf;
+
+ if (sx == dx && sy == dy && psx_gpu->mask_msb == 0)
+ break;
+
+ render_block_move(psx_gpu, sx, sy, dx, dy, w, h);
+ if (dy + h > 512)
+ h = 512 - dy;
+ sx = sx & ~7; // FIXME?
+ dx = dx * 2 & ~7;
+ dy *= 2;
+ w = (w + 7) / 8;
+ buf = select_enhancement_buf_ptr(psx_gpu, dx / 2);
+ scale2x_tiles8(buf + dy * 1024 + dx,
+ psx_gpu->vram_ptr + sy * 1024 + sx, w, h);
+ break;
+ }
+
+ case 0xA0: // sys -> vid
+ case 0xC0: // vid -> sys
+ goto breakloop;
+
+ case 0xE1:
+ set_texture(psx_gpu, list[0]);
+
+ if(list[0] & (1 << 9))
+ psx_gpu->render_state_base |= RENDER_STATE_DITHER;
+ else
+ psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
+
+ psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1;
+ SET_Ex(1, list[0]);
+ break;
+
+ case 0xE2:
+ {
+ // TODO: Clean
+ u32 texture_window_settings = list[0];
+ u32 tmp, x, y, w, h;
+
+ if(texture_window_settings != psx_gpu->texture_window_settings)
+ {
+ tmp = (texture_window_settings & 0x1F) | 0x20;
+ for(w = 8; (tmp & 1) == 0; tmp >>= 1, w <<= 1);
+
+ tmp = ((texture_window_settings >> 5) & 0x1f) | 0x20;
+ for (h = 8; (tmp & 1) == 0; tmp >>= 1, h <<= 1);
+
+ tmp = 32 - (w >> 3);
+ x = ((texture_window_settings >> 10) & tmp) << 3;
+
+ tmp = 32 - (h >> 3);
+ y = ((texture_window_settings >> 15) & tmp) << 3;
+
+ flush_render_block_buffer(psx_gpu);
+
+ psx_gpu->texture_window_settings = texture_window_settings;
+ psx_gpu->texture_window_x = x;
+ psx_gpu->texture_window_y = y;
+ psx_gpu->texture_mask_width = w - 1;
+ psx_gpu->texture_mask_height = h - 1;
+
+ update_texture_ptr(psx_gpu);
+ }
+ SET_Ex(2, list[0]);
+ break;
+ }
+
+ case 0xE3:
+ {
+ s16 viewport_start_x = list[0] & 0x3FF;
+ s16 viewport_start_y = (list[0] >> 10) & 0x1FF;
+ u32 d;
+
+ if(viewport_start_x == psx_gpu->viewport_start_x &&
+ viewport_start_y == psx_gpu->viewport_start_y)
+ {
+ break;
+ }
+ psx_gpu->viewport_start_x = viewport_start_x;
+ psx_gpu->viewport_start_y = viewport_start_y;
+ psx_gpu->saved_viewport_start_x = viewport_start_x;
+ psx_gpu->saved_viewport_start_y = viewport_start_y;
+
+ d = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1;
+ if((u32)psx_gpu->enhancement_x_threshold - d <= 16)
+ {
+ update_enhancement_buf_table_from_x(psx_gpu,
+ viewport_start_x, d);
+ }
+ select_enhancement_buf(psx_gpu);
+
+#ifdef TEXTURE_CACHE_4BPP
+ psx_gpu->viewport_mask =
+ texture_region_mask(psx_gpu->viewport_start_x,
+ psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
+ psx_gpu->viewport_end_y);
+#endif
+ SET_Ex(3, list[0]);
+ break;
+ }
+
+ case 0xE4:
+ {
+ s16 viewport_end_x = list[0] & 0x3FF;
+ s16 viewport_end_y = (list[0] >> 10) & 0x1FF;
+ u32 d;
+
+ if(viewport_end_x == psx_gpu->viewport_end_x &&
+ viewport_end_y == psx_gpu->viewport_end_y)
+ {
+ break;
+ }
+
+ psx_gpu->viewport_end_x = viewport_end_x;
+ psx_gpu->viewport_end_y = viewport_end_y;
+ psx_gpu->saved_viewport_end_x = viewport_end_x;
+ psx_gpu->saved_viewport_end_y = viewport_end_y;
+
+ d = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1;
+ if((u32)psx_gpu->enhancement_x_threshold - d <= 16)
+ {
+ update_enhancement_buf_table_from_x(psx_gpu,
+ psx_gpu->viewport_start_x, d);
+ }
+ select_enhancement_buf(psx_gpu);
+
+#ifdef TEXTURE_CACHE_4BPP
+ psx_gpu->viewport_mask =
+ texture_region_mask(psx_gpu->viewport_start_x,
+ psx_gpu->viewport_start_y, psx_gpu->viewport_end_x,
+ psx_gpu->viewport_end_y);
+#endif
+ SET_Ex(4, list[0]);
+ break;
+ }
+
+ case 0xE5:
+ {
+ s32 offset_x = list[0] << 21;
+ s32 offset_y = list[0] << 10;
+ psx_gpu->offset_x = offset_x >> 21;
+ psx_gpu->offset_y = offset_y >> 21;
+
+ SET_Ex(5, list[0]);
+ break;
+ }
+
+ case 0xE6:
+ {
+ u32 mask_settings = list[0];
+ u16 mask_msb = mask_settings << 15;
+
+ if(list[0] & 0x2)
+ psx_gpu->render_state_base |= RENDER_STATE_MASK_EVALUATE;
+ else
+ psx_gpu->render_state_base &= ~RENDER_STATE_MASK_EVALUATE;
+
+ if(mask_msb != psx_gpu->mask_msb)
+ {
+ flush_render_block_buffer(psx_gpu);
+ psx_gpu->mask_msb = mask_msb;
+ }
+
+ SET_Ex(6, list[0]);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ enhancement_disable();
+
+breakloop:
+ if (last_command != NULL)
+ *last_command = current_command;
+ return list - list_start;
+}
+
+#endif /* PCSX */
+
+// vim:shiftwidth=2:expandtab
diff --git a/plugins/gpu_neon/psx_gpu/tests/Makefile b/plugins/gpu_neon/psx_gpu/tests/Makefile
index 210f44d..58cca29 100644
--- a/plugins/gpu_neon/psx_gpu/tests/Makefile
+++ b/plugins/gpu_neon/psx_gpu/tests/Makefile
@@ -13,7 +13,7 @@ ASFLAGS = $(CFLAGS)
OBJ += psx_gpu_arm_neon.o
endif
ifndef DEBUG
-CFLAGS += -O2 -fno-strict-aliasing
+CFLAGS += -O2 -DNDEBUG -fno-strict-aliasing
endif
OBJ += psx_gpu.o psx_gpu_parse.o psx_gpu_main.o
diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h
index c11955d..c91e7d9 100644
--- a/plugins/gpu_neon/psx_gpu/vector_ops.h
+++ b/plugins/gpu_neon/psx_gpu/vector_ops.h
@@ -394,6 +394,10 @@ build_vector_types(s);
foreach_element(8, (dest).e[_i] = \
(u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \
+#define zip_4x32b(dest, source_a, source_b) \
+ foreach_element(4, (dest).e[_i] = \
+ (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \
+
#define zip_2x64b(dest, source_a, source_b) \
foreach_element(2, (dest).e[_i] = \
(u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \
diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c
index ff31c27..ad01761 100644
--- a/plugins/gpu_neon/psx_gpu_if.c
+++ b/plugins/gpu_neon/psx_gpu_if.c
@@ -9,11 +9,13 @@
*/
#include <stdio.h>
+#include <sys/mman.h>
extern const unsigned char cmd_lengths[256];
#define command_lengths cmd_lengths
static unsigned int *ex_regs;
+static int initialized;
#define PCSX
#define SET_Ex(r, v) \
@@ -27,20 +29,102 @@ static psx_gpu_struct egpu __attribute__((aligned(256)));
int do_cmd_list(uint32_t *list, int count, int *last_cmd)
{
- int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
+ int ret;
+
+ if (gpu.state.enhancement_active)
+ ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd);
+ else
+ ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd);
ex_regs[1] &= ~0x1ff;
ex_regs[1] |= egpu.texture_settings & 0x1ff;
return ret;
}
+#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
+
+static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h,
+ int *vram_h)
+{
+ uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x);
+
+ *x *= 2;
+ *y *= 2;
+ *w = *w * 2;
+ *h = *h * 2;
+ *vram_h = 1024;
+ return ret;
+}
+
+static void map_enhancement_buffer(void)
+{
+ // currently we use 4x 1024*1024 buffers instead of single 2048*1024
+ // to be able to reuse 1024-width code better (triangle setup,
+ // dithering phase, lines).
+ egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
+ if (egpu.enhancement_buf_ptr == NULL) {
+ fprintf(stderr, "failed to map enhancement buffer\n");
+ gpu.get_enhancement_bufer = NULL;
+ }
+ else {
+ egpu.enhancement_buf_ptr += 4096 / 2;
+ gpu.get_enhancement_bufer = get_enhancement_bufer;
+ }
+}
+
int renderer_init(void)
{
- initialize_psx_gpu(&egpu, gpu.vram);
+ if (gpu.vram != NULL) {
+ initialize_psx_gpu(&egpu, gpu.vram);
+ initialized = 1;
+ }
+
+ if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+ map_enhancement_buffer();
+
ex_regs = gpu.ex_regs;
return 0;
}
+void renderer_finish(void)
+{
+ if (egpu.enhancement_buf_ptr != NULL) {
+ egpu.enhancement_buf_ptr -= 4096 / 2;
+ gpu.munmap(egpu.enhancement_buf_ptr, ENHANCEMENT_BUF_SIZE);
+ }
+ egpu.enhancement_buf_ptr = NULL;
+ egpu.enhancement_current_buf_ptr = NULL;
+ initialized = 0;
+}
+
+static __attribute__((noinline)) void
+sync_enhancement_buffers(int x, int y, int w, int h)
+{
+ const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16);
+ u16 *src, *dst;
+ int w1, fb_index;
+
+ w += x & (step_x - 1);
+ x &= ~(step_x - 1);
+ w = (w + step_x - 1) & ~(step_x - 1);
+ if (y + h > 512)
+ h = 512 - y;
+
+ while (w > 0) {
+ fb_index = egpu.enhancement_buf_by_x16[x / step_x];
+ for (w1 = 0; w > 0; w1++, w -= step_x)
+ if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1])
+ break;
+
+ src = gpu.vram + y * 1024 + x;
+ dst = select_enhancement_buf_ptr(&egpu, x);
+ dst += (y * 1024 + x) * 2;
+ scale2x_tiles8(dst, src, w1 * step_x / 8, h);
+
+ x += w1 * step_x;
+ }
+}
+
void renderer_sync_ecmds(uint32_t *ecmds)
{
gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL);
@@ -49,6 +133,8 @@ void renderer_sync_ecmds(uint32_t *ecmds)
void renderer_update_caches(int x, int y, int w, int h)
{
update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1);
+ if (gpu.state.enhancement_active && !gpu.status.rgb24)
+ sync_enhancement_buffers(x, y, w, h);
}
void renderer_flush_queues(void)
@@ -58,13 +144,44 @@ void renderer_flush_queues(void)
void renderer_set_interlace(int enable, int is_odd)
{
- egpu.interlace_mode &= ~(RENDER_INTERLACE_ENABLED|RENDER_INTERLACE_ODD);
+ egpu.render_mode &= ~(RENDER_INTERLACE_ENABLED|RENDER_INTERLACE_ODD);
if (enable)
- egpu.interlace_mode |= RENDER_INTERLACE_ENABLED;
+ egpu.render_mode |= RENDER_INTERLACE_ENABLED;
if (is_odd)
- egpu.interlace_mode |= RENDER_INTERLACE_ODD;
+ egpu.render_mode |= RENDER_INTERLACE_ODD;
}
+void renderer_notify_res_change(void)
+{
+ // note: must keep it multiple of 8
+ if (egpu.enhancement_x_threshold != gpu.screen.hres)
+ {
+ egpu.enhancement_x_threshold = gpu.screen.hres;
+ update_enhancement_buf_table_from_hres(&egpu);
+ }
+}
+
+#include "../../frontend/plugin_lib.h"
+
void renderer_set_config(const struct rearmed_cbs *cbs)
{
+ static int enhancement_was_on;
+
+ disable_main_render = cbs->gpu_neon.enhancement_no_main;
+ if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable
+ && !enhancement_was_on)
+ {
+ sync_enhancement_buffers(0, 0, 1024, 512);
+ }
+ enhancement_was_on = cbs->gpu_neon.enhancement_enable;
+
+ if (!initialized) {
+ initialize_psx_gpu(&egpu, gpu.vram);
+ initialized = 1;
+ }
+
+ if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
+ map_enhancement_buffer();
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
}
diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp
index 46552ac..c111d78 100644
--- a/plugins/gpu_unai/gpu.cpp
+++ b/plugins/gpu_unai/gpu.cpp
@@ -824,7 +824,6 @@ void GPU_updateLace(void)
extern "C" {
static const struct rearmed_cbs *cbs;
-static void *screen_buf;
static s16 old_res_horz, old_res_vert, old_rgb24;
static void blit(void)
@@ -832,12 +831,10 @@ static void blit(void)
u16 *base = (u16 *)GPU_FrameBuffer;
s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
s16 h0, x0, y0, w0, h1;
- u32 fb_offs;
- u8 *dest;
x0 = DisplayArea[0] & ~1; // alignment needed by blitter
y0 = DisplayArea[1];
- fb_offs = FRAME_OFFSET(x0, y0);
+ base += FRAME_OFFSET(x0, y0);
w0 = DisplayArea[2];
h0 = DisplayArea[3]; // video mode
@@ -853,62 +850,10 @@ static void blit(void)
old_res_horz = w0;
old_res_vert = h1;
old_rgb24 = (s16)isRGB24;
- screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
+ cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
}
- dest = (u8 *)screen_buf;
- if (isRGB24)
- {
- if (!cbs->only_16bpp)
- {
- for (; h1-- > 0; dest += w0 * 3, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb888(dest, base + fb_offs, w0 * 3);
- }
- }
- else
- {
- for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb565(dest, base + fb_offs, w0 * 3);
- }
- }
- }
- else
- {
- for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr555_to_rgb565(dest, base + fb_offs, w0 * 2);
- }
- }
-
- screen_buf = cbs->pl_vout_flip();
-}
-
-static void blit_raw(void)
-{
- s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
- s16 h0, w0, h1;
-
- w0 = DisplayArea[2];
- h0 = DisplayArea[3]; // video mode
- h1 = DisplayArea[5] - DisplayArea[4]; // display needed
- if (h0 == 480) h1 = Min2(h1*2,480);
-
- if (h1 <= 0)
- return;
-
- if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24)
- {
- old_res_horz = w0;
- old_res_vert = h1;
- old_rgb24 = (s16)isRGB24;
- screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
- }
- cbs->pl_vout_raw_flip(DisplayArea[0], DisplayArea[1]);
+ cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1);
}
void GPU_updateLace(void)
@@ -920,10 +865,7 @@ void GPU_updateLace(void)
return;
if (!wasSkip) {
- if (cbs->pl_vout_raw_flip != NULL)
- blit_raw();
- else
- blit();
+ blit();
fb_dirty = false;
skCount = 0;
}
@@ -939,7 +881,6 @@ void GPU_updateLace(void)
long GPUopen(unsigned long *, char *, char *)
{
cbs->pl_vout_open();
- screen_buf = cbs->pl_vout_flip();
return 0;
}
@@ -966,6 +907,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
cbs_->pl_vout_set_raw_vram((void *)GPU_FrameBuffer);
cbs = cbs_;
+ if (cbs->pl_set_gpu_caps)
+ cbs->pl_set_gpu_caps(0);
}
} /* extern "C" */
diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp
index 38e7ce1..de16721 100644
--- a/plugins/gpu_unai/gpulib_if.cpp
+++ b/plugins/gpu_unai/gpulib_if.cpp
@@ -154,6 +154,14 @@ int renderer_init(void)
return 0;
}
+void renderer_finish(void)
+{
+}
+
+void renderer_notify_res_change(void)
+{
+}
+
extern const unsigned char cmd_lengths[256];
int do_cmd_list(unsigned int *list, int list_len, int *last_cmd)
@@ -525,6 +533,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
enableAbbeyHack = cbs->gpu_unai.abe_hack;
light = !cbs->gpu_unai.no_light;
blend = !cbs->gpu_unai.no_blend;
+
+ GPU_FrameBuffer = (u16 *)gpu.vram;
}
#endif
diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c
index 46e92d1..b61bff6 100644
--- a/plugins/gpulib/gpu.c
+++ b/plugins/gpulib/gpu.c
@@ -24,7 +24,7 @@
//#define log_anomaly gpu_log
#define log_anomaly(...)
-struct psx_gpu gpu __attribute__((aligned(2048)));
+struct psx_gpu gpu;
static noinline int do_cmd_buffer(uint32_t *data, int count);
static void finish_vram_transfer(int is_read);
@@ -133,6 +133,22 @@ static noinline void get_gpu_info(uint32_t data)
}
}
+// double, for overdraw guard
+#define VRAM_SIZE (1024 * 512 * 2 * 2)
+
+static int map_vram(void)
+{
+ gpu.vram = gpu.mmap(VRAM_SIZE);
+ if (gpu.vram != NULL) {
+ gpu.vram += 4096 / 2;
+ return 0;
+ }
+ else {
+ fprintf(stderr, "could not map vram, expect crashes\n");
+ return -1;
+ }
+}
+
long GPUinit(void)
{
int ret;
@@ -145,12 +161,26 @@ long GPUinit(void)
gpu.cmd_len = 0;
do_reset();
+ if (gpu.mmap != NULL) {
+ if (map_vram() != 0)
+ ret = -1;
+ }
return ret;
}
long GPUshutdown(void)
{
- return vout_finish();
+ long ret;
+
+ renderer_finish();
+ ret = vout_finish();
+ if (gpu.vram != NULL) {
+ gpu.vram -= 4096 / 2;
+ gpu.munmap(gpu.vram, VRAM_SIZE);
+ }
+ gpu.vram = NULL;
+
+ return ret;
}
void GPUwriteStatus(uint32_t data)
@@ -207,6 +237,7 @@ void GPUwriteStatus(uint32_t data)
gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
update_width();
update_height();
+ renderer_notify_res_change();
break;
default:
if ((cmd & 0xf0) == 0x10)
@@ -582,13 +613,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
case 1: // save
if (gpu.cmd_len > 0)
flush_cmd_buffer();
- memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
+ memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
freeze->ulStatus = gpu.status.reg;
break;
case 0: // load
- memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
+ memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
gpu.status.reg = freeze->ulStatus;
@@ -669,6 +700,14 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
gpu.state.hcnt = cbs->gpu_hcnt;
gpu.state.frame_count = cbs->gpu_frame_count;
gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
+ gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
+
+ gpu.mmap = cbs->mmap;
+ gpu.munmap = cbs->munmap;
+
+ // delayed vram mmap
+ if (gpu.vram == NULL)
+ map_vram();
if (cbs->pl_vout_set_raw_vram)
cbs->pl_vout_set_raw_vram(gpu.vram);
diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h
index 1cbe38c..d11f991 100644
--- a/plugins/gpulib/gpu.h
+++ b/plugins/gpulib/gpu.h
@@ -17,10 +17,9 @@ extern "C" {
#define CMD_BUFFER_LEN 1024
struct psx_gpu {
- uint16_t vram[1024 * 512];
- uint16_t guard[1024 * 512]; // overdraw guard
uint32_t cmd_buffer[CMD_BUFFER_LEN];
uint32_t regs[16];
+ uint16_t *vram;
union {
uint32_t reg;
struct {
@@ -67,6 +66,8 @@ struct psx_gpu {
uint32_t old_interlace:1;
uint32_t allow_interlace:2;
uint32_t blanked:1;
+ uint32_t enhancement_enable:1;
+ uint32_t enhancement_active:1;
uint32_t *frame_count;
uint32_t *hcnt; /* hsync count */
struct {
@@ -87,6 +88,10 @@ struct psx_gpu {
uint32_t last_flip_frame;
uint32_t pending_fill[3];
} frameskip;
+ uint16_t *(*get_enhancement_bufer)
+ (int *x, int *y, int *w, int *h, int *vram_h);
+ void *(*mmap)(unsigned int size);
+ void (*munmap)(void *ptr, unsigned int size);
};
extern struct psx_gpu gpu;
@@ -98,11 +103,13 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd);
struct rearmed_cbs;
int renderer_init(void);
+void renderer_finish(void);
void renderer_sync_ecmds(uint32_t * ecmds);
void renderer_update_caches(int x, int y, int w, int h);
void renderer_flush_queues(void);
void renderer_set_interlace(int enable, int is_odd);
void renderer_set_config(const struct rearmed_cbs *config);
+void renderer_notify_res_change(void);
int vout_init(void);
int vout_finish(void);
diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c
index 0bd1ecf..11307e2 100644
--- a/plugins/gpulib/vout_pl.c
+++ b/plugins/gpulib/vout_pl.c
@@ -15,7 +15,6 @@
#include "../../frontend/plugin_lib.h"
static const struct rearmed_cbs *cbs;
-static void *screen_buf;
int vout_init(void)
{
@@ -27,90 +26,74 @@ int vout_finish(void)
return 0;
}
-static void check_mode_change(void)
+static void check_mode_change(int force)
{
static uint32_t old_status;
static int old_h;
+ int w = gpu.screen.hres;
+ int h = gpu.screen.h;
+
+ gpu.state.enhancement_active =
+ gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable
+ && w <= 512 && h <= 256 && !gpu.status.rgb24;
+
+ if (gpu.state.enhancement_active) {
+ w *= 2;
+ h *= 2;
+ }
// width|rgb24 change?
- if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || gpu.screen.h != old_h)
+ if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h)
{
old_status = gpu.status.reg;
- old_h = gpu.screen.h;
- screen_buf = cbs->pl_vout_set_mode(gpu.screen.hres, gpu.screen.h,
+ old_h = h;
+
+ cbs->pl_vout_set_mode(w, h,
(gpu.status.rgb24 && !cbs->only_16bpp) ? 24 : 16);
}
}
-static void blit(void)
+void vout_update(void)
{
int x = gpu.screen.x & ~1; // alignment needed by blitter
int y = gpu.screen.y;
int w = gpu.screen.w;
int h = gpu.screen.h;
uint16_t *vram = gpu.vram;
- int stride = gpu.screen.hres;
- int fb_offs, doffs;
- uint8_t *dest;
+ int vram_h = 512;
- dest = (uint8_t *)screen_buf;
- if (dest == NULL)
+ if (w == 0 || h == 0)
return;
- fb_offs = y * 1024 + x;
+ check_mode_change(0);
+ if (gpu.state.enhancement_active)
+ vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h);
- // only do centering, at least for now
- doffs = (stride - w) / 2 & ~1;
-
- if (gpu.status.rgb24)
- {
- if (cbs->only_16bpp) {
- dest += doffs * 2;
- for (; h-- > 0; dest += stride * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb565(dest, vram + fb_offs, w * 3);
- }
- }
- else {
- dest += (doffs / 8) * 24;
- for (; h-- > 0; dest += stride * 3, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb888(dest, vram + fb_offs, w * 3);
- }
- }
- }
- else
- {
- dest += doffs * 2;
- for (; h-- > 0; dest += stride * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr555_to_rgb565(dest, vram + fb_offs, w * 2);
+ if (y + h > vram_h) {
+ if (y + h - vram_h > h / 2) {
+ // wrap
+ y = 0;
+ h -= vram_h - y;
}
+ else
+ // clip
+ h = vram_h - y;
}
- screen_buf = cbs->pl_vout_flip();
-}
+ vram += y * 1024 + x;
-void vout_update(void)
-{
- check_mode_change();
- if (cbs->pl_vout_raw_flip)
- cbs->pl_vout_raw_flip(gpu.screen.x, gpu.screen.y);
- else
- blit();
+ cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h);
}
void vout_blank(void)
{
- check_mode_change();
- if (cbs->pl_vout_raw_flip == NULL) {
- int bytespp = gpu.status.rgb24 ? 3 : 2;
- memset(screen_buf, 0, gpu.screen.hres * gpu.screen.h * bytespp);
- screen_buf = cbs->pl_vout_flip();
+ int w = gpu.screen.hres;
+ int h = gpu.screen.h;
+ if (gpu.state.enhancement_active) {
+ w *= 2;
+ h *= 2;
}
+ cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h);
}
long GPUopen(void **unused)
@@ -119,7 +102,7 @@ long GPUopen(void **unused)
gpu.frameskip.frame_ready = 1;
cbs->pl_vout_open();
- screen_buf = cbs->pl_vout_flip();
+ check_mode_change(1);
return 0;
}