aboutsummaryrefslogtreecommitdiff
path: root/plugins/gpulib
diff options
context:
space:
mode:
authortwinaphex2012-11-28 06:35:30 +0100
committertwinaphex2012-11-28 06:35:30 +0100
commitb194a2ecd43debbb3724e332b660b993725acd52 (patch)
treea41deacfa7a6841e97d67fce60bb7f39b54b12be /plugins/gpulib
parentb6514bd168106e978a0af0f5043d5da6fd35800a (diff)
parent06d2e1a58b6eb7fb449f79f7e807343bd8d39b59 (diff)
downloadpcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.tar.gz
pcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.tar.bz2
pcsx_rearmed-b194a2ecd43debbb3724e332b660b993725acd52.zip
Merge git://github.com/notaz/pcsx_rearmed
Diffstat (limited to 'plugins/gpulib')
-rw-r--r--plugins/gpulib/cspace.c101
-rw-r--r--plugins/gpulib/cspace.h4
-rw-r--r--plugins/gpulib/gpu.c49
-rw-r--r--plugins/gpulib/gpu.h11
-rw-r--r--plugins/gpulib/gpulib.mak10
-rw-r--r--plugins/gpulib/vout_pl.c98
6 files changed, 205 insertions, 68 deletions
diff --git a/plugins/gpulib/cspace.c b/plugins/gpulib/cspace.c
index 1d5718c..8e3bee9 100644
--- a/plugins/gpulib/cspace.c
+++ b/plugins/gpulib/cspace.c
@@ -1,3 +1,13 @@
+/*
+ * (C) GraÅžvydas "notaz" Ignotas, 2011,2012
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ * - GNU GPL, version 2 or later.
+ * - GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
#include "cspace.h"
/*
@@ -5,6 +15,8 @@
* in favor of NEON version or platform-specific conversion
*/
+#ifndef __ARM_NEON__
+
void bgr555_to_rgb565(void *dst_, const void *src_, int bytes)
{
const unsigned int *src = src_;
@@ -42,3 +54,92 @@ void bgr888_to_rgb565(void *dst_, const void *src_, int bytes)
void rgb888_to_rgb565(void *dst, const void *src, int bytes) {}
void bgr888_to_rgb888(void *dst, const void *src, int bytes) {}
+#endif // __ARM_NEON__
+
+/* YUV stuff */
+static int yuv_ry[32], yuv_gy[32], yuv_by[32];
+static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
+
+void bgr_to_uyvy_init(void)
+{
+ int i, v;
+
+ /* init yuv converter:
+ y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0));
+ y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1));
+ u = (int)(8 * 0.565f * (b0 - y0)) + 128;
+ v = (int)(8 * 0.713f * (r0 - y0)) + 128;
+ */
+ for (i = 0; i < 32; i++) {
+ yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f);
+ yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f);
+ yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f);
+ }
+ for (i = -32; i < 32; i++) {
+ v = (int)(8 * 0.565f * i) + 128;
+ if (v < 0)
+ v = 0;
+ if (v > 255)
+ v = 255;
+ yuv_u[i + 32] = v;
+ v = (int)(8 * 0.713f * i) + 128;
+ if (v < 0)
+ v = 0;
+ if (v > 255)
+ v = 255;
+ yuv_v[i + 32] = v;
+ }
+}
+
+void bgr555_to_uyvy(void *d, const void *s, int pixels)
+{
+ unsigned int *dst = d;
+ const unsigned short *src = s;
+ const unsigned char *yu = yuv_u + 32;
+ const unsigned char *yv = yuv_v + 32;
+ int r0, g0, b0, r1, g1, b1;
+ int y0, y1, u, v;
+
+ for (; pixels > 0; src += 2, dst++, pixels -= 2)
+ {
+ b0 = (src[0] >> 10) & 0x1f;
+ g0 = (src[0] >> 5) & 0x1f;
+ r0 = src[0] & 0x1f;
+ b1 = (src[1] >> 10) & 0x1f;
+ g1 = (src[1] >> 5) & 0x1f;
+ r1 = src[1] & 0x1f;
+ y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16;
+ y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16;
+ u = yu[b0 - y0];
+ v = yv[r0 - y0];
+ // valid Y range seems to be 16..235
+ y0 = 16 + 219 * y0 / 31;
+ y1 = 16 + 219 * y1 / 31;
+
+ *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
+ }
+}
+
+void bgr888_to_uyvy(void *d, const void *s, int pixels)
+{
+ unsigned int *dst = d;
+ const unsigned char *src8 = s;
+ const unsigned char *yu = yuv_u + 32;
+ const unsigned char *yv = yuv_v + 32;
+ int r0, g0, b0, r1, g1, b1;
+ int y0, y1, u, v;
+
+ for (; pixels > 0; src8 += 3*2, dst++, pixels -= 2)
+ {
+ r0 = src8[0], g0 = src8[1], b0 = src8[2];
+ r1 = src8[3], g1 = src8[4], b1 = src8[5];
+ y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
+ y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
+ u = yu[(b0 - y0) / 8];
+ v = yv[(r0 - y0) / 8];
+ y0 = 16 + 219 * y0 / 255;
+ y1 = 16 + 219 * y1 / 255;
+
+ *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
+ }
+}
diff --git a/plugins/gpulib/cspace.h b/plugins/gpulib/cspace.h
index 8c9bcfa..95eae85 100644
--- a/plugins/gpulib/cspace.h
+++ b/plugins/gpulib/cspace.h
@@ -8,6 +8,10 @@ void bgr888_to_rgb888(void *dst, const void *src, int bytes);
void bgr888_to_rgb565(void *dst, const void *src, int bytes);
void rgb888_to_rgb565(void *dst, const void *src, int bytes);
+void bgr_to_uyvy_init(void);
+void bgr555_to_uyvy(void *d, const void *s, int pixels);
+void bgr888_to_uyvy(void *d, const void *s, int pixels);
+
#ifdef __cplusplus
}
#endif
diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c
index 46e92d1..b300c88 100644
--- a/plugins/gpulib/gpu.c
+++ b/plugins/gpulib/gpu.c
@@ -24,7 +24,7 @@
//#define log_anomaly gpu_log
#define log_anomaly(...)
-struct psx_gpu gpu __attribute__((aligned(2048)));
+struct psx_gpu gpu;
static noinline int do_cmd_buffer(uint32_t *data, int count);
static void finish_vram_transfer(int is_read);
@@ -133,6 +133,22 @@ static noinline void get_gpu_info(uint32_t data)
}
}
+// double, for overdraw guard
+#define VRAM_SIZE (1024 * 512 * 2 * 2)
+
+static int map_vram(void)
+{
+ gpu.vram = gpu.mmap(VRAM_SIZE);
+ if (gpu.vram != NULL) {
+ gpu.vram += 4096 / 2;
+ return 0;
+ }
+ else {
+ fprintf(stderr, "could not map vram, expect crashes\n");
+ return -1;
+ }
+}
+
long GPUinit(void)
{
int ret;
@@ -145,12 +161,26 @@ long GPUinit(void)
gpu.cmd_len = 0;
do_reset();
+ if (gpu.mmap != NULL) {
+ if (map_vram() != 0)
+ ret = -1;
+ }
return ret;
}
long GPUshutdown(void)
{
- return vout_finish();
+ long ret;
+
+ renderer_finish();
+ ret = vout_finish();
+ if (gpu.vram != NULL) {
+ gpu.vram -= 4096 / 2;
+ gpu.munmap(gpu.vram, VRAM_SIZE);
+ }
+ gpu.vram = NULL;
+
+ return ret;
}
void GPUwriteStatus(uint32_t data)
@@ -182,7 +212,7 @@ void GPUwriteStatus(uint32_t data)
break;
case 0x05:
gpu.screen.x = data & 0x3ff;
- gpu.screen.y = (data >> 10) & 0x3ff;
+ gpu.screen.y = (data >> 10) & 0x1ff;
if (gpu.frameskip.set) {
decide_frameskip_allow(gpu.ex_regs[3]);
if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
@@ -207,6 +237,7 @@ void GPUwriteStatus(uint32_t data)
gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3];
update_width();
update_height();
+ renderer_notify_res_change();
break;
default:
if ((cmd & 0xf0) == 0x10)
@@ -582,13 +613,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
case 1: // save
if (gpu.cmd_len > 0)
flush_cmd_buffer();
- memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram));
+ memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
freeze->ulStatus = gpu.status.reg;
break;
case 0: // load
- memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram));
+ memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
gpu.status.reg = freeze->ulStatus;
@@ -669,6 +700,14 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
gpu.state.hcnt = cbs->gpu_hcnt;
gpu.state.frame_count = cbs->gpu_frame_count;
gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
+ gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
+
+ gpu.mmap = cbs->mmap;
+ gpu.munmap = cbs->munmap;
+
+ // delayed vram mmap
+ if (gpu.vram == NULL)
+ map_vram();
if (cbs->pl_vout_set_raw_vram)
cbs->pl_vout_set_raw_vram(gpu.vram);
diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h
index 1cbe38c..d11f991 100644
--- a/plugins/gpulib/gpu.h
+++ b/plugins/gpulib/gpu.h
@@ -17,10 +17,9 @@ extern "C" {
#define CMD_BUFFER_LEN 1024
struct psx_gpu {
- uint16_t vram[1024 * 512];
- uint16_t guard[1024 * 512]; // overdraw guard
uint32_t cmd_buffer[CMD_BUFFER_LEN];
uint32_t regs[16];
+ uint16_t *vram;
union {
uint32_t reg;
struct {
@@ -67,6 +66,8 @@ struct psx_gpu {
uint32_t old_interlace:1;
uint32_t allow_interlace:2;
uint32_t blanked:1;
+ uint32_t enhancement_enable:1;
+ uint32_t enhancement_active:1;
uint32_t *frame_count;
uint32_t *hcnt; /* hsync count */
struct {
@@ -87,6 +88,10 @@ struct psx_gpu {
uint32_t last_flip_frame;
uint32_t pending_fill[3];
} frameskip;
+ uint16_t *(*get_enhancement_bufer)
+ (int *x, int *y, int *w, int *h, int *vram_h);
+ void *(*mmap)(unsigned int size);
+ void (*munmap)(void *ptr, unsigned int size);
};
extern struct psx_gpu gpu;
@@ -98,11 +103,13 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd);
struct rearmed_cbs;
int renderer_init(void);
+void renderer_finish(void);
void renderer_sync_ecmds(uint32_t * ecmds);
void renderer_update_caches(int x, int y, int w, int h);
void renderer_flush_queues(void);
void renderer_set_interlace(int enable, int is_odd);
void renderer_set_config(const struct rearmed_cbs *config);
+void renderer_notify_res_change(void);
int vout_init(void);
int vout_finish(void);
diff --git a/plugins/gpulib/gpulib.mak b/plugins/gpulib/gpulib.mak
index 349a0c8..ad6a8ad 100644
--- a/plugins/gpulib/gpulib.mak
+++ b/plugins/gpulib/gpulib.mak
@@ -2,7 +2,7 @@
# always adding gpulib to deps in case cspace is needed
# users must include ../../config.mak
-LDFLAGS += -shared
+LDFLAGS += -shared -Wl,--no-undefined
CFLAGS += $(PLUGIN_CFLAGS)
ifeq "$(ARCH)" "arm"
EXT =
@@ -17,10 +17,10 @@ endif
GPULIB_A = ../gpulib/gpulib$(EXT).a
ifdef BIN_STANDLALONE
-TARGETS += $(BIN_STANDLALONE)$(EXT)
+TARGETS += $(BIN_STANDLALONE)
endif
ifdef BIN_GPULIB
-TARGETS += $(BIN_GPULIB)$(EXT)
+TARGETS += $(BIN_GPULIB)
endif
CC_STANDLALONE = $(CC)
CC_GPULIB = $(CC)
@@ -34,7 +34,7 @@ ifdef BIN_STANDLALONE
ifneq ($(findstring .cpp,$(SRC_STANDALONE)),)
CC_STANDLALONE = $(CXX)
endif
-$(BIN_STANDLALONE)$(EXT): $(SRC) $(SRC_STANDALONE) $(GPULIB_A)
+$(BIN_STANDLALONE): $(SRC) $(SRC_STANDALONE) $(GPULIB_A)
$(CC_STANDLALONE) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) $(LDLIBS_STANDALONE)
ln -fs $(PLUGINDIR)/$@ ../
endif
@@ -43,7 +43,7 @@ ifdef BIN_GPULIB
ifneq ($(findstring .cpp,$(SRC_GPULIB)),)
CC_GPULIB = $(CXX)
endif
-$(BIN_GPULIB)$(EXT): $(SRC) $(SRC_GPULIB) $(GPULIB_A)
+$(BIN_GPULIB): $(SRC) $(SRC_GPULIB) $(GPULIB_A)
$(CC_GPULIB) -o $@ $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) $(LDLIBS_GPULIB)
ln -fs $(PLUGINDIR)/$@ ../
endif
diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c
index 0bd1ecf..49f53d6 100644
--- a/plugins/gpulib/vout_pl.c
+++ b/plugins/gpulib/vout_pl.c
@@ -15,7 +15,6 @@
#include "../../frontend/plugin_lib.h"
static const struct rearmed_cbs *cbs;
-static void *screen_buf;
int vout_init(void)
{
@@ -27,90 +26,77 @@ int vout_finish(void)
return 0;
}
-static void check_mode_change(void)
+static void check_mode_change(int force)
{
static uint32_t old_status;
static int old_h;
+ int w = gpu.screen.hres;
+ int h = gpu.screen.h;
+ int w_out = w;
+ int h_out = h;
+
+ gpu.state.enhancement_active =
+ gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable
+ && w <= 512 && h <= 256 && !gpu.status.rgb24;
+
+ if (gpu.state.enhancement_active) {
+ w_out *= 2;
+ h_out *= 2;
+ }
// width|rgb24 change?
- if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || gpu.screen.h != old_h)
+ if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h)
{
old_status = gpu.status.reg;
- old_h = gpu.screen.h;
- screen_buf = cbs->pl_vout_set_mode(gpu.screen.hres, gpu.screen.h,
+ old_h = h;
+
+ cbs->pl_vout_set_mode(w_out, h_out, w, h,
(gpu.status.rgb24 && !cbs->only_16bpp) ? 24 : 16);
}
}
-static void blit(void)
+void vout_update(void)
{
int x = gpu.screen.x & ~1; // alignment needed by blitter
int y = gpu.screen.y;
int w = gpu.screen.w;
int h = gpu.screen.h;
uint16_t *vram = gpu.vram;
- int stride = gpu.screen.hres;
- int fb_offs, doffs;
- uint8_t *dest;
+ int vram_h = 512;
- dest = (uint8_t *)screen_buf;
- if (dest == NULL)
+ if (w == 0 || h == 0)
return;
- fb_offs = y * 1024 + x;
+ check_mode_change(0);
+ if (gpu.state.enhancement_active)
+ vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h);
- // only do centering, at least for now
- doffs = (stride - w) / 2 & ~1;
-
- if (gpu.status.rgb24)
- {
- if (cbs->only_16bpp) {
- dest += doffs * 2;
- for (; h-- > 0; dest += stride * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb565(dest, vram + fb_offs, w * 3);
- }
- }
- else {
- dest += (doffs / 8) * 24;
- for (; h-- > 0; dest += stride * 3, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr888_to_rgb888(dest, vram + fb_offs, w * 3);
- }
- }
- }
- else
- {
- dest += doffs * 2;
- for (; h-- > 0; dest += stride * 2, fb_offs += 1024)
- {
- fb_offs &= 1024*512-1;
- bgr555_to_rgb565(dest, vram + fb_offs, w * 2);
+ if (y + h > vram_h) {
+ if (y + h - vram_h > h / 2) {
+ // wrap
+ h -= vram_h - y;
+ y = 0;
}
+ else
+ // clip
+ h = vram_h - y;
}
- screen_buf = cbs->pl_vout_flip();
-}
+ vram += y * 1024 + x;
-void vout_update(void)
-{
- check_mode_change();
- if (cbs->pl_vout_raw_flip)
- cbs->pl_vout_raw_flip(gpu.screen.x, gpu.screen.y);
- else
- blit();
+ cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h);
}
void vout_blank(void)
{
- check_mode_change();
- if (cbs->pl_vout_raw_flip == NULL) {
- int bytespp = gpu.status.rgb24 ? 3 : 2;
- memset(screen_buf, 0, gpu.screen.hres * gpu.screen.h * bytespp);
- screen_buf = cbs->pl_vout_flip();
+ int w = gpu.screen.hres;
+ int h = gpu.screen.h;
+ if (gpu.state.enhancement_active) {
+ w *= 2;
+ h *= 2;
}
+ check_mode_change(0);
+ cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h);
}
long GPUopen(void **unused)
@@ -119,7 +105,7 @@ long GPUopen(void **unused)
gpu.frameskip.frame_ready = 1;
cbs->pl_vout_open();
- screen_buf = cbs->pl_vout_flip();
+ check_mode_change(1);
return 0;
}