From 030d1121f27550429364745419fc5e6161a2a431 Mon Sep 17 00:00:00 2001 From: negativeExponent Date: Sat, 17 Aug 2019 09:31:06 +0800 Subject: Backport GPU Unai plugin from PCSX4ALL - backports gpu unai plugin from PCSX4ALL - sync necessary files with notaz/master to allow building standalone app --- Makefile | 3 + blackberry_qnx/.cproject | 142 ++ blackberry_qnx/.project | 84 ++ debian_maemo/buildpkg | 13 + debian_maemo/changelog | 112 ++ debian_maemo/compat | 1 + debian_maemo/control | 115 ++ debian_maemo/copyright | 2 + debian_maemo/dirs | 1 + debian_maemo/docs | 1 + debian_maemo/files | 1 + debian_maemo/install | 6 + debian_maemo/rules | 68 + frontend/320240/caanoo.gpe | 23 + frontend/320240/haptic_s.cfg | 3 + frontend/320240/haptic_w.cfg | 3 + frontend/320240/pcsx26.png | Bin 0 -> 4763 bytes frontend/320240/pcsx_rearmed.ini | 6 + frontend/320240/pcsxb.png | Bin 0 -> 24784 bytes frontend/320240/pollux_set.c | 389 ++++++ frontend/320240/skin/background.png | Bin 0 -> 36069 bytes frontend/320240/skin/font.png | Bin 0 -> 3185 bytes frontend/320240/skin/readme.txt | 8 + frontend/320240/skin/selector.png | Bin 0 -> 261 bytes frontend/320240/skin/skin.txt | 4 + frontend/320240/ui_gp2x.h | 15 + frontend/libretro.c | 93 ++ frontend/libretro_core_options.h | 70 + frontend/main.c | 7 + frontend/menu.c | 32 +- frontend/plugin_lib.h | 7 + jni/Android.mk | 4 + maemo/hildon.c | 843 ++++++++++++ maemo/maemo_common.h | 18 + maemo/maemo_xkb.c | 88 ++ plugins/gpu_unai/Makefile | 5 +- plugins/gpu_unai/README_senquack.txt | 956 ++++++++++++++ plugins/gpu_unai/gpu.cpp | 1061 +++++++-------- plugins/gpu_unai/gpu.h | 99 +- plugins/gpu_unai/gpu_blit.h | 24 +- plugins/gpu_unai/gpu_command.h | 667 ++++++---- plugins/gpu_unai/gpu_fixedpoint.h | 107 +- plugins/gpu_unai/gpu_inner.h | 914 ++++++++----- plugins/gpu_unai/gpu_inner_blend.h | 268 ++-- plugins/gpu_unai/gpu_inner_blend_arm5.h | 100 ++ plugins/gpu_unai/gpu_inner_blend_arm7.h | 107 ++ plugins/gpu_unai/gpu_inner_light.h | 293 ++++- plugins/gpu_unai/gpu_inner_quantization.h | 108 ++ plugins/gpu_unai/gpu_raster_image.h | 98 +- plugins/gpu_unai/gpu_raster_line.h | 874 ++++++++++--- plugins/gpu_unai/gpu_raster_polygon.h | 1997 +++++++++++++++++++---------- plugins/gpu_unai/gpu_raster_sprite.h | 219 ++-- plugins/gpu_unai/gpu_unai.h | 318 +++++ plugins/gpu_unai/gpulib_if.cpp | 708 +++++----- 54 files changed, 8310 insertions(+), 2775 deletions(-) create mode 100644 blackberry_qnx/.cproject create mode 100644 blackberry_qnx/.project create mode 100644 debian_maemo/buildpkg create mode 100644 debian_maemo/changelog create mode 100644 debian_maemo/compat create mode 100644 debian_maemo/control create mode 100644 debian_maemo/copyright create mode 100644 debian_maemo/dirs create mode 100644 debian_maemo/docs create mode 100644 debian_maemo/files create mode 100644 debian_maemo/install create mode 100644 debian_maemo/rules create mode 100755 frontend/320240/caanoo.gpe create mode 100644 frontend/320240/haptic_s.cfg create mode 100644 frontend/320240/haptic_w.cfg create mode 100644 frontend/320240/pcsx26.png create mode 100644 frontend/320240/pcsx_rearmed.ini create mode 100644 frontend/320240/pcsxb.png create mode 100644 frontend/320240/pollux_set.c create mode 100644 frontend/320240/skin/background.png create mode 100644 frontend/320240/skin/font.png create mode 100644 frontend/320240/skin/readme.txt create mode 100644 frontend/320240/skin/selector.png create mode 100644 frontend/320240/skin/skin.txt create mode 100644 frontend/320240/ui_gp2x.h create mode 100644 maemo/hildon.c create mode 100644 maemo/maemo_common.h create mode 100644 maemo/maemo_xkb.c create mode 100644 plugins/gpu_unai/README_senquack.txt create mode 100644 plugins/gpu_unai/gpu_inner_blend_arm5.h create mode 100644 plugins/gpu_unai/gpu_inner_blend_arm7.h create mode 100644 plugins/gpu_unai/gpu_inner_quantization.h create mode 100644 plugins/gpu_unai/gpu_unai.h diff --git a/Makefile b/Makefile index 06e4fcc..b44c1f2 100644 --- a/Makefile +++ b/Makefile @@ -150,6 +150,9 @@ OBJS += plugins/dfxvideo/gpulib_if.o endif ifeq "$(BUILTIN_GPU)" "unai" CFLAGS += -DGPU_UNAI +CFLAGS += -DUSE_GPULIB=1 +#CFLAGS += -DINLINE="static __inline__" +#CFLAGS += -Dasm="__asm__ __volatile__" OBJS += plugins/gpu_unai/gpulib_if.o ifeq "$(ARCH)" "arm" OBJS += plugins/gpu_unai/gpu_arm.o diff --git a/blackberry_qnx/.cproject b/blackberry_qnx/.cproject new file mode 100644 index 0000000..565f4a9 --- /dev/null +++ b/blackberry_qnx/.cproject @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/blackberry_qnx/.project b/blackberry_qnx/.project new file mode 100644 index 0000000..c8e1e20 --- /dev/null +++ b/blackberry_qnx/.project @@ -0,0 +1,84 @@ + + + pcsx_rearmed + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + ?name? + + + + org.eclipse.cdt.make.core.append_environment + true + + + org.eclipse.cdt.make.core.autoBuildTarget + all + + + org.eclipse.cdt.make.core.buildArguments + -C .. -f Makefile.libretro platform=qnx + + + org.eclipse.cdt.make.core.buildCommand + make + + + org.eclipse.cdt.make.core.cleanBuildTarget + clean + + + org.eclipse.cdt.make.core.contents + org.eclipse.cdt.make.core.activeConfigSettings + + + org.eclipse.cdt.make.core.enableAutoBuild + false + + + org.eclipse.cdt.make.core.enableCleanBuild + true + + + org.eclipse.cdt.make.core.enableFullBuild + true + + + org.eclipse.cdt.make.core.fullBuildTarget + all + + + org.eclipse.cdt.make.core.stopOnError + true + + + org.eclipse.cdt.make.core.useDefaultBuildCmd + false + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + com.qnx.tools.bbt.xml.core.bbtXMLValidationBuilder + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + com.qnx.tools.ide.bbt.core.bbtnature + + diff --git a/debian_maemo/buildpkg b/debian_maemo/buildpkg new file mode 100644 index 0000000..4c34f94 --- /dev/null +++ b/debian_maemo/buildpkg @@ -0,0 +1,13 @@ +#!/bin/bash -e + +NAME=`head debian/changelog -n1 | sed -n 's/^\(.*\) (\(.*\)) .*/\1-\2/p'` +[[ -z $NAME ]] && { echo "Could not extract package name and version from debian/changelog" 2>&1; exit 1; } + +rm -rf ../$NAME +cp -r ../`basename $PWD` ../$NAME +cd ../$NAME +rm -rf .git* +find . -depth -name .svn -type d -exec rm -r {} \; +find . -name '*~' -exec rm {} \; + +LD_LIBRARY_PATH=/usr/lib dpkg-buildpackage -rfakeroot $* diff --git a/debian_maemo/changelog b/debian_maemo/changelog new file mode 100644 index 0000000..e3395de --- /dev/null +++ b/debian_maemo/changelog @@ -0,0 +1,112 @@ +pcsxrearmed (0.4.0.14.13) unstable; urgency=low + + * Updated source to notaz git version + + -- sakya Fri, 15 Feb 2013 12:50:28 +0200 + +pcsxrearmed (0.4.0.14.12) unstable; urgency=low + + * Fixed a problem with controller and vibration (Gran Turismo 2, Wipeout 3) + * Added dependency to libts + + -- sakya Wed, 16 May 2012 17:09:33 +0200 + +pcsxrearmed (0.4.0.14.11) unstable; urgency=low + + * Added option -guncon and -gunnotrigger to activate guncon controller type + + -- sakya Wed, 16 May 2012 09:37:12 +0200 + +pcsxrearmed (0.4.0.14.10) unstable; urgency=low + + * Added option -corners to set action to execute when clicking on display corners + * Fixed problem with notification using gles plugin + * Fixed controller problem with game "Heart Of Darkness" (maybe others?) + + -- sakya Fri, 11 May 2012 16:38:29 +0200 + +pcsxrearmed (0.4.0.14.9) unstable; urgency=low + + * Added support to .mdf extension + * Added option -vibration to activate vibration + + -- sakya Tue, 1 May 2012 12:19:49 +0200 + +pcsxrearmed (0.4.0.14.8) unstable; urgency=low + + * Added option -disc to set the initial disc in multi discs images (used when loading a savestate with -load) + * Added option -autosave + * Fixed disc change for multi discs images (PBP) + * Merged commits from Notaz git + * drc: inv: fix ram ofset and mirror handling + * support emulated RAM mapped at offset + + -- sakya Fri, 20 Apr 2012 20:27:19 +0200 + +pcsxrearmed (0.4.0.14.7) unstable; urgency=low + + * Fixed -displayon + + -- sakya Sun, 15 Apr 2012 17:22:08 +0200 + +pcsxrearmed (0.4.0.14.6) unstable; urgency=low + + * Added option -keys to set the keys config file + * Fixed L1/L2/R1/R2 + * Added autopause on incoming call + + -- sakya Wed, 13 Apr 2012 12:51:35 +0200 + +pcsxrearmed (0.4.0.14.5) unstable; urgency=low + + * Fixed accelerometer using gles + * Added -analog option to use the accelerometer as the analog pad + * Added options to set accelerometer sens, max value, y_def + * Added -displayon option to keep the display on (useful when playing using the accelerometer) + + -- sakya Tue, 10 Apr 2012 15:34:11 +0200 + +pcsxrearmed (0.4.0.14.4) unstable; urgency=low + + * Fixed -load option + * Added disc change (configured a new key) + + -- sakya Fri, 06 Apr 2012 13:54:56 +0200 + +pcsxrearmed (0.4.0.14.3) unstable; urgency=low + + * Added options to set various gles settings + * Fixed save state slot selection + * Added notification on save state slot change + + -- sakya Wed, 04 Apr 2012 10:20:18 +0200 + +pcsxrearmed (0.4.0.14.2) unstable; urgency=low + + * Fixed fullscreen using gpu-gles + * Fixed crash when saving savestate using gpu-gles + * Added options to set spu reverb and interpolation (disabled by default) + + -- sakya Sun, 01 Apr 2012 11:42:20 +0200 + +pcsxrearmed (0.4.0.14.1) unstable; urgency=low + + * Added option to set psx region (NTSC/PAL/Auto) + * Use PulseAudio (better audio) + + -- sakya Wed, 30 Mar 2012 09:44:51 +0200 + +pcsxrearmed (0.4.0.14) unstable; urgency=low + + * Updated to r14 + * Added --help + * PCSX4All + + -- sakya Sun, 27 Dec 2011 00:02:27 +0200 + +pcsxrearmed (0.4.0.12.2) unstable; urgency=low + + * gpu-gles + + + -- Bonapart Sun, 27 Dec 2011 00:02:27 +0200 diff --git a/debian_maemo/compat b/debian_maemo/compat new file mode 100644 index 0000000..7ed6ff8 --- /dev/null +++ b/debian_maemo/compat @@ -0,0 +1 @@ +5 diff --git a/debian_maemo/control b/debian_maemo/control new file mode 100644 index 0000000..4469ed8 --- /dev/null +++ b/debian_maemo/control @@ -0,0 +1,115 @@ +Source: pcsxrearmed +Section: user/games +Priority: extra +Maintainer: Bonapart +Build-Depends: debhelper (>= 5), zlib1g-dev, libhildon1-dev, libpulse-dev, libasound2-dev, libbz2-dev, libgles1-sgx-img-dev, opengles-sgx-img-common-dev, libosso-dev, libdbus-1-dev, libhildonfm2-dev, libts-dev +Standards-Version: 3.7.3 + +Package: pcsxrearmed +Architecture: armel +Depends: ${shlibs:Depends}, libts-0.0-0 +Description: Sony PlayStation emulator +XSBC-Homepage: http://notaz.gp2x.de/pcsx_rearmed.php +XSBC-Bugtracker: http://notaz.gp2x.de/pcsx_rearmed.php +XB-Maemo-Display-Name: PCSX-ReArmed +XB-Maemo-Icon-26: + iVBORw0KGgoAAAANSUhEUgAAADAAAAAwCAYAAABXAvmHAAAAAXNSR0IArs4c + 6QAAEStJREFUaN7Fmn+wXVV1xz9r733uufe+e9+DJIRESEICEQghAUQBI8Uo + ik7jrxm1o+3ooB2RkWrtONqZlhn7Y8bRUVudqVZsoTjFH2CrSKsVEFIQIr8C + JARC0CDkBwkkIcl7L7n3nLP36h97n3Pvo07/7c2cOfeenB9rr/Vd3/Vd6zxh + 7DM52b9isj+5YcGC+a9bunTpOVNTU91WK5Msy1ECIgYRMGKxzmGtQQBrHSZ9 + R8C5DOcc1hiMtTjrsM4hAtZYWq1W/H9nyZzDZVk6x9LKWmSteH2rlZNlLhw/ + fnzXszt3fmHjxo13bH74oReBwbbtO6r0ODht5UpnfXXdytOXX+mswVcl1hqc + swjgQ0BV8T7gfSCEQFBQFFQJqqhquptpHCJiMGIQKxhjsdZircMai8kczibj + XUaWZdi0d1lGlrVwmdN2nsvypUtZvmwpw7LY8dSTT37yX264/sGgOrNl67ZS + AFavPudTF5y76u9fOniQZ3/7PLPHjuG9Z8wSJO7QeAABVIW0DFAFBEVANF0j + AGh9XBWV9L3eRFAx6dr6XFDiMWMNy049VS9Yu4o/et+7xBj78u5duz78lS9/ + 8U6UoZx59lnZ2StXbMP7lT+7cyNFUWCsbQwmyLhTo1EiydvpSFogIpiIMQST + zhOMMSAmnmcMIhYxgogFYxBjEYnnYOIWn2OjA4IiRjhj+TL98uc/K5X3W7Zs + 2fKBf/72Pz5tZmdmz5jodJZtfeppyqqK+LQx3EYtpm0wRChYazBWMCYaasRg + TFwAUu/TsgyIiecytkAxBjG1sTZuzfe4GIwDk4GxqMmQLAdx7PjNs/KNG75H + 3srWLFmy5E3P79rj7Omnr7is08o+8Otnn6MYFtGIErQCyZXsqxW6B3S3gQqw + /I6PNgEZwSPmQAMVYxERVCSSQTI8et4iYtF6MckhcXECJuWSGPbvf1EvumCN + nDA1taIoypuNr6pFg6JgMEjGe2Ch0vpmSXdnQeuqQPeeivZtBbI6oIWiKJqS + WLU2vM6POh2SIZjEXpIMtglGdSTib9IxEsSoFy9mBFtjmTleyJ33bMIYOevQ + oUMnuLIs5w+LkuCr+HAP+dcqWu8PFD8SZAEwA9kGxawuOXZeBjMGXJ0BNMk4 + gpIZYbr2ZvJ6hJBrftcLEGPTfVJ+EBcfUo6pgohQhMCBgwdB4fDhw5MG6JdV + RQgKQZAO2IuVMIDB+xzhcWF4lWP4FUPYZJATf1dCp0ROxps69LXR1mGsTZtD + TNrbWE/i9/jbmFEiRx6oCSCRRHJZURTMzMx0nUK7LD2KIgZ0CPoM2KWQf8tj + TlPkZGX4GRdNdiTvzzW+Nrg2voaIqb1uHUYsYm2KRNxURnBSTfdB4k5r6q0x + qogIIShFWVKWZWa8962y8g0eRGH4GYd/Usg/GsjerHQ2VnRuK7GvC6iXhjpH + xo+MyPOcdruNsTZyughiXFxI42mHuAyswzgLNrGRNWANYg1gUCOoMQ2DYUZ5 + JgKqahxI5n0Y4cJBeMxw7LwM+6ZA62880o454C4rmb1U4AkLrXHjY6UNqpx6 + ymKm+n2qEBgUFUemZzk8PYsPinMxIsa6xDAR71rXjFj/CJo8WdNDXeQTY4iA + tZHVnKo61VBfCW3FXqzoUCl/bnB/ECj+0uGurGj/bcBdESifcBGLKQIN5oFu + p8OCBfPpdDp0u13yvI1X5fnd+9jx210cOjJDnmUY4yLOE9xACEmamHoRQZtF + aar0oBgxOJfFBYgxTjXxeGVgnqdzd4nuh9mLshizFkieHDKcyzam5nNjICjt + djS81+sxMTHB5OQkvV6P89euoZXn3Pfgo9y16RGKMiDWRu8bQVTQEEAiPQsh + KoAwqjApDBgjZM5F54mIa5IwA31OKL5kMYthYkeJe2ege39Bfm3Ab4fqJy7i + PVjEW/AGCel3sHTb0fh+v8/k5CQ+KEemZ+h0u8yfN48PvvddXPvpq5ma6qMK + 1goGk2qdIKYmZlMrLiBE+ISAAkaELMswxohx1mZSaxgBnFD8hePYBx3V92PW + hEeFwecyjr+lA7stsgDMUkWWhLidGpBXBWRxQPvKZH+yWcA9v9rMN2+8mWd2 + Pk+/36fX6/G615zHtZ++GucsqpIkR6obgJFXVPfa+QliYgwuy7DW4sQYGdFg + UsMC1fct1Q8c+ZdKqq9lhBds1D3WYt8zwP3JMNFc/RTBiPLcXdtZ9/LFdHtd + +v0+/ck+Yi0vHXyZXq9Ht9ul3W5z2bqLeNsb13HrHffQytvUVdSIRPyPVfna + +HpJxiSpL4LTEDAxHFFC12IssUz51y2oBNOSVPqF8MMu1X90QWykR7EY46iq + ite89VImzp6Ixvf7tLIW1rbodDtMTEzQ6XTI85w8z7nowvP54U/vJs9Hyanq + myKpKKIpAppEsWqCXhRlTlUxSbvXJTtSlYyS1tTcnKrt0CKFbSqqcZHnfeVY + 0DupMb7X65HlOVhD3mo33s/znCzLWLl8Ka08IxBtUA1NUBVFJeoukdg0ee+Z + mZllMBwgCM45cSGEEPHX0OzcRqaWv4y+R/GVqmqSCJJ6iM5Ep2GeXq9H1soR + sWSt1hzjsyxj3oknMJG3KGqmqTsmiaCvo1BWnmJYUJRDKu9HjZIqzhijseSb + RlXW+zoqkSJMoyZreWDHKms8Bt1urzG+1+vhsgwjllaWkef5qB+2lsoHfEj8 + PoZ3QQjeU5QFRVFSlRUh+Oa8qIK1gVAwxozEWLpN3Tr6oIhRjIJY0yhHsbEB + qQWaMRapPN1uh36/z8TEBN1ul8xliAHrsjnGG2PY88I+Zo4dZ6I3EVnHB4qy + ZDgYUpYVGkJqbUO0SAEJqAYq72MEAG+SFtc6DwREU6+bIhCA4AMWgzhpEnek + c2KE6iJWV+I4jbA4Z2vubqK96aHHokQuo5eHRYEGT0hDBG0wLU0VltRi+qqK + CxARX+uKV+J/1HhHcRXVo+C9EvB4FWwAYwLz5p+IwZC1crrdLp1Oh3a7jRhD + 5QODwSAZBoPhkLvv/RXfuP4mPIbS+5iA49t4m9QcjwUt+EBRFrV0I4y8ImOV + Q5qOKiZzrUINKlBVnrzlWLFsCWefdQbr113CLx94BCNCp9Oh0+nQarWoqujZ + m370U3bs3MWik+Zx5OhRfnHPJnrdLsOyZPr4MarKY63BGtMYKnXhIsKmXogP + nqIogajuGwjVTKQyd+yROnQCUJYVJ05NsWb1Kt74hks4c+XpnPKqxfT7fZ58 + 5lmyVqsxPsuilsqtsPrMM7jktWs5/bRl9HtdrvnYh7HWMhgMefHAQR7c/Dg/ + u3Mjj27djnEWI6aJQNJ0zcd7T1EMI4RUtaqTamxWMoJOUoo+KKcsWsglF13I + 773hEk6av4B+vzcnYV8+fKQpUs45nHMcPTrNT757Heede04zwfhfcAXeuv5S + Pvepj3PHXffw2c9/kWd378Vg0DAXRqrgK89gMCSEMJbEafxBrVoZNS55nnP5 + G9dx+frLmgJVU+TR6Vnu+O/7+Pdb/4vHt2zjfe/ZEKdsaTRTVSWLTl6Ic5ay + qjg2e4ytTz3NDf96C4889jjHBkP+/E+v5sMfeC/tPOcdb38LU/0+7//jT3J0 + enpsXDDKCx88xTAtQKFsIKRjA6t6WqZw9qvP4PL1lzE5OcnExATtdocHHn6M + +x/azP0PPkorb7P23HO4MG8nre6oqdmK4cCBg+zavYc77v4lP7/rHrZse4oz + Vqxg7bnn8o4r1vPmy9Y1zATw4oGDBO/RkPJRx1pKjRAaDCMpOIGqSWIZTdqU + URIfPjpNnrdHnncZe/e/xAknnMDVH/0Q6y+9hFVnvZp/+s73CaqM15V+v8s1 + n72W5/fsY3Z2lt9/y3o+9fErOX/NOSxfthTnRoOmF/bt58bv3sz1N93CzOzs + qJGRxEAoSCB4z3A4yoFiVAdovB6naQYV4bnd+/jCV7/BuzdcwaVvuJjTly/i + z675GO12m06n00gDl2WNWhyfXBw9Ms2nr7qSd2+4gkUnL6Tdzkd5oPCrBx/m + m9d/h433PcjR6RmGRTXWTupcFKnifcWwzgERKWMEatYZCdjxAe2uF/bxtW/d + yNe/fSNrV69i1dmvZvHCRbSyDLGW/fv38/O77uX1//CVOcl5+MhRbv/xTSw6 + eWGjX7z3TE9P88Nbf8oNN93MY1ufjHoqiUljYncW6sSlrgFRRnhfMSzqHFAt + jJFGsMXRRuR+bSZjMULORSmxdfuveXzbMzFlxiYS5XDI3n37eenAQebPOzFC + CeLMCXjpwEGe3P4Mt/zoNv7z9l/w/J59KYKt6NwxxlFlrIAx+h0ghEAxLOIC + gmpRj/5C7XV5xfiiRkRiVWcdtOq5T2z9xBjaeYu/+uLf8YN/u5ULzlvNhisu + ZzAcsOnBh9m9dy+3/+JeNm95giNHZ8haGZNT/cjxGtvGKB9CQkpACQRCwkBI + 8jouoCxLvPfqNIRBnQOicY6TXBvlrMaGew60dCT8JIUcBWMNBw4d5t5ND3Pf + A4/wvVt+wuzsMbZt/zXT09MMyxJjLZ1Op1GUUmuexsPjsoH4m7lsFEKgrLXQ + KInndIdzJs9xRhPHHLFKayrtBiQZks4REWxm0QAHXz4MCoNiCBiyzKVnNJiI + 9wmRYXTMeG2eERKca4qJLBRqNaqqRZ7nWOua1q2REPWKAs1UTALxfVkawMYb + +9F8RxQNZvz9B6iMTbLjTUMIc7yu9Z4wFoUR89T72ODXL0oEZ4yUcQFWVcbZ + TZtmRhtRFXm4rtIRXqnZVghGRzYzPmtPhKraDK8aVtG52J/LOjp6hUVI3lVE + RK2NswsHlK34llCa7BeNuKd2uU0tTkA1hiJG1qNqmh6C1F01EJyzklFyJspJ + JBNSDigStDlWGz96AZGco4qNr31qOS1F1spod9ujKXP9Uq5OgWa0B0ZHN2s8 + Qz3XTBBibpvYxLSBgI4aljpx0Yb3m1eHiaHGtZ8I5O0cUMqy9KaqyuPe+4OL + Fy4kc07rmzH2kDjqCOnBPr4F0YCGufuAJwSPBo+G6hVbOlc9IV2j9bWEdN8k + F9SPYEZgjH5oOdHFCxcwHBb7q6oqzNGj00cOHjz05No1q1lw4qSoD801UlfC + Bn5a3yfOMTUWn7gF8CkR/4+NEFknfq/vXz9z9LwRtBI0gxK8Z+H8E2XtmtXs + 3bv38enp6VlzbHb25W3bnnhANQzeseFtKBWVL0bYk9jU61jySTP1Do2XGsrT + MOa5sU1HXZXq+LFRtyK11yXMmciB4qsCoeRd73w7RVEc37x586YjR44ctsba + 7NChQ3m73T7ltRe+Zumac1ax9Yltenx6RsSmF26M6aRxlkpo1zQASEgevQTU + MRgSKTJomHMMJb7pb/7RRDbWBk91/DiTUxN6zVUfkWVLlnDvvffed//99/8Y + +I0VkWCtMc/s2DHw3i9at+71C996+Xrp9Sd01/O7ZHD8GASP+gpClfbxt097 + rY/7ilAlvPt4bHwLVUmo4n1COten65rzqmL0PXgmum3eueEK/ciH/lBOmJrU + 2267bevtt9/+XeBRYJ8Att3OT6oqf6b3/pJ58+a96ROf+MRFZ5115sTk5JTp + 93vSzttYF//OwaU/2siyLPatiSqNiXI8TjhoeoIRe0gjr4MGqjI6oKoqyqKk + KAqGxRBf+TQIGFKWJbMzszozOxt27tw5e9111z1w4MCBO4H7gKeBlwWg3c5b + 3of5VVWtUNVzgfMvvvji80477bQFS5Ys6VprTd301IaNG2iMaWBR9wKNwa/Y + 69j8sqoqQhpeeR8Nr+V22sKePXuO7969+6WHHnroseT1x4GdwCGgbFzU7XSy + wXDYDyGcDKwAlgOLgUmgzf/PZwgcAV4Ank2G7wemgRLgfwDIFWZCNtkwCgAA + AABJRU5ErkJggg== diff --git a/debian_maemo/copyright b/debian_maemo/copyright new file mode 100644 index 0000000..75a6b06 --- /dev/null +++ b/debian_maemo/copyright @@ -0,0 +1,2 @@ +this package was maemonized by Roman Deninberg +Mon, 10 Jan 2011 02:00:13 +0100 diff --git a/debian_maemo/dirs b/debian_maemo/dirs new file mode 100644 index 0000000..33359b8 --- /dev/null +++ b/debian_maemo/dirs @@ -0,0 +1 @@ +usr/games diff --git a/debian_maemo/docs b/debian_maemo/docs new file mode 100644 index 0000000..e845566 --- /dev/null +++ b/debian_maemo/docs @@ -0,0 +1 @@ +README diff --git a/debian_maemo/files b/debian_maemo/files new file mode 100644 index 0000000..0cc57dd --- /dev/null +++ b/debian_maemo/files @@ -0,0 +1 @@ +pcsxrearmed_0.4.0.14.13_armel.deb user/games extra diff --git a/debian_maemo/install b/debian_maemo/install new file mode 100644 index 0000000..a260186 --- /dev/null +++ b/debian_maemo/install @@ -0,0 +1,6 @@ +pcsx opt/maemo/usr/games/ +plugins/spunull/spunull.so opt/maemo/usr/games/plugins +plugins/gpu_unai/gpu_unai.so opt/maemo/usr/games/plugins +#plugins/gpu_unai/gpuPCSX4ALL.so opt/maemo/usr/games/plugins +plugins/dfxvideo/gpu_peops.so opt/maemo/usr/games/plugins +plugins/gpu-gles/gpu_gles.so opt/maemo/usr/games/plugins diff --git a/debian_maemo/rules b/debian_maemo/rules new file mode 100644 index 0000000..5230bf7 --- /dev/null +++ b/debian_maemo/rules @@ -0,0 +1,68 @@ +#!/usr/bin/make -f +# -*- makefile -*- + +#export DH_VERBOSE=1 + +DEB_HOST_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_HOST_GNU_TYPE) +DEB_BUILD_GNU_TYPE ?= $(shell dpkg-architecture -qDEB_BUILD_GNU_TYPE) +DEB_HOST_ARCH ?= $(shell dpkg-architecture -qDEB_HOST_ARCH) + +#GAME_VERSION := $(shell head debian/changelog -n1 | sed -n 's/.* (\(.*\)) .*/\1/p') +CFLAGS = -Wall -g + +ifneq (,$(findstring noopt,$(DEB_BUILD_OPTIONS))) + CFLAGS += -O0 +else + CFLAGS += -O2 +endif + +build: build-stamp + +build-stamp: + dh_testdir + ./configure --platform=maemo --gpu=neon --sound-drivers=pulseaudio --enable-neon + $(MAKE) + strip pcsx + strip plugins/gpu_unai/gpu_unai.so + strip plugins/gpu-gles/gpu_gles.so + strip plugins/spunull/spunull.so + touch build-stamp + +clean: + dh_testdir + dh_testroot + rm -f build-stamp + dh_clean + $(MAKE) clean clean_plugins + +install: build + dh_testdir + dh_testroot + dh_installdirs + mkdir -p "$(CURDIR)"/debian/pcsxrearmed/opt/maemo/usr/games/screenshots + chmod 777 "$(CURDIR)"/debian/pcsxrearmed/opt/maemo/usr/games/screenshots + chown user "$(CURDIR)"/debian/pcsxrearmed/opt/maemo/usr/games/screenshots + dh_install + +binary-indep: build install + +binary-arch: build install + dh_testdir + dh_testroot + dh_installchangelogs + dh_installdocs + #dh_installmenu + dh_link + dh_strip + dh_compress + dh_fixperms + dh_installdeb + dh_makeshlibs + dh_shlibdeps + dh_gencontrol + #maemo-optify + dh_md5sums + dh_builddeb + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary install diff --git a/frontend/320240/caanoo.gpe b/frontend/320240/caanoo.gpe new file mode 100755 index 0000000..9d6154a --- /dev/null +++ b/frontend/320240/caanoo.gpe @@ -0,0 +1,23 @@ +#!/bin/sh + +# Wiz's timings are already good, apply this for Caanoo +if [ -e /dev/accel ]; then + ./pollux_set "ram_timings=3,9,4,1,1,1,1" +fi + +# the sync mount causes problems when writing saves, +# probably due to many write calls, so have to get rid of it +if grep mmcblk /proc/mounts | grep -q '\'; then + oldmount=`grep mmcblk /proc/mounts | grep '\' | awk '{print $4}'` + mount /dev/mmcblk0p1 /mnt/sd/ -o remount,dirsync,noatime +fi + +./pcsx "$@" +sync + +if [ -n "$oldmount" ]; then + mount /dev/mmcblk0p1 /mnt/sd/ -o remount,$oldmount +fi + +cd /usr/gp2x +exec ./gp2xmenu diff --git a/frontend/320240/haptic_s.cfg b/frontend/320240/haptic_s.cfg new file mode 100644 index 0000000..624056d --- /dev/null +++ b/frontend/320240/haptic_s.cfg @@ -0,0 +1,3 @@ +0 126 +100 -126 +115 0 diff --git a/frontend/320240/haptic_w.cfg b/frontend/320240/haptic_w.cfg new file mode 100644 index 0000000..3585a71 --- /dev/null +++ b/frontend/320240/haptic_w.cfg @@ -0,0 +1,3 @@ +0 54 +100 -126 +105 0 diff --git a/frontend/320240/pcsx26.png b/frontend/320240/pcsx26.png new file mode 100644 index 0000000..ed220a0 Binary files /dev/null and b/frontend/320240/pcsx26.png differ diff --git a/frontend/320240/pcsx_rearmed.ini b/frontend/320240/pcsx_rearmed.ini new file mode 100644 index 0000000..b15497f --- /dev/null +++ b/frontend/320240/pcsx_rearmed.ini @@ -0,0 +1,6 @@ +[info] +name="PCSX ReARMed" +icon="/pcsx_rearmed/pcsx26.png" +path="/pcsx_rearmed/pcsx.gpe" +title="/pcsx_rearmed/pcsxb.png" +group="GAMES" diff --git a/frontend/320240/pcsxb.png b/frontend/320240/pcsxb.png new file mode 100644 index 0000000..ff5a48a Binary files /dev/null and b/frontend/320240/pcsxb.png differ diff --git a/frontend/320240/pollux_set.c b/frontend/320240/pollux_set.c new file mode 100644 index 0000000..f49e777 --- /dev/null +++ b/frontend/320240/pollux_set.c @@ -0,0 +1,389 @@ +/* + * quick tool to set various timings for Wiz + * + * Copyright (c) GraÅžvydas "notaz" Ignotas, 2009 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the organization nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * HTOTAL: X VTOTAL: 341 + * HSWIDTH: 1 VSWIDTH: 0 + * HASTART: 37 VASTART: 17 + * HAEND: 277 VAEND: 337 + * + * 120Hz + * pcd 8, 447: + 594us + * pcd 9, 397: + 36us + * pcd 10, 357: - 523us + * pcd 11, 325: +1153us + * + * 'lcd_timings=397,1,37,277,341,0,17,337;dpc_clkdiv0=9' + * 'ram_timings=2,9,4,1,1,1,1' + */ + +#include +#include +#include +//#include "pollux_set.h" +#define BINARY + +/* parse stuff */ +static int parse_lcd_timings(const char *str, void *data) +{ + int *lcd_timings = data; + const char *p = str; + int ret, c; + ret = sscanf(str, "%d,%d,%d,%d,%d,%d,%d,%d", + &lcd_timings[0], &lcd_timings[1], &lcd_timings[2], &lcd_timings[3], + &lcd_timings[4], &lcd_timings[5], &lcd_timings[6], &lcd_timings[7]); + if (ret != 8) + return -1; + /* skip seven commas */ + for (c = 0; c < 7 && *p != 0; p++) + if (*p == ',') + c++; + if (c != 7) + return -1; + /* skip last number */ + while ('0' <= *p && *p <= '9') + p++; + + return p - str; +} + +static int parse_ram_timings(const char *str, void *data) +{ + int *ram_timings = data; + const char *p = str; + int ret, c; + float cas; + + ret = sscanf(p, "%f,%d,%d,%d,%d,%d,%d", + &cas, &ram_timings[1], &ram_timings[2], &ram_timings[3], + &ram_timings[4], &ram_timings[5], &ram_timings[6]); + if (ret != 7) + return -1; + if (cas == 2) + ram_timings[0] = 1; + else if (cas == 2.5) + ram_timings[0] = 2; + else if (cas == 3) + ram_timings[0] = 3; + else + return -1; + for (c = 0; c < 6 && *p != 0; p++) + if (*p == ',') + c++; + if (c != 6) + return -1; + while ('0' <= *p && *p <= '9') + p++; + + return p - str; +} + +static int parse_decimal(const char *str, void *data) +{ + char *ep; + + *(int *)data = strtoul(str, &ep, 10); + if (ep == str) + return -1; + + return ep - str; +} + +/* validate and apply stuff */ +static int apply_lcd_timings(volatile unsigned short *memregs, void *data) +{ + int *lcd_timings = data; + int i; + + for (i = 0; i < 8; i++) { + if (lcd_timings[i] & ~0xffff) { + fprintf(stderr, "pollux_set: invalid lcd timing %d: %d\n", i, lcd_timings[i]); + return -1; + } + } + + for (i = 0; i < 8; i++) + memregs[(0x307c>>1) + i] = lcd_timings[i]; + + return 0; +} + +static const struct { + signed char adj; /* how to adjust value passed by user */ + signed short min; /* range of */ + signed short max; /* allowed values (inclusive) */ +} +ram_ranges[] = { + { 0, 1, 3 }, /* cas (cl) */ + { -2, 0, 15 }, /* trc */ + { -2, 0, 15 }, /* tras */ + { 0, 0, 15 }, /* twr */ + { 0, 0, 15 }, /* tmrd */ + { 0, 0, 15 }, /* trp */ + { 0, 0, 15 }, /* trcd */ +}; + +static int apply_ram_timings(volatile unsigned short *memregs, void *data) +{ + int *ram_timings = data; + int i, val; + + for (i = 0; i < 7; i++) + { + ram_timings[i] += ram_ranges[i].adj; + if (ram_timings[i] < ram_ranges[i].min || ram_timings[i] > ram_ranges[i].max) { + fprintf(stderr, "pollux_set: invalid RAM timing %d\n", i); + return -1; + } + } + + val = memregs[0x14802>>1] & 0x0f00; + val |= (ram_timings[4] << 12) | (ram_timings[5] << 4) | ram_timings[6]; + memregs[0x14802>>1] = val; + + val = memregs[0x14804>>1] & 0x4000; + val |= (ram_timings[0] << 12) | (ram_timings[1] << 8) | + (ram_timings[2] << 4) | ram_timings[3]; + val |= 0x8000; + memregs[0x14804>>1] = val; + + for (i = 0; i < 0x100000 && (memregs[0x14804>>1] & 0x8000); i++) + ; + + return 0; +} + +static int apply_dpc_clkdiv0(volatile unsigned short *memregs, void *data) +{ + int pcd = *(int *)data; + int tmp; + + if ((pcd - 1) & ~0x3f) { + fprintf(stderr, "pollux_set: invalid lcd clkdiv0: %d\n", pcd); + return -1; + } + + pcd = (pcd - 1) & 0x3f; + tmp = memregs[0x31c4>>1]; + memregs[0x31c4>>1] = (tmp & ~0x3f0) | (pcd << 4); + + return 0; +} + +static int apply_cpuclk(volatile unsigned short *memregs, void *data) +{ + volatile unsigned int *memregl = (volatile void *)memregs; + int mhz = *(int *)data; + int adiv, mdiv, pdiv, sdiv = 0; + int i, vf000, vf004; + + // m = MDIV, p = PDIV, s = SDIV + #define SYS_CLK_FREQ 27 + pdiv = 9; + mdiv = (mhz * pdiv) / SYS_CLK_FREQ; + if (mdiv & ~0x3ff) + return -1; + vf004 = (pdiv<<18) | (mdiv<<8) | sdiv; + + // attempt to keep AHB the divider close to 250, but not higher + for (adiv = 1; mhz / adiv > 250; adiv++) + ; + + vf000 = memregl[0xf000>>2]; + vf000 = (vf000 & ~0x3c0) | ((adiv - 1) << 6); + memregl[0xf000>>2] = vf000; + memregl[0xf004>>2] = vf004; + memregl[0xf07c>>2] |= 0x8000; + for (i = 0; (memregl[0xf07c>>2] & 0x8000) && i < 0x100000; i++) + ; + + printf("clock set to %dMHz, AHB set to %dMHz\n", mhz, mhz / adiv); + return 0; +} + +static int lcd_timings[8]; +static int ram_timings[7]; +static int dpc_clkdiv0; +static int cpuclk; + +static const char lcd_t_help[] = "htotal,hswidth,hastart,haend,vtotal,vswidth,vastart,vaend"; +static const char ram_t_help[] = "CAS,tRC,tRAS,tWR,tMRD,tRP,tRCD"; + +static const struct { + const char *name; + const char *help; + int (*parse)(const char *str, void *data); + int (*apply)(volatile unsigned short *memregs, void *data); + void *data; +} +all_params[] = { + { "lcd_timings", lcd_t_help, parse_lcd_timings, apply_lcd_timings, lcd_timings }, + { "ram_timings", ram_t_help, parse_ram_timings, apply_ram_timings, ram_timings }, + { "dpc_clkdiv0", "divider", parse_decimal, apply_dpc_clkdiv0, &dpc_clkdiv0 }, + { "clkdiv0", "divider", parse_decimal, apply_dpc_clkdiv0, &dpc_clkdiv0 }, /* alias */ + { "cpuclk", "MHZ", parse_decimal, apply_cpuclk, &cpuclk }, +}; +#define ALL_PARAM_COUNT (sizeof(all_params) / sizeof(all_params[0])) + +/* + * set timings based on preformated string + * returns 0 on success. + */ +int pollux_set(volatile unsigned short *memregs, const char *str) +{ + int parsed_params[ALL_PARAM_COUNT]; + int applied_params[ALL_PARAM_COUNT]; + int applied_something = 0; + const char *p, *po; + int i, ret; + + if (str == NULL) + return -1; + + memset(parsed_params, 0, sizeof(parsed_params)); + memset(applied_params, 0, sizeof(applied_params)); + + p = str; + while (1) + { +again: + while (*p == ';' || *p == ' ') + p++; + if (*p == 0) + break; + + for (i = 0; i < ALL_PARAM_COUNT; i++) + { + int param_len = strlen(all_params[i].name); + if (strncmp(p, all_params[i].name, param_len) == 0 && p[param_len] == '=') + { + p += param_len + 1; + ret = all_params[i].parse(p, all_params[i].data); + if (ret < 0) { + fprintf(stderr, "pollux_set parser: error at %-10s\n", p); + fprintf(stderr, " valid format is: <%s>\n", all_params[i].help); + return -1; + } + parsed_params[i] = 1; + p += ret; + goto again; + } + } + + /* Unknown param. Attempt to be forward compatible and ignore it. */ + for (po = p; *p != 0 && *p != ';'; p++) + ; + + fprintf(stderr, "unhandled param: "); + fwrite(po, 1, p - po, stderr); + fprintf(stderr, "\n"); + } + + /* validate and apply */ + for (i = 0; i < ALL_PARAM_COUNT; i++) + { + if (!parsed_params[i]) + continue; + + ret = all_params[i].apply(memregs, all_params[i].data); + if (ret < 0) { + fprintf(stderr, "pollux_set: failed to apply %s (bad value?)\n", + all_params[i].name); + continue; + } + + applied_something = 1; + applied_params[i] = 1; + } + + if (applied_something) + { + int c; + printf("applied: "); + for (i = c = 0; i < ALL_PARAM_COUNT; i++) + { + if (!applied_params[i]) + continue; + if (c != 0) + printf(", "); + printf("%s", all_params[i].name); + c++; + } + printf("\n"); + } + + return 0; +} + +#ifdef BINARY +#include +#include +#include +#include +#include + +static void usage(const char *binary) +{ + int i; + printf("usage:\n%s \n" + "set_str:\n", binary); + for (i = 0; i < ALL_PARAM_COUNT; i++) + printf(" %s=<%s>\n", all_params[i].name, all_params[i].help); +} + +int main(int argc, char *argv[]) +{ + volatile unsigned short *memregs; + int ret, memdev; + + if (argc != 2) { + usage(argv[0]); + return 1; + } + + memdev = open("/dev/mem", O_RDWR); + if (memdev == -1) + { + perror("open(/dev/mem) failed"); + return 1; + } + + memregs = mmap(0, 0x20000, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, 0xc0000000); + if (memregs == MAP_FAILED) + { + perror("mmap(memregs) failed"); + close(memdev); + return 1; + } + + ret = pollux_set(memregs, argv[1]); + + munmap((void *)memregs, 0x20000); + close(memdev); + + return ret; +} +#endif diff --git a/frontend/320240/skin/background.png b/frontend/320240/skin/background.png new file mode 100644 index 0000000..0efdd18 Binary files /dev/null and b/frontend/320240/skin/background.png differ diff --git a/frontend/320240/skin/font.png b/frontend/320240/skin/font.png new file mode 100644 index 0000000..c526a08 Binary files /dev/null and b/frontend/320240/skin/font.png differ diff --git a/frontend/320240/skin/readme.txt b/frontend/320240/skin/readme.txt new file mode 100644 index 0000000..dd83963 --- /dev/null +++ b/frontend/320240/skin/readme.txt @@ -0,0 +1,8 @@ +The skin images can be customized, but there are several limitations: + +background.png - must be 320x240 image with 24bit RGB colors. +font.png - must be 128x160 8bit grayscale image. +selector.png - must be 8x10 8bit grayscale image. + +Font and selector colors can be changed by editing skin.txt. + diff --git a/frontend/320240/skin/selector.png b/frontend/320240/skin/selector.png new file mode 100644 index 0000000..5062cc2 Binary files /dev/null and b/frontend/320240/skin/selector.png differ diff --git a/frontend/320240/skin/skin.txt b/frontend/320240/skin/skin.txt new file mode 100644 index 0000000..1d6979f --- /dev/null +++ b/frontend/320240/skin/skin.txt @@ -0,0 +1,4 @@ +// html-style hex color codes, ex. ff0000 is red, 0000ff is blue, etc. +text_color=ffffc0 +selection_color=808010 + diff --git a/frontend/320240/ui_gp2x.h b/frontend/320240/ui_gp2x.h new file mode 100644 index 0000000..a9c4413 --- /dev/null +++ b/frontend/320240/ui_gp2x.h @@ -0,0 +1,15 @@ +#ifndef UI_FEATURES_H +#define UI_FEATURES_H + +#define MENU_BIOS_PATH "pcsx_rearmed/bios/" +#define MENU_SHOW_VARSCALER 0 +#define MENU_SHOW_VOUTMODE 0 +#define MENU_SHOW_SCALER2 1 +#define MENU_SHOW_NUBS_BTNS 0 +#define MENU_SHOW_VIBRATION 1 +#define MENU_SHOW_DEADZONE 1 +#define MENU_SHOW_MINIMIZE 0 +#define MENU_SHOW_FULLSCREEN 0 +#define MENU_SHOW_VOLUME 1 + +#endif // UI_FEATURES_H diff --git a/frontend/libretro.c b/frontend/libretro.c index b8d17ab..919fabb 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -78,6 +78,7 @@ static bool display_internal_fps = false; static unsigned frame_count = 0; static bool libretro_supports_bitmasks = false; static int show_advanced_gpu_peops_settings = -1; +static int show_advanced_gpu_unai_settings = -1; static unsigned previous_width = 0; static unsigned previous_height = 0; @@ -1423,6 +1424,7 @@ static void update_variables(bool in_flight) if (strcmp(var.value, "disabled") == 0) { pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 0; + pl_rearmed_cbs.gpu_unai.dithering = 0; #ifdef __ARM_NEON__ pl_rearmed_cbs.gpu_neon.allow_dithering = 0; #endif @@ -1430,6 +1432,7 @@ static void update_variables(bool in_flight) else if (strcmp(var.value, "enabled") == 0) { pl_rearmed_cbs.gpu_peops.iUseDither = 1; pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 1; + pl_rearmed_cbs.gpu_unai.dithering = 1; #ifdef __ARM_NEON__ pl_rearmed_cbs.gpu_neon.allow_dithering = 1; #endif @@ -1757,6 +1760,96 @@ static void update_variables(bool in_flight) } #endif +#ifdef GPU_UNAI + var.key = "pcsx_rearmed_gpu_unai_ilace_force"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.ilace_force = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.ilace_force = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_pixel_skip"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.pixel_skip = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.pixel_skip = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_lighting"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.lighting = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.lighting = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_fast_lighting"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.fast_lighting = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.fast_lighting = 1; + } + + var.key = "pcsx_rearmed_gpu_unai_blending"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) || var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.gpu_unai.blending = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.gpu_unai.blending = 1; + } + + var.key = "pcsx_rearmed_show_gpu_unai_settings"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + int show_advanced_gpu_unai_settings_prev = show_advanced_gpu_unai_settings; + + show_advanced_gpu_unai_settings = 1; + if (strcmp(var.value, "disabled") == 0) + show_advanced_gpu_unai_settings = 0; + + if (show_advanced_gpu_unai_settings != show_advanced_gpu_unai_settings_prev) + { + unsigned i; + struct retro_core_option_display option_display; + char gpu_unai_option[5][40] = { + "pcsx_rearmed_gpu_unai_blending", + "pcsx_rearmed_gpu_unai_lighting", + "pcsx_rearmed_gpu_unai_fast_lighting", + "pcsx_rearmed_gpu_unai_ilace_force", + "pcsx_rearmed_gpu_unai_pixel_skip", + }; + + option_display.visible = show_advanced_gpu_unai_settings; + + for (i = 0; i < 5; i++) + { + option_display.key = gpu_unai_option[i]; + environ_cb(RETRO_ENVIRONMENT_SET_CORE_OPTIONS_DISPLAY, &option_display); + } + } + } +#endif // GPU_UNAI + if (in_flight) { // inform core things about possible config changes plugin_call_rearmed_cbs(); diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 6513e1c..bfb21a5 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -534,6 +534,76 @@ struct retro_core_option_definition option_defs_us[] = { }, #endif + /* GPU UNAI Advanced Options */ +#ifdef GPU_UNAI + { + "pcsx_rearmed_show_gpu_unai_settings", + "Advance GPU UNAI/PCSX4All Settings", + "Shows or hides advanced gpu settings. A core restart might be needed for settings to take effect. NOTE: Quick Menu must be toggled for this setting to take effect.", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_unai_blending", + "(GPU) Enable Blending", + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "enabled", + }, + { + "pcsx_rearmed_gpu_unai_lighting", + "(GPU) Enable Lighting", + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "enabled", + }, + { + "pcsx_rearmed_gpu_unai_fast_lighting", + "(GPU) Enable Fast Lighting", + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "enabled", + }, + { + "pcsx_rearmed_gpu_unai_ilace_force", + "(GPU) Enable Forced Interlace", + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, + { + "pcsx_rearmed_gpu_unai_pixel_skip", + "(GPU) Enable Pixel Skip", + NULL, + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, + "disabled", + }, +#endif /* GPU UNAI Advanced Settings */ + { "pcsx_rearmed_show_bios_bootlogo", "Show Bios Bootlogo", diff --git a/frontend/main.c b/frontend/main.c index c0bfd0f..b6b5411 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -130,6 +130,13 @@ void emu_set_default_config(void) pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; + pl_rearmed_cbs.gpu_unai.ilace_force = 0; + pl_rearmed_cbs.gpu_unai.pixel_skip = 1; + pl_rearmed_cbs.gpu_unai.lighting = 1; + pl_rearmed_cbs.gpu_unai.fast_lighting = 1; + pl_rearmed_cbs.gpu_unai.blending = 1; + pl_rearmed_cbs.gpu_unai.dithering = 0; + // old gpu_unai config pl_rearmed_cbs.gpu_unai.abe_hack = pl_rearmed_cbs.gpu_unai.no_light = pl_rearmed_cbs.gpu_unai.no_blend = 0; diff --git a/frontend/menu.c b/frontend/menu.c index babe109..47523b2 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -305,14 +305,14 @@ static void menu_sync_config(void) cycle_multiplier = 10000 / psx_clock; switch (in_type_sel1) { - case 1: in_type1 = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type1 = PSE_PAD_TYPE_NEGCON; break; - default: in_type1 = PSE_PAD_TYPE_STANDARD; + case 1: in_type[0] = PSE_PAD_TYPE_ANALOGPAD; break; + case 2: in_type[0] = PSE_PAD_TYPE_NEGCON; break; + default: in_type[0] = PSE_PAD_TYPE_STANDARD; } switch (in_type_sel2) { - case 1: in_type2 = PSE_PAD_TYPE_ANALOGPAD; break; - case 2: in_type2 = PSE_PAD_TYPE_NEGCON; break; - default: in_type2 = PSE_PAD_TYPE_STANDARD; + case 1: in_type[1] = PSE_PAD_TYPE_ANALOGPAD; break; + case 2: in_type[1] = PSE_PAD_TYPE_NEGCON; break; + default: in_type[1] = PSE_PAD_TYPE_STANDARD; } if (in_evdev_allow_abs_only != allow_abs_only_old) { in_probe(); @@ -422,6 +422,12 @@ static const struct { CE_INTVAL_V(frameskip, 3), CE_INTVAL_P(gpu_peops.iUseDither), CE_INTVAL_P(gpu_peops.dwActFixes), + CE_INTVAL_P(gpu_unai.ilace_force), + CE_INTVAL_P(gpu_unai.pixel_skip), + CE_INTVAL_P(gpu_unai.lighting), + CE_INTVAL_P(gpu_unai.fast_lighting), + CE_INTVAL_P(gpu_unai.blending), + CE_INTVAL_P(gpu_unai.dithering), CE_INTVAL_P(gpu_unai.lineskip), CE_INTVAL_P(gpu_unai.abe_hack), CE_INTVAL_P(gpu_unai.no_light), @@ -1358,10 +1364,16 @@ static int menu_loop_plugin_gpu_neon(int id, int keys) static menu_entry e_menu_plugin_gpu_unai[] = { - mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai.lineskip, 1), - mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai.abe_hack, 1), - mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai.no_light, 1), - mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai.no_blend, 1), + //mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai.lineskip, 1), + //mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai.abe_hack, 1), + //mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai.no_light, 1), + //mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai.no_blend, 1), + mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_unai.ilace_force, 1), + mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_unai.dithering, 1), + mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1), + mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_unai.fast_lighting, 1), + mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_unai.blending, 1), + mee_onoff ("Pixel skip", 0, pl_rearmed_cbs.gpu_unai.pixel_skip, 1), mee_end, }; diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index 92e62e9..d51c5e7 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -80,6 +80,13 @@ struct rearmed_cbs { int dwFrameRateTicks; } gpu_peops; struct { + int ilace_force; + int pixel_skip; + int lighting; + int fast_lighting; + int blending; + int dithering; + // old gpu_unai config for compatibility int abe_hack; int no_light, no_blend; int lineskip; diff --git a/jni/Android.mk b/jni/Android.mk index 2867791..3ebdf97 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -140,10 +140,14 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) $(FRONTEND_DIR)/cspace_neon.S SOURCES_C += $(NEON_DIR)/psx_gpu_if.c else ifeq ($(TARGET_ARCH_ABI),armeabi) + COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI + COREFLAGS += -DINLINE="static __inline__" -Dasm="__asm__ __volatile__" SOURCES_ASM += $(UNAI_DIR)/gpu_arm.S \ $(FRONTEND_DIR)/cspace_arm.S SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp else + COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI + COREFLAGS += -DINLINE="static __inline__" -Dasm="__asm__ __volatile__" SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp endif diff --git a/maemo/hildon.c b/maemo/hildon.c new file mode 100644 index 0000000..7e9cd9f --- /dev/null +++ b/maemo/hildon.c @@ -0,0 +1,843 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../frontend/plugin_lib.h" +#include "../frontend/main.h" +#include "../libpcsxcore/misc.h" +#include "../include/psemu_plugin_defs.h" +#include "../libpcsxcore/cdrom.h" +#include "../libpcsxcore/cdriso.h" +#include "../plugins/dfinput/main.h" +#include "../frontend/libpicofe/readpng.h" +#include "maemo_common.h" +#include +#include + +#define X_RES 800 +#define Y_RES 480 +#define D_WIDTH 640 +#define D_HEIGHT 480 + +#define CALL_SIGNAL_IF "com.nokia.csd.Call" +#define CALL_SIGNAL_PATH "/com/nokia/csd/call" +#define CALL_INCOMING_SIG "Coming" + +#define DBUS_RULE_CALL_INCOMING "type='signal',interface='" CALL_SIGNAL_IF \ + "',path='" CALL_SIGNAL_PATH \ + "',member='" CALL_INCOMING_SIG "'" + +osso_context_t* osso = NULL; +int bRunning = TRUE; +extern int bKeepDisplayOn; +extern int bAutosaveOnExit; +extern int cornerActions[4]; +extern char keys_config_file[MAXPATHLEN]; +static pthread_t display_thread = (pthread_t)0; +int g_layer_x = (X_RES - D_WIDTH) / 2; +int g_layer_y = (Y_RES - D_HEIGHT) / 2; +int g_layer_w = D_WIDTH, g_layer_h = D_HEIGHT; + +static GdkImage *image; +static HildonAnimationActor *actor; +static GtkWidget *window, *drawing = NULL; + +static int pl_buf_w, pl_buf_h; +int keymap[65536]; +int direction_keys[4]; + +// map psx4m compatible keymap to PSX keys +static const unsigned char keymap2[14] = { + DKEY_LEFT, // 0 + DKEY_RIGHT, + DKEY_UP, + DKEY_DOWN, + DKEY_CIRCLE, + DKEY_CROSS, // 5 + DKEY_TRIANGLE, + DKEY_SQUARE, + DKEY_SELECT, + DKEY_START, + DKEY_L1, // 10 + DKEY_R1, + DKEY_L2, + DKEY_R2, +}; + +void hildon_quit() +{ + maemo_finish(); + gtk_main_quit(); + exit(0); +} + +gdouble press_x = -1; +gdouble press_y = -1; + +int maemo_x11_update_keys(); +void show_notification(char* text); + +void change_slot(int delta) +{ + state_slot += delta; + if (state_slot > 9) + state_slot = 0; + else if (state_slot < 0) + state_slot = 9; + char message[50]; + sprintf(message,"Savestate slot: %i",state_slot + 1); + show_notification(message); +} + +void save(int state_slot) +{ + emu_save_state(state_slot); + char buf[MAXPATHLEN]; + if (image && image->mem){ + sprintf (buf,"/opt/maemo/usr/games/screenshots%s.%3.3d",file_name,state_slot); + writepng(buf, image->mem, pl_buf_w,pl_buf_h); + } + char message[50]; + sprintf(message,"Saved savestate slot: %i",state_slot + 1); + show_notification(message); +} + +void quit() +{ + if (bAutosaveOnExit){ + show_notification("Autosaving"); + emu_save_state(99); + char buf[MAXPATHLEN]; + if (image && image->mem){ + sprintf (buf,"/opt/maemo/usr/games/screenshots%s.%3.3d",file_name,99); + writepng(buf, image->mem, pl_buf_w,pl_buf_h); + } + } + hildon_quit(); +} + +int show_confirmbox(char* text) +{ + if (!window) + return TRUE; + + GtkWidget *dialog; + dialog = gtk_message_dialog_new (GTK_WINDOW(window), + GTK_DIALOG_DESTROY_WITH_PARENT, + GTK_MESSAGE_QUESTION, + GTK_BUTTONS_YES_NO, + text); + gint result = gtk_dialog_run (GTK_DIALOG (dialog)); + gtk_widget_destroy (dialog); + if (result == GTK_RESPONSE_YES) + return TRUE; + return FALSE; +} + +static void +window_button_proxy(GtkWidget *widget, + GdkEventButton *event, + gpointer user_data) +{ + int corner = -1; + int sens = 100; + + switch (event->type){ + case GDK_BUTTON_PRESS: + //printf("GDK_BUTTON_PRESS: x=%f y=%f\n", event->x, event->y); + press_x = event->x; + press_y = event->y; + break; + case GDK_BUTTON_RELEASE: + //printf("GDK_BUTTON_RELEASE: x=%f y=%f\n", event->x, event->y); + if (press_x < sens && press_y < sens && event->x < sens && event->y < sens) + corner = 0; + else if (press_x > 800 - sens && press_y < sens && event->x > 800 - sens && event->y < sens) + corner = 1; + else if (press_x > 800 - sens && press_y > 480 - sens && event->x > 800 - sens && event->y > 480 - sens) + corner = 2; + else if (press_x < sens && press_y > 480 - sens && event->x < sens && event->y > 480 - sens) + corner = 3; + + press_x = -1; + press_y = -1; + break; + default: + break; + } + + if (corner >= 0){ + switch (cornerActions[corner]){ + case 1: + if (show_confirmbox("Save savestate?")) + save(state_slot); + break; + case 2: + if (show_confirmbox("Load savestate?")) + emu_load_state(state_slot); + break; + case 3: + change_slot(1); + break; + case 4: + change_slot(-1); + break; + case 5: + if (show_confirmbox("Quit?")) + quit(); + break; + } + } +} + +static void *displayThread(void *arg) +{ + DBusConnection* system_bus = (DBusConnection*)osso_get_sys_dbus_connection(osso); + DBusMessage* msg = dbus_message_new_method_call("com.nokia.mce", + "/com/nokia/mce/request", + "com.nokia.mce.request", + "req_display_blanking_pause"); + if (msg && system_bus) { + bRunning = TRUE; + while (bRunning) { + dbus_connection_send(system_bus, msg, NULL); + dbus_connection_flush(system_bus); + int i = 0; + for (i=0; i<8; i++){ + usleep(500000); + if (!bRunning) + break; + } + } + dbus_message_unref(msg); + } + + pthread_exit(0); + return NULL; +} + +void show_notification(char* text) +{ + if (window){ + GtkWidget* banner = hildon_banner_show_information(GTK_WIDGET(window), NULL, text); + hildon_banner_set_timeout(HILDON_BANNER(banner), 3000); + }else{ + DBusConnection* session_bus = (DBusConnection*)osso_get_dbus_connection(osso); + DBusMessageIter args; + DBusMessage*msg = dbus_message_new_method_call("org.freedesktop.Notifications", + "/org/freedesktop/Notifications", + "org.freedesktop.Notifications", + "SystemNoteInfoprint"); + if (msg) { + dbus_message_iter_init_append(msg, &args); + char* param = text; + if (dbus_message_iter_append_basic(&args, DBUS_TYPE_STRING, ¶m)) { + dbus_connection_send(session_bus, msg, NULL); + dbus_connection_flush(session_bus); + } + dbus_message_unref(msg); + } + } +} + +void show_messagebox(char* text) +{ + if (!window) + return; + + GtkWidget *dialog; + dialog = gtk_message_dialog_new (GTK_WINDOW(window), + GTK_DIALOG_DESTROY_WITH_PARENT, + GTK_MESSAGE_INFO, + GTK_BUTTONS_OK, + text); + gtk_dialog_run (GTK_DIALOG (dialog)); + gtk_widget_destroy (dialog); +} + +#include +void change_disc() +{ + GtkWidget *dialog; + dialog = hildon_file_chooser_dialog_new (GTK_WINDOW(window), GTK_FILE_CHOOSER_ACTION_OPEN); + gtk_window_set_title (GTK_WINDOW (dialog), "Change disc"); + + char currentFile[MAXPATHLEN]; + strcpy(currentFile, GetIsoFile()); + if (strlen(currentFile)) + gtk_file_chooser_set_filename (GTK_FILE_CHOOSER(dialog), currentFile); + else + gtk_file_chooser_set_current_folder (GTK_FILE_CHOOSER(dialog), "/home/user/MyDocs/"); + + GtkFileFilter *filter=gtk_file_filter_new(); + gtk_file_filter_add_pattern (filter,"*.bin"); + gtk_file_filter_add_pattern (filter,"*.BIN"); + gtk_file_filter_add_pattern (filter,"*.iso"); + gtk_file_filter_add_pattern (filter,"*.ISO"); + gtk_file_filter_add_pattern (filter,"*.img"); + gtk_file_filter_add_pattern (filter,"*.IMG"); + gtk_file_filter_add_pattern (filter,"*.z"); + gtk_file_filter_add_pattern (filter,"*.Z"); + gtk_file_filter_add_pattern (filter,"*.znx"); + gtk_file_filter_add_pattern (filter,"*.ZNX"); + gtk_file_filter_add_pattern (filter,"*.pbp"); + gtk_file_filter_add_pattern (filter,"*.PBP"); + gtk_file_filter_add_pattern (filter,"*.mdf"); + gtk_file_filter_add_pattern (filter,"*.MDF"); + gtk_file_chooser_set_filter (GTK_FILE_CHOOSER (dialog),filter); + + if (gtk_dialog_run (GTK_DIALOG (dialog)) == GTK_RESPONSE_OK) { + char *filename = gtk_file_chooser_get_filename (GTK_FILE_CHOOSER (dialog)); + + //if (strcmp(filename, currentFile)) { + CdromId[0] = '\0'; + CdromLabel[0] = '\0'; + + set_cd_image(filename); + if (ReloadCdromPlugin() < 0) + printf("Failed to load cdr plugin\n"); + + if (CDR_open() < 0) + printf("Failed to open cdr plugin\n"); + + strcpy(file_name, strrchr(filename,'/')); + + SetCdOpenCaseTime(time(NULL) + 3); + LidInterrupt(); + //} + g_free (filename); + } + + gtk_widget_destroy (dialog); +} + +void change_multi_disc() +{ + HildonDialog* window = HILDON_DIALOG(hildon_dialog_new()); + gtk_window_set_title (GTK_WINDOW (window), "Change disc"); + gtk_window_set_default_size(GTK_WINDOW (window), 480, 300); + + GtkWidget* sw = hildon_pannable_area_new (); + gtk_box_pack_start (GTK_BOX(GTK_DIALOG(window)->vbox), sw, TRUE, TRUE, 0); + + GtkWidget* tree_view = hildon_gtk_tree_view_new (HILDON_UI_MODE_EDIT); + gtk_widget_set_name (tree_view, "fremantle-widget"); + + gtk_tree_view_set_rules_hint (GTK_TREE_VIEW (tree_view), TRUE); + + int i; + GtkListStore *store = gtk_list_store_new (1, G_TYPE_STRING); + for (i = 0; i < cdrIsoMultidiskCount; i++) { + gchar *str; + + str = g_strdup_printf ("Disc %d", i+1); + gtk_list_store_insert_with_values (store, NULL, i, 0, str, -1); + g_free (str); + } + GtkTreeModel* model = GTK_TREE_MODEL (store); + + gtk_tree_view_set_model (GTK_TREE_VIEW (tree_view), model); + g_object_unref (model); + + GtkTreeSelection* selection = gtk_tree_view_get_selection (GTK_TREE_VIEW (tree_view)); + gtk_tree_selection_set_mode (selection, GTK_SELECTION_SINGLE); + + GtkCellRenderer* renderer = gtk_cell_renderer_text_new (); + g_object_set (renderer, + "xalign", 0.5, + "weight", PANGO_WEIGHT_NORMAL, + NULL); + + gtk_tree_view_insert_column_with_attributes (GTK_TREE_VIEW (tree_view), + 0, "Column 0", + renderer, + "text", 0, + NULL); + + char current[5]; + sprintf(current, "%i", cdrIsoMultidiskSelect); + GtkTreePath* path = gtk_tree_path_new_from_string(current); + gtk_tree_selection_select_path (selection, path); + gtk_tree_path_free(path); + + gtk_widget_set_size_request (tree_view, 480, 800); + gtk_container_add (GTK_CONTAINER (sw), tree_view); + + hildon_dialog_add_button (HILDON_DIALOG(window), GTK_STOCK_OK, GTK_RESPONSE_ACCEPT); + + gtk_widget_show_all (GTK_WIDGET(window)); + gint result = gtk_dialog_run (GTK_DIALOG (window)); + if (result == GTK_RESPONSE_ACCEPT) { + GtkTreeModel* model; + GtkTreeIter iter; + GtkTreeSelection* selection = gtk_tree_view_get_selection(GTK_TREE_VIEW(tree_view)); + if (gtk_tree_selection_get_selected(selection, &model, &iter)){ + GtkTreePath* path = gtk_tree_model_get_path(model , &iter); + int* i = gtk_tree_path_get_indices(path) ; + + cdrIsoMultidiskSelect = *i; + CdromId[0] = '\0'; + CdromLabel[0] = '\0'; + + CDR_close(); + if (CDR_open() < 0) { + printf("Failed to load cdr plugin\n"); + return; + } + + SetCdOpenCaseTime(time(NULL) + 3); + LidInterrupt(); + } + } + gtk_widget_destroy(GTK_WIDGET(window)); +} + +static DBusHandlerResult on_msg_recieved(DBusConnection* connection G_GNUC_UNUSED, DBusMessage* message, void* data) +{ + const char* path = dbus_message_get_path(message); + if (path && g_str_equal(path, CALL_SIGNAL_PATH)){ + const char* mbr = dbus_message_get_member(message); + if (mbr && g_str_equal(mbr, CALL_INCOMING_SIG)) + show_messagebox("Paused"); + } + + return DBUS_HANDLER_RESULT_NOT_YET_HANDLED; +} + +static void +window_key_proxy(GtkWidget *widget, + GdkEventKey *event, + gpointer user_data) +{ + key_press_event(event->hardware_keycode, event->type == GDK_KEY_PRESS ? 1 : (event->type == GDK_KEY_RELEASE ? 2 : 0) ); +} + +int last_key_pressed = 0; +inline void key_press_event(int key2,int type) +{ + int psxkey1 = -1, psxkey2 = -1; + int key=keymap[key2]; + + if (key < 0) + return; + + if (type == 1 && key2 == last_key_pressed) + return; + last_key_pressed = type == 1 ? key2 : 0; + + //printf("Key: %i %s\n", key2, type == 1 ? "Pressed" : (type == 2 ? "Released" : "Unknown")); + if (key < ARRAY_SIZE(keymap2)){ + psxkey1 = keymap2[key]; + }else switch (key) { + case 14: + quit(); + break; + case 15: + psxkey1 = DKEY_UP; + psxkey2 = DKEY_LEFT; + break; + case 16: + psxkey1 = DKEY_UP; + psxkey2 = DKEY_RIGHT; + break; + case 17: + psxkey1 = DKEY_DOWN; + psxkey2 = DKEY_LEFT; + break; + case 18: + psxkey1 = DKEY_DOWN; + psxkey2 = DKEY_RIGHT; + break; + case 19: + if (type == 1) + save(state_slot); + return; + case 20: + if (type == 1) + emu_load_state(state_slot); + return; + case 21: + if (type == 1) + change_slot(1); + return; + case 22: + if (type == 1) + change_slot(-1); + return; + case 23: + if (type == 1){ + if (cdrIsoMultidiskCount > 1) + change_multi_disc(); + else + change_disc(); + } + return; + } + + if (in_type1 == PSE_PAD_TYPE_GUNCON){ + if (type == 1) { + switch (psxkey1){ + case DKEY_CROSS: + in_state_gun |= SACTION_GUN_A; + break; + case DKEY_CIRCLE: + in_state_gun |= SACTION_GUN_B; + break; + case DKEY_TRIANGLE: + in_state_gun |= SACTION_GUN_TRIGGER2; + break; + case DKEY_SQUARE: + in_state_gun |= SACTION_GUN_TRIGGER; + break; + } + }else if (type == 2) { + switch (psxkey1){ + case DKEY_CROSS: + in_state_gun &= ~SACTION_GUN_A; + break; + case DKEY_CIRCLE: + in_state_gun &= ~SACTION_GUN_B; + break; + case DKEY_TRIANGLE: + in_state_gun &= ~SACTION_GUN_TRIGGER2; + break; + case DKEY_SQUARE: + in_state_gun &= ~SACTION_GUN_TRIGGER; + break; + } + } + }else{ + if (type == 1) { + if (psxkey1 >= 0) + in_keystate |= 1 << psxkey1; + if (psxkey2 >= 0) + in_keystate |= 1 << psxkey2; + + if (in_type1 == PSE_PAD_TYPE_ANALOGPAD){ + switch(psxkey1){ + case DKEY_LEFT: + in_a1[0] = 0; + break; + case DKEY_RIGHT: + in_a1[0] = 255; + break; + case DKEY_UP: + in_a1[1] = 0; + break; + case DKEY_DOWN: + in_a1[1] = 255; + break; + } + } + } + else if (type == 2) { + if (psxkey1 >= 0) + in_keystate &= ~(1 << psxkey1); + if (psxkey2 >= 0) + in_keystate &= ~(1 << psxkey2); + + if (in_type1 == PSE_PAD_TYPE_ANALOGPAD){ + switch(psxkey1){ + case DKEY_LEFT: + case DKEY_RIGHT: + in_a1[0] = 127; + break; + case DKEY_UP: + case DKEY_DOWN: + in_a1[1] = 127; + break; + } + } + emu_set_action(SACTION_NONE); + } + } +} + +void plat_finish() +{ + hildon_quit(); +} + +void set_accel_multipliers() +{ + accelOptions.xMultiplier = 255.0 / ( (accelOptions.maxValue - accelOptions.sens) * 2.0); + accelOptions.yMultiplier = 255.0 / ( (accelOptions.maxValue - accelOptions.sens) * 2.0); +} + +#include +int maemo_init(int *argc, char ***argv) +{ + osso = osso_initialize("pcsxrearmed", PACKAGE_VERSION, FALSE, NULL); + + DBusConnection* system_bus = (DBusConnection*)osso_get_sys_dbus_connection(osso); + dbus_bus_add_match(system_bus, DBUS_RULE_CALL_INCOMING, NULL); + dbus_connection_add_filter(system_bus, on_msg_recieved, NULL, NULL); + + FILE* pFile; + pFile = fopen(keys_config_file, "r"); + if (pFile == NULL){ + fprintf(stderr, "Error opening keys config file %s\n", keys_config_file); + return 1; + } + printf("Keys config read from %s\n", keys_config_file); + + int ch; + int i=0; + for (i=0;i<65536;i++) + keymap[i]=-1; + if (NULL != pFile) { + for(i=0;i<24;i++){ + fscanf(pFile, "%i",&ch); + keymap[ch]=i; + if (i < 4) + direction_keys[i] = ch; + } + fclose(pFile); + } + + switch (in_type1){ + case PSE_PAD_TYPE_GUNCON: + memset(cornerActions, 0, sizeof(cornerActions)); + printf("Controller set to GUNCON (SLPH-00034)\n"); + break; + case PSE_PAD_TYPE_STANDARD: + printf("Controller set to standard (SCPH-1080)\n"); + break; + case PSE_PAD_TYPE_ANALOGPAD: + printf("Controller set to analog (SCPH-1150)\n"); + break; + } + + if (in_enable_vibration) + printf("Vibration enabled\n"); + + if (!(g_maemo_opts&8)){ + gtk_init (argc, argv); + + window = hildon_stackable_window_new (); + gtk_widget_realize (window); + gtk_window_fullscreen (GTK_WINDOW(window)); + + if (cornerActions[0] + cornerActions[1] + cornerActions[2] + cornerActions[3] > 0){ + g_signal_connect (G_OBJECT (window), "button_release_event", + G_CALLBACK (window_button_proxy), NULL); + g_signal_connect (G_OBJECT (window), "button_press_event", + G_CALLBACK (window_button_proxy), NULL); + } + + g_signal_connect (G_OBJECT (window), "key-press-event", + G_CALLBACK (window_key_proxy), NULL); + g_signal_connect (G_OBJECT (window), "key-release-event", + G_CALLBACK (window_key_proxy), NULL); + g_signal_connect (G_OBJECT (window), "delete_event", + G_CALLBACK (hildon_quit), NULL); + gtk_widget_add_events (window, + GDK_BUTTON_PRESS_MASK | GDK_BUTTON_RELEASE_MASK); + + actor = HILDON_ANIMATION_ACTOR (hildon_animation_actor_new()); + if (g_maemo_opts & 2) + hildon_animation_actor_set_position (actor, 0, 0 ); + else + hildon_animation_actor_set_position (actor, (X_RES - D_WIDTH)/2, (Y_RES - D_HEIGHT)/2 ); + hildon_animation_actor_set_parent (actor, GTK_WINDOW (window)); + + drawing = gtk_image_new (); + + gtk_container_add (GTK_CONTAINER (actor), drawing); + + gtk_widget_show_all (GTK_WIDGET (actor)); + gtk_widget_show_all (GTK_WIDGET (window)); + }else{ + gtk_init (argc, argv); + /*GdkScreen* scr = gdk_screen_get_default(); + window = GTK_WIDGET(gdk_screen_get_root_window(scr)); + if (!window) + window = GTK_WIDGET(gdk_get_default_root_window());*/ + } + + set_accel_multipliers(); + + if (bKeepDisplayOn){ + if (pthread_create(&display_thread, NULL, displayThread, NULL)) + printf("Failed to create display thread.\n"); + } + + pl_rearmed_cbs.only_16bpp = 1; + return 0; +} + +void maemo_finish() +{ + if (display_thread > 0){ + bRunning = FALSE; + pthread_join(display_thread, NULL); + } + + if (osso){ + osso_deinitialize(osso); + osso = NULL; + } + printf("Exiting\n"); +} + +void menu_loop(void) +{ +} + +void *plat_gvideo_set_mode(int *w_, int *h_, int *bpp_) +{ + int w = *w_, h = *h_; + + if (g_maemo_opts&8) return pl_vout_buf; + //printf("Setting video mode %ix%i\n", w, h); + + if (w <= 0 || h <= 0) + return pl_vout_buf; + + if (image) gdk_image_destroy(image); + image = gdk_image_new( GDK_IMAGE_FASTEST, gdk_visual_get_system(), w, h ); + + pl_vout_buf = (void *) image->mem; + + gtk_image_set_from_image (GTK_IMAGE(drawing), image, NULL); + + gtk_window_resize (GTK_WINDOW (actor), w, h); + if (g_maemo_opts & 2) + hildon_animation_actor_set_scale (actor, + (gdouble)800 / (gdouble)w, + (gdouble)480 / (gdouble)h + ); + else + hildon_animation_actor_set_scale (actor, + (gdouble)D_WIDTH / (gdouble)w, + (gdouble)D_HEIGHT / (gdouble)h + ); + pl_buf_w=w;pl_buf_h=h; + return pl_vout_buf; +} + +void *plat_gvideo_flip(void) +{ + if (!(g_maemo_opts&8)) + gtk_widget_queue_draw(drawing); + + // process accelometer + if (g_maemo_opts & 4) { + float x, y, z; + FILE* f = fopen( "/sys/class/i2c-adapter/i2c-3/3-001d/coord", "r" ); + if( !f ) {printf ("err in accel"); exit(1);} + fscanf( f, "%f %f %f", &x, &y, &z ); + fclose( f ); + + if (in_type1 == PSE_PAD_TYPE_ANALOGPAD){ + if (x > accelOptions.maxValue) x = accelOptions.maxValue; + else if (x < -accelOptions.maxValue) x = -accelOptions.maxValue; + + const int maxValue = accelOptions.maxValue - accelOptions.sens; + if(x > accelOptions.sens){ + x -= accelOptions.sens; + in_a1[0] = (-x + maxValue ) * accelOptions.xMultiplier; + }else if (x < -accelOptions.sens){ + x += accelOptions.sens; + in_a1[0] = (-x + maxValue ) * accelOptions.xMultiplier; + }else in_a1[0] = 127; + + y += accelOptions.y_def; + if (y > accelOptions.maxValue) y = accelOptions.maxValue; + else if (y < -accelOptions.maxValue) y = -accelOptions.maxValue; + + if(y > accelOptions.sens){ + y -= accelOptions.sens; + in_a1[1] = (-y + maxValue ) * accelOptions.yMultiplier; + }else if (y < -accelOptions.sens){ + y += accelOptions.sens; + in_a1[1] = (-y + maxValue ) * accelOptions.yMultiplier; + }else in_a1[1] = 127; + + //printf("x: %i y: %i\n", in_a1[0], in_a1[1]); + }else{ + if( x > accelOptions.sens ) in_keystate |= 1 << DKEY_LEFT; + else if( x < -accelOptions.sens ) in_keystate |= 1 << DKEY_RIGHT; + else {in_keystate &= ~(1 << DKEY_LEFT);in_keystate &= ~(1 << DKEY_RIGHT);} + + y += accelOptions.y_def; + if( y > accelOptions.sens )in_keystate |= 1 << DKEY_UP; + else if( y < -accelOptions.sens ) in_keystate |= 1 << DKEY_DOWN; + else {in_keystate &= ~(1 << DKEY_DOWN);in_keystate &= ~(1 << DKEY_UP);} + } + } + + return pl_vout_buf; +} + +// for frontend/plugin_lib.c +void update_input(void) +{ + if (g_maemo_opts & 8) + maemo_x11_update_keys(); + else { + /* process GTK+ events */ + while (gtk_events_pending()) + gtk_main_iteration(); + } +} + +int omap_enable_layer(int enabled) +{ + return 0; +} + +void menu_notify_mode_change(int w, int h, int bpp) +{ +} + +void *plat_prepare_screenshot(int *w, int *h, int *bpp) +{ + return NULL; +} + +void plat_step_volume(int is_up) +{ +} + +void plat_trigger_vibrate(int pad, int low, int high) +{ + const int vDuration = 10; + + DBusConnection* system_bus = (DBusConnection*)osso_get_sys_dbus_connection(osso); + DBusMessageIter args; + DBusMessage*msg = dbus_message_new_method_call("com.nokia.mce", + "/com/nokia/mce/request", + "com.nokia.mce.request", + "req_start_manual_vibration"); + if (msg) { + dbus_message_iter_init_append(msg, &args); + // FIXME: somebody with hardware should tune this + int speed = high; // is_strong ? 200 : 150; + int duration = vDuration; + if (dbus_message_iter_append_basic(&args, DBUS_TYPE_INT32, &speed)) { + if (dbus_message_iter_append_basic(&args, DBUS_TYPE_INT32, &duration)) { + dbus_connection_send(system_bus, msg, NULL); + //dbus_connection_flush(system_bus); + } + } + dbus_message_unref(msg); + } +} + +void plat_minimize(void) +{ +} + +void plat_gvideo_close(void) +{ +} + +void plat_gvideo_open(int is_pal) +{ +} diff --git a/maemo/maemo_common.h b/maemo/maemo_common.h new file mode 100644 index 0000000..ace0bfd --- /dev/null +++ b/maemo/maemo_common.h @@ -0,0 +1,18 @@ +int maemo_init(int *argc, char ***argv); +void maemo_finish(); + +extern char file_name[MAXPATHLEN]; +extern int g_maemo_opts; + +extern inline void key_press_event(int key,int type); + +typedef struct +{ + int sens; + int y_def; + float maxValue; + float xMultiplier; + float yMultiplier; +} accel_option; + +extern accel_option accelOptions; diff --git a/maemo/maemo_xkb.c b/maemo/maemo_xkb.c new file mode 100644 index 0000000..52af2ca --- /dev/null +++ b/maemo/maemo_xkb.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2009, Wei Mingzhi . + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../frontend/main.h" +#include "../frontend/plugin_lib.h" + +static Atom wmprotocols, wmdelwindow; +static int initialized; + + + +static void InitKeyboard(void) { + Display *disp = (Display *)gpuDisp; + if (disp){ + wmprotocols = XInternAtom(disp, "WM_PROTOCOLS", 0); + wmdelwindow = XInternAtom(disp, "WM_DELETE_WINDOW", 0); + XkbSetDetectableAutoRepeat(disp, 1, NULL); + } +} + +static void DestroyKeyboard(void) { + Display *disp = (Display *)gpuDisp; + if (disp) + XkbSetDetectableAutoRepeat(disp, 0, NULL); +} +#include "maemo_common.h" + +int maemo_x11_update_keys() { + + XEvent evt; + XClientMessageEvent *xce; + int leave = 0; + Display *disp = (Display *)gpuDisp; + + if (!disp) + return 0; + + if (!initialized) { + initialized++; + InitKeyboard(); + } + + while (XPending(disp)>0) { + XNextEvent(disp, &evt); + switch (evt.type) { + case KeyPress: + case KeyRelease: + key_press_event(evt.xkey.keycode, evt.type==KeyPress ? 1 : (evt.type==KeyRelease ? 2 : 0) ); + break; + + case ClientMessage: + xce = (XClientMessageEvent *)&evt; + if (xce->message_type == wmprotocols && (Atom)xce->data.l[0] == wmdelwindow) + leave = 1; + break; + } + } + + if (leave) { + DestroyKeyboard(); + exit(1); + } + + return 0; +} diff --git a/plugins/gpu_unai/Makefile b/plugins/gpu_unai/Makefile index 1075ee5..756d19a 100644 --- a/plugins/gpu_unai/Makefile +++ b/plugins/gpu_unai/Makefile @@ -1,6 +1,9 @@ CFLAGS += -ggdb -Wall -O3 -ffast-math CFLAGS += -DREARMED CFLAGS += -I../../include +#CFLAGS += -DINLINE="static __inline__" +#CFLAGS += -Dasm="__asm__ __volatile__" +CFLAGS += -DUSE_GPULIB=1 include ../../config.mak @@ -8,7 +11,7 @@ SRC_STANDALONE += gpu.cpp SRC_GPULIB += gpulib_if.cpp ifeq "$(ARCH)" "arm" -SRC += gpu_arm.s +SRC += gpu_arm.S endif #BIN_STANDALONE = gpuPCSX4ALL.so diff --git a/plugins/gpu_unai/README_senquack.txt b/plugins/gpu_unai/README_senquack.txt new file mode 100644 index 0000000..cda17fc --- /dev/null +++ b/plugins/gpu_unai/README_senquack.txt @@ -0,0 +1,956 @@ +//NOTE: You can find the set of original Unai poly routines (disabled now) +// at the bottom end of this file. + +//senquack - Original Unai GPU poly routines have been replaced with new +// ones based on DrHell routines. The original routines suffered from +// shifted rows, causing many quads to have their first triangle drawn +// correctly, but the second triangle would randomly have pixels shifted +// either left or right or entire rows not drawn at all. Furthermore, +// some times entire triangles seemed to be either missing or only +// partially drawn (most clearly seen in sky/road textures in NFS3, +// clock tower in beginning of Castlevania SOTN). Pixel gaps were +// prevalent. +// +// Since DrHell GPU didn't seem to exhibit these artifacts at all, I adapted +// its routines to GPU Unai (Unai was probably already originally based on it). +// DrHell uses 22.10 fixed point instead of Unai's 16.16, so gpu_fixedpoint.h +// required modification as well as gpu_inner.h (where gpuPolySpanFn driver +// functions are). +// +// Originally, I tried to patch up original Unai routines and got as far +// as fixing the shifted rows, but still had other problem of triangles rendered +// wrong (black triangular gaps in NFS3 sky, clock tower in Castlevania SOTN). +// I eventually gave up. Even after rewriting/adapting the routines, +// however, I still had some random pixel droupouts, specifically in +// NFS3 sky texture. I discovered that gpu_inner.h gpuPolySpanFn function +// was taking optimizations to an extreme and packing u/v texture coords +// into one 32-bit word, reducing their accuracy. Only once they were +// handled in full-accuracy individual words was that problem fixed. +// +// NOTE: I also added support for doing divisions using the FPU, either +// with normal division or multiplication-by-reciprocal. +// To use float division, GPU_UNAI_USE_FLOATMATH should be defined. +// To use float mult-by-reciprocal, GPU_UNAI_USE_FLOAT_DIV_MULTINV +// can be specified (GPU_UNAI_USE_FLOATMATH must also be specified) +// To use inaccurate fixed-point mult-by-reciprocal, define +// GPU_UNAI_USE_INT_DIV_MULTINV. This is the default on older +// ARM devices like Wiz/Caanoo that have neither integer division +// in hardware or an FPU. It results in some pixel dropouts, +// texture glitches, but less than the original GPU UNAI code. +// +// If nothing is specified, integer division will be used. +// +// NOTE 2: Even with MIPS32R2 having FPU recip.s instruction, and it is +// used when this platform is detected, I found it not to give any +// noticeable speedup over normal float division (in fact seemed a tiny +// tiny bit slower). I also found float division to not provide any +// noticeable speedups versus integer division on MISP32R2 platform. +// Granted, the differences were all around .5 FPS or less. +// +// TODO: +// * See if anything can be done about remaining pixel gaps in Gran +// Turismo car models, track. +// * Find better way of passing parameters to gpuPolySpanFn functions than +// through original Unai method of using global variables u4,v4,du4 etc. +// * Come up with some newer way of drawing rows of pixels than by calling +// gpuPolySpanFn through function pointer. For every row, at least on +// MIPS platforms, many registers are having to be pushed/popped from stack +// on each call, which is strange since MIPS has so many registers. +// * MIPS MXU/ASM optimized gpuPolySpanFn ? + +////////////////////////////////////////////////////////////////////////// +//senquack - Disabled original Unai poly routines left here for reference: +// ( from gpu_raster_polygon.h ) +////////////////////////////////////////////////////////////////////////// +#define GPU_TESTRANGE3() \ +{ \ + if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \ + if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \ + if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \ + if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \ + if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \ + if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \ +} + +/*---------------------------------------------------------------------- +F3 +----------------------------------------------------------------------*/ + +void gpuDrawF3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + PixelData = GPU_RGB16(PacketBuffer.U4[0]); + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); + GPU_SWAP(y1, y2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx =(x2 - x1) * ya - (x2 - x0) * yb; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + dx3 = xLoDivx((x2 - x0), (y2 - y0)); + dx4 = xLoDivx((x1 - x0), (y1 - y0)); + } + else + { + dx3 = xLoDivx((x1 - x0), (y1 - y0)); + dx4 = xLoDivx((x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + x4 = i2x(x1); + x3 = i2x(x0) + (dx3 * (y1 - y0)); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + dx3 = xLoDivx((x2 - x1), (y2 - y1)); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; + x4 += dx4*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; + x4+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +FT3 +----------------------------------------------------------------------*/ + +void gpuDrawFT3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 u0, u1, u2, u3, du3=0; + s32 v0, v1, v2, v3, dv3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; + u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17]; + u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25]; + + r4 = s32(PacketBuffer.U1[0]); + g4 = s32(PacketBuffer.U1[1]); + b4 = s32(PacketBuffer.U1[2]); + dr4 = dg4 = db4 = 0; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); + GPU_SWAP(v0, v1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); + GPU_SWAP(y1, y2, temp); + GPU_SWAP(u1, u2, temp); + GPU_SWAP(v1, v2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); + GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); + GPU_SWAP(v0, v1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + + s32 iF,iS; + xInv( dx, iF, iS); + du4 = xInvMulx( du4, iF, iS); + dv4 = xInvMulx( dv4, iF, iS); + tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); + tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + u3 = i2x(u0); + v3 = i2x(v0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + du3 = xInvMulx( (u2 - u0), iF, iS); + dv3 = xInvMulx( (v2 - v0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + du3 = xInvMulx( (u1 - u0), iF, iS); + dv3 = xInvMulx( (v1 - v0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + u3 = i2x(u0) + (du3 * temp); + v3 = i2x(v0) + (dv3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + u3 = i2x(u1); + v3 = i2x(v1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + du3 = xInvMulx( (u2 - u1), iF, iS); + dv3 = xInvMulx( (v2 - v1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; + x4 += dx4*temp; + u3 += du3*temp; + v3 += dv3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; + x4+= fixed_HALF; + u3+= fixed_HALF; + v4+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + u4 = u3 + du4*temp; + v4 = v3 + dv4*temp; + } + else + { + u4 = u3; + v4 = v3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +G3 +----------------------------------------------------------------------*/ + +void gpuDrawG3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 r0, r1, r2, r3, dr3=0; + s32 g0, g1, g2, g3, dg3=0; + s32 b0, b1, b2, b3, db3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; + r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10]; + r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18]; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); + GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + + s32 iF,iS; + xInv( dx, iF, iS); + dr4 = xInvMulx( dr4, iF, iS); + dg4 = xInvMulx( dg4, iF, iS); + db4 = xInvMulx( db4, iF, iS); + u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; + u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; + u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; + lInc = db + dg + dr; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + dr3 = xInvMulx( (r2 - r0), iF, iS); + dg3 = xInvMulx( (g2 - g0), iF, iS); + db3 = xInvMulx( (b2 - b0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + dr3 = xInvMulx( (r1 - r0), iF, iS); + dg3 = xInvMulx( (g1 - g0), iF, iS); + db3 = xInvMulx( (b1 - b0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + r3 = i2x(r0) + (dr3 * temp); + g3 = i2x(g0) + (dg3 * temp); + b3 = i2x(b0) + (db3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + r3 = i2x(r1); + g3 = i2x(g1); + b3 = i2x(b1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + dr3 = xInvMulx( (r2 - r1), iF, iS); + dg3 = xInvMulx( (g2 - g1), iF, iS); + db3 = xInvMulx( (b2 - b1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; x4 += dx4*temp; + r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; x4+= fixed_HALF; + r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; + + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + } + else + { + r4 = r3; g4 = g3; b4 = b3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + +/*---------------------------------------------------------------------- +GT3 +----------------------------------------------------------------------*/ + +void gpuDrawGT3(const PP gpuPolySpanDriver) +{ + const int li=linesInterlace; + const int pi=(progressInterlace?(linesInterlace+1):0); + const int pif=(progressInterlace?(progressInterlace_flag?(linesInterlace+1):0):1); + s32 temp; + s32 xa, xb, xmin, xmax; + s32 ya, yb, ymin, ymax; + s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; + s32 y0, y1, y2; + s32 u0, u1, u2, u3, du3=0; + s32 v0, v1, v2, v3, dv3=0; + s32 r0, r1, r2, r3, dr3=0; + s32 g0, g1, g2, g3, dg3=0; + s32 b0, b1, b2, b3, db3=0; + + x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); + y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); + x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] ); + y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] ); + x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]); + y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]); + + GPU_TESTRANGE3(); + + x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; + y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; + + xmin = DrawingArea[0]; xmax = DrawingArea[2]; + ymin = DrawingArea[1]; ymax = DrawingArea[3]; + + { + int rx0 = Max2(xmin,Min3(x0,x1,x2)); + int ry0 = Max2(ymin,Min3(y0,y1,y2)); + int rx1 = Min2(xmax,Max3(x0,x1,x2)); + int ry1 = Min2(ymax,Max3(y0,y1,y2)); + if( rx0>=rx1 || ry0>=ry1) return; + } + + r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; + u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; + r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14]; + u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21]; + r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26]; + u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33]; + + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + if (y1 >= y2) + { + if( y1!=y2 || x1>x2 ) + { + GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); + GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp); + GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); + } + } + if (y0 >= y1) + { + if( y0!=y1 || x0>x1 ) + { + GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); + GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); + GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); + } + } + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + + s32 iF,iS; + + xInv( dx, iF, iS); + du4 = xInvMulx( du4, iF, iS); + dv4 = xInvMulx( dv4, iF, iS); + dr4 = xInvMulx( dr4, iF, iS); + dg4 = xInvMulx( dg4, iF, iS); + db4 = xInvMulx( db4, iF, iS); + u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; + u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; + u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; + lInc = db + dg + dr; + tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); + tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; + + for (s32 loop0 = 2; loop0; --loop0) + { + if (loop0 == 2) + { + ya = y0; + yb = y1; + u3 = i2x(u0); + v3 = i2x(v0); + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + x3 = i2x(x0); + x4 = y0!=y1 ? x3 : i2x(x1); + if (dx < 0) + { + xInv( (y2 - y0), iF, iS); + dx3 = xInvMulx( (x2 - x0), iF, iS); + du3 = xInvMulx( (u2 - u0), iF, iS); + dv3 = xInvMulx( (v2 - v0), iF, iS); + dr3 = xInvMulx( (r2 - r0), iF, iS); + dg3 = xInvMulx( (g2 - g0), iF, iS); + db3 = xInvMulx( (b2 - b0), iF, iS); + dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); + } + else + { + xInv( (y1 - y0), iF, iS); + dx3 = xInvMulx( (x1 - x0), iF, iS); + du3 = xInvMulx( (u1 - u0), iF, iS); + dv3 = xInvMulx( (v1 - v0), iF, iS); + dr3 = xInvMulx( (r1 - r0), iF, iS); + dg3 = xInvMulx( (g1 - g0), iF, iS); + db3 = xInvMulx( (b1 - b0), iF, iS); + dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + } + } + else + { + ya = y1; + yb = y2; + if (dx < 0) + { + temp = y1 - y0; + u3 = i2x(u0) + (du3 * temp); + v3 = i2x(v0) + (dv3 * temp); + r3 = i2x(r0) + (dr3 * temp); + g3 = i2x(g0) + (dg3 * temp); + b3 = i2x(b0) + (db3 * temp); + x3 = i2x(x0) + (dx3 * temp); + x4 = i2x(x1); + dx4 = xLoDivx((x2 - x1), (y2 - y1)); + } + else + { + u3 = i2x(u1); + v3 = i2x(v1); + r3 = i2x(r1); + g3 = i2x(g1); + b3 = i2x(b1); + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + xInv( (y2 - y1), iF, iS); + dx3 = xInvMulx( (x2 - x1), iF, iS); + du3 = xInvMulx( (u2 - u1), iF, iS); + dv3 = xInvMulx( (v2 - v1), iF, iS); + dr3 = xInvMulx( (r2 - r1), iF, iS); + dg3 = xInvMulx( (g2 - g1), iF, iS); + db3 = xInvMulx( (b2 - b1), iF, iS); + } + } + + temp = ymin - ya; + if (temp > 0) + { + ya = ymin; + x3 += dx3*temp; x4 += dx4*temp; + u3 += du3*temp; v3 += dv3*temp; + r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; + } + if (yb > ymax) yb = ymax; + if (ya>=yb) continue; + + x3+= fixed_HALF; x4+= fixed_HALF; + u3+= fixed_HALF; v4+= fixed_HALF; + r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; + u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + + for(;yaxmax) || (xb 0) + { + xa = xmin; + u4 = u3 + du4*temp; v4 = v3 + dv4*temp; + r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + } + else + { + u4 = u3; v4 = v3; + r4 = r3; g4 = g3; b4 = b3; + } + if(xb > xmax) xb = xmax; + xb-=xa; + if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); + } + } +} + + +////////////////////////////////////////////////////////////////////////// +//senquack - Original Unai poly routines left here for reference: +// ( from gpu_inner.h ) NOTE: this uses 16.16, not 22.10 fixed point +////////////////////////////////////////////////////////////////////////// +template +INLINE void gpuPolySpanFn(u16 *pDst, u32 count) +{ + if (!TM) + { + // NO TEXTURE + if (!G) + { + // NO GOURAUD + u16 data; + if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); } + else data=PixelData; + if ((!M)&&(!B)) + { + if (MB) { data = data | 0x8000; } + do { *pDst++ = data; } while (--count); + } + else if ((M)&&(!B)) + { + if (MB) { data = data | 0x8000; } + do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); + } + else + { + u16 uSrc; + u16 uDst; + u32 uMsk; if (BM==0) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endtile; } + // masking + uDst = *pDst; + if(M) { if (uDst&0x8000) goto endtile; } + uSrc = data; + // blend + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endtile: pDst++; + } + while (--count); + } + } + else + { + // GOURAUD + u16 uDst; + u16 uSrc; + u32 linc=lInc; + u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); + u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endgou; } + // masking + if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; } + // blend + if(B) + { + // light + gpuLightingRGB(uSrc,lCol); + if(!M) { uDst = *pDst; } + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + } + else + { + // light + gpuLightingRGB(uSrc,lCol); + } + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endgou: pDst++; lCol=(lCol+linc); + } + while (--count); + } + } + else + { + // TEXTURE + u16 uDst; + u16 uSrc; + u32 linc; if (L&&G) linc=lInc; + u32 tinc=tInc; + u32 tmsk=tMsk; + u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk; + const u16* _TBA=TBA; + const u16* _CBA; if (TM!=3) _CBA=CBA; + u32 lCol; + if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); } + else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); } + u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + u32 bMsk; if (BI) bMsk=blit_mask; + do + { + // blit-mask + if (BI) { if((bMsk>>((((u32)pDst)>>1)&7))&1) goto endpoly; } + // masking + if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; } + // texture + if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; } + if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; } + if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; } + // blend + if(B) + { + if (uSrc&0x8000) + { + // light + if(L) gpuLightingTXT(uSrc, lCol); + if(!M) { uDst = *pDst; } + if (BM==0) gpuBlending00(uSrc, uDst); + if (BM==1) gpuBlending01(uSrc, uDst); + if (BM==2) gpuBlending02(uSrc, uDst); + if (BM==3) gpuBlending03(uSrc, uDst); + } + else + { + // light + if(L) gpuLightingTXT(uSrc, lCol); + } + } + else + { + // light + if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; } + } + if (MB) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + endpoly: pDst++; + tCor=(tCor+tinc)&tmsk; + if (L&&G) lCol=(lCol+linc); + } + while (--count); + } +} diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp index 1552bed..c3f7095 100644 --- a/plugins/gpu_unai/gpu.cpp +++ b/plugins/gpu_unai/gpu.cpp @@ -1,6 +1,7 @@ /*************************************************************************** * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -18,102 +19,42 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ -#include "port.h" -#include "gpu.h" -#include "profiler.h" -#include "debug.h" +#include +#include "plugins.h" +#include "psxcommon.h" +//#include "port.h" +#include "gpu_unai.h" -int skipCount = 2; /* frame skip (0,1,2,3...) */ -int skCount = 0; /* internal frame skip */ -int linesInterlace = 0; /* internal lines interlace */ -int linesInterlace_user = 0; /* Lines interlace */ +#define VIDEO_WIDTH 320 -bool isSkip = false; /* skip frame (info coming from GPU) */ -bool wasSkip = false; -bool skipFrame = false; /* skip frame (according to frame skip) */ -bool alt_fps = false; /* Alternative FPS algorithm */ -bool show_fps = false; /* Show FPS statistics */ - -bool isPAL = false; /* PAL video timing */ -bool progressInterlace_flag = false; /* Progressive interlace flag */ -bool progressInterlace = false; /* Progressive interlace option*/ -bool frameLimit = false; /* frames to wait */ - -bool light = true; /* lighting */ -bool blend = true; /* blending */ -bool FrameToRead = false; /* load image in progress */ -bool FrameToWrite = false; /* store image in progress */ -bool fb_dirty = false; - -bool enableAbbeyHack = false; /* Abe's Odyssey hack */ - -u8 BLEND_MODE; -u8 TEXT_MODE; -u8 Masking; - -u16 PixelMSB; -u16 PixelData; - -/////////////////////////////////////////////////////////////////////////////// -// GPU Global data -/////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////// -// Dma Transfers info -s32 px,py; -s32 x_end,y_end; -u16* pvram; - -u32 GP0; -s32 PacketCount; -s32 PacketIndex; - -/////////////////////////////////////////////////////////////////////////////// -// Display status -u32 DisplayArea [6]; - -/////////////////////////////////////////////////////////////////////////////// -// Rasterizer status -u32 TextureWindow [4]; -u32 DrawingArea [4]; -u32 DrawingOffset [2]; +#ifdef TIME_IN_MSEC +#define TPS 1000 +#else +#define TPS 1000000 +#endif -/////////////////////////////////////////////////////////////////////////////// -// Rasterizer status +#define IS_PAL (gpu_unai.GPU_GP1&(0x08<<17)) -u16* TBA; -u16* CBA; +//senquack - Original 512KB of guard space seems not to be enough, as Xenogears +// accesses outside this range and crashes in town intro fight sequence. +// Increased to 2MB total (double PSX VRAM) and Xenogears no longer +// crashes, but some textures are still messed up. Also note that alignment min +// is 16 bytes, needed for pixel-skipping rendering/blitting in high horiz res. +// Extra 4KB is for guard room at beginning. +// TODO: Determine cause of out-of-bounds write/reads. <-- Note: this is largely +// solved by adoption of PCSX Rearmed's 'gpulib' in gpulib_if.cpp, which +// replaces this file (gpu.cpp) +//u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(32))); +static u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE*2 + 4096)/2] __attribute__((aligned(32))); /////////////////////////////////////////////////////////////////////////////// -// Inner Loops -s32 u4, du4; -s32 v4, dv4; -s32 r4, dr4; -s32 g4, dg4; -s32 b4, db4; -u32 lInc; -u32 tInc, tMsk; - -GPUPacket PacketBuffer; -// FRAME_BUFFER_SIZE is defined in bytes; 512K is guard memory for out of range reads -u16 GPU_FrameBuffer[(FRAME_BUFFER_SIZE+512*1024)/2] __attribute__((aligned(2048))); -u32 GPU_GP1; +// GPU fixed point math +#include "gpu_fixedpoint.h" /////////////////////////////////////////////////////////////////////////////// -// Inner loop driver instanciation file +// Inner loop driver instantiation file #include "gpu_inner.h" -/////////////////////////////////////////////////////////////////////////////// -// GPU Raster Macros -#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) - -#define GPU_EXPANDSIGN(x) (((s32)(x)<<21)>>21) - -#define CHKMAX_X 1024 -#define CHKMAX_Y 512 - -#define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);} - /////////////////////////////////////////////////////////////////////////////// // GPU internal image drawing functions #include "gpu_raster_image.h" @@ -135,72 +76,88 @@ u32 GPU_GP1; #include "gpu_command.h" /////////////////////////////////////////////////////////////////////////////// -INLINE void gpuReset(void) +static void gpuReset(void) { - GPU_GP1 = 0x14802000; - TextureWindow[0] = 0; - TextureWindow[1] = 0; - TextureWindow[2] = 255; - TextureWindow[3] = 255; - DrawingArea[2] = 256; - DrawingArea[3] = 240; - DisplayArea[2] = 256; - DisplayArea[3] = 240; - DisplayArea[5] = 240; + memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); + gpu_unai.vram = (u16*)GPU_FrameBuffer + (4096/2); //4kb guard room in front + gpu_unai.GPU_GP1 = 0x14802000; + gpu_unai.DrawingArea[2] = 256; + gpu_unai.DrawingArea[3] = 240; + gpu_unai.DisplayArea[2] = 256; + gpu_unai.DisplayArea[3] = 240; + gpu_unai.DisplayArea[5] = 240; + gpu_unai.TextureWindow[0] = 0; + gpu_unai.TextureWindow[1] = 0; + gpu_unai.TextureWindow[2] = 255; + gpu_unai.TextureWindow[3] = 255; + //senquack - new vars must be updated whenever texture window is changed: + // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + // Configuration options + gpu_unai.config = gpu_unai_config_ext; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + gpu_unai.frameskip.skipCount = gpu_unai.config.frameskip_count; + + SetupLightLUT(); + SetupDitheringConstants(); } /////////////////////////////////////////////////////////////////////////////// -bool GPU_init(void) +long GPU_init(void) { gpuReset(); - + +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV // s_invTable - for(int i=1;i<=(1<>1); - #else - v *= double(0x80000000); - #endif - s_invTable[i-1]=s32(v); + s_invTable[i-1]=0x7fffffff/i; } +#endif + + gpu_unai.fb_dirty = true; + gpu_unai.dma.last_dma = NULL; return (0); } /////////////////////////////////////////////////////////////////////////////// -void GPU_shutdown(void) +long GPU_shutdown(void) { + return 0; } /////////////////////////////////////////////////////////////////////////////// -long GPU_freeze(unsigned int bWrite, GPUFreeze_t* p2) +long GPU_freeze(u32 bWrite, GPUFreeze_t* p2) { if (!p2) return (0); - if (p2->Version != 1) return (0); + if (p2->ulFreezeVersion != 1) return (0); if (bWrite) { - p2->GPU_gp1 = GPU_GP1; - memset(p2->Control, 0, sizeof(p2->Control)); + p2->ulStatus = gpu_unai.GPU_GP1; + memset(p2->ulControl, 0, sizeof(p2->ulControl)); // save resolution and registers for P.E.Op.S. compatibility - p2->Control[3] = (3 << 24) | ((GPU_GP1 >> 23) & 1); - p2->Control[4] = (4 << 24) | ((GPU_GP1 >> 29) & 3); - p2->Control[5] = (5 << 24) | (DisplayArea[0] | (DisplayArea[1] << 10)); - p2->Control[6] = (6 << 24) | (2560 << 12); - p2->Control[7] = (7 << 24) | (DisplayArea[4] | (DisplayArea[5] << 10)); - p2->Control[8] = (8 << 24) | ((GPU_GP1 >> 17) & 0x3f) | ((GPU_GP1 >> 10) & 0x40); - memcpy(p2->FrameBuffer, (u16*)GPU_FrameBuffer, FRAME_BUFFER_SIZE); + p2->ulControl[3] = (3 << 24) | ((gpu_unai.GPU_GP1 >> 23) & 1); + p2->ulControl[4] = (4 << 24) | ((gpu_unai.GPU_GP1 >> 29) & 3); + p2->ulControl[5] = (5 << 24) | (gpu_unai.DisplayArea[0] | (gpu_unai.DisplayArea[1] << 10)); + p2->ulControl[6] = (6 << 24) | (2560 << 12); + p2->ulControl[7] = (7 << 24) | (gpu_unai.DisplayArea[4] | (gpu_unai.DisplayArea[5] << 10)); + p2->ulControl[8] = (8 << 24) | ((gpu_unai.GPU_GP1 >> 17) & 0x3f) | ((gpu_unai.GPU_GP1 >> 10) & 0x40); + memcpy((void*)p2->psxVRam, (void*)gpu_unai.vram, FRAME_BUFFER_SIZE); return (1); } else { - GPU_GP1 = p2->GPU_gp1; - memcpy((u16*)GPU_FrameBuffer, p2->FrameBuffer, FRAME_BUFFER_SIZE); - GPU_writeStatus((5 << 24) | p2->Control[5]); - GPU_writeStatus((7 << 24) | p2->Control[7]); - GPU_writeStatus((8 << 24) | p2->Control[8]); - gpuSetTexture(GPU_GP1); + extern void GPU_writeStatus(u32 data); + gpu_unai.GPU_GP1 = p2->ulStatus; + memcpy((void*)gpu_unai.vram, (void*)p2->psxVRam, FRAME_BUFFER_SIZE); + GPU_writeStatus((5 << 24) | p2->ulControl[5]); + GPU_writeStatus((7 << 24) | p2->ulControl[7]); + GPU_writeStatus((8 << 24) | p2->ulControl[8]); + gpuSetTexture(gpu_unai.GPU_GP1); return (1); } return (0); @@ -233,72 +190,69 @@ u8 PacketSize[256] = /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSendPacket() { -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_sendPacket++; -#endif - gpuSendPacketFunction(PacketBuffer.U4[0]>>24); + gpuSendPacketFunction(gpu_unai.PacketBuffer.U4[0]>>24); } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuCheckPacket(u32 uData) { - if (PacketCount) + if (gpu_unai.PacketCount) { - PacketBuffer.U4[PacketIndex++] = uData; - --PacketCount; + gpu_unai.PacketBuffer.U4[gpu_unai.PacketIndex++] = uData; + --gpu_unai.PacketCount; } else { - PacketBuffer.U4[0] = uData; - PacketCount = PacketSize[uData >> 24]; - PacketIndex = 1; + gpu_unai.PacketBuffer.U4[0] = uData; + gpu_unai.PacketCount = PacketSize[uData >> 24]; + gpu_unai.PacketIndex = 1; } - if (!PacketCount) gpuSendPacket(); + if (!gpu_unai.PacketCount) gpuSendPacket(); } /////////////////////////////////////////////////////////////////////////////// -void GPU_writeDataMem(u32* dmaAddress, s32 dmaCount) +void GPU_writeDataMem(u32* dmaAddress, int dmaCount) { -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_writeDataMem++; -#endif - pcsx4all_prof_pause(PCSX4ALL_PROF_CPU); - pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeDataMem(%d)\n",dmaCount); + #endif u32 data; - const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1); - GPU_GP1 &= ~0x14000000; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; + gpu_unai.GPU_GP1 &= ~0x14000000; while (dmaCount) { - if (FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { while (dmaCount) { dmaCount--; data = *dmaAddress++; - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - pvram[px] = data; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px] = data; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToWrite = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; break; } } - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - pvram[px] = data>>16; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px] = data>>16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToWrite = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; break; } } @@ -312,95 +266,100 @@ void GPU_writeDataMem(u32* dmaAddress, s32 dmaCount) } } - GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000; - fb_dirty = true; - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); - pcsx4all_prof_resume(PCSX4ALL_PROF_CPU); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; } -u32 *lUsedAddr[3]; -INLINE int CheckForEndlessLoop(u32 *laddr) +long GPU_dmaChain(u32 *rambase, u32 start_addr) { - if(laddr==lUsedAddr[1]) return 1; - if(laddr==lUsedAddr[2]) return 1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_dmaChain(0x%x)\n",start_addr); + #endif - if(laddr> 2)); - if(DMACommandCounter++ > 2000000) break; - if(CheckForEndlessLoop(address)) break; - data = *address++; - count = (data >> 24); - offset = data & 0x001FFFFF; - if (dmaVAddr != offset) dmaVAddr = offset; - else dmaVAddr = 0x1FFFFF; - - if(count>0) GPU_writeDataMem(address,count); - dma_words += 1 + count; + list = rambase + (addr & 0x1fffff) / 4; + len = list[0] >> 24; + addr = list[0] & 0xffffff; + + dma_words += 1 + len; + + // add loop detection marker + list[0] |= 0x800000; + + if (len) GPU_writeDataMem(list + 1, len); + + if (addr & 0x800000) + { + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_dmaChain(LOOP)\n"); + #endif + break; + } } - GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000; - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); + + // remove loop detection markers + addr = start_addr & 0x1fffff; + while (count-- > 0) + { + list = rambase + addr / 4; + addr = list[0] & 0x1fffff; + list[0] &= ~0x800000; + } + + if (gpu_unai.dma.last_dma) *gpu_unai.dma.last_dma &= ~0x800000; + gpu_unai.dma.last_dma = rambase + (start_addr & 0x1fffff) / 4; + + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; return dma_words; } /////////////////////////////////////////////////////////////////////////////// -void GPU_writeData(u32 data) +void GPU_writeData(u32 data) { - const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1); -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_writeData++; -#endif - pcsx4all_prof_pause(PCSX4ALL_PROF_CPU); - pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); - GPU_GP1 &= ~0x14000000; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeData()\n"); + #endif + gpu_unai.GPU_GP1 &= ~0x14000000; - if (FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - pvram[px]=(u16)data; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px]=(u16)data; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToWrite = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; } } - if (FrameToWrite) + if (gpu_unai.dma.FrameToWrite) { - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - pvram[px]=data>>16; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.dma.pvram[gpu_unai.dma.px]=data>>16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToWrite = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.fb_dirty = true; } } } @@ -409,507 +368,463 @@ void GPU_writeData(u32 data) { gpuCheckPacket(data); } - GPU_GP1 |= 0x14000000; - fb_dirty = true; - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); - pcsx4all_prof_resume(PCSX4ALL_PROF_CPU); - + gpu_unai.GPU_GP1 |= 0x14000000; } /////////////////////////////////////////////////////////////////////////////// -void GPU_readDataMem(u32* dmaAddress, s32 dmaCount) +void GPU_readDataMem(u32* dmaAddress, int dmaCount) { - const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1); -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_readDataMem++; -#endif - if(!FrameToRead) return; + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_readDataMem(%d)\n",dmaCount); + #endif + if(!gpu_unai.dma.FrameToRead) return; - pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); - GPU_GP1 &= ~0x14000000; + gpu_unai.GPU_GP1 &= ~0x14000000; do { - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; // lower 16 bit - u32 data = pvram[px]; + //senquack - 64-bit fix (from notaz) + //u32 data = (unsigned long)gpu_unai.dma.pvram[gpu_unai.dma.px]; + u32 data = (u32)gpu_unai.dma.pvram[gpu_unai.dma.px]; - if (++px>=x_end) + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; } - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; // higher 16 bit (always, even if it's an odd width) - data |= (u32)(pvram[px])<<16; + //senquack - 64-bit fix (from notaz) + //data |= (unsigned long)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16; + data |= (u32)(gpu_unai.dma.pvram[gpu_unai.dma.px])<<16; *dmaAddress++ = data; - if (++px>=x_end) + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToRead = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; break; } } } while (--dmaCount); - GPU_GP1 = (GPU_GP1 | 0x14000000) & ~0x60000000; - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 | 0x14000000) & ~0x60000000; } /////////////////////////////////////////////////////////////////////////////// -u32 GPU_readData(void) +u32 GPU_readData(void) { - const u16 *VIDEO_END=(GPU_FrameBuffer+(FRAME_BUFFER_SIZE/2)-1); -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_readData++; -#endif - pcsx4all_prof_pause(PCSX4ALL_PROF_CPU); - pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_READ); - GPU_GP1 &= ~0x14000000; - if (FrameToRead) + const u16 *VIDEO_END = (u16*)gpu_unai.vram+(FRAME_BUFFER_SIZE/2)-1; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_readData()\n"); + #endif + gpu_unai.GPU_GP1 &= ~0x14000000; + if (gpu_unai.dma.FrameToRead) { - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - GP0 = pvram[px]; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.GPU_GP0 = gpu_unai.dma.pvram[gpu_unai.dma.px]; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram += 1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToRead = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; } } - if ((&pvram[px])>(VIDEO_END)) pvram-=512*1024; - GP0 |= pvram[px]<<16; - if (++px>=x_end) + if ((&gpu_unai.dma.pvram[gpu_unai.dma.px])>(VIDEO_END)) gpu_unai.dma.pvram-=512*1024; + gpu_unai.GPU_GP0 |= gpu_unai.dma.pvram[gpu_unai.dma.px]<<16; + if (++gpu_unai.dma.px >= gpu_unai.dma.x_end) { - px = 0; - pvram +=1024; - if (++py>=y_end) + gpu_unai.dma.px = 0; + gpu_unai.dma.pvram += 1024; + if (++gpu_unai.dma.py >= gpu_unai.dma.y_end) { - FrameToRead = false; - GPU_GP1 &= ~0x08000000; + gpu_unai.dma.FrameToRead = false; + gpu_unai.GPU_GP1 &= ~0x08000000; } } } - GPU_GP1 |= 0x14000000; + gpu_unai.GPU_GP1 |= 0x14000000; - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_READ); - pcsx4all_prof_resume(PCSX4ALL_PROF_CPU); - return (GP0); + return (gpu_unai.GPU_GP0); } /////////////////////////////////////////////////////////////////////////////// -u32 GPU_readStatus(void) +u32 GPU_readStatus(void) { -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_readStatus++; -#endif - return GPU_GP1; + return gpu_unai.GPU_GP1; +} + +INLINE void GPU_NoSkip(void) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_NoSkip()\n"); + #endif + gpu_unai.frameskip.wasSkip = gpu_unai.frameskip.isSkip; + if (gpu_unai.frameskip.isSkip) + { + gpu_unai.frameskip.isSkip = false; + gpu_unai.frameskip.skipGPU = false; + } + else + { + gpu_unai.frameskip.isSkip = gpu_unai.frameskip.skipFrame; + gpu_unai.frameskip.skipGPU = gpu_unai.frameskip.skipFrame; + } } /////////////////////////////////////////////////////////////////////////////// void GPU_writeStatus(u32 data) { -#ifdef DEBUG_ANALYSIS - dbg_anacnt_GPU_writeStatus++; -#endif - pcsx4all_prof_pause(PCSX4ALL_PROF_CPU); - pcsx4all_prof_start_with_pause(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeStatus(%d,%d)\n",data>>24,data & 0xff); + #endif switch (data >> 24) { case 0x00: gpuReset(); break; case 0x01: - GPU_GP1 &= ~0x08000000; - PacketCount = 0; FrameToRead = FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.PacketCount = 0; + gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false; break; case 0x02: - GPU_GP1 &= ~0x08000000; - PacketCount = 0; FrameToRead = FrameToWrite = false; + gpu_unai.GPU_GP1 &= ~0x08000000; + gpu_unai.PacketCount = 0; + gpu_unai.dma.FrameToRead = gpu_unai.dma.FrameToWrite = false; break; case 0x03: - GPU_GP1 = (GPU_GP1 & ~0x00800000) | ((data & 1) << 23); + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x00800000) | ((data & 1) << 23); break; case 0x04: - if (data == 0x04000000) - PacketCount = 0; - GPU_GP1 = (GPU_GP1 & ~0x60000000) | ((data & 3) << 29); + if (data == 0x04000000) gpu_unai.PacketCount = 0; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x60000000) | ((data & 3) << 29); break; case 0x05: - DisplayArea[0] = (data & 0x000003FF); //(short)(data & 0x3ff); - DisplayArea[1] = ((data & 0x0007FC00)>>10); //(data & 0x000FFC00) >> 10; //(short)((data>>10)&0x1ff); - fb_dirty = true; - wasSkip = isSkip; - if (isSkip) - isSkip = false; - else - isSkip = skipFrame; + // Start of Display Area in VRAM + gpu_unai.DisplayArea[0] = data & 0x3ff; // X (0..1023) + gpu_unai.DisplayArea[1] = (data >> 10) & 0x1ff; // Y (0..511) + GPU_NoSkip(); + break; + case 0x06: + // GP1(06h) - Horizontal Display range (on Screen) + // 0-11 X1 (260h+0) ;12bit ;\counted in 53.222400MHz units, + // 12-23 X2 (260h+320*8) ;12bit ;/relative to HSYNC + + // senquack - gpu_unai completely ignores GP1(0x06) command and + // lacks even a place in DisplayArea[] array to store the values. + // It seems to have been concerned only with vertical display range + // and centering top/bottom. I will not add support here, and + // focus instead on the gpulib version (gpulib_if.cpp) which uses + // gpulib for its PS1->host framebuffer blitting. break; case 0x07: - DisplayArea[4] = data & 0x000003FF; //(short)(data & 0x3ff); - DisplayArea[5] = (data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff); - fb_dirty = true; + // GP1(07h) - Vertical Display range (on Screen) + // 0-9 Y1 (NTSC=88h-(224/2), (PAL=A3h-(264/2)) ;\scanline numbers on screen, + // 10-19 Y2 (NTSC=88h+(224/2), (PAL=A3h+(264/2)) ;/relative to VSYNC + // 20-23 Not used (zero) + { + u32 v1=data & 0x000003FF; //(short)(data & 0x3ff); + u32 v2=(data & 0x000FFC00) >> 10; //(short)((data>>10) & 0x3ff); + if ((gpu_unai.DisplayArea[4]!=v1)||(gpu_unai.DisplayArea[5]!=v2)) + { + gpu_unai.DisplayArea[4] = v1; + gpu_unai.DisplayArea[5] = v2; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"video_clear(CHANGE_Y)\n"); + #endif + video_clear(); + } + } break; case 0x08: { - GPU_GP1 = (GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); - static u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 }; - DisplayArea[2] = HorizontalResolution[(GPU_GP1 >> 16) & 7]; - static u32 VerticalResolution[4] = { 240, 480, 256, 480 }; - DisplayArea[3] = VerticalResolution[(GPU_GP1 >> 19) & 3]; - isPAL = (data & 0x08) ? true : false; // if 1 - PAL mode, else NTSC + static const u32 HorizontalResolution[8] = { 256, 368, 320, 384, 512, 512, 640, 640 }; + static const u32 VerticalResolution[4] = { 240, 480, 256, 480 }; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x007F0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_writeStatus(RES=%dx%d,BITS=%d,PAL=%d)\n",HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7], + VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3],(gpu_unai.GPU_GP1&0x00200000?24:15),(IS_PAL?1:0)); + #endif + // Video mode change + u32 new_width = HorizontalResolution[(gpu_unai.GPU_GP1 >> 16) & 7]; + u32 new_height = VerticalResolution[(gpu_unai.GPU_GP1 >> 19) & 3]; + + if (gpu_unai.DisplayArea[2] != new_width || gpu_unai.DisplayArea[3] != new_height) + { + // Update width + gpu_unai.DisplayArea[2] = new_width; + + if (PixelSkipEnabled()) { + // Set blit_mask for high horizontal resolutions. This allows skipping + // rendering pixels that would never get displayed on low-resolution + // platforms that use simple pixel-dropping scaler. + switch (gpu_unai.DisplayArea[2]) + { + case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_unai.blit_mask = 0; break; + } + } else { + gpu_unai.blit_mask = 0; + } + + // Update height + gpu_unai.DisplayArea[3] = new_height; + + if (LineSkipEnabled()) { + // Set rendering line-skip (only render every other line in high-res + // 480 vertical mode, or, optionally, force it for all video modes) + + if (gpu_unai.DisplayArea[3] == 480) { + if (gpu_unai.config.ilace_force) { + gpu_unai.ilace_mask = 3; // Only need 1/4 of lines + } else { + gpu_unai.ilace_mask = 1; // Only need 1/2 of lines + } + } else { + // Vert resolution changed from 480 to lower one + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + } + } else { + gpu_unai.ilace_mask = 0; + } + + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"video_clear(CHANGE_RES)\n"); + #endif + video_clear(); + } + } - fb_dirty = true; break; case 0x10: - switch (data & 0xffff) { - case 0: - case 1: - case 3: - GP0 = (DrawingArea[1] << 10) | DrawingArea[0]; - break; - case 4: - GP0 = ((DrawingArea[3]-1) << 10) | (DrawingArea[2]-1); - break; - case 6: - case 5: - GP0 = (DrawingOffset[1] << 11) | DrawingOffset[0]; - break; - case 7: - GP0 = 2; - break; - default: - GP0 = 0; + switch (data & 0xff) { + case 2: gpu_unai.GPU_GP0 = gpu_unai.tex_window; break; + case 3: gpu_unai.GPU_GP0 = (gpu_unai.DrawingArea[1] << 10) | gpu_unai.DrawingArea[0]; break; + case 4: gpu_unai.GPU_GP0 = ((gpu_unai.DrawingArea[3]-1) << 10) | (gpu_unai.DrawingArea[2]-1); break; + case 5: case 6: gpu_unai.GPU_GP0 = (((u32)gpu_unai.DrawingOffset[1] & 0x7ff) << 11) | ((u32)gpu_unai.DrawingOffset[0] & 0x7ff); break; + case 7: gpu_unai.GPU_GP0 = 2; break; + case 8: case 15: gpu_unai.GPU_GP0 = 0xBFC03720; break; } break; } - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_HW_WRITE); - pcsx4all_prof_resume(PCSX4ALL_PROF_CPU); } -#ifndef REARMED - // Blitting functions #include "gpu_blit.h" -INLINE void gpuVideoOutput(void) +static void gpuVideoOutput(void) { - static s16 old_res_horz, old_res_vert, old_rgb24; - s16 h0, x0, y0, w0, h1; + int h0, x0, y0, w0, h1; - x0 = DisplayArea[0]; - y0 = DisplayArea[1]; + x0 = gpu_unai.DisplayArea[0]; + y0 = gpu_unai.DisplayArea[1]; - w0 = DisplayArea[2]; - h0 = DisplayArea[3]; // video mode + w0 = gpu_unai.DisplayArea[2]; + h0 = gpu_unai.DisplayArea[3]; // video mode - h1 = DisplayArea[5] - DisplayArea[4]; // display needed + h1 = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4]; // display needed if (h0 == 480) h1 = Min2(h1*2,480); - u16* dest_screen16 = SCREEN; - u16* src_screen16 = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0,y0)]; - u32 isRGB24 = (GPU_GP1 & 0x00200000 ? 32 : 0); + bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); + u16* dst16 = SCREEN; + u16* src16 = (u16*)gpu_unai.vram; - /* Clear the screen if resolution changed to prevent interlacing and clipping to clash */ - if( (w0 != old_res_horz || h1 != old_res_vert || (s16)isRGB24 != old_rgb24) ) - { - // Update old resolution - old_res_horz = w0; - old_res_vert = h1; - old_rgb24 = (s16)isRGB24; - // Finally, clear the screen for this special case - video_clear(); - } + // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') + unsigned int src16_offs_msk = 1024*512-1; + unsigned int src16_offs = (x0 + y0*1024) & src16_offs_msk; // Height centering int sizeShift = 1; - if(h0==256) h0 = 240; else if(h0==480) sizeShift = 2; - if(h1>h0) { src_screen16 += ((h1-h0)>>sizeShift)*1024; h1 = h0; } - else if(h1>sizeShift)*VIDEO_WIDTH; + if (h0 == 256) { + h0 = 240; + } else if (h0 == 480) { + sizeShift = 2; + } + if (h1 > h0) { + src16_offs = (src16_offs + (((h1-h0) / 2) * 1024)) & src16_offs_msk; + h1 = h0; + } else if (h1> sizeShift) * VIDEO_WIDTH; + } + /* Main blitter */ int incY = (h0==480) ? 2 : 1; h0=(h0==480 ? 2048 : 1024); { - const int li=linesInterlace; - bool pi=progressInterlace; - bool pif=progressInterlace_flag; + const int li=gpu_unai.ilace_mask; + bool pi = ProgressiveInterlaceEnabled(); + bool pif = gpu_unai.prog_ilace_flag; switch ( w0 ) { case 256: for(int y1=y0+h1; y0>3 (8 times per second) +#define GPU_FRAMESKIP_UPDATE 3 - // Limit FPS - if (frameLimit) - { - static unsigned next=get_ticks(); - if (!skipFrame) - { - unsigned now=get_ticks(); - if (now=1000000) + static u32 spd=100; // speed % + static u32 frames=0; // frames counter + static u32 prev=now; // previous fps calculation + frames++; + if ((now-prev)>=(TPS>>GPU_FRAMESKIP_UPDATE)) { - u32 expected_fps=(isPAL?50:60); - sprintf(msg,"FPS=%3d/%2d SPD=%3d%%",((real_fps*(12-skipCount))/12),((expected_fps*(12-skipCount))/12),((real_fps*100)/expected_fps)); + if (IS_PAL) spd=(frames<<1); + else spd=((frames*1001)/600); + spd<<=GPU_FRAMESKIP_UPDATE; + frames=0; prev=now; - real_fps=0; } - port_printf(5,5,msg); - } - - // Update frame-skip - if (!alt_fps) - { - // Video frame-skip - skipFrame=skipTable[skipCount][skCount]; - skCount--; if (skCount<0) skCount=11; - isSkip=skipFrame; - } - else - { - // Game frame-skip - if (!isSkip) + switch(gpu_unai.frameskip.skipCount) { - skipFrame=skipTable[skipCount][skCount]; - skCount--; if (skCount<0) skCount=11; - isSkip=true; + case 1: if (spd<50) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<50%) + case 2: if (spd<60) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<60%) + case 3: if (spd<70) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<70%) + case 4: if (spd<80) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<80%) + case 5: if (spd<90) gpu_unai.frameskip.skipFrame=true; else gpu_unai.frameskip.skipFrame=false; break; // frameskip on (spd<90%) } } - fb_dirty=false; - - pcsx4all_prof_end_with_resume(PCSX4ALL_PROF_GPU,PCSX4ALL_PROF_COUNTERS); -} - -#else - -#include "../../frontend/plugin_lib.h" - -extern "C" { - -static const struct rearmed_cbs *cbs; -static s16 old_res_horz, old_res_vert, old_rgb24; - -static void blit(void) -{ - u16 *base = (u16 *)GPU_FrameBuffer; - s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0; - s16 h0, x0, y0, w0, h1; - - x0 = DisplayArea[0] & ~1; // alignment needed by blitter - y0 = DisplayArea[1]; - base += FRAME_OFFSET(x0, y0); - - w0 = DisplayArea[2]; - h0 = DisplayArea[3]; // video mode - - h1 = DisplayArea[5] - DisplayArea[4]; // display needed - if (h0 == 480) h1 = Min2(h1*2,480); - - if (h1 <= 0) - return; - - if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24) - { - old_res_horz = w0; - old_res_vert = h1; - old_rgb24 = (s16)isRGB24; - cbs->pl_vout_set_mode(w0, h1, w0, h1, isRGB24 ? 24 : 16); - } - - cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1); } +/////////////////////////////////////////////////////////////////////////////// void GPU_updateLace(void) { // Interlace bit toggle - GPU_GP1 ^= 0x80000000; + gpu_unai.GPU_GP1 ^= 0x80000000; - if (!fb_dirty || (GPU_GP1&0x08800000)) - return; - - if (!wasSkip) { - blit(); - fb_dirty = false; - skCount = 0; - } - else { - skCount++; - if (skCount >= 8) - wasSkip = isSkip = 0; + // Update display? + if ((gpu_unai.fb_dirty) && (!gpu_unai.frameskip.wasSkip) && (!(gpu_unai.GPU_GP1&0x00800000))) + { + // Display updated + gpuVideoOutput(); + GPU_frameskip(true); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_updateLace(UPDATE)\n"); + #endif + } else { + GPU_frameskip(false); + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"GPU_updateLace(SKIP)\n"); + #endif } - skipFrame = cbs->fskip_advice || cbs->frameskip == 1; -} + if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) gpu_unai.frameskip.skipGPU=true; // Tekken 3 hack -long GPUopen(unsigned long *, char *, char *) -{ - cbs->pl_vout_open(); - return 0; + gpu_unai.fb_dirty=false; + gpu_unai.dma.last_dma = NULL; } -long GPUclose(void) +// Allows frontend to signal plugin to redraw screen after returning to emu +void GPU_requestScreenRedraw() { - cbs->pl_vout_close(); - return 0; + gpu_unai.fb_dirty = true; } -long GPUfreeze(unsigned int ulGetFreezeData, GPUFreeze_t* p2) +void GPU_getScreenInfo(GPUScreenInfo_t *sinfo) { - if (ulGetFreezeData > 1) - return 0; - - return GPU_freeze(ulGetFreezeData, p2); + bool depth24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); + int16_t hres = (uint16_t)gpu_unai.DisplayArea[2]; + int16_t vres = (uint16_t)gpu_unai.DisplayArea[3]; + int16_t w = hres; // Original gpu_unai doesn't support width < 100% + int16_t h = gpu_unai.DisplayArea[5] - gpu_unai.DisplayArea[4]; + if (vres == 480) + h *= 2; + if (h <= 0 || h > vres) + h = vres; + + sinfo->vram = (uint8_t*)gpu_unai.vram; + sinfo->x = (uint16_t)gpu_unai.DisplayArea[0]; + sinfo->y = (uint16_t)gpu_unai.DisplayArea[1]; + sinfo->w = w; + sinfo->h = h; + sinfo->hres = hres; + sinfo->vres = vres; + sinfo->depth24 = depth24; + sinfo->pal = IS_PAL; } - -void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_) -{ - enableAbbeyHack = cbs_->gpu_unai.abe_hack; - light = !cbs_->gpu_unai.no_light; - blend = !cbs_->gpu_unai.no_blend; - if (cbs_->pl_vout_set_raw_vram) - cbs_->pl_vout_set_raw_vram((void *)GPU_FrameBuffer); - - cbs = cbs_; - if (cbs->pl_set_gpu_caps) - cbs->pl_set_gpu_caps(0); -} - -} /* extern "C" */ - -#endif diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai/gpu.h index 1811630..eade2a8 100644 --- a/plugins/gpu_unai/gpu.h +++ b/plugins/gpu_unai/gpu.h @@ -1,6 +1,7 @@ /*************************************************************************** * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -18,70 +19,52 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ -#ifndef NEW_GPU_H -#define NEW_GPU_H +#ifndef GPU_UNAI_GPU_H +#define GPU_UNAI_GPU_H -/////////////////////////////////////////////////////////////////////////////// -// GPU global definitions -#define FRAME_BUFFER_SIZE (1024*512*2) -#define FRAME_WIDTH 1024 -#define FRAME_HEIGHT 512 -#define FRAME_OFFSET(x,y) (((y)<<10)+(x)) +struct gpu_unai_config_t { + uint8_t pixel_skip:1; // If 1, allows skipping rendering pixels that + // would not be visible when a high horizontal + // resolution PS1 video mode is set. + // Only applies to devices with low resolutions + // like 320x240. Should not be used if a + // down-scaling framebuffer blitter is in use. + // Can cause gfx artifacts if game reads VRAM + // to do framebuffer effects. -#define VIDEO_WIDTH 320 + uint8_t ilace_force:3; // Option to force skipping rendering of lines, + // for very slow platforms. Value will be + // assigned to 'ilace_mask' in gpu_unai struct. + // Normally 0. Value '1' will skip rendering + // odd lines. -typedef char s8; -typedef signed short s16; -typedef signed int s32; -typedef signed long long s64; + uint8_t lighting:1; + uint8_t fast_lighting:1; + uint8_t blending:1; + uint8_t dithering:1; -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; + //senquack Only PCSX Rearmed's version of gpu_unai had this, and I + // don't think it's necessary. It would require adding 'AH' flag to + // gpuSpriteSpanFn() increasing size of sprite span function array. + //uint8_t enableAbbeyHack:1; // Abe's Odyssey hack -#include "gpu_fixedpoint.h" - -/////////////////////////////////////////////////////////////////////////////// -// Tweaks and Hacks -extern int skipCount; -extern bool enableAbbeyHack; -extern bool show_fps; -extern bool alt_fps; - -/////////////////////////////////////////////////////////////////////////////// -// interlaced rendering -extern int linesInterlace_user; -extern bool progressInterlace; - -extern bool light; -extern bool blend; - -typedef struct { - u32 Version; - u32 GPU_gp1; - u32 Control[256]; - unsigned char FrameBuffer[1024*512*2]; -} GPUFreeze_t; - -struct GPUPacket -{ - union - { - u32 U4[16]; - s32 S4[16]; - u16 U2[32]; - s16 S2[32]; - u8 U1[64]; - s8 S1[64]; - }; + //////////////////////////////////////////////////////////////////////////// + // Variables used only by older standalone version of gpu_unai (gpu.cpp) +#ifndef USE_GPULIB + uint8_t prog_ilace:1; // Progressive interlace option (old option) + // This option was somewhat oddly named: + // When in interlaced video mode, on a low-res + // 320x240 device, only the even lines are + // rendered. This option will take that one + // step further and only render half the even + // even lines one frame, and then the other half. + uint8_t frameskip_count:3; // Frame skip (0..7) +#endif }; -/////////////////////////////////////////////////////////////////////////////// -// Compile Options +extern gpu_unai_config_t gpu_unai_config_ext; -//#define ENABLE_GPU_NULL_SUPPORT // Enables NullGPU support -//#define ENABLE_GPU_LOG_SUPPORT // Enables gpu logger, very slow only for windows debugging +// TODO: clean up show_fps frontend option +extern bool show_fps; -/////////////////////////////////////////////////////////////////////////////// -#endif // NEW_GPU_H +#endif // GPU_UNAI_GPU_H diff --git a/plugins/gpu_unai/gpu_blit.h b/plugins/gpu_unai/gpu_blit.h index 35cd056..e93f12f 100644 --- a/plugins/gpu_unai/gpu_blit.h +++ b/plugins/gpu_unai/gpu_blit.h @@ -32,10 +32,10 @@ /////////////////////////////////////////////////////////////////////////////// // GPU Blitting code with rescale and interlace support. -INLINE void GPU_BlitWW(const void* src, u16* dst16, u32 isRGB24) +INLINE void GPU_BlitWW(const void* src, u16* dst16, bool isRGB24) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 20; @@ -85,10 +85,10 @@ INLINE void GPU_BlitWW(const void* src, u16* dst16, u32 isRGB24) } } -INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, u32 isRGB24) +INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, bool isRGB24) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 32; @@ -145,10 +145,10 @@ INLINE void GPU_BlitWWSWWSWS(const void* src, u16* dst16, u32 isRGB24) } } -INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, u32 isRGB24) +INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, bool isRGB24) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 32; @@ -201,10 +201,10 @@ INLINE void GPU_BlitWWWWWS(const void* src, u16* dst16, u32 isRGB24) } } -INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, u32 isRGB24, u32 uClip_src) +INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, bool isRGB24, u32 uClip_src) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 20; @@ -274,10 +274,10 @@ INLINE void GPU_BlitWWWWWWWWS(const void* src, u16* dst16, u32 isRGB24, u32 uCli } } -INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, u32 isRGB24) +INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, bool isRGB24) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 32; @@ -331,10 +331,10 @@ INLINE void GPU_BlitWWDWW(const void* src, u16* dst16, u32 isRGB24) } -INLINE void GPU_BlitWS(const void* src, u16* dst16, u32 isRGB24) +INLINE void GPU_BlitWS(const void* src, u16* dst16, bool isRGB24) { u32 uCount; - if(isRGB24 == 0) + if(!isRGB24) { #ifndef USE_BGR15 uCount = 20; diff --git a/plugins/gpu_unai/gpu_command.h b/plugins/gpu_unai/gpu_command.h index d6e7a74..7096b75 100644 --- a/plugins/gpu_unai/gpu_command.h +++ b/plugins/gpu_unai/gpu_command.h @@ -1,6 +1,7 @@ /*************************************************************************** * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -19,34 +20,35 @@ ***************************************************************************/ /////////////////////////////////////////////////////////////////////////////// -INLINE void gpuSetTexture(u16 tpage) +void gpuSetTexture(u16 tpage) { - u32 tp; - u32 tx, ty; - GPU_GP1 = (GPU_GP1 & ~0x1FF) | (tpage & 0x1FF); + u32 tmode, tx, ty; + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x1FF) | (tpage & 0x1FF); + gpu_unai.TextureWindow[0]&= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1]&= ~gpu_unai.TextureWindow[3]; - TextureWindow[0]&= ~TextureWindow[2]; - TextureWindow[1]&= ~TextureWindow[3]; + tmode = (tpage >> 7) & 3; // 16bpp, 8bpp, or 4bpp texture colors? + // 0: 4bpp 1: 8bpp 2/3: 16bpp + + // Nocash PSX docs state setting of 3 is same as setting of 2 (16bpp): + // Note: DrHell assumes 3 is same as 0.. TODO: verify which is correct? + if (tmode == 3) tmode = 2; - tp = (tpage >> 7) & 3; tx = (tpage & 0x0F) << 6; ty = (tpage & 0x10) << 4; - if (tp == 3) tp = 2; - tx += (TextureWindow[0] >> (2 - tp)); - ty += TextureWindow[1]; + tx += (gpu_unai.TextureWindow[0] >> (2 - tmode)); + ty += gpu_unai.TextureWindow[1]; - BLEND_MODE = (((tpage>>5)&0x3) ) << 3; - TEXT_MODE = (((tpage>>7)&0x3) + 1 ) << 5; // +1 el cero no lo usamos - - TBA = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(tx, ty)]; - + gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3; + gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one + gpu_unai.TBA = &((u16*)gpu_unai.vram)[FRAME_OFFSET(tx, ty)]; } /////////////////////////////////////////////////////////////////////////////// INLINE void gpuSetCLUT(u16 clut) { - CBA = &((u16*)GPU_FrameBuffer)[(clut & 0x7FFF) << 4]; + gpu_unai.CBA = &((u16*)gpu_unai.vram)[(clut & 0x7FFF) << 4]; } #ifdef ENABLE_GPU_NULL_SUPPORT @@ -61,159 +63,305 @@ INLINE void gpuSetCLUT(u16 clut) #define DO_LOG(expr) {} #endif -#define Blending (((PRIM&0x2)&&(blend))?(PRIM&0x2):0) -#define Blending_Mode (((PRIM&0x2)&&(blend))?BLEND_MODE:0) -#define Lighting (((~PRIM)&0x1)&&(light)) +#define Blending (((PRIM&0x2) && BlendingEnabled()) ? (PRIM&0x2) : 0) +#define Blending_Mode (((PRIM&0x2) && BlendingEnabled()) ? gpu_unai.BLEND_MODE : 0) +#define Lighting (((~PRIM)&0x1) && LightingEnabled()) +// Dithering applies only to Gouraud-shaded polys or texture-blended polys: +#define Dithering (((((~PRIM)&0x1) || (PRIM&0x10)) && DitheringEnabled()) ? \ + (ForcedDitheringEnabled() ? (1<<9) : (gpu_unai.GPU_GP1 & (1 << 9))) \ + : 0) + +/////////////////////////////////////////////////////////////////////////////// +//Now handled by Rearmed's gpulib and gpu_unai/gpulib_if.cpp: +/////////////////////////////////////////////////////////////////////////////// +#ifndef USE_GPULIB + +// Handles GP0 draw settings commands 0xE1...0xE6 +static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) +{ + // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 + u8 num = (cmd_word >> 24) & 7; + switch (num) { + case 1: { + // GP0(E1h) - Draw Mode setting (aka "Texpage") + DO_LOG(("GP0(0xE1) DrawMode TexPage(0x%x)\n", cmd_word)); + u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF; + u32 new_texpage = cmd_word & 0x7FF; + if (cur_texpage != new_texpage) { + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 2: { + // GP0(E2h) - Texture Window setting + DO_LOG(("GP0(0xE2) TextureWindow(0x%x)\n", cmd_word)); + if (cmd_word != gpu_unai.TextureWindowCur) { + static const u8 TextureMask[32] = { + 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, + 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 + }; + gpu_unai.TextureWindowCur = cmd_word; + gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3]; + + // Inner loop vars must be updated whenever texture window is changed: + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 3: { + // GP0(E3h) - Set Drawing Area top left (X1,Y1) + DO_LOG(("GP0(0xE3) DrawingArea Pos(0x%x)\n", cmd_word)); + gpu_unai.DrawingArea[0] = cmd_word & 0x3FF; + gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + } break; + + case 4: { + // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) + DO_LOG(("GP0(0xE4) DrawingArea Size(0x%x)\n", cmd_word)); + gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + } break; + + case 5: { + // GP0(E5h) - Set Drawing Offset (X,Y) + DO_LOG(("GP0(0xE5) DrawingOffset(0x%x)\n", cmd_word)); + gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + } break; + + case 6: { + // GP0(E6h) - Mask Bit Setting + DO_LOG(("GP0(0xE6) SetMask(0x%x)\n", cmd_word)); + gpu_unai.Masking = (cmd_word & 0x2) << 1; + gpu_unai.PixelMSB = (cmd_word & 0x1) << 8; + } break; + } +} void gpuSendPacketFunction(const int PRIM) { //printf("0x%x\n",PRIM); + //senquack - TODO: optimize this (packet pointer union as prim draw parameter + // introduced as optimization for gpulib command-list processing) + PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; + switch (PRIM) { - case 0x02: + case 0x02: { NULL_GPU(); - gpuClearImage(); // prim handles updateLace && skip + gpuClearImage(packet); // prim handles updateLace && skip + gpu_unai.fb_dirty = true; DO_LOG(("gpuClearImage(0x%x)\n",PRIM)); - break; + } break; + case 0x20: case 0x21: case 0x22: - case 0x23: - if (!isSkip) + case 0x23: { // Monochrome 3-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]); - DO_LOG(("gpuDrawF3(0x%x)\n",PRIM)); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, false); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyF(0x%x)\n",PRIM)); } - break; + } break; + case 0x24: case 0x25: case 0x26: - case 0x27: - if (!isSkip) + case 0x27: { // Textured 3-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[4] >> 16); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]); - else - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]); - DO_LOG(("gpuDrawFT3(0x%x)\n",PRIM)); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, false); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyFT(0x%x)\n",PRIM)); } - break; + } break; + case 0x28: case 0x29: case 0x2A: - case 0x2B: - if (!isSkip) + case 0x2B: { // Monochrome 4-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]; - //--PacketBuffer.S2[6]; - gpuDrawF3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[4]; - //--PacketBuffer.S2[2]; - gpuDrawF3(gpuPolySpanDriver); - DO_LOG(("gpuDrawF4(0x%x)\n",PRIM)); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, true); // is_quad = true + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyF(0x%x) (4-pt QUAD)\n",PRIM)); } - break; + } break; + case 0x2C: case 0x2D: case 0x2E: - case 0x2F: - if (!isSkip) + case 0x2F: { // Textured 4-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[4] >> 16); - PP gpuPolySpanDriver; - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]; - else - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]; - //--PacketBuffer.S2[6]; - gpuDrawFT3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - PacketBuffer.U4[2] = PacketBuffer.U4[8]; - //--PacketBuffer.S2[2]; - gpuDrawFT3(gpuPolySpanDriver); - DO_LOG(("gpuDrawFT4(0x%x)\n",PRIM)); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, true); // is_quad = true + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyFT(0x%x) (4-pt QUAD)\n",PRIM)); } - break; + } break; + case 0x30: case 0x31: case 0x32: - case 0x33: - if (!isSkip) + case 0x33: { // Gouraud-shaded 3-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]); - DO_LOG(("gpuDrawG3(0x%x)\n",PRIM)); + //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however + // this is an untextured poly, so CF_LIGHT (texture blend) + // shouldn't apply. Until the original array of template + // instantiation ptrs is fixed, we're stuck with this. (TODO) + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, false); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyG(0x%x)\n",PRIM)); } - break; + } break; + case 0x34: case 0x35: case 0x36: - case 0x37: - if (!isSkip) + case 0x37: { // Gouraud-shaded, textured 3-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]); - DO_LOG(("gpuDrawGT3(0x%x)\n",PRIM)); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, false); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyGT(0x%x)\n",PRIM)); } - break; + } break; + case 0x38: case 0x39: case 0x3A: - case 0x3B: - if (!isSkip) + case 0x3B: { // Gouraud-shaded 4-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]; - //--PacketBuffer.S2[6]; - gpuDrawG3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[6]; - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - //--PacketBuffer.S2[2]; - gpuDrawG3(gpuPolySpanDriver); - DO_LOG(("gpuDrawG4(0x%x)\n",PRIM)); + // See notes regarding '129' for 0x30..0x33 further above -senquack + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, true); // is_quad = true + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyG(0x%x) (4-pt QUAD)\n",PRIM)); } - break; + } break; + case 0x3C: case 0x3D: case 0x3E: - case 0x3F: - if (!isSkip) + case 0x3F: { // Gouraud-shaded, textured 4-pt poly + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]; - //--PacketBuffer.S2[6]; - gpuDrawGT3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[9]; - PacketBuffer.U4[1] = PacketBuffer.U4[10]; - PacketBuffer.U4[2] = PacketBuffer.U4[11]; - //--PacketBuffer.S2[2]; - gpuDrawGT3(gpuPolySpanDriver); - DO_LOG(("gpuDrawGT4(0x%x)\n",PRIM)); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, true); // is_quad = true + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawPolyGT(0x%x) (4-pt QUAD)\n",PRIM)); } - break; + } break; + case 0x40: case 0x41: case 0x42: - case 0x43: - if (!isSkip) + case 0x43: { // Monochrome line + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - DO_LOG(("gpuDrawLF(0x%x)\n",PRIM)); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); } - break; + } break; + case 0x48: case 0x49: case 0x4A: @@ -221,32 +369,44 @@ void gpuSendPacketFunction(const int PRIM) case 0x4C: case 0x4D: case 0x4E: - case 0x4F: - if (!isSkip) + case 0x4F: { // Monochrome line strip + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - DO_LOG(("gpuDrawLF(0x%x)\n",PRIM)); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawLineF(0x%x)\n",PRIM)); } - if ((PacketBuffer.U4[3] & 0xF000F000) != 0x50005000) + if ((gpu_unai.PacketBuffer.U4[3] & 0xF000F000) != 0x50005000) { - PacketBuffer.U4[1] = PacketBuffer.U4[2]; - PacketBuffer.U4[2] = PacketBuffer.U4[3]; - PacketCount = 1; - PacketIndex = 3; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketCount = 1; + gpu_unai.PacketIndex = 3; } - break; + } break; + case 0x50: case 0x51: case 0x52: - case 0x53: - if (!isSkip) + case 0x53: { // Gouraud-shaded line + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - DO_LOG(("gpuDrawLG(0x%x)\n",PRIM)); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); } - break; + } break; + case 0x58: case 0x59: case 0x5A: @@ -254,204 +414,203 @@ void gpuSendPacketFunction(const int PRIM) case 0x5C: case 0x5D: case 0x5E: - case 0x5F: - if (!isSkip) + case 0x5F: { // Gouraud-shaded line strip + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - DO_LOG(("gpuDrawLG(0x%x)\n",PRIM)); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + gpu_unai.fb_dirty = true; + DO_LOG(("gpuDrawLineG(0x%x)\n",PRIM)); } - if ((PacketBuffer.U4[4] & 0xF000F000) != 0x50005000) + if ((gpu_unai.PacketBuffer.U4[4] & 0xF000F000) != 0x50005000) { - PacketBuffer.U1[3 + (2 * 4)] = PacketBuffer.U1[3 + (0 * 4)]; - PacketBuffer.U4[0] = PacketBuffer.U4[2]; - PacketBuffer.U4[1] = PacketBuffer.U4[3]; - PacketBuffer.U4[2] = PacketBuffer.U4[4]; - PacketCount = 2; - PacketIndex = 3; + gpu_unai.PacketBuffer.U1[3 + (2 * 4)] = gpu_unai.PacketBuffer.U1[3 + (0 * 4)]; + gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketBuffer.U4[2] = gpu_unai.PacketBuffer.U4[4]; + gpu_unai.PacketCount = 2; + gpu_unai.PacketIndex = 3; } - break; + } break; + case 0x60: case 0x61: case 0x62: - case 0x63: - if (!isSkip) + case 0x63: { // Monochrome rectangle (variable size) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } - break; + } break; + case 0x64: case 0x65: case 0x66: - case 0x67: - if (!isSkip) + case 0x67: { // Textured rectangle (variable size) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + + // This fixes Silent Hill running animation on loading screens: + // (On PSX, color values 0x00-0x7F darken the source texture's color, + // 0x81-FF lighten textures (ultimately clamped to 0x1F), + // 0x80 leaves source texture color unchanged, HOWEVER, + // gpu_unai uses a simple lighting LUT whereby only the upper + // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as + // 0x80. + // + // NOTE: I've changed all textured sprite draw commands here and + // elsewhere to use proper behavior, but left poly commands + // alone, I don't want to slow rendering down too much. (TODO) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } - break; + } break; + case 0x68: case 0x69: case 0x6A: - case 0x6B: - if (!isSkip) + case 0x6B: { // Monochrome rectangle (1x1 dot) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PacketBuffer.U4[2] = 0x00010001; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); + gpu_unai.PacketBuffer.U4[2] = 0x00010001; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } - break; + } break; + case 0x70: case 0x71: case 0x72: - case 0x73: - if (!isSkip) + case 0x73: { // Monochrome rectangle (8x8) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PacketBuffer.U4[2] = 0x00080008; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); + gpu_unai.PacketBuffer.U4[2] = 0x00080008; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } - break; + } break; + case 0x74: case 0x75: case 0x76: - case 0x77: - if (!isSkip) + case 0x77: { // Textured rectangle (8x8) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); + gpu_unai.PacketBuffer.U4[3] = 0x00080008; + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } - break; + } break; + case 0x78: case 0x79: case 0x7A: - case 0x7B: - if (!isSkip) + case 0x7B: { // Monochrome rectangle (16x16) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PacketBuffer.U4[2] = 0x00100010; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); + gpu_unai.PacketBuffer.U4[2] = 0x00100010; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawT(0x%x)\n",PRIM)); } - break; + } break; + case 0x7C: case 0x7D: -#ifdef __arm__ - if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0) + #ifdef __arm__ + /* Notaz 4bit sprites optimization */ + if ((!gpu_unai.frameskip.skipGPU) && (!(gpu_unai.GPU_GP1&0x180)) && (!(gpu_unai.Masking|gpu_unai.PixelMSB))) { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - gpuDrawS16(); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuDrawS16(packet); + gpu_unai.fb_dirty = true; break; } - // fallthrough -#endif + #endif case 0x7E: - case 0x7F: - if (!isSkip) + case 0x7F: { // Textured rectangle (16x16) + if (!gpu_unai.frameskip.skipGPU) { NULL_GPU(); - PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); + gpu_unai.PacketBuffer.U4[3] = 0x00100010; + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + gpu_unai.fb_dirty = true; DO_LOG(("gpuDrawS(0x%x)\n",PRIM)); } - break; + } break; + case 0x80: // vid -> vid - gpuMoveImage(); // prim handles updateLace && skip + gpuMoveImage(packet); // prim handles updateLace && skip + if ((!gpu_unai.frameskip.skipCount) && (gpu_unai.DisplayArea[3] == 480)) // Tekken 3 hack + { + if (!gpu_unai.frameskip.skipGPU) gpu_unai.fb_dirty = true; + } + else + { + gpu_unai.fb_dirty = true; + } DO_LOG(("gpuMoveImage(0x%x)\n",PRIM)); break; case 0xA0: // sys ->vid - gpuLoadImage(); // prim handles updateLace && skip -#ifndef isSkip // not a define - if (alt_fps) isSkip=false; -#endif + gpuLoadImage(packet); // prim handles updateLace && skip DO_LOG(("gpuLoadImage(0x%x)\n",PRIM)); break; case 0xC0: // vid -> sys - gpuStoreImage(); // prim handles updateLace && skip + gpuStoreImage(packet); // prim handles updateLace && skip DO_LOG(("gpuStoreImage(0x%x)\n",PRIM)); break; - case 0xE1: - { - const u32 temp = PacketBuffer.U4[0]; - GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF); - gpuSetTexture(temp); - DO_LOG(("gpuSetTexture(0x%x)\n",PRIM)); - } - break; - case 0xE2: - { - static const u8 TextureMask[32] = { - 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, // - 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 // - }; - const u32 temp = PacketBuffer.U4[0]; - TextureWindow[0] = ((temp >> 10) & 0x1F) << 3; - TextureWindow[1] = ((temp >> 15) & 0x1F) << 3; - TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F]; - TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F]; - gpuSetTexture(GPU_GP1); - //isSkip = false; - DO_LOG(("TextureWindow(0x%x)\n",PRIM)); - } - break; - case 0xE3: - { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[0] = temp & 0x3FF; - DrawingArea[1] = (temp >> 10) & 0x3FF; - //isSkip = false; - DO_LOG(("DrawingArea_Pos(0x%x)\n",PRIM)); - } - break; - case 0xE4: - { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[2] = (temp & 0x3FF) + 1; - DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1; - //isSkip = false; - DO_LOG(("DrawingArea_Size(0x%x)\n",PRIM)); - } - break; - case 0xE5: - { - const u32 temp = PacketBuffer.U4[0]; - DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11); - DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11); - //isSkip = false; - DO_LOG(("DrawingOffset(0x%x)\n",PRIM)); - } - break; - case 0xE6: - { - const u32 temp = PacketBuffer.U4[0]; - //GPU_GP1 = (GPU_GP1 & ~0x00001800) | ((temp&3) << 11); - Masking = (temp & 0x2) << 1; - PixelMSB =(temp & 0x1) << 8; - DO_LOG(("SetMask(0x%x)\n",PRIM)); - } - break; + case 0xE1 ... 0xE6: { // Draw settings + gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]); + } break; } } +#endif //!USE_GPULIB +/////////////////////////////////////////////////////////////////////////////// +// End of code specific to non-gpulib standalone version of gpu_unai +/////////////////////////////////////////////////////////////////////////////// diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h index e72fda1..5df42cf 100644 --- a/plugins/gpu_unai/gpu_fixedpoint.h +++ b/plugins/gpu_unai/gpu_fixedpoint.h @@ -21,60 +21,73 @@ #ifndef FIXED_H #define FIXED_H -#include "arm_features.h" - typedef s32 fixed; -#ifdef GPU_TABLE_10_BITS -#define TABLE_BITS 10 -#else -#define TABLE_BITS 16 -#endif - -#define FIXED_BITS 16 +//senquack - The gpu_drhell poly routines I adapted use 22.10 fixed point, +// while original Unai used 16.16: (see README_senquack.txt) +//#define FIXED_BITS 16 +#define FIXED_BITS 10 #define fixed_ZERO ((fixed)0) #define fixed_ONE ((fixed)1<>1)) -// big precision inverse table. -s32 s_invTable[(1<fixed conversions: +#define i2x(x) ((x)<>FIXED_BITS) + +INLINE fixed FixedCeil(const fixed x) +{ + return (x + (fixed_ONE - 1)) & fixed_HIMASK; +} -INLINE fixed i2x(const int _x) { return ((_x)<>FIXED_BITS); } +INLINE s32 FixedCeilToInt(const fixed x) +{ + return (x + (fixed_ONE - 1)) >> FIXED_BITS; +} -/* -INLINE u32 Log2(u32 _a) +//senquack - float<->fixed conversions: +#define f2x(x) ((s32)((x) * (float)(1<>= 16; c |= 16; } - if (_a & 0xFF00) { _a >>= 8; c |= 8; } - if (_a & 0xF0) { _a >>= 4; c |= 4; } - if (_a & 0xC) { _a >>= 2; c |= 2; } - if (_a & 0x2) { _a >>= 1; c |= 1; } - return c; + return (1.0f / x); } -*/ +#endif +#endif -#ifdef HAVE_ARMV5 +/////////////////////////////////////////////////////////////////////////// +// --- BEGIN INVERSE APPROXIMATION SECTION --- +/////////////////////////////////////////////////////////////////////////// +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + +// big precision inverse table. +#define TABLE_BITS 16 +s32 s_invTable[(1< 0; ++i, x >>= 1); return i - 1; } #endif -#ifdef GPU_TABLE_10_BITS -INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_) -{ - u32 uD = (_b<0) ? -_b : _b ; - u32 uLog = Log2(uD); - uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0; - u32 uDen = uD>>uLog; - iFactor_ = s_invTable[uDen]; - iFactor_ = (_b<0) ? -iFactor_ :iFactor_; - iShift_ = 15+uLog; -} -#else INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_) { u32 uD = (_b<0) ? -_b : _b; @@ -82,10 +95,12 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_) { u32 uLog = Log2(uD); uLog = uLog>(TABLE_BITS-1) ? uLog-(TABLE_BITS-1) : 0; - u32 uDen = (uD>>uLog)-1; + u32 uDen = (uD>>uLog); iFactor_ = s_invTable[uDen]; iFactor_ = (_b<0) ? -iFactor_ :iFactor_; - iShift_ = 15+uLog; + //senquack - Adapted to 22.10 fixed point (originally 16.16): + //iShift_ = 15+uLog; + iShift_ = 21+uLog; } else { @@ -93,7 +108,6 @@ INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_) iShift_ = 0; } } -#endif INLINE fixed xInvMulx (const fixed _a, const s32 _iFact, const s32 _iShift) { @@ -112,20 +126,9 @@ INLINE fixed xLoDivx (const fixed _a, const fixed _b) xInv(_b, iFact, iShift); return xInvMulx(_a, iFact, iShift); } - +#endif // GPU_UNAI_USE_INT_DIV_MULTINV /////////////////////////////////////////////////////////////////////////// -template -INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; } - -template -INLINE T Min3 (const T _a, const T _b, const T _c) { return Min2(Min2(_a,_b),_c); } - +// --- END INVERSE APPROXIMATION SECTION --- /////////////////////////////////////////////////////////////////////////// -template -INLINE T Max2 (const T _a, const T _b) { return (_a>_b)?_a:_b; } -template -INLINE T Max3 (const T _a, const T _b, const T _c) { return Max2(Max2(_a,_b),_c); } - -/////////////////////////////////////////////////////////////////////////// #endif //FIXED_H diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 4cd7bff..723e09f 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -1,6 +1,7 @@ /*************************************************************************** * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -19,415 +20,688 @@ ***************************************************************************/ /////////////////////////////////////////////////////////////////////////////// -// Inner loop driver instanciation file +// Inner loop driver instantiation file /////////////////////////////////////////////////////////////////////////////// -// Option Masks -#define L ((CF>>0)&1) -#define B ((CF>>1)&1) -#define M ((CF>>2)&1) -#define BM ((CF>>3)&3) -#define TM ((CF>>5)&3) -#define G ((CF>>7)&1) +// Option Masks (CF template paramter) +#define CF_LIGHT ((CF>> 0)&1) // Lighting +#define CF_BLEND ((CF>> 1)&1) // Blending +#define CF_MASKCHECK ((CF>> 2)&1) // Mask bit check +#define CF_BLENDMODE ((CF>> 3)&3) // Blend mode 0..3 +#define CF_TEXTMODE ((CF>> 5)&3) // Texture mode 1..3 (0: texturing disabled) +#define CF_GOURAUD ((CF>> 7)&1) // Gouraud shading +#define CF_MASKSET ((CF>> 8)&1) // Mask bit set +#define CF_DITHER ((CF>> 9)&1) // Dithering +#define CF_BLITMASK ((CF>>10)&1) // blit_mask check (skip rendering pixels + // that wouldn't end up displayed on + // low-res screen using simple downscaler) -#define AH ((CF>>7)&1) - -#define MB ((CF>>8)&1) +//#ifdef __arm__ +//#ifndef ENABLE_GPU_ARMV7 +/* ARMv5 */ +//#include "gpu_inner_blend_arm5.h" +//#else +/* ARMv7 optimized */ +//#include "gpu_inner_blend_arm7.h" +//#endif +//#else +//#include "gpu_inner_blend.h" +//#endif +// TODO: use the arm-optimized gpu_inner_blends for arm builds #include "gpu_inner_blend.h" + +#include "gpu_inner_quantization.h" #include "gpu_inner_light.h" +// If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 +// This is only for debugging/verification of low-precision colors in C. +// Low-precision Gouraud is intended for use by SIMD-optimized inner drivers +// which get/use Gouraud colors in SIMD registers. +//#define GPU_GOURAUD_LOW_PRECISION + +// How many bits of fixed-point precision GouraudColor uses +#ifdef GPU_GOURAUD_LOW_PRECISION +#define GPU_GOURAUD_FIXED_BITS 11 +#else +#define GPU_GOURAUD_FIXED_BITS 16 +#endif + +// Used to pass Gouraud colors to gpuPixelSpanFn() (lines) +struct GouraudColor { +#ifdef GPU_GOURAUD_LOW_PRECISION + u16 r, g, b; + s16 r_incr, g_incr, b_incr; +#else + u32 r, g, b; + s32 r_incr, g_incr, b_incr; +#endif +}; + +static inline u16 gpuGouraudColor15bpp(u32 r, u32 g, u32 b) +{ + r >>= GPU_GOURAUD_FIXED_BITS; + g >>= GPU_GOURAUD_FIXED_BITS; + b >>= GPU_GOURAUD_FIXED_BITS; + +#ifndef GPU_GOURAUD_LOW_PRECISION + // High-precision Gouraud colors are 8-bit + fractional + r >>= 3; g >>= 3; b >>= 3; +#endif + + return r | (g << 5) | (b << 10); +} + /////////////////////////////////////////////////////////////////////////////// -// GPU Pixel opperations generator -template -INLINE void gpuPixelFn(u16 *pixel,const u16 data) +// GPU Pixel span operations generator gpuPixelSpanFn<> +// Oct 2016: Created/adapted from old gpuPixelFn by senquack: +// Original gpuPixelFn was used to draw lines one pixel at a time. I wrote +// new line algorithms that draw lines using horizontal/vertical/diagonal +// spans of pixels, necessitating new pixel-drawing function that could +// not only render spans of pixels, but gouraud-shade them as well. +// This speeds up line rendering and would allow tile-rendering (untextured +// rectangles) to use the same set of functions. Since tiles are always +// monochrome, they simply wouldn't use the extra set of 32 gouraud-shaded +// gpuPixelSpanFn functions (TODO?). +// +// NOTE: While the PS1 framebuffer is 16 bit, we use 8-bit pointers here, +// so that pDst can be incremented directly by 'incr' parameter +// without having to shift it before use. +template +static u8* gpuPixelSpanFn(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) { - if ((!M)&&(!B)) - { - if(MB) { *pixel = data | 0x8000; } - else { *pixel = data; } + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + u16 col; + struct GouraudColor * gcPtr; + u32 r, g, b; + s32 r_incr, g_incr, b_incr; + + if (CF_GOURAUD) { + gcPtr = (GouraudColor*)data; + r = gcPtr->r; r_incr = gcPtr->r_incr; + g = gcPtr->g; g_incr = gcPtr->g_incr; + b = gcPtr->b; b_incr = gcPtr->b_incr; + } else { + col = (u16)data; } - else if ((M)&&(!B)) - { - if (!(*pixel&0x8000)) - { - if(MB) { *pixel = data | 0x8000; } - else { *pixel = data; } + + do { + if (!CF_GOURAUD) + { // NO GOURAUD + if (!CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } else if (CF_MASKCHECK && !CF_BLEND) { + if (!(*(u16*)pDst & 0x8000)) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } + } else { + u16 uDst = *(u16*)pDst; + if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } + + u16 uSrc = col; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } + else { *(u16*)pDst = uSrc; } + } + + } else + { // GOURAUD + + if (!CF_MASKCHECK && !CF_BLEND) { + col = gpuGouraudColor15bpp(r, g, b); + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } else if (CF_MASKCHECK && !CF_BLEND) { + col = gpuGouraudColor15bpp(r, g, b); + if (!(*(u16*)pDst & 0x8000)) { + if (CF_MASKSET) { *(u16*)pDst = col | 0x8000; } + else { *(u16*)pDst = col; } + } + } else { + u16 uDst = *(u16*)pDst; + if (CF_MASKCHECK) { if (uDst & 0x8000) goto endpixel; } + col = gpuGouraudColor15bpp(r, g, b); + + u16 uSrc = col; + + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *(u16*)pDst = uSrc | 0x8000; } + else { *(u16*)pDst = uSrc; } + } } + +endpixel: + if (CF_GOURAUD) { + r += r_incr; + g += g_incr; + b += b_incr; + } + pDst += incr; + } while (len-- > 1); + + // Note from senquack: Normally, I'd prefer to write a 'do {} while (--len)' + // loop, or even a for() loop, however, on MIPS platforms anything but the + // 'do {} while (len-- > 1)' tends to generate very unoptimal asm, with + // many unneeded MULs/ADDs/branches at the ends of these functions. + // If you change the loop structure above, be sure to compare the quality + // of the generated code!! + + if (CF_GOURAUD) { + gcPtr->r = r; + gcPtr->g = g; + gcPtr->b = b; } - else - { - u16 uDst = *pixel; - if(M) { if (uDst&0x8000) return; } - u16 uSrc = data; - u32 uMsk; if (BM==0) uMsk=0x7BDE; - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); - if(MB) { *pixel = uSrc | 0x8000; } - else { *pixel = uSrc; } - } + return pDst; +} + +static u8* PixelSpanNULL(u8* pDst, uintptr_t data, ptrdiff_t incr, size_t len) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"PixelSpanNULL()\n"); + #endif + return pDst; } -/////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -// Pixel drawing drivers, for lines (only blending) -typedef void (*PD)(u16 *pixel,const u16 data); -const PD gpuPixelDrivers[32] = // We only generate pixel op for MASKING/BLEND_ENABLE/BLEND_MODE +// PixelSpan (lines) innerloops driver +typedef u8* (*PSD)(u8* dst, uintptr_t data, ptrdiff_t incr, size_t len); + +const PSD gpuPixelSpanDrivers[64] = { - gpuPixelFn<0x00<<1>,gpuPixelFn<0x01<<1>,gpuPixelFn<0x02<<1>,gpuPixelFn<0x03<<1>, - NULL,gpuPixelFn<0x05<<1>,NULL,gpuPixelFn<0x07<<1>, - NULL,gpuPixelFn<0x09<<1>,NULL,gpuPixelFn<0x0B<<1>, - NULL,gpuPixelFn<0x0D<<1>,NULL,gpuPixelFn<0x0F<<1>, - - gpuPixelFn<(0x00<<1)|256>,gpuPixelFn<(0x01<<1)|256>,gpuPixelFn<(0x02<<1)|256>,gpuPixelFn<(0x03<<1)|256>, - NULL,gpuPixelFn<(0x05<<1)|256>,NULL,gpuPixelFn<(0x07<<1)|256>, - NULL,gpuPixelFn<(0x09<<1)|256>,NULL,gpuPixelFn<(0x0B<<1)|256>, - NULL,gpuPixelFn<(0x0D<<1)|256>,NULL,gpuPixelFn<(0x0F<<1)|256> + // Array index | 'CF' template field | Field value + // ------------+---------------------+---------------- + // Bit 0 | CF_BLEND | off (0), on (1) + // Bit 1 | CF_MASKCHECK | off (0), on (1) + // Bit 3:2 | CF_BLENDMODE | 0..3 + // Bit 4 | CF_MASKSET | off (0), on (1) + // Bit 5 | CF_GOURAUD | off (0), on (1) + // + // NULL entries are ones for which blending is disabled and blend-mode + // field is non-zero, which is obviously invalid. + + // Flat-shaded + gpuPixelSpanFn<0x00<<1>, gpuPixelSpanFn<0x01<<1>, gpuPixelSpanFn<0x02<<1>, gpuPixelSpanFn<0x03<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x05<<1>, PixelSpanNULL, gpuPixelSpanFn<0x07<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x09<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0B<<1>, + PixelSpanNULL, gpuPixelSpanFn<0x0D<<1>, PixelSpanNULL, gpuPixelSpanFn<0x0F<<1>, + + // Flat-shaded + PixelMSB (CF_MASKSET) + gpuPixelSpanFn<(0x00<<1)|0x100>, gpuPixelSpanFn<(0x01<<1)|0x100>, gpuPixelSpanFn<(0x02<<1)|0x100>, gpuPixelSpanFn<(0x03<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x100>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x100>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x100>, + + // Gouraud-shaded (CF_GOURAUD) + gpuPixelSpanFn<(0x00<<1)|0x80>, gpuPixelSpanFn<(0x01<<1)|0x80>, gpuPixelSpanFn<(0x02<<1)|0x80>, gpuPixelSpanFn<(0x03<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x80>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x80>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x80>, + + // Gouraud-shaded (CF_GOURAUD) + PixelMSB (CF_MASKSET) + gpuPixelSpanFn<(0x00<<1)|0x180>, gpuPixelSpanFn<(0x01<<1)|0x180>, gpuPixelSpanFn<(0x02<<1)|0x180>, gpuPixelSpanFn<(0x03<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x05<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x07<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x09<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0B<<1)|0x180>, + PixelSpanNULL, gpuPixelSpanFn<(0x0D<<1)|0x180>, PixelSpanNULL, gpuPixelSpanFn<(0x0F<<1)|0x180> }; /////////////////////////////////////////////////////////////////////////////// // GPU Tiles innerloops generator -template -INLINE void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) +template +static void gpuTileSpanFn(u16 *pDst, u32 count, u16 data) { - if ((!M)&&(!B)) - { - if (MB) { data = data | 0x8000; } + if (!CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { data = data | 0x8000; } do { *pDst++ = data; } while (--count); - } - else if ((M)&&(!B)) - { - if (MB) { data = data | 0x8000; } + } else if (CF_MASKCHECK && !CF_BLEND) { + if (CF_MASKSET) { data = data | 0x8000; } do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); - } - else + } else { - u16 uSrc; - u16 uDst; - u32 uMsk; if (BM==0) uMsk=0x7BDE; + // Blend func can save an operation if it knows uSrc MSB is + // unset. For untextured prims, this is always true. + const bool skip_uSrc_mask = true; + + u16 uSrc, uDst; do { - // MASKING - uDst = *pDst; - if(M) { if (uDst&0x8000) goto endtile; } + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) { if (uDst&0x8000) goto endtile; } + uSrc = data; - // BLEND - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); - if (MB) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } - endtile: pDst++; + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + + //senquack - Did not apply "Silent Hill" mask-bit fix to here. + // It is hard to tell from scarce documentation available and + // lack of comments in code, but I believe the tile-span + // functions here should not bother to preserve any source MSB, + // as they are not drawing from a texture. +endtile: + pDst++; } while (--count); } } +static void TileNULL(u16 *pDst, u32 count, u16 data) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"TileNULL()\n"); + #endif +} + /////////////////////////////////////////////////////////////////////////////// // Tiles innerloops driver typedef void (*PT)(u16 *pDst, u32 count, u16 data); -const PT gpuTileSpanDrivers[64] = -{ - gpuTileSpanFn<0x00>,NULL,gpuTileSpanFn<0x02>,NULL, gpuTileSpanFn<0x04>,NULL,gpuTileSpanFn<0x06>,NULL, NULL,NULL,gpuTileSpanFn<0x0A>,NULL, NULL,NULL,gpuTileSpanFn<0x0E>,NULL, - NULL,NULL,gpuTileSpanFn<0x12>,NULL, NULL,NULL,gpuTileSpanFn<0x16>,NULL, NULL,NULL,gpuTileSpanFn<0x1A>,NULL, NULL,NULL,gpuTileSpanFn<0x1E>,NULL, - gpuTileSpanFn<0x100>,NULL,gpuTileSpanFn<0x102>,NULL, gpuTileSpanFn<0x104>,NULL,gpuTileSpanFn<0x106>,NULL, NULL,NULL,gpuTileSpanFn<0x10A>,NULL, NULL,NULL,gpuTileSpanFn<0x10E>,NULL, - NULL,NULL,gpuTileSpanFn<0x112>,NULL, NULL,NULL,gpuTileSpanFn<0x116>,NULL, NULL,NULL,gpuTileSpanFn<0x11A>,NULL, NULL,NULL,gpuTileSpanFn<0x11E>,NULL, +// Template instantiation helper macros +#define TI(cf) gpuTileSpanFn<(cf)> +#define TN TileNULL +#define TIBLOCK(ub) \ + TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \ + TN, TI((ub)|0x0a), TN, TI((ub)|0x0e), \ + TN, TI((ub)|0x12), TN, TI((ub)|0x16), \ + TN, TI((ub)|0x1a), TN, TI((ub)|0x1e) + +const PT gpuTileSpanDrivers[32] = { + TIBLOCK(0<<8), TIBLOCK(1<<8) }; +#undef TI +#undef TN +#undef TIBLOCK + + /////////////////////////////////////////////////////////////////////////////// // GPU Sprites innerloops generator -template -INLINE void gpuSpriteSpanFn(u16 *pDst, u32 count, u32 u0, const u32 mask) +template +static void gpuSpriteSpanFn(u16 *pDst, u32 count, u8* pTxt, u32 u0) { - u16 uSrc; - u16 uDst; - const u16* pTxt = TBA+(u0&~0x1ff); u0=u0&0x1ff; - const u16 *_CBA; if(TM!=3) _CBA=CBA; - u32 lCol; if(L) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); } - u8 rgb; if (TM==1) rgb = ((u8*)pTxt)[u0>>1]; - u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + // Blend func can save an operation if it knows uSrc MSB is unset. + // Untextured prims can always skip (source color always comes with MSB=0). + // For textured prims, lighting funcs always return it unset. (bonus!) + const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT; + + u16 uSrc, uDst, srcMSB; + u32 u0_mask = gpu_unai.TextureWindow[2]; + + u8 r5, g5, b5; + if (CF_LIGHT) { + r5 = gpu_unai.r5; + g5 = gpu_unai.g5; + b5 = gpu_unai.b5; + } + + if (CF_TEXTMODE==3) { + // Texture is accessed byte-wise, so adjust mask if 16bpp + u0_mask <<= 1; + } + + const u16 *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; do { - // MASKING - if(M) { uDst = *pDst; if (uDst&0x8000) { u0=(u0+1)&mask; goto endsprite; } } + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; } - // TEXTURE MAPPING - if (TM==1) { if (!(u0&1)) rgb = ((u8*)pTxt)[u0>>1]; uSrc = _CBA[(rgb>>((u0&1)<<2))&0xf]; u0=(u0+1)&mask; } - if (TM==2) { uSrc = _CBA[((u8*)pTxt)[u0]]; u0=(u0+1)&mask; } - if (TM==3) { uSrc = pTxt[u0]; u0=(u0+1)&mask; } - if(!AH) { if (!uSrc) goto endsprite; } - - // BLEND - if(B) - { - if(uSrc&0x8000) - { - // LIGHTING CALCULATIONS - if(L) { gpuLightingTXT(uSrc, lCol); } - - if(!M) { uDst = *pDst; } - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); - } - else - { - // LIGHTING CALCULATIONS - if(L) { gpuLightingTXT(uSrc, lCol); } - } + if (CF_TEXTMODE==1) { // 4bpp (CLUT) + u8 rgb = pTxt[(u0 & u0_mask)>>1]; + uSrc = CBA_[(rgb>>((u0&1)<<2))&0xf]; } - else - { - // LIGHTING CALCULATIONS - if(L) { gpuLightingTXT(uSrc, lCol); } else - { if(!MB) uSrc&= 0x7fff; } + if (CF_TEXTMODE==2) { // 8bpp (CLUT) + uSrc = CBA_[pTxt[u0 & u0_mask]]; + } + if (CF_TEXTMODE==3) { // 16bpp + uSrc = *(u16*)(&pTxt[u0 & u0_mask]); } - if (MB) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } + if (!uSrc) goto endsprite; + + //senquack - save source MSB, as blending or lighting macros will not + // (Silent Hill gray rectangles mask bit bug) + if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; - endsprite: pDst++; + if (CF_LIGHT) + uSrc = gpuLightingTXT(uSrc, r5, g5, b5); + + if (CF_BLEND && srcMSB) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; } + else { *pDst = uSrc; } + +endsprite: + u0 += (CF_TEXTMODE==3) ? 2 : 1; + pDst++; } while (--count); } + +static void SpriteNULL(u16 *pDst, u32 count, u8* pTxt, u32 u0) +{ + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"SpriteNULL()\n"); + #endif +} + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// // Sprite innerloops driver -typedef void (*PS)(u16 *pDst, u32 count, u32 u0, const u32 mask); -const PS gpuSpriteSpanDrivers[512] = -{ - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - gpuSpriteSpanFn<0x20>,gpuSpriteSpanFn<0x21>,gpuSpriteSpanFn<0x22>,gpuSpriteSpanFn<0x23>, gpuSpriteSpanFn<0x24>,gpuSpriteSpanFn<0x25>,gpuSpriteSpanFn<0x26>,gpuSpriteSpanFn<0x27>, NULL,NULL,gpuSpriteSpanFn<0x2A>,gpuSpriteSpanFn<0x2B>, NULL,NULL,gpuSpriteSpanFn<0x2E>,gpuSpriteSpanFn<0x2F>, - NULL,NULL,gpuSpriteSpanFn<0x32>,gpuSpriteSpanFn<0x33>, NULL,NULL,gpuSpriteSpanFn<0x36>,gpuSpriteSpanFn<0x37>, NULL,NULL,gpuSpriteSpanFn<0x3A>,gpuSpriteSpanFn<0x3B>, NULL,NULL,gpuSpriteSpanFn<0x3E>,gpuSpriteSpanFn<0x3F>, - gpuSpriteSpanFn<0x40>,gpuSpriteSpanFn<0x41>,gpuSpriteSpanFn<0x42>,gpuSpriteSpanFn<0x43>, gpuSpriteSpanFn<0x44>,gpuSpriteSpanFn<0x45>,gpuSpriteSpanFn<0x46>,gpuSpriteSpanFn<0x47>, NULL,NULL,gpuSpriteSpanFn<0x4A>,gpuSpriteSpanFn<0x4B>, NULL,NULL,gpuSpriteSpanFn<0x4E>,gpuSpriteSpanFn<0x4F>, - NULL,NULL,gpuSpriteSpanFn<0x52>,gpuSpriteSpanFn<0x53>, NULL,NULL,gpuSpriteSpanFn<0x56>,gpuSpriteSpanFn<0x57>, NULL,NULL,gpuSpriteSpanFn<0x5A>,gpuSpriteSpanFn<0x5B>, NULL,NULL,gpuSpriteSpanFn<0x5E>,gpuSpriteSpanFn<0x5F>, - gpuSpriteSpanFn<0x60>,gpuSpriteSpanFn<0x61>,gpuSpriteSpanFn<0x62>,gpuSpriteSpanFn<0x63>, gpuSpriteSpanFn<0x64>,gpuSpriteSpanFn<0x65>,gpuSpriteSpanFn<0x66>,gpuSpriteSpanFn<0x67>, NULL,NULL,gpuSpriteSpanFn<0x6A>,gpuSpriteSpanFn<0x6B>, NULL,NULL,gpuSpriteSpanFn<0x6E>,gpuSpriteSpanFn<0x6F>, - NULL,NULL,gpuSpriteSpanFn<0x72>,gpuSpriteSpanFn<0x73>, NULL,NULL,gpuSpriteSpanFn<0x76>,gpuSpriteSpanFn<0x77>, NULL,NULL,gpuSpriteSpanFn<0x7A>,gpuSpriteSpanFn<0x7B>, NULL,NULL,gpuSpriteSpanFn<0x7E>,gpuSpriteSpanFn<0x7F>, - - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - gpuSpriteSpanFn<0xa0>,gpuSpriteSpanFn<0xa1>,gpuSpriteSpanFn<0xa2>,gpuSpriteSpanFn<0xa3>, gpuSpriteSpanFn<0xa4>,gpuSpriteSpanFn<0xa5>,gpuSpriteSpanFn<0xa6>,gpuSpriteSpanFn<0xa7>, NULL,NULL,gpuSpriteSpanFn<0xaA>,gpuSpriteSpanFn<0xaB>, NULL,NULL,gpuSpriteSpanFn<0xaE>,gpuSpriteSpanFn<0xaF>, - NULL,NULL,gpuSpriteSpanFn<0xb2>,gpuSpriteSpanFn<0xb3>, NULL,NULL,gpuSpriteSpanFn<0xb6>,gpuSpriteSpanFn<0xb7>, NULL,NULL,gpuSpriteSpanFn<0xbA>,gpuSpriteSpanFn<0xbB>, NULL,NULL,gpuSpriteSpanFn<0xbE>,gpuSpriteSpanFn<0xbF>, - gpuSpriteSpanFn<0xc0>,gpuSpriteSpanFn<0xc1>,gpuSpriteSpanFn<0xc2>,gpuSpriteSpanFn<0xc3>, gpuSpriteSpanFn<0xc4>,gpuSpriteSpanFn<0xc5>,gpuSpriteSpanFn<0xc6>,gpuSpriteSpanFn<0xc7>, NULL,NULL,gpuSpriteSpanFn<0xcA>,gpuSpriteSpanFn<0xcB>, NULL,NULL,gpuSpriteSpanFn<0xcE>,gpuSpriteSpanFn<0xcF>, - NULL,NULL,gpuSpriteSpanFn<0xd2>,gpuSpriteSpanFn<0xd3>, NULL,NULL,gpuSpriteSpanFn<0xd6>,gpuSpriteSpanFn<0xd7>, NULL,NULL,gpuSpriteSpanFn<0xdA>,gpuSpriteSpanFn<0xdB>, NULL,NULL,gpuSpriteSpanFn<0xdE>,gpuSpriteSpanFn<0xdF>, - gpuSpriteSpanFn<0xe0>,gpuSpriteSpanFn<0xe1>,gpuSpriteSpanFn<0xe2>,gpuSpriteSpanFn<0xe3>, gpuSpriteSpanFn<0xe4>,gpuSpriteSpanFn<0xe5>,gpuSpriteSpanFn<0xe6>,gpuSpriteSpanFn<0xe7>, NULL,NULL,gpuSpriteSpanFn<0xeA>,gpuSpriteSpanFn<0xeB>, NULL,NULL,gpuSpriteSpanFn<0xeE>,gpuSpriteSpanFn<0xeF>, - NULL,NULL,gpuSpriteSpanFn<0xf2>,gpuSpriteSpanFn<0xf3>, NULL,NULL,gpuSpriteSpanFn<0xf6>,gpuSpriteSpanFn<0xf7>, NULL,NULL,gpuSpriteSpanFn<0xfA>,gpuSpriteSpanFn<0xfB>, NULL,NULL,gpuSpriteSpanFn<0xfE>,gpuSpriteSpanFn<0xfF>, - - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - gpuSpriteSpanFn<0x120>,gpuSpriteSpanFn<0x121>,gpuSpriteSpanFn<0x122>,gpuSpriteSpanFn<0x123>, gpuSpriteSpanFn<0x124>,gpuSpriteSpanFn<0x125>,gpuSpriteSpanFn<0x126>,gpuSpriteSpanFn<0x127>, NULL,NULL,gpuSpriteSpanFn<0x12A>,gpuSpriteSpanFn<0x12B>, NULL,NULL,gpuSpriteSpanFn<0x12E>,gpuSpriteSpanFn<0x12F>, - NULL,NULL,gpuSpriteSpanFn<0x132>,gpuSpriteSpanFn<0x133>, NULL,NULL,gpuSpriteSpanFn<0x136>,gpuSpriteSpanFn<0x137>, NULL,NULL,gpuSpriteSpanFn<0x13A>,gpuSpriteSpanFn<0x13B>, NULL,NULL,gpuSpriteSpanFn<0x13E>,gpuSpriteSpanFn<0x13F>, - gpuSpriteSpanFn<0x140>,gpuSpriteSpanFn<0x141>,gpuSpriteSpanFn<0x142>,gpuSpriteSpanFn<0x143>, gpuSpriteSpanFn<0x144>,gpuSpriteSpanFn<0x145>,gpuSpriteSpanFn<0x146>,gpuSpriteSpanFn<0x147>, NULL,NULL,gpuSpriteSpanFn<0x14A>,gpuSpriteSpanFn<0x14B>, NULL,NULL,gpuSpriteSpanFn<0x14E>,gpuSpriteSpanFn<0x14F>, - NULL,NULL,gpuSpriteSpanFn<0x152>,gpuSpriteSpanFn<0x153>, NULL,NULL,gpuSpriteSpanFn<0x156>,gpuSpriteSpanFn<0x157>, NULL,NULL,gpuSpriteSpanFn<0x15A>,gpuSpriteSpanFn<0x15B>, NULL,NULL,gpuSpriteSpanFn<0x15E>,gpuSpriteSpanFn<0x15F>, - gpuSpriteSpanFn<0x160>,gpuSpriteSpanFn<0x161>,gpuSpriteSpanFn<0x162>,gpuSpriteSpanFn<0x163>, gpuSpriteSpanFn<0x164>,gpuSpriteSpanFn<0x165>,gpuSpriteSpanFn<0x166>,gpuSpriteSpanFn<0x167>, NULL,NULL,gpuSpriteSpanFn<0x16A>,gpuSpriteSpanFn<0x16B>, NULL,NULL,gpuSpriteSpanFn<0x16E>,gpuSpriteSpanFn<0x16F>, - NULL,NULL,gpuSpriteSpanFn<0x172>,gpuSpriteSpanFn<0x173>, NULL,NULL,gpuSpriteSpanFn<0x176>,gpuSpriteSpanFn<0x177>, NULL,NULL,gpuSpriteSpanFn<0x17A>,gpuSpriteSpanFn<0x17B>, NULL,NULL,gpuSpriteSpanFn<0x17E>,gpuSpriteSpanFn<0x17F>, - - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, - gpuSpriteSpanFn<0x1a0>,gpuSpriteSpanFn<0x1a1>,gpuSpriteSpanFn<0x1a2>,gpuSpriteSpanFn<0x1a3>, gpuSpriteSpanFn<0x1a4>,gpuSpriteSpanFn<0x1a5>,gpuSpriteSpanFn<0x1a6>,gpuSpriteSpanFn<0x1a7>, NULL,NULL,gpuSpriteSpanFn<0x1aA>,gpuSpriteSpanFn<0x1aB>, NULL,NULL,gpuSpriteSpanFn<0x1aE>,gpuSpriteSpanFn<0x1aF>, - NULL,NULL,gpuSpriteSpanFn<0x1b2>,gpuSpriteSpanFn<0x1b3>, NULL,NULL,gpuSpriteSpanFn<0x1b6>,gpuSpriteSpanFn<0x1b7>, NULL,NULL,gpuSpriteSpanFn<0x1bA>,gpuSpriteSpanFn<0x1bB>, NULL,NULL,gpuSpriteSpanFn<0x1bE>,gpuSpriteSpanFn<0x1bF>, - gpuSpriteSpanFn<0x1c0>,gpuSpriteSpanFn<0x1c1>,gpuSpriteSpanFn<0x1c2>,gpuSpriteSpanFn<0x1c3>, gpuSpriteSpanFn<0x1c4>,gpuSpriteSpanFn<0x1c5>,gpuSpriteSpanFn<0x1c6>,gpuSpriteSpanFn<0x1c7>, NULL,NULL,gpuSpriteSpanFn<0x1cA>,gpuSpriteSpanFn<0x1cB>, NULL,NULL,gpuSpriteSpanFn<0x1cE>,gpuSpriteSpanFn<0x1cF>, - NULL,NULL,gpuSpriteSpanFn<0x1d2>,gpuSpriteSpanFn<0x1d3>, NULL,NULL,gpuSpriteSpanFn<0x1d6>,gpuSpriteSpanFn<0x1d7>, NULL,NULL,gpuSpriteSpanFn<0x1dA>,gpuSpriteSpanFn<0x1dB>, NULL,NULL,gpuSpriteSpanFn<0x1dE>,gpuSpriteSpanFn<0x1dF>, - gpuSpriteSpanFn<0x1e0>,gpuSpriteSpanFn<0x1e1>,gpuSpriteSpanFn<0x1e2>,gpuSpriteSpanFn<0x1e3>, gpuSpriteSpanFn<0x1e4>,gpuSpriteSpanFn<0x1e5>,gpuSpriteSpanFn<0x1e6>,gpuSpriteSpanFn<0x1e7>, NULL,NULL,gpuSpriteSpanFn<0x1eA>,gpuSpriteSpanFn<0x1eB>, NULL,NULL,gpuSpriteSpanFn<0x1eE>,gpuSpriteSpanFn<0x1eF>, - NULL,NULL,gpuSpriteSpanFn<0x1f2>,gpuSpriteSpanFn<0x1f3>, NULL,NULL,gpuSpriteSpanFn<0x1f6>,gpuSpriteSpanFn<0x1f7>, NULL,NULL,gpuSpriteSpanFn<0x1fA>,gpuSpriteSpanFn<0x1fB>, NULL,NULL,gpuSpriteSpanFn<0x1fE>,gpuSpriteSpanFn<0x1fF> +typedef void (*PS)(u16 *pDst, u32 count, u8* pTxt, u32 u0); + +// Template instantiation helper macros +#define TI(cf) gpuSpriteSpanFn<(cf)> +#define TN SpriteNULL +#define TIBLOCK(ub) \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ + TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ + TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ + TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ + TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ + TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ + TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ + TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ + TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ + TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f) + +const PS gpuSpriteSpanDrivers[256] = { + TIBLOCK(0<<8), TIBLOCK(1<<8) }; +#undef TI +#undef TN +#undef TIBLOCK + /////////////////////////////////////////////////////////////////////////////// // GPU Polygon innerloops generator -template -INLINE void gpuPolySpanFn(u16 *pDst, u32 count) + +//senquack - Newer version with following changes: +// * Adapted to work with new poly routings in gpu_raster_polygon.h +// adapted from DrHell GPU. They are less glitchy and use 22.10 +// fixed-point instead of original UNAI's 16.16. +// * Texture coordinates are no longer packed together into one +// unsigned int. This seems to lose too much accuracy (they each +// end up being only 8.7 fixed-point that way) and pixel-droupouts +// were noticeable both with original code and current DrHell +// adaptations. An example would be the sky in NFS3. Now, they are +// stored in separate ints, using separate masks. +// * Function is no longer INLINE, as it was always called +// through a function pointer. +// * Function now ensures the mask bit of source texture is preserved +// across calls to blending functions (Silent Hill rectangles fix) +// * November 2016: Large refactoring of blending/lighting when +// JohnnyF added dithering. See gpu_inner_quantization.h and +// relevant blend/light headers. +// (see README_senquack.txt) +template +static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) { - if (!TM) - { - // NO TEXTURE - if (!G) + // Blend func can save an operation if it knows uSrc MSB is unset. + // Untextured prims can always skip this (src color MSB is always 0). + // For textured prims, lighting funcs always return it unset. (bonus!) + const bool skip_uSrc_mask = (!CF_TEXTMODE) || CF_LIGHT; + + u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask; + + if (!CF_TEXTMODE) + { + if (!CF_GOURAUD) { - // NO GOURAUD - u16 data; - if (L) { u32 lCol=((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); gpuLightingRGB(data,lCol); } - else data=PixelData; - if ((!M)&&(!B)) - { - if (MB) { data = data | 0x8000; } - do { *pDst++ = data; } while (--count); - } - else if ((M)&&(!B)) - { - if (MB) { data = data | 0x8000; } - do { if (!(*pDst&0x8000)) { *pDst = data; } pDst++; } while (--count); - } - else - { - u16 uSrc; - u16 uDst; - u32 uMsk; if (BM==0) uMsk=0x7BDE; - do - { - // masking - uDst = *pDst; - if(M) { if (uDst&0x8000) goto endtile; } - uSrc = data; - // blend - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); - if (MB) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } - endtile: pDst++; - } - while (--count); - } + // UNTEXTURED, NO GOURAUD + const u16 pix15 = gpu_unai.PixelData; + do { + u16 uSrc, uDst; + + // NOTE: Don't enable CF_BLITMASK pixel skipping (speed hack) + // on untextured polys. It seems to do more harm than good: see + // gravestone text at end of Medieval intro sequence. -senquack + //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) { goto endpolynotextnogou; } } + + if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_MASKCHECK) { if (uDst&0x8000) { goto endpolynotextnogou; } } + + uSrc = pix15; + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + +endpolynotextnogou: + pDst++; + } while(--count); } else { - // GOURAUD - u16 uDst; - u16 uSrc; - u32 linc=lInc; - u32 lCol=((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); - u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; - do - { - // masking - if(M) { uDst = *pDst; if (uDst&0x8000) goto endgou; } - // blend - if(B) - { - // light - gpuLightingRGB(uSrc,lCol); - if(!M) { uDst = *pDst; } - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); - } - else - { - // light - gpuLightingRGB(uSrc,lCol); + // UNTEXTURED, GOURAUD + u32 l_gCol = gpu_unai.gCol; + u32 l_gInc = gpu_unai.gInc; + + do { + u16 uDst, uSrc; + + // See note in above loop regarding CF_BLITMASK + //if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolynotextgou; } + + if (CF_BLEND || CF_MASKCHECK) uDst = *pDst; + if (CF_MASKCHECK) { if (uDst&0x8000) goto endpolynotextgou; } + + if (CF_DITHER) { + // GOURAUD, DITHER + + u32 uSrc24 = gpuLightingRGB24(l_gCol); + if (CF_BLEND) + uSrc24 = gpuBlending24(uSrc24, uDst); + uSrc = gpuColorQuantization24(uSrc24, pDst); + } else { + // GOURAUD, NO DITHER + + uSrc = gpuLightingRGB(l_gCol); + + if (CF_BLEND) + uSrc = gpuBlending(uSrc, uDst); } - if (MB) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } - endgou: pDst++; lCol=(lCol+linc); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else { *pDst = uSrc; } + +endpolynotextgou: + pDst++; + l_gCol += l_gInc; } while (--count); } } else { - // TEXTURE - u16 uDst; - u16 uSrc; - u32 linc; if (L&&G) linc=lInc; - u32 tinc=tInc; - u32 tmsk=tMsk; - u32 tCor = ((u32)( u4<<7)&0x7fff0000) | ((u32)( v4>>9)&0x00007fff); tCor&= tmsk; - const u16* _TBA=TBA; - const u16* _CBA; if (TM!=3) _CBA=CBA; - u32 lCol; - if(L && !G) { lCol = ((u32)(b4<< 2)&(0x03ff)) | ((u32)(g4<<13)&(0x07ff<<10)) | ((u32)(r4<<24)&(0x07ff<<21)); } - else if(L && G) { lCol = ((u32)(b4>>14)&(0x03ff)) | ((u32)(g4>>3)&(0x07ff<<10)) | ((u32)(r4<<8)&(0x07ff<<21)); } - u32 uMsk; if ((B)&&(BM==0)) uMsk=0x7BDE; + // TEXTURED + + u16 uDst, uSrc, srcMSB; + + //senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into + // one 32-bit unsigned int, but this proved to lose too much accuracy + // (pixel drouputs noticeable in NFS3 sky), so now are separate vars. + u32 l_u_msk = gpu_unai.u_msk; u32 l_v_msk = gpu_unai.v_msk; + u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk; + s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc; + + const u16* TBA_ = gpu_unai.TBA; + const u16* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA; + + u8 r5, g5, b5; + u8 r8, g8, b8; + + u32 l_gInc, l_gCol; + + if (CF_LIGHT) { + if (CF_GOURAUD) { + l_gInc = gpu_unai.gInc; + l_gCol = gpu_unai.gCol; + } else { + if (CF_DITHER) { + r8 = gpu_unai.r8; + g8 = gpu_unai.g8; + b8 = gpu_unai.b8; + } else { + r5 = gpu_unai.r5; + g5 = gpu_unai.g5; + b5 = gpu_unai.b5; + } + } + } + do { - // masking - if(M) { uDst = *pDst; if (uDst&0x8000) goto endpoly; } - // texture - if (TM==1) { u32 tu=(tCor>>23); u32 tv=(tCor<<4)&(0xff<<11); u8 rgb=((u8*)_TBA)[tv+(tu>>1)]; uSrc=_CBA[(rgb>>((tu&1)<<2))&0xf]; if(!uSrc) goto endpoly; } - if (TM==2) { uSrc = _CBA[(((u8*)_TBA)[(tCor>>23)+((tCor<<4)&(0xff<<11))])]; if(!uSrc) goto endpoly; } - if (TM==3) { uSrc = _TBA[(tCor>>23)+((tCor<<3)&(0xff<<10))]; if(!uSrc) goto endpoly; } - // blend - if(B) - { - if (uSrc&0x8000) - { - // light - if(L) gpuLightingTXT(uSrc, lCol); - if(!M) { uDst = *pDst; } - if (BM==0) gpuBlending00(uSrc, uDst); - if (BM==1) gpuBlending01(uSrc, uDst); - if (BM==2) gpuBlending02(uSrc, uDst); - if (BM==3) gpuBlending03(uSrc, uDst); - } - else - { - // light - if(L) gpuLightingTXT(uSrc, lCol); - } + if (CF_BLITMASK) { if ((bMsk>>((((uintptr_t)pDst)>>1)&7))&1) goto endpolytext; } + if (CF_MASKCHECK || CF_BLEND) { uDst = *pDst; } + if (CF_MASKCHECK) if (uDst&0x8000) { goto endpolytext; } + + //senquack - adapted to work with new 22.10 fixed point routines: + // (UNAI originally used 16.16) + if (CF_TEXTMODE==1) { // 4bpp (CLUT) + u32 tu=(l_u>>10); + u32 tv=(l_v<<1)&(0xff<<11); + u8 rgb=((u8*)TBA_)[tv+(tu>>1)]; + uSrc=CBA_[(rgb>>((tu&1)<<2))&0xf]; + if (!uSrc) goto endpolytext; + } + if (CF_TEXTMODE==2) { // 8bpp (CLUT) + uSrc = CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]; + if (!uSrc) goto endpolytext; } - else + if (CF_TEXTMODE==3) { // 16bpp + uSrc = TBA_[(l_u>>10)+((l_v)&(0xff<<10))]; + if (!uSrc) goto endpolytext; + } + + // Save source MSB, as blending or lighting will not (Silent Hill) + if (CF_BLEND || CF_LIGHT) srcMSB = uSrc & 0x8000; + + // When textured, only dither when LIGHT (texture blend) is enabled + // LIGHT && BLEND => dither + // LIGHT && !BLEND => dither + //!LIGHT && BLEND => no dither + //!LIGHT && !BLEND => no dither + + if (CF_DITHER && CF_LIGHT) { + u32 uSrc24; + if ( CF_GOURAUD) + uSrc24 = gpuLightingTXT24Gouraud(uSrc, l_gCol); + if (!CF_GOURAUD) + uSrc24 = gpuLightingTXT24(uSrc, r8, g8, b8); + + if (CF_BLEND && srcMSB) + uSrc24 = gpuBlending24(uSrc24, uDst); + + uSrc = gpuColorQuantization24(uSrc24, pDst); + } else { - // light - if(L) { gpuLightingTXT(uSrc, lCol); } else if(!MB) { uSrc&= 0x7fff; } + if (CF_LIGHT) { + if ( CF_GOURAUD) + uSrc = gpuLightingTXTGouraud(uSrc, l_gCol); + if (!CF_GOURAUD) + uSrc = gpuLightingTXT(uSrc, r5, g5, b5); + } + + if (CF_BLEND && srcMSB) + uSrc = gpuBlending(uSrc, uDst); } - if (MB) { *pDst = uSrc | 0x8000; } - else { *pDst = uSrc; } - endpoly: pDst++; - tCor=(tCor+tinc)&tmsk; - if (L&&G) lCol=(lCol+linc); + + if (CF_MASKSET) { *pDst = uSrc | 0x8000; } + else if (CF_BLEND || CF_LIGHT) { *pDst = uSrc | srcMSB; } + else { *pDst = uSrc; } +endpolytext: + pDst++; + l_u = (l_u + l_u_inc) & l_u_msk; + l_v = (l_v + l_v_inc) & l_v_msk; + if (CF_LIGHT && CF_GOURAUD) l_gCol += l_gInc; } while (--count); } } -// supposedly shouldn't be called? -static void gpuPolySpanFn_NULL_(u16 *pDst, u32 count) +static void PolyNULL(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count) { + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"PolyNULL()\n"); + #endif } -/////////////////////////////////////////////////////////////////////////////// - /////////////////////////////////////////////////////////////////////////////// // Polygon innerloops driver -typedef void (*PP)(u16 *pDst, u32 count); -const PP gpuPolySpanDrivers[512] = -{ - gpuPolySpanFn<0x00>,gpuPolySpanFn<0x01>,gpuPolySpanFn<0x02>,gpuPolySpanFn<0x03>, gpuPolySpanFn<0x04>,gpuPolySpanFn<0x05>,gpuPolySpanFn<0x06>,gpuPolySpanFn<0x07>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x0A>,gpuPolySpanFn<0x0B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x0E>,gpuPolySpanFn<0x0F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12>,gpuPolySpanFn<0x13>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16>,gpuPolySpanFn<0x17>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1A>,gpuPolySpanFn<0x1B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1E>,gpuPolySpanFn<0x1F>, - gpuPolySpanFn<0x20>,gpuPolySpanFn<0x21>,gpuPolySpanFn<0x22>,gpuPolySpanFn<0x23>, gpuPolySpanFn<0x24>,gpuPolySpanFn<0x25>,gpuPolySpanFn<0x26>,gpuPolySpanFn<0x27>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x2A>,gpuPolySpanFn<0x2B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x2E>,gpuPolySpanFn<0x2F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x32>,gpuPolySpanFn<0x33>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x36>,gpuPolySpanFn<0x37>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x3A>,gpuPolySpanFn<0x3B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x3E>,gpuPolySpanFn<0x3F>, - gpuPolySpanFn<0x40>,gpuPolySpanFn<0x41>,gpuPolySpanFn<0x42>,gpuPolySpanFn<0x43>, gpuPolySpanFn<0x44>,gpuPolySpanFn<0x45>,gpuPolySpanFn<0x46>,gpuPolySpanFn<0x47>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x4A>,gpuPolySpanFn<0x4B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x4E>,gpuPolySpanFn<0x4F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x52>,gpuPolySpanFn<0x53>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x56>,gpuPolySpanFn<0x57>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x5A>,gpuPolySpanFn<0x5B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x5E>,gpuPolySpanFn<0x5F>, - gpuPolySpanFn<0x60>,gpuPolySpanFn<0x61>,gpuPolySpanFn<0x62>,gpuPolySpanFn<0x63>, gpuPolySpanFn<0x64>,gpuPolySpanFn<0x65>,gpuPolySpanFn<0x66>,gpuPolySpanFn<0x67>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x6A>,gpuPolySpanFn<0x6B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x6E>,gpuPolySpanFn<0x6F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x72>,gpuPolySpanFn<0x73>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x76>,gpuPolySpanFn<0x77>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x7A>,gpuPolySpanFn<0x7B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x7E>,gpuPolySpanFn<0x7F>, - - gpuPolySpanFn_NULL_,gpuPolySpanFn<0x81>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x83>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x85>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x87>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x8B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x8F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x93>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x97>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x9B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x9F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xa7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xaB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xaF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xb3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xb7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xbB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xbF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xc7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xcB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xcF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xd3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xd7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xdB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xdF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xe7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xeB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xeF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xf3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xf7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xfB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0xfF>, - - gpuPolySpanFn<0x100>,gpuPolySpanFn<0x101>,gpuPolySpanFn<0x102>,gpuPolySpanFn<0x103>, gpuPolySpanFn<0x104>,gpuPolySpanFn<0x105>,gpuPolySpanFn<0x106>,gpuPolySpanFn<0x107>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x10A>,gpuPolySpanFn<0x10B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x10E>,gpuPolySpanFn<0x10F>, - gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x112>,gpuPolySpanFn<0x113>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x116>,gpuPolySpanFn<0x117>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x11A>,gpuPolySpanFn<0x11B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x11E>,gpuPolySpanFn<0x11F>, - gpuPolySpanFn<0x120>,gpuPolySpanFn<0x121>,gpuPolySpanFn<0x122>,gpuPolySpanFn<0x123>, gpuPolySpanFn<0x124>,gpuPolySpanFn<0x125>,gpuPolySpanFn<0x126>,gpuPolySpanFn<0x127>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12A>,gpuPolySpanFn<0x12B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x12E>,gpuPolySpanFn<0x12F>, - gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x132>,gpuPolySpanFn<0x133>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x136>,gpuPolySpanFn<0x137>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x13A>,gpuPolySpanFn<0x13B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x13E>,gpuPolySpanFn<0x13F>, - gpuPolySpanFn<0x140>,gpuPolySpanFn<0x141>,gpuPolySpanFn<0x142>,gpuPolySpanFn<0x143>, gpuPolySpanFn<0x144>,gpuPolySpanFn<0x145>,gpuPolySpanFn<0x146>,gpuPolySpanFn<0x147>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x14A>,gpuPolySpanFn<0x14B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x14E>,gpuPolySpanFn<0x14F>, - gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x152>,gpuPolySpanFn<0x153>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x156>,gpuPolySpanFn<0x157>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x15A>,gpuPolySpanFn<0x15B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x15E>,gpuPolySpanFn<0x15F>, - gpuPolySpanFn<0x160>,gpuPolySpanFn<0x161>,gpuPolySpanFn<0x162>,gpuPolySpanFn<0x163>, gpuPolySpanFn<0x164>,gpuPolySpanFn<0x165>,gpuPolySpanFn<0x166>,gpuPolySpanFn<0x167>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16A>,gpuPolySpanFn<0x16B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x16E>,gpuPolySpanFn<0x16F>, - gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x172>,gpuPolySpanFn<0x173>, gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_, gpuPolySpanFn<0x176>,gpuPolySpanFn<0x177>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x17A>,gpuPolySpanFn<0x17B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x17E>,gpuPolySpanFn<0x17F>, - - gpuPolySpanFn_NULL_,gpuPolySpanFn<0x181>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x183>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x185>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x187>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x18B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x18F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x193>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x197>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x19B>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x19F>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1a7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1aB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1aF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1b3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1b7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1bB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1bF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1c7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1cB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1cF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1d3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1d7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1dB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1dF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e1>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e3>, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e5>,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1e7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1eB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1eF>, - gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1f3>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_, gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1f7>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1fB>, gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn_NULL_,gpuPolySpanFn<0x1fF> +typedef void (*PP)(const gpu_unai_t &gpu_unai, u16 *pDst, u32 count); + +// Template instantiation helper macros +#define TI(cf) gpuPolySpanFn<(cf)> +#define TN PolyNULL +#define TIBLOCK(ub) \ + TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \ + TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ + TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ + TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ + TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ + TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ + TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ + TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ + TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ + TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ + TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ + TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ + TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ + TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f), \ + TN, TI((ub)|0x81), TN, TI((ub)|0x83), TN, TI((ub)|0x85), TN, TI((ub)|0x87), \ + TN, TN, TN, TI((ub)|0x8b), TN, TN, TN, TI((ub)|0x8f), \ + TN, TN, TN, TI((ub)|0x93), TN, TN, TN, TI((ub)|0x97), \ + TN, TN, TN, TI((ub)|0x9b), TN, TN, TN, TI((ub)|0x9f), \ + TN, TI((ub)|0xa1), TN, TI((ub)|0xa3), TN, TI((ub)|0xa5), TN, TI((ub)|0xa7), \ + TN, TN, TN, TI((ub)|0xab), TN, TN, TN, TI((ub)|0xaf), \ + TN, TN, TN, TI((ub)|0xb3), TN, TN, TN, TI((ub)|0xb7), \ + TN, TN, TN, TI((ub)|0xbb), TN, TN, TN, TI((ub)|0xbf), \ + TN, TI((ub)|0xc1), TN, TI((ub)|0xc3), TN, TI((ub)|0xc5), TN, TI((ub)|0xc7), \ + TN, TN, TN, TI((ub)|0xcb), TN, TN, TN, TI((ub)|0xcf), \ + TN, TN, TN, TI((ub)|0xd3), TN, TN, TN, TI((ub)|0xd7), \ + TN, TN, TN, TI((ub)|0xdb), TN, TN, TN, TI((ub)|0xdf), \ + TN, TI((ub)|0xe1), TN, TI((ub)|0xe3), TN, TI((ub)|0xe5), TN, TI((ub)|0xe7), \ + TN, TN, TN, TI((ub)|0xeb), TN, TN, TN, TI((ub)|0xef), \ + TN, TN, TN, TI((ub)|0xf3), TN, TN, TN, TI((ub)|0xf7), \ + TN, TN, TN, TI((ub)|0xfb), TN, TN, TN, TI((ub)|0xff) + +const PP gpuPolySpanDrivers[2048] = { + TIBLOCK(0<<8), TIBLOCK(1<<8), TIBLOCK(2<<8), TIBLOCK(3<<8), + TIBLOCK(4<<8), TIBLOCK(5<<8), TIBLOCK(6<<8), TIBLOCK(7<<8) }; + +#undef TI +#undef TN +#undef TIBLOCK diff --git a/plugins/gpu_unai/gpu_inner_blend.h b/plugins/gpu_unai/gpu_inner_blend.h index ce439d3..93c268b 100644 --- a/plugins/gpu_unai/gpu_inner_blend.h +++ b/plugins/gpu_unai/gpu_inner_blend.h @@ -23,132 +23,166 @@ // GPU Blending operations functions -#ifdef __arm__ -#define gpuBlending00(uSrc,uDst) \ -{ \ - asm ("and %[src], %[src], %[msk]\n" \ - "and %[dst], %[dst], %[msk]\n" \ - "add %[src], %[dst], %[src]\n" \ - "mov %[src], %[src], lsr #1\n" \ - : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \ -} -#else -#define gpuBlending00(uSrc,uDst) \ -{ \ - uSrc = (((uDst & uMsk) + (uSrc & uMsk)) >> 1); \ -} -#endif +//////////////////////////////////////////////////////////////////////////////// +// Blend bgr555 color in 'uSrc' (foreground) with bgr555 color +// in 'uDst' (background), returning resulting color. +// +// INPUT: +// 'uSrc','uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// OUTPUT: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// Where '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE u16 gpuBlending(u16 uSrc, u16 uDst) +{ + // These use Blargg's bitwise modulo-clamping: + // http://blargg.8bitalley.com/info/rgb_mixing.html + // http://blargg.8bitalley.com/info/rgb_clamped_add.html + // http://blargg.8bitalley.com/info/rgb_clamped_sub.html -// 1.0 x Back + 1.0 x Forward -#ifdef __arm__ -#define gpuBlending01(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x7C00\n" \ - "add %[out], %[dt], %[st] \n" \ - "cmp %[out], #0x7C00 \n" \ - "movhi %[out], #0x7C00 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x03E0\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x03E0 \n" \ - "movhi %[dt], #0x03E0 \n" \ - "orr %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x001F\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x001F \n" \ - "movhi %[dt], #0x001F \n" \ - "orr %[src], %[out], %[dt] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ -} + u16 mix; + + // 0.5 x Back + 0.5 x Forward + if (BLENDMODE==0) { +#ifdef GPU_UNAI_USE_ACCURATE_BLENDING + // Slower, but more accurate (doesn't lose LSB data) + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + mix = ((uSrc + uDst) - ((uSrc ^ uDst) & 0x0421)) >> 1; #else -#define gpuBlending01(uSrc,uDst) \ -{ \ - u16 rr, gg, bb; \ - bb = (uDst & 0x7C00) + (uSrc & 0x7C00); if (bb > 0x7C00) bb = 0x7C00; \ - gg = (uDst & 0x03E0) + (uSrc & 0x03E0); if (gg > 0x03E0) gg = 0x03E0; bb |= gg; \ - rr = (uDst & 0x001F) + (uSrc & 0x001F); if (rr > 0x001F) rr = 0x001F; bb |= rr; \ - uSrc = bb; \ -} + mix = ((uDst & 0x7bde) + (uSrc & 0x7bde)) >> 1; #endif + } + + // 1.0 x Back + 1.0 x Forward + if (BLENDMODE==1) { + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + u32 sum = uSrc + uDst; + u32 low_bits = (uSrc ^ uDst) & 0x0421; + u32 carries = (sum - low_bits) & 0x8420; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 5); + mix = modulo | clamp; + } + + // 1.0 x Back - 1.0 x Forward + if (BLENDMODE==2) { + uDst &= 0x7fff; + if (!SKIP_USRC_MSB_MASK) + uSrc &= 0x7fff; + u32 diff = uDst - uSrc + 0x8420; + u32 low_bits = (uDst ^ uSrc) & 0x8420; + u32 borrows = (diff - low_bits) & 0x8420; + u32 modulo = diff - borrows; + u32 clamp = borrows - (borrows >> 5); + mix = modulo & clamp; + } -// 1.0 x Back - 1.0 x Forward */ -#ifdef __arm__ -#define gpuBlending02(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x7C00\n" \ - "subs %[out], %[dt], %[st] \n" \ - "movmi %[out], #0x0000 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x03E0\n" \ - "subs %[dt], %[dt], %[st] \n" \ - "orrpl %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x001F\n" \ - "subs %[dt], %[dt], %[st] \n" \ - "orrpl %[out], %[out], %[dt] \n" \ - "mov %[src], %[out] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ + // 1.0 x Back + 0.25 x Forward + if (BLENDMODE==3) { + uDst &= 0x7fff; + uSrc = ((uSrc >> 2) & 0x1ce7); + u32 sum = uSrc + uDst; + u32 low_bits = (uSrc ^ uDst) & 0x0421; + u32 carries = (sum - low_bits) & 0x8420; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 5); + mix = modulo | clamp; + } + + return mix; } -int btest(int s, int d) + +//////////////////////////////////////////////////////////////////////////////// +// Convert bgr555 color in uSrc to padded u32 5.4:5.4:5.4 bgr fixed-pt +// color triplet suitable for use with HQ 24-bit quantization. +// +// INPUT: +// 'uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuGetRGB24(u16 uSrc) { - gpuBlending02(s, d); - return s; -} -#else -#define gpuBlending02(uSrc,uDst) \ -{ \ - s32 rr, gg, bb; \ - bb = (uDst & 0x7C00) - (uSrc & 0x7C00); if (bb < 0) bb = 0; \ - gg = (uDst & 0x03E0) - (uSrc & 0x03E0); if (gg > 0) bb |= gg; \ - rr = (uDst & 0x001F) - (uSrc & 0x001F); if (rr > 0) bb |= rr; \ - uSrc = bb; \ + return ((uSrc & 0x7C00)<<14) + | ((uSrc & 0x03E0)<< 9) + | ((uSrc & 0x001F)<< 4); } -#endif -// 1.0 x Back + 0.25 x Forward */ -#ifdef __arm__ -#define gpuBlending03(uSrc,uDst) \ -{ \ - u32 st,dt,out; \ - asm ("mov %[src], %[src], lsr #2 \n" \ - "and %[dt], %[dst], #0x7C00\n" \ - "and %[st], %[src], #0x1C00\n" \ - "add %[out], %[dt], %[st] \n" \ - "cmp %[out], #0x7C00 \n" \ - "movhi %[out], #0x7C00 \n" \ - "and %[dt], %[dst], #0x03E0\n" \ - "and %[st], %[src], #0x00E0\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x03E0 \n" \ - "movhi %[dt], #0x03E0 \n" \ - "orr %[out], %[out], %[dt] \n" \ - "and %[dt], %[dst], #0x001F\n" \ - "and %[st], %[src], #0x0007\n" \ - "add %[dt], %[dt], %[st] \n" \ - "cmp %[dt], #0x001F \n" \ - "movhi %[dt], #0x001F \n" \ - "orr %[src], %[out], %[dt] \n" \ - : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ - : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ -} -#else -#define gpuBlending03(uSrc,uDst) \ -{ \ - u16 rr, gg, bb; \ - uSrc >>= 2; \ - bb = (uDst & 0x7C00) + (uSrc & 0x1C00); if (bb > 0x7C00) bb = 0x7C00; \ - gg = (uDst & 0x03E0) + (uSrc & 0x00E0); if (gg > 0x03E0) gg = 0x03E0; bb |= gg; \ - rr = (uDst & 0x001F) + (uSrc & 0x0007); if (rr > 0x001F) rr = 0x001F; bb |= rr; \ - uSrc = bb; \ + +//////////////////////////////////////////////////////////////////////////////// +// Blend padded u32 5.4:5.4:5.4 bgr fixed-pt color triplet in 'uSrc24' +// (foreground color) with bgr555 color in 'uDst' (background color), +// returning the resulting u32 5.4:5.4:5.4 color. +// +// INPUT: +// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// 'uDst' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE u32 gpuBlending24(u32 uSrc24, u16 uDst) +{ + // These use techniques adapted from Blargg's techniques mentioned in + // in gpuBlending() comments above. Not as much bitwise trickery is + // necessary because of presence of 0 padding in uSrc24 format. + + u32 uDst24 = gpuGetRGB24(uDst); + u32 mix; + + // 0.5 x Back + 0.5 x Forward + if (BLENDMODE==0) { + const u32 uMsk = 0x1FE7F9FE; + // Only need to mask LSBs of uSrc24, uDst24's LSBs are 0 already + mix = (uDst24 + (uSrc24 & uMsk)) >> 1; + } + + // 1.0 x Back + 1.0 x Forward + if (BLENDMODE==1) { + u32 sum = uSrc24 + uDst24; + u32 carries = sum & 0x20080200; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 9); + mix = modulo | clamp; + } + + // 1.0 x Back - 1.0 x Forward + if (BLENDMODE==2) { + // Insert ones in 0-padded borrow slot of color to be subtracted from + uDst24 |= 0x20080200; + u32 diff = uDst24 - uSrc24; + u32 borrows = diff & 0x20080200; + u32 clamp = borrows - (borrows >> 9); + mix = diff & clamp; + } + + // 1.0 x Back + 0.25 x Forward + if (BLENDMODE==3) { + uSrc24 = (uSrc24 & 0x1FC7F1FC) >> 2; + u32 sum = uSrc24 + uDst24; + u32 carries = sum & 0x20080200; + u32 modulo = sum - carries; + u32 clamp = carries - (carries >> 9); + mix = modulo | clamp; + } + + return mix; } -#endif #endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_blend_arm5.h b/plugins/gpu_unai/gpu_inner_blend_arm5.h new file mode 100644 index 0000000..0e9b74f --- /dev/null +++ b/plugins/gpu_unai/gpu_inner_blend_arm5.h @@ -0,0 +1,100 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_BLEND_H_ +#define _OP_BLEND_H_ + +// GPU Blending operations functions + +#define gpuBlending00(uSrc,uDst) \ +{ \ + asm ("and %[src], %[src], %[msk] " : [src] "=r" (uSrc) : "0" (uSrc), [msk] "r" (uMsk) ); \ + asm ("and %[dst], %[dst], %[msk] " : [dst] "=r" (uDst) : "0" (uDst), [msk] "r" (uMsk) ); \ + asm ("add %[src], %[dst], %[src] " : [src] "=r" (uSrc) : [dst] "r" (uDst), "0" (uSrc) ); \ + asm ("mov %[src], %[src], lsr #1 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ +} + +// 1.0 x Back + 1.0 x Forward +#define gpuBlending01(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ + asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ + asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ +} + +// 1.0 x Back - 1.0 x Forward */ +#define gpuBlending02(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x7C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) : "cc" ); \ + asm ("movmi %[out], #0x0000 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x03E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ + asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x001F " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("subs %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) : "cc" ); \ + asm ("orrpl %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("mov %[uSrc], %[out]" : [uSrc] "=r" (uSrc) : [out] "r" (out) ); \ +} + +// 1.0 x Back + 0.25 x Forward */ +#define gpuBlending03(uSrc,uDst) \ +{ \ + u16 st,dt,out; \ + asm ("mov %[src], %[src], lsr #2 " : [src] "=r" (uSrc) : "0" (uSrc) ); \ + asm ("and %[dt], %[dst], #0x7C00 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x1C00 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[out], %[dt], %[st] " : [out] "=r" (out) : [dt] "r" (dt), [st] "r" (st) ); \ + asm ("cmp %[out], #0x7C00 " : : [out] "r" (out) : "cc" ); \ + asm ("movhi %[out], #0x7C00 " : [out] "=r" (out) : "0" (out) ); \ + asm ("and %[dt], %[dst], #0x03E0 " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x00E0 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x03E0 " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x03E0 " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[out], %[out], %[dt] " : [out] "=r" (out) : "0" (out), [dt] "r" (dt) ); \ + asm ("and %[dt], %[dst], #0x001F " : [dt] "=r" (dt) : [dst] "r" (uDst) ); \ + asm ("and %[st], %[src], #0x0007 " : [st] "=r" (st) : [src] "r" (uSrc) ); \ + asm ("add %[dt], %[dt], %[st] " : [dt] "=r" (dt) : "0" (dt), [st] "r" (st) ); \ + asm ("cmp %[dt], #0x001F " : : [dt] "r" (dt) : "cc" ); \ + asm ("movhi %[dt], #0x001F " : [dt] "=r" (dt) : "0" (dt) ); \ + asm ("orr %[uSrc], %[out], %[dt] " : [uSrc] "=r" (uSrc) : [out] "r" (out), [dt] "r" (dt) ); \ +} + +#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_blend_arm7.h b/plugins/gpu_unai/gpu_inner_blend_arm7.h new file mode 100644 index 0000000..083e62d --- /dev/null +++ b/plugins/gpu_unai/gpu_inner_blend_arm7.h @@ -0,0 +1,107 @@ +/*************************************************************************** +* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2010 Unai * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_BLEND_H_ +#define _OP_BLEND_H_ + +// GPU Blending operations functions + +#define gpuBlending00(uSrc,uDst) \ +{ \ + asm ("and %[src], %[src], %[msk]\n" \ + "and %[dst], %[dst], %[msk]\n" \ + "add %[src], %[dst], %[src]\n" \ + "mov %[src], %[src], lsr #1\n" \ + : [src] "=&r" (uSrc), [dst] "=&r" (uDst) : "0" (uSrc), "1" (uDst), [msk] "r" (uMsk)); \ +} + +// 1.0 x Back + 1.0 x Forward +#define gpuBlending01(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x7C00\n" \ + "add %[out], %[dt], %[st] \n" \ + "cmp %[out], #0x7C00 \n" \ + "movhi %[out], #0x7C00 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x03E0\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x03E0 \n" \ + "movhi %[dt], #0x03E0 \n" \ + "orr %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x001F\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x001F \n" \ + "movhi %[dt], #0x001F \n" \ + "orr %[src], %[out], %[dt] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +// 1.0 x Back - 1.0 x Forward */ +#define gpuBlending02(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x7C00\n" \ + "subs %[out], %[dt], %[st] \n" \ + "movmi %[out], #0x0000 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x03E0\n" \ + "subs %[dt], %[dt], %[st] \n" \ + "orrpl %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x001F\n" \ + "subs %[dt], %[dt], %[st] \n" \ + "orrpl %[out], %[out], %[dt] \n" \ + "mov %[src], %[out] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +// 1.0 x Back + 0.25 x Forward */ +#define gpuBlending03(uSrc,uDst) \ +{ \ + u32 st,dt,out; \ + asm ("mov %[src], %[src], lsr #2 \n" \ + "and %[dt], %[dst], #0x7C00\n" \ + "and %[st], %[src], #0x1C00\n" \ + "add %[out], %[dt], %[st] \n" \ + "cmp %[out], #0x7C00 \n" \ + "movhi %[out], #0x7C00 \n" \ + "and %[dt], %[dst], #0x03E0\n" \ + "and %[st], %[src], #0x00E0\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x03E0 \n" \ + "movhi %[dt], #0x03E0 \n" \ + "orr %[out], %[out], %[dt] \n" \ + "and %[dt], %[dst], #0x001F\n" \ + "and %[st], %[src], #0x0007\n" \ + "add %[dt], %[dt], %[st] \n" \ + "cmp %[dt], #0x001F \n" \ + "movhi %[dt], #0x001F \n" \ + "orr %[src], %[out], %[dt] \n" \ + : [src] "=r" (uSrc), [st] "=&r" (st), [dt] "=&r" (dt), [out] "=&r" (out) \ + : [dst] "r" (uDst), "0" (uSrc) : "cc"); \ +} + +#endif //_OP_BLEND_H_ diff --git a/plugins/gpu_unai/gpu_inner_light.h b/plugins/gpu_unai/gpu_inner_light.h index d291418..b041dc3 100644 --- a/plugins/gpu_unai/gpu_inner_light.h +++ b/plugins/gpu_unai/gpu_inner_light.h @@ -1,5 +1,5 @@ /*************************************************************************** -* Copyright (C) 2010 PCSX4ALL Team * +* Copyright (C) 2016 PCSX4ALL Team * * Copyright (C) 2010 Unai * * * * This program is free software; you can redistribute it and/or modify * @@ -23,60 +23,249 @@ // GPU color operations for lighting calculations -#ifdef __arm__ -#define gpuLightingRGB(uSrc,lCol) \ -{ \ - u32 cb,cg; \ - asm ("and %[cb], %[lCol], #0x7C00/32 \n" \ - "and %[cg], %[lCol], #0x03E0*2048 \n" \ - "mov %[res], %[lCol], lsr #27\n" \ - "orr %[res], %[res], %[cb], lsl #5 \n" \ - "orr %[res], %[res], %[cg], lsr #11\n" \ - : [res] "=&r" (uSrc), [cb] "=&r" (cb), [cg] "=&r" (cg) \ - : [lCol] "r" (lCol)); \ +static void SetupLightLUT() +{ + // 1024-entry lookup table that modulates 5-bit texture + 5-bit light value. + // A light value of 15 does not modify the incoming texture color. + // LightLUT[32*32] array is initialized to following values: + // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + // 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + // 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + // 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, + // 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,11,11, + // 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9,10,10,10,11,11,12,12,13,13, + // 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15, + // 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9,10,10,11,11,12,12,13,14,14,15,15,16,16,17, + // 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9,10,10,11,11,12,13,13,14,15,15,16,16,17,18,18,19, + // 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,19,20,21, + // 0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,11,12,12,13,14,15,15,16,17,18,18,19,20,21,21,22,23, + // 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9,10,11,12,13,13,14,15,16,17,17,18,19,20,21,21,22,23,24,25, + // 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,15,16,17,18,19,20,21,21,22,23,24,25,26,27, + // 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29, + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,31, + // 0, 1, 2, 3, 4, 5, 6, 7, 9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,27,28,29,30,31,31,31,31, + // 0, 1, 2, 3, 4, 5, 7, 8, 9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,30,31,31,31,31,31, + // 0, 1, 2, 3, 5, 6, 7, 8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,31,31,31,31,31,31, + // 0, 1, 2, 3, 5, 6, 7, 9,10,11,13,14,15,17,18,19,21,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31, + // 0, 1, 2, 4, 5, 6, 8, 9,11,12,13,15,16,17,19,20,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,31, + // 0, 1, 2, 4, 5, 7, 8,10,11,12,14,15,17,18,20,21,23,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 7, 9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 7, 9,10,12,14,15,17,18,20,21,23,25,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 4, 6, 8, 9,11,13,14,16,17,19,21,22,24,26,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 6, 8,10,11,13,15,16,18,20,21,23,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 8,10,12,14,15,17,19,21,22,24,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,10,12,14,16,18,19,21,23,25,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,11,13,15,16,18,20,22,24,26,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31, + // 0, 1, 3, 5, 7, 9,11,13,15,17,19,21,23,25,27,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 + + for (int j=0; j < 32; ++j) { + for (int i=0; i < 32; ++i) { + int val = i * j / 16; + if (val > 31) val = 31; + gpu_unai.LightLUT[(j*32) + i] = val; + } + } +} + + +//////////////////////////////////////////////////////////////////////////////// +// Create packed Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// 'r','g','b' are 8.10 fixed-pt color components (r shown here) +// 'r' input: --------------rrrrrrrrXXXXXXXXXX +// ^ bit 31 +// RETURNS: +// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '-' don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuPackGouraudCol(u32 r, u32 g, u32 b) +{ + return ((u32)(b>> 8)&(0x03ff )) + | ((u32)(g<< 3)&(0x07ff<<10)) + | ((u32)(r<<14)&(0x07ff<<21)); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Create packed increment for Gouraud fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// Sign-extended 8.10 fixed-pt r,g,b color increment values (only dr is shown) +// 'dr' input: ssssssssssssssrrrrrrrrXXXXXXXXXX +// ^ bit 31 +// RETURNS: +// u32 output: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and 's' sign bits +// +// NOTE: The correctness of this code/method has not been fully verified, +// having been merely factored out from original code in +// poly-drawing functions. Feel free to check/improve it -senquack +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuPackGouraudColInc(s32 dr, s32 dg, s32 db) +{ + u32 dr_tmp = (u32)(dr << 14)&(0xffffffff<<21); if (dr < 0) dr_tmp += 1<<21; + u32 dg_tmp = (u32)(dg << 3)&(0xffffffff<<10); if (dg < 0) dg_tmp += 1<<10; + u32 db_tmp = (u32)(db >> 8)&(0xffffffff ); if (db < 0) db_tmp += 1<< 0; + return db_tmp + dg_tmp + dr_tmp; } -#else -#define gpuLightingRGB(uSrc,lCol) uSrc=((lCol<<5)&0x7C00) | ((lCol>>11)&0x3E0) | (lCol>>27) -#endif -INLINE void gpuLightingTXT(u16 &uSrc, u32 &lCol) + +//////////////////////////////////////////////////////////////////////////////// +// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet +// +// INPUT: +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// Where 'r,g,b' are integer bits of colors, 'X' fixed-pt, and '0' zero +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u16 gpuLightingRGB(u32 gCol) +{ + return ((gCol<< 5)&0x7C00) | + ((gCol>>11)&0x03E0) | + (gCol>>27); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Convert packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet in 'gCol' +// to padded u32 5.4:5.4:5.4 bgr fixed-pt triplet, suitable for use +// with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingRGB24(u32 gCol) +{ + return ((gCol<<19) & (0x1FF<<20)) | + ((gCol>> 2) & (0x1FF<<10)) | + (gCol>>23); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit lighting to bgr555 texture color: +// +// INPUT: +// 'r5','g5','b5' are unsigned 5-bit color values, value of 15 +// is midpoint that doesn't modify that component of texture +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u16 gpuLightingTXT(u16 uSrc, u8 r5, u8 g5, u8 b5) { - // Pixelops Table - static const u8 _gpuLitT[32*32] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, - 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, - 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9,10,10,10,11,11, - 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9,10,10,10,11,11,12,12,13,13, - 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15, - 0, 0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9,10,10,11,11,12,12,13,14,14,15,15,16,16,17, - 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 6, 7, 8, 8, 9,10,10,11,11,12,13,13,14,15,15,16,16,17,18,18,19, - 0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,19,20,21, - 0, 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,11,12,12,13,14,15,15,16,17,18,18,19,20,21,21,22,23, - 0, 0, 1, 2, 3, 4, 4, 5, 6, 7, 8, 8, 9,10,11,12,13,13,14,15,16,17,17,18,19,20,21,21,22,23,24,25, - 0, 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,15,16,17,18,19,20,21,21,22,23,24,25,26,27, - 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,31, - 0, 1, 2, 3, 4, 5, 6, 7, 9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,27,28,29,30,31,31,31,31, - 0, 1, 2, 3, 4, 5, 7, 8, 9,10,11,13,14,15,16,17,19,20,21,22,23,24,26,27,28,29,30,31,31,31,31,31, - 0, 1, 2, 3, 5, 6, 7, 8,10,11,12,13,15,16,17,18,20,21,22,23,25,26,27,28,30,31,31,31,31,31,31,31, - 0, 1, 2, 3, 5, 6, 7, 9,10,11,13,14,15,17,18,19,21,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31, - 0, 1, 2, 4, 5, 6, 8, 9,11,12,13,15,16,17,19,20,22,23,24,26,27,28,30,31,31,31,31,31,31,31,31,31, - 0, 1, 2, 4, 5, 7, 8,10,11,12,14,15,17,18,20,21,23,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 4, 6, 7, 9,10,12,13,15,16,18,19,21,22,24,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 4, 6, 7, 9,10,12,14,15,17,18,20,21,23,25,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 4, 6, 8, 9,11,13,14,16,17,19,21,22,24,26,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 5, 6, 8,10,11,13,15,16,18,20,21,23,25,27,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 5, 7, 8,10,12,14,15,17,19,21,22,24,26,28,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 5, 7, 9,10,12,14,16,18,19,21,23,25,27,29,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 5, 7, 9,11,13,15,16,18,20,22,24,26,28,30,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31, - 0, 1, 3, 5, 7, 9,11,13,15,17,19,21,23,25,27,29,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 - }; - uSrc = (_gpuLitT[((uSrc&0x7C00)>>5)|((lCol>>5)&0x1f)]<<10)|(_gpuLitT[(uSrc&0x03E0)|((lCol>>16)&0x1f)]<<5)|(_gpuLitT[((uSrc&0x001F)<<5)|(lCol>>27)]); + return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | b5] << 10) | + (gpu_unai.LightLUT[ (uSrc&0x03E0) | g5] << 5) | + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | r5] ); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color: +// +// INPUT: +// 'gCol' is a packed Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet, value of +// 15.0 is midpoint that does not modify color of texture +// gCol input : rrrrrXXXXXXgggggXXXXXXbbbbbXXXXX +// ^ bit 31 +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u16 gpuLightingTXTGouraud(u16 uSrc, u32 gCol) +{ + return (gpu_unai.LightLUT[((uSrc&0x7C00)>>5) | ((gCol>> 5)&0x1F)]<<10) | + (gpu_unai.LightLUT[ (uSrc&0x03E0) | ((gCol>>16)&0x1F)]<< 5) | + (gpu_unai.LightLUT[((uSrc&0x001F)<<5) | (gCol>>27) ] ); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply high-precision 8-bit lighting to bgr555 texture color, +// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet +// suitable for use with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'r8','g8','b8' are unsigned 8-bit color component values, value of +// 127 is midpoint that doesn't modify that component of texture +// +// uSrc input: -bbbbbgggggrrrrr +// ^ bit 16 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingTXT24(u16 uSrc, u8 r8, u8 g8, u8 b8) +{ + u16 r1 = uSrc&0x001F; + u16 g1 = uSrc&0x03E0; + u16 b1 = uSrc&0x7C00; + + u16 r2 = r8; + u16 g2 = g8; + u16 b2 = b8; + + u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; + u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; + u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; + + return ((r3>> 3) ) | + ((g3>> 8)<<10) | + ((b3>>13)<<20); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Apply high-precision 8-bit lighting to bgr555 texture color in 'uSrc', +// returning a padded u32 5.4:5.4:5.4 bgr fixed-pt triplet +// suitable for use with HQ 24-bit lighting/quantization. +// +// INPUT: +// 'uSrc' input: -bbbbbgggggrrrrr +// ^ bit 16 +// 'gCol' input: rrrrrrrrXXXggggggggXXXbbbbbbbbXX +// ^ bit 31 +// RETURNS: +// u32 output: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +GPU_INLINE u32 gpuLightingTXT24Gouraud(u16 uSrc, u32 gCol) +{ + u16 r1 = uSrc&0x001F; + u16 g1 = uSrc&0x03E0; + u16 b1 = uSrc&0x7C00; + + u16 r2 = (gCol>>24) & 0xFF; + u16 g2 = (gCol>>13) & 0xFF; + u16 b2 = (gCol>> 2) & 0xFF; + + u32 r3 = r1 * r2; if (r3 & 0xFFFFF000) r3 = ~0xFFFFF000; + u32 g3 = g1 * g2; if (g3 & 0xFFFE0000) g3 = ~0xFFFE0000; + u32 b3 = b1 * b2; if (b3 & 0xFFC00000) b3 = ~0xFFC00000; + + return ((r3>> 3) ) | + ((g3>> 8)<<10) | + ((b3>>13)<<20); } #endif //_OP_LIGHT_H_ diff --git a/plugins/gpu_unai/gpu_inner_quantization.h b/plugins/gpu_unai/gpu_inner_quantization.h new file mode 100644 index 0000000..0e7e3e8 --- /dev/null +++ b/plugins/gpu_unai/gpu_inner_quantization.h @@ -0,0 +1,108 @@ +/*************************************************************************** +* Copyright (C) 2016 PCSX4ALL Team * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef _OP_DITHER_H_ +#define _OP_DITHER_H_ + +static void SetupDitheringConstants() +{ + // Initialize Dithering Constants + // The screen is divided into 8x8 chunks and sub-unitary noise is applied + // using the following matrix. This ensures that data lost in color + // quantization will be added back to the image 'by chance' in predictable + // patterns that are naturally 'smoothed' by your sight when viewed from a + // certain distance. + // + // http://caca.zoy.org/study/index.html + // + // Shading colors are encoded in 4.5, and then are quantitized to 5.0, + // DitherMatrix constants reflect that. + + static const u8 DitherMatrix[] = { + 0, 32, 8, 40, 2, 34, 10, 42, + 48, 16, 56, 24, 50, 18, 58, 26, + 12, 44, 4, 36, 14, 46, 6, 38, + 60, 28, 52, 20, 62, 30, 54, 22, + 3, 35, 11, 43, 1, 33, 9, 41, + 51, 19, 59, 27, 49, 17, 57, 25, + 15, 47, 7, 39, 13, 45, 5, 37, + 63, 31, 55, 23, 61, 29, 53, 21 + }; + + int i, j; + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + u16 offset = (i << 3) | j; + + u32 component = ((DitherMatrix[offset] + 1) << 4) / 65; //[5.5] -> [5] + + // XXX - senquack - hack Dec 2016 + // Until JohnnyF gets the time to work further on dithering, + // force lower bit of component to 0. This fixes grid pattern + // affecting quality of dithered image, as well as loss of + // detail in dark areas. With lower bit unset like this, existing + // 27-bit accuracy of dithering math is unneeded, could be 24-bit. + // Is 8x8 matrix overkill as a result, can we use 4x4? + component &= ~1; + + gpu_unai.DitherMatrix[offset] = (component) + | (component << 10) + | (component << 20); + } + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Convert padded u32 5.4:5.4:5.4 bgr fixed-pt triplet to final bgr555 color, +// applying dithering if specified by template parameter. +// +// INPUT: +// 'uSrc24' input: 000bbbbbXXXX0gggggXXXX0rrrrrXXXX +// ^ bit 31 +// 'pDst' is a pointer to destination framebuffer pixel, used +// to determine which DitherMatrix[] entry to apply. +// RETURNS: +// u16 output: 0bbbbbgggggrrrrr +// ^ bit 16 +// Where 'X' are fixed-pt bits, '0' is zero-padding, and '-' is don't care +//////////////////////////////////////////////////////////////////////////////// +template +GPU_INLINE u16 gpuColorQuantization24(u32 uSrc24, const u16 *pDst) +{ + if (DITHER) + { + u16 fbpos = (u32)(pDst - gpu_unai.vram); + u16 offset = ((fbpos & (0x7 << 10)) >> 7) | (fbpos & 0x7); + + //clean overflow flags and add + uSrc24 = (uSrc24 & 0x1FF7FDFF) + gpu_unai.DitherMatrix[offset]; + + if (uSrc24 & (1<< 9)) uSrc24 |= (0x1FF ); + if (uSrc24 & (1<<19)) uSrc24 |= (0x1FF<<10); + if (uSrc24 & (1<<29)) uSrc24 |= (0x1FF<<20); + } + + return ((uSrc24>> 4) & (0x1F )) + | ((uSrc24>> 9) & (0x1F<<5 )) + | ((uSrc24>>14) & (0x1F<<10)); +} + +#endif //_OP_DITHER_H_ diff --git a/plugins/gpu_unai/gpu_raster_image.h b/plugins/gpu_unai/gpu_raster_image.h index 0c82aa9..87d2151 100644 --- a/plugins/gpu_unai/gpu_raster_image.h +++ b/plugins/gpu_unai/gpu_raster_image.h @@ -19,71 +19,79 @@ ***************************************************************************/ /////////////////////////////////////////////////////////////////////////////// -INLINE void gpuLoadImage(void) +#ifndef USE_GPULIB +void gpuLoadImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = PacketBuffer.U2[2] & 1023; - y0 = PacketBuffer.U2[3] & 511; - w0 = PacketBuffer.U2[4]; - h0 = PacketBuffer.U2[5]; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + w0 = packet.U2[4]; + h0 = packet.U2[5]; if ((y0 + h0) > FRAME_HEIGHT) { h0 = FRAME_HEIGHT - y0; } - FrameToWrite = ((w0)&&(h0)); + gpu_unai.dma.FrameToWrite = ((w0)&&(h0)); - px = 0; - py = 0; - x_end = w0; - y_end = h0; - pvram = &((u16*)GPU_FrameBuffer)[x0+(y0*1024)]; + gpu_unai.dma.px = 0; + gpu_unai.dma.py = 0; + gpu_unai.dma.x_end = w0; + gpu_unai.dma.y_end = h0; + gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)]; - GPU_GP1 |= 0x08000000; + gpu_unai.GPU_GP1 |= 0x08000000; } +#endif // !USE_GPULIB /////////////////////////////////////////////////////////////////////////////// -INLINE void gpuStoreImage(void) +#ifndef USE_GPULIB +void gpuStoreImage(PtrUnion packet) { u16 x0, y0, w0, h0; - x0 = PacketBuffer.U2[2] & 1023; - y0 = PacketBuffer.U2[3] & 511; - w0 = PacketBuffer.U2[4]; - h0 = PacketBuffer.U2[5]; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + w0 = packet.U2[4]; + h0 = packet.U2[5]; if ((y0 + h0) > FRAME_HEIGHT) { h0 = FRAME_HEIGHT - y0; } - FrameToRead = ((w0)&&(h0)); + gpu_unai.dma.FrameToRead = ((w0)&&(h0)); - px = 0; - py = 0; - x_end = w0; - y_end = h0; - pvram = &((u16*)GPU_FrameBuffer)[x0+(y0*1024)]; + gpu_unai.dma.px = 0; + gpu_unai.dma.py = 0; + gpu_unai.dma.x_end = w0; + gpu_unai.dma.y_end = h0; + gpu_unai.dma.pvram = &((u16*)gpu_unai.vram)[x0+(y0*1024)]; - GPU_GP1 |= 0x08000000; + gpu_unai.GPU_GP1 |= 0x08000000; } +#endif // !USE_GPULIB -INLINE void gpuMoveImage(void) +void gpuMoveImage(PtrUnion packet) { u32 x0, y0, x1, y1; s32 w0, h0; - x0 = PacketBuffer.U2[2] & 1023; - y0 = PacketBuffer.U2[3] & 511; - x1 = PacketBuffer.U2[4] & 1023; - y1 = PacketBuffer.U2[5] & 511; - w0 = PacketBuffer.U2[6]; - h0 = PacketBuffer.U2[7]; + x0 = packet.U2[2] & 1023; + y0 = packet.U2[3] & 511; + x1 = packet.U2[4] & 1023; + y1 = packet.U2[5] & 511; + w0 = packet.U2[6]; + h0 = packet.U2[7]; if( (x0==x1) && (y0==y1) ) return; if ((w0<=0) || (h0<=0)) return; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"gpuMoveImage(x0=%u,y0=%u,x1=%u,y1=%u,w0=%d,h0=%d)\n",x0,y0,x1,y1,w0,h0); + #endif + if (((y0+h0)>512)||((x0+w0)>1024)||((y1+h0)>512)||((x1+w0)>1024)) { - u16 *psxVuw=GPU_FrameBuffer; + u16 *psxVuw=gpu_unai.vram; s32 i,j; for(j=0;j>1); lpDst += ((FRAME_OFFSET(x1, y1))>>1); if (w0&1) @@ -143,13 +151,13 @@ INLINE void gpuMoveImage(void) } } -INLINE void gpuClearImage(void) +void gpuClearImage(PtrUnion packet) { s32 x0, y0, w0, h0; - x0 = PacketBuffer.S2[2]; - y0 = PacketBuffer.S2[3]; - w0 = PacketBuffer.S2[4] & 0x3ff; - h0 = PacketBuffer.S2[5] & 0x3ff; + x0 = packet.S2[2]; + y0 = packet.S2[3]; + w0 = packet.S2[4] & 0x3ff; + h0 = packet.S2[5] & 0x3ff; w0 += x0; if (x0 < 0) x0 = 0; @@ -162,10 +170,14 @@ INLINE void gpuClearImage(void) h0 -= y0; if (h0 <= 0) return; + #ifdef ENABLE_GPU_LOG_SUPPORT + fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0); + #endif + if (x0&1) { - u16* pixel = (u16*)GPU_FrameBuffer + FRAME_OFFSET(x0, y0); - u16 rgb = GPU_RGB16(PacketBuffer.S4[0]); + u16* pixel = (u16*)gpu_unai.vram + FRAME_OFFSET(x0, y0); + u16 rgb = GPU_RGB16(packet.U4[0]); y0 = FRAME_WIDTH - w0; do { x0=w0; @@ -175,8 +187,8 @@ INLINE void gpuClearImage(void) } else { - u32* pixel = (u32*)(void*)GPU_FrameBuffer + ((FRAME_OFFSET(x0, y0))>>1); - u32 rgb = GPU_RGB16(PacketBuffer.S4[0]); + u32* pixel = (u32*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1); + u32 rgb = GPU_RGB16(packet.U4[0]); rgb |= (rgb<<16); if (w0&1) { diff --git a/plugins/gpu_unai/gpu_raster_line.h b/plugins/gpu_unai/gpu_raster_line.h index fc59b79..28ea074 100644 --- a/plugins/gpu_unai/gpu_raster_line.h +++ b/plugins/gpu_unai/gpu_raster_line.h @@ -1,6 +1,7 @@ /*************************************************************************** * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -18,240 +19,697 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ -#define GPU_TESTRANGE(x) { if((u32)(x+1024) > 2047) return; } - /////////////////////////////////////////////////////////////////////////////// // GPU internal line drawing functions +// +// Rewritten October 2016 by senquack: +// Instead of one pixel at a time, lines are now drawn in runs of pixels, +// whether vertical, horizontal, or diagonal. A new inner driver +// 'gpuPixelSpanFn' is used, as well as an enhanced Bresenham run-slice +// algorithm. For more information, see the following: +// +// Michael Abrash - Graphics Programming Black Book +// Chapters 35 - 36 (does not implement diagonal runs) +// http://www.drdobbs.com/parallel/graphics-programming-black-book/184404919 +// http://www.jagregory.com/abrash-black-book/ +// +// Article by Andrew Delong (does not implement diagonal runs) +// http://timetraces.ca/nw/drawline.htm +// +// 'Run-Based Multi-Point Line Drawing' by Eun Jae Lee & Larry F. Hodges +// https://smartech.gatech.edu/bitstream/handle/1853/3632/93-22.pdf +// Provided the idea of doing a half-octant transform allowing lines with +// slopes between 0.5 and 2.0 (diagonal runs of pixels) to be handled +// identically to the traditional horizontal/vertical run-slice method. -#define GPU_DIGITS 16 -#define GPU_DIGITSC (GPU_DIGITS+3) +// Use 16.16 fixed point precision for line math. +// NOTE: Gouraud colors used by gpuPixelSpanFn can use a different precision. +#define GPU_LINE_FIXED_BITS 16 -INLINE s32 GPU_DIV(s32 rs, s32 rt) -{ - return rt ? (rs / rt) : (0); -} +// If defined, Gouraud lines will use fixed-point multiply-by-inverse to +// do most divisions. With enough accuracy, this should be OK. +#define USE_LINES_ALL_FIXED_PT_MATH -/////////////////////////////////////////////////////////////////////////////// -void gpuDrawLF(const PD gpuPixelDriver) +////////////////////// +// Flat-shaded line // +////////////////////// +void gpuDrawLineF(PtrUnion packet, const PSD gpuPixelSpanDriver) { - s32 temp; - s32 xmin, xmax; - s32 ymin, ymax; - s32 x0, x1, dx; - s32 y0, y1, dy; - - x0 = PacketBuffer.S2[2] + DrawingOffset[0]; GPU_TESTRANGE(x0); - y0 = PacketBuffer.S2[3] + DrawingOffset[1]; GPU_TESTRANGE(y0); - x1 = PacketBuffer.S2[4] + DrawingOffset[0]; GPU_TESTRANGE(x1); - y1 = PacketBuffer.S2[5] + DrawingOffset[1]; GPU_TESTRANGE(y1); - - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; - const u16 pixeldata = GPU_RGB16(PacketBuffer.U4[0]); - - dy = (y1 - y0); - if (dy < 0) dy = -dy; - dx = (x1 - x0); - if (dx < 0) dx = -dx; - if (dx > dy) { - if (x0 > x1) { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); + int x0, y0, x1, y1; + int dx, dy; + + // All three of these variables should be signed (so multiplication works) + ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 + const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel + const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line + + // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ + // bottommost pixels of the draw area. Since we render every pixel between + // and including both line endpoints, subtract one from xmax/ymax. + const int xmin = gpu_unai.DrawingArea[0]; + const int ymin = gpu_unai.DrawingArea[1]; + const int xmax = gpu_unai.DrawingArea[2] - 1; + const int ymax = gpu_unai.DrawingArea[3] - 1; + + x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(packet.S2[4]) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(packet.S2[5]) + gpu_unai.DrawingOffset[1]; + + // Always draw top to bottom, so ensure y0 <= y1 + if (y0 > y1) { + SwapValues(y0, y1); + SwapValues(x0, x1); + } + + // Is line totally outside Y clipping range? + if (y0 > ymax || y1 < ymin) return; + + dx = x1 - x0; + dy = y1 - y0; + + // X-axis range check : max distance between any two X coords is 1023 + // (PSX hardware will not render anything violating this rule) + // NOTE: We'll check y coord range further below + if (dx >= CHKMAX_X || dx <= -CHKMAX_X) + return; + + // Y-axis range check and clipping + if (dy) { + // Y-axis range check : max distance between any two Y coords is 511 + // (PSX hardware will not render anything violating this rule) + if (dy >= CHKMAX_Y) + return; + + // We already know y0 < y1 + if (y0 < ymin) { + x0 += GPU_FAST_DIV(((ymin - y0) * dx), dy); + y0 = ymin; } - y1 = GPU_DIV((y1 - y0) << GPU_DIGITS, dx); - y0 <<= GPU_DIGITS; - temp = xmin - x0; - if (temp > 0) { - x0 = xmin; - y0 += (y1 * temp); + if (y1 > ymax) { + x1 += GPU_FAST_DIV(((ymax - y1) * dx), dy); + y1 = ymax; } - if (x1 > xmax) x1 = xmax; - x1 -= x0; - if (x1 < 0) x1 = 0; - - const int li=linesInterlace; - for (; x1; x1--) { - temp = y0 >> GPU_DIGITS; - if( 0 == (temp&li) ) { - if ((u32) (temp - ymin) < (u32) (ymax - ymin)) { - gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, temp)],pixeldata); - } + + // Recompute in case clipping occurred: + dx = x1 - x0; + dy = y1 - y0; + } + + // Check X clipping range, set 'sx' x-direction variable + if (dx == 0) { + // Is vertical line totally outside X clipping range? + if (x0 < xmin || x0 > xmax) + return; + sx = 0; + } else { + if (dx > 0) { + // x0 is leftmost coordinate + if (x0 > xmax) return; // Both points outside X clip range + + if (x0 < xmin) { + if (x1 < xmin) return; // Both points outside X clip range + y0 += GPU_FAST_DIV(((xmin - x0) * dy), dx); + x0 = xmin; + } + + if (x1 > xmax) { + y1 += GPU_FAST_DIV(((xmax - x1) * dy), dx); + x1 = xmax; + } + + sx = +1; + dx = x1 - x0; // Get final value, which should also be absolute value + } else { + // x1 is leftmost coordinate + if (x1 > xmax) return; // Both points outside X clip range + + if (x1 < xmin) { + if (x0 < xmin) return; // Both points outside X clip range + + y1 += GPU_FAST_DIV(((xmin - x1) * dy), dx); + x1 = xmin; } - x0++; - y0 += y1; + + if (x0 > xmax) { + y0 += GPU_FAST_DIV(((xmax - x0) * dy), dx); + x0 = xmax; + } + + sx = -1; + dx = x0 - x1; // Get final value, which should also be absolute value + } + + // Recompute in case clipping occurred: + dy = y1 - y0; + } + + // IMPORTANT: dx,dy should now contain their absolute values + + int min_length, // Minimum length of a pixel run + start_length, // Length of first run + end_length, // Length of last run + err_term, // Cumulative error to determine when to draw longer run + err_adjup, // Increment to err_term for each run drawn + err_adjdown; // Subract this from err_term after drawing longer run + + // Color to draw with (16 bits, highest of which is unset mask bit) + uintptr_t col16 = GPU_RGB16(packet.U4[0]); + + // We use u8 pointers even though PS1 has u16 framebuffer. + // This allows pixel-drawing functions to increment dst pointer + // directly by the passed 'incr' value, not having to shift it first. + u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; + + // SPECIAL CASE: Vertical line + if (dx == 0) { + gpuPixelSpanDriver(dst, col16, dst_stride, dy+1); + return; + } + + // SPECIAL CASE: Horizontal line + if (dy == 0) { + gpuPixelSpanDriver(dst, col16, sx * dst_depth, dx+1); + return; + } + + // SPECIAL CASE: Diagonal line + if (dx == dy) { + gpuPixelSpanDriver(dst, col16, dst_stride + (sx * dst_depth), dy+1); + return; + } + + int major, minor; // Major axis, minor axis + ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis + + if (dx > dy) { + major = dx; + minor = dy; + } else { + major = dy; + minor = dx; + } + + // Determine if diagonal or horizontal runs + if (major < (2 * minor)) { + // Diagonal runs, so perform half-octant transformation + minor = major - minor; + + // Advance diagonally when drawing runs + incr_major = dst_stride + (sx * dst_depth); + + // After drawing each run, correct for over-advance along minor axis + if (dx > dy) + incr_minor = -dst_stride; + else + incr_minor = -sx * dst_depth; + } else { + // Horizontal or vertical runs + if (dx > dy) { + incr_major = sx * dst_depth; + incr_minor = dst_stride; + } else { + incr_major = dst_stride; + incr_minor = sx * dst_depth; } - } else if (dy) { - if (y0 > y1) { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); + } + + if (minor > 1) { + // Minimum number of pixels each run + min_length = major / minor; + + // Initial error term; reflects an initial step of 0.5 along minor axis + err_term = (major % minor) - (minor * 2); + + // Increment err_term this much each step along minor axis; when + // err_term crosses zero, draw longer pixel run. + err_adjup = (major % minor) * 2; + } else { + min_length = major; + err_term = 0; + err_adjup = 0; + } + + // Error term adjustment when err_term turns over; used to factor + // out the major-axis step made at that time + err_adjdown = minor * 2; + + // The initial and last runs are partial, because minor axis advances + // only 0.5 for these runs, rather than 1. Each is half a full run, + // plus the initial pixel. + start_length = end_length = (min_length / 2) + 1; + + if (min_length & 1) { + // If there're an odd number of pixels per run, we have 1 pixel that + // can't be allocated to either the initial or last partial run, so + // we'll add 0.5 to err_term so that this pixel will be handled + // by the normal full-run loop + err_term += minor; + } else { + // If the minimum run length is even and there's no fractional advance, + // we have one pixel that could go to either the initial or last + // partial run, which we arbitrarily allocate to the last run + if (err_adjup == 0) + start_length--; // Leave out the extra pixel at the start + } + + // First run of pixels + dst = gpuPixelSpanDriver(dst, col16, incr_major, start_length); + dst += incr_minor; + + // Middle runs of pixels + while (--minor > 0) { + int run_length = min_length; + err_term += err_adjup; + + // If err_term passed 0, reset it and draw longer run + if (err_term > 0) { + err_term -= err_adjdown; + run_length++; } - x1 = GPU_DIV((x1 - x0) << GPU_DIGITS, dy); - x0 <<= GPU_DIGITS; - temp = ymin - y0; - if (temp > 0) { + + dst = gpuPixelSpanDriver(dst, col16, incr_major, run_length); + dst += incr_minor; + } + + // Final run of pixels + gpuPixelSpanDriver(dst, col16, incr_major, end_length); +} + +///////////////////////// +// Gouraud-shaded line // +///////////////////////// +void gpuDrawLineG(PtrUnion packet, const PSD gpuPixelSpanDriver) +{ + int x0, y0, x1, y1; + int dx, dy, dr, dg, db; + u32 r0, g0, b0, r1, g1, b1; + + // All three of these variables should be signed (so multiplication works) + ptrdiff_t sx; // Sign of x delta, positive when x0 < x1 + const ptrdiff_t dst_depth = FRAME_BYTES_PER_PIXEL; // PSX: 2 bytes per pixel + const ptrdiff_t dst_stride = FRAME_BYTE_STRIDE; // PSX: 2048 bytes per framebuffer line + + // Clip region: xmax/ymax seem to normally be one *past* the rightmost/ + // bottommost pixels of the draw area. We'll render every pixel between + // and including both line endpoints, so subtract one from xmax/ymax. + const int xmin = gpu_unai.DrawingArea[0]; + const int ymin = gpu_unai.DrawingArea[1]; + const int xmax = gpu_unai.DrawingArea[2] - 1; + const int ymax = gpu_unai.DrawingArea[3] - 1; + + x0 = GPU_EXPANDSIGN(packet.S2[2]) + gpu_unai.DrawingOffset[0]; + y0 = GPU_EXPANDSIGN(packet.S2[3]) + gpu_unai.DrawingOffset[1]; + x1 = GPU_EXPANDSIGN(packet.S2[6]) + gpu_unai.DrawingOffset[0]; + y1 = GPU_EXPANDSIGN(packet.S2[7]) + gpu_unai.DrawingOffset[1]; + + u32 col0 = packet.U4[0]; + u32 col1 = packet.U4[2]; + + // Always draw top to bottom, so ensure y0 <= y1 + if (y0 > y1) { + SwapValues(y0, y1); + SwapValues(x0, x1); + SwapValues(col0, col1); + } + + // Is line totally outside Y clipping range? + if (y0 > ymax || y1 < ymin) return; + + // If defined, Gouraud colors are fixed-point 5.11, otherwise they are 8.16 + // (This is only beneficial if using SIMD-optimized pixel driver) +#ifdef GPU_GOURAUD_LOW_PRECISION + r0 = (col0 >> 3) & 0x1f; g0 = (col0 >> 11) & 0x1f; b0 = (col0 >> 19) & 0x1f; + r1 = (col1 >> 3) & 0x1f; g1 = (col1 >> 11) & 0x1f; b1 = (col1 >> 19) & 0x1f; +#else + r0 = col0 & 0xff; g0 = (col0 >> 8) & 0xff; b0 = (col0 >> 16) & 0xff; + r1 = col1 & 0xff; g1 = (col1 >> 8) & 0xff; b1 = (col1 >> 16) & 0xff; +#endif + + dx = x1 - x0; + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; + + // X-axis range check : max distance between any two X coords is 1023 + // (PSX hardware will not render anything violating this rule) + // NOTE: We'll check y coord range further below + if (dx >= CHKMAX_X || dx <= -CHKMAX_X) + return; + + // Y-axis range check and clipping + if (dy) { + // Y-axis range check : max distance between any two Y coords is 511 + // (PSX hardware will not render anything violating this rule) + if (dy >= CHKMAX_Y) + return; + + // We already know y0 < y1 + if (y0 < ymin) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((ymin - y0) << GPU_LINE_FIXED_BITS), dy); + x0 += (dx * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + x0 += (ymin - y0) * dx / dy; + r0 += (ymin - y0) * dr / dy; + g0 += (ymin - y0) * dg / dy; + b0 += (ymin - y0) * db / dy; +#endif y0 = ymin; - x0 += (x1 * temp); } - if (y1 > ymax) y1 = ymax; - y1 -= y0; - if (y1 < 0) y1 = 0; - - const int li=linesInterlace; - for (; y1; y1--) { - if( 0 == (y0&li) ) { - temp = x0 >> GPU_DIGITS; - if ((u32) (temp - xmin) < (u32) (xmax - xmin)) { - gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(temp, y0)],pixeldata); - } - } - y0++; - x0 += x1; + + if (y1 > ymax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((ymax - y1) << GPU_LINE_FIXED_BITS), dy); + x1 += (dx * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + x1 += (ymax - y1) * dx / dy; + r1 += (ymax - y1) * dr / dy; + g1 += (ymax - y1) * dg / dy; + b1 += (ymax - y1) * db / dy; +#endif + y1 = ymax; } - + + // Recompute in case clipping occurred: + dx = x1 - x0; + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; + } + + // Check X clipping range, set 'sx' x-direction variable + if (dx == 0) { + // Is vertical line totally outside X clipping range? + if (x0 < xmin || x0 > xmax) + return; + sx = 0; } else { - if( 0 == (y0&linesInterlace) ) { - if ((u32) (x0 - xmin) < (u32) (xmax - xmin)) { - if ((u32) (y0 - ymin) < (u32) (ymax - ymin)) { - gpuPixelDriver(&((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)],pixeldata); - } + if (dx > 0) { + // x0 is leftmost coordinate + if (x0 > xmax) return; // Both points outside X clip range + + if (x0 < xmin) { + if (x1 < xmin) return; // Both points outside X clip range + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmin - x0) << GPU_LINE_FIXED_BITS), dx); + y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y0 += (xmin - x0) * dy / dx; + r0 += (xmin - x0) * dr / dx; + g0 += (xmin - x0) * dg / dx; + b0 += (xmin - x0) * db / dx; +#endif + x0 = xmin; } + + if (x1 > xmax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmax - x1) << GPU_LINE_FIXED_BITS), dx); + y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y1 += (xmax - x1) * dy / dx; + r1 += (xmax - x1) * dr / dx; + g1 += (xmax - x1) * dg / dx; + b1 += (xmax - x1) * db / dx; +#endif + x1 = xmax; + } + + sx = +1; + dx = x1 - x0; // Get final value, which should also be absolute value + } else { + // x1 is leftmost coordinate + if (x1 > xmax) return; // Both points outside X clip range + + if (x1 < xmin) { + if (x0 < xmin) return; // Both points outside X clip range + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmin - x1) << GPU_LINE_FIXED_BITS), dx); + y1 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r1 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g1 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b1 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y1 += (xmin - x1) * dy / dx; + r1 += (xmin - x1) * dr / dx; + g1 += (xmin - x1) * dg / dx; + b1 += (xmin - x1) * db / dx; +#endif + x1 = xmin; + } + + if (x0 > xmax) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 factor = GPU_FAST_DIV(((xmax - x0) << GPU_LINE_FIXED_BITS), dx); + y0 += (dy * factor) >> GPU_LINE_FIXED_BITS; + r0 += (dr * factor) >> GPU_LINE_FIXED_BITS; + g0 += (dg * factor) >> GPU_LINE_FIXED_BITS; + b0 += (db * factor) >> GPU_LINE_FIXED_BITS; +#else + y0 += (xmax - x0) * dy / dx; + r0 += (xmax - x0) * dr / dx; + g0 += (xmax - x0) * dg / dx; + b0 += (xmax - x0) * db / dx; +#endif + x0 = xmax; + } + + sx = -1; + dx = x0 - x1; // Get final value, which should also be absolute value } + + // Recompute in case clipping occurred: + dy = y1 - y0; + dr = r1 - r0; + dg = g1 - g0; + db = b1 - b0; } -} -/*---------------------------------------------------------------------- -GF -----------------------------------------------------------------------*/ + // IMPORTANT: dx,dy should now contain their absolute values -/////////////////////////////////////////////////////////////////////////////// -void gpuDrawLG(const PD gpuPixelDriver) -{ - s32 temp; - s32 xmin, xmax; - s32 ymin, ymax; - s32 x0, x1, dx; - s32 y0, y1, dy; - s32 r0, r1; - s32 g0, g1; - s32 b0, b1; - - x0 = PacketBuffer.S2[2] + DrawingOffset[0]; GPU_TESTRANGE(x0); - y0 = PacketBuffer.S2[3] + DrawingOffset[1]; GPU_TESTRANGE(y0); - x1 = PacketBuffer.S2[6] + DrawingOffset[0]; GPU_TESTRANGE(x1); - y1 = PacketBuffer.S2[7] + DrawingOffset[1]; GPU_TESTRANGE(y1); - - r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; - r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10]; - - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; - - dy = (y1 - y0); - if (dy < 0) - dy = -dy; - dx = (x1 - x0); - if (dx < 0) - dx = -dx; - if (dx > dy) { - if (x0 > x1) { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - GPU_SWAP(r0, r1, temp); - GPU_SWAP(g0, g1, temp); - GPU_SWAP(b0, b1, temp); - } - y1 = GPU_DIV((y1 - y0) << GPU_DIGITS, dx); - r1 = GPU_DIV((r1 - r0) << GPU_DIGITS, dx); - g1 = GPU_DIV((g1 - g0) << GPU_DIGITS, dx); - b1 = GPU_DIV((b1 - b0) << GPU_DIGITS, dx); - y0 <<= GPU_DIGITS; - r0 <<= GPU_DIGITS; - g0 <<= GPU_DIGITS; - b0 <<= GPU_DIGITS; - temp = xmin - x0; - if (temp > 0) { - x0 = xmin; - y0 += (y1 * temp); - r0 += (r1 * temp); - g0 += (g1 * temp); - b0 += (b1 * temp); + int min_length, // Minimum length of a pixel run + start_length, // Length of first run + end_length, // Length of last run + err_term, // Cumulative error to determine when to draw longer run + err_adjup, // Increment to err_term for each run drawn + err_adjdown; // Subract this from err_term after drawing longer run + + GouraudColor gcol; + gcol.r = r0 << GPU_GOURAUD_FIXED_BITS; + gcol.g = g0 << GPU_GOURAUD_FIXED_BITS; + gcol.b = b0 << GPU_GOURAUD_FIXED_BITS; + + // We use u8 pointers even though PS1 has u16 framebuffer. + // This allows pixel-drawing functions to increment dst pointer + // directly by the passed 'incr' value, not having to shift it first. + u8 *dst = (u8*)gpu_unai.vram + y0 * dst_stride + x0 * dst_depth; + + // SPECIAL CASE: Vertical line + if (dx == 0) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dy fixed-point inverse + s32 inv_factor = 1 << GPU_GOURAUD_FIXED_BITS; + if (dy > 1) inv_factor = GPU_FAST_DIV(inv_factor, dy); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + // First, convert to Gouraud fixed point + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dy > 1) { + if (dr) gcol.r_incr /= dy; + if (dg) gcol.g_incr /= dy; + if (db) gcol.b_incr /= dy; } - if (x1 > xmax) x1 = xmax; - x1 -= x0; - if (x1 < 0) x1 = 0; +#endif - const int li=linesInterlace; - for (; x1; x1--) { - temp = y0 >> GPU_DIGITS; - if( 0 == (temp&li) ) { - if ((u32) (temp - ymin) < (u32) (ymax - ymin)) { - gpuPixelDriver ( - &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, temp)], - (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F) - ); - } - } - x0++; - y0 += y1; - r0 += r1; - g0 += g1; - b0 += b1; - } - } else if (dy) { - if (y0 > y1) { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - GPU_SWAP(r0, r1, temp); - GPU_SWAP(g0, g1, temp); - GPU_SWAP(b0, b1, temp); + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride, dy+1); + return; + } + + // SPECIAL CASE: Horizontal line + if (dy == 0) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dx fixed-point inverse + s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); + if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dx > 1) { + if (dr) gcol.r_incr /= dx; + if (dg) gcol.g_incr /= dx; + if (db) gcol.b_incr /= dx; } - x1 = GPU_DIV((x1 - x0) << GPU_DIGITS, dy); - r1 = GPU_DIV((r1 - r0) << GPU_DIGITS, dy); - g1 = GPU_DIV((g1 - g0) << GPU_DIGITS, dy); - b1 = GPU_DIV((b1 - b0) << GPU_DIGITS, dy); - x0 <<= GPU_DIGITS; - r0 <<= GPU_DIGITS; - g0 <<= GPU_DIGITS; - b0 <<= GPU_DIGITS; - temp = ymin - y0; - if (temp > 0) { - y0 = ymin; - x0 += (x1 * temp); - r0 += (r1 * temp); - g0 += (g1 * temp); - b0 += (b1 * temp); +#endif + + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, sx * dst_depth, dx+1); + return; + } + + // SPECIAL CASE: Diagonal line + if (dx == dy) { +#ifdef USE_LINES_ALL_FIXED_PT_MATH + // Get dx fixed-point inverse + s32 inv_factor = (1 << GPU_GOURAUD_FIXED_BITS); + if (dx > 1) inv_factor = GPU_FAST_DIV(inv_factor, dx); + + // Simultaneously divide and convert integer to Gouraud fixed point: + gcol.r_incr = dr * inv_factor; + gcol.g_incr = dg * inv_factor; + gcol.b_incr = db * inv_factor; +#else + // First, convert to Gouraud fixed point + gcol.r_incr = dr << GPU_GOURAUD_FIXED_BITS; + gcol.g_incr = dg << GPU_GOURAUD_FIXED_BITS; + gcol.b_incr = db << GPU_GOURAUD_FIXED_BITS; + + if (dx > 1) { + if (dr) gcol.r_incr /= dx; + if (dg) gcol.g_incr /= dx; + if (db) gcol.b_incr /= dx; } - if (y1 > ymax) y1 = ymax; - y1 -= y0; - if (y1 < 0) y1 = 0; - - const int li=linesInterlace; - for (; y1; y1--) { - if( 0 == (y0&li) ) { - temp = x0 >> GPU_DIGITS; - if ((u32) (temp - xmin) < (u32) (xmax - xmin)) { - gpuPixelDriver ( - &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(temp, y0)], - (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F) - ); - } - } - y0++; - x0 += x1; - r0 += r1; - g0 += g1; - b0 += b1; +#endif + + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, dst_stride + (sx * dst_depth), dy+1); + return; + } + + int major, minor; // Absolute val of major,minor axis delta + ptrdiff_t incr_major, incr_minor; // Ptr increment for each step along axis + + if (dx > dy) { + major = dx; + minor = dy; + } else { + major = dy; + minor = dx; + } + + // Determine if diagonal or horizontal runs + if (major < (2 * minor)) { + // Diagonal runs, so perform half-octant transformation + minor = major - minor; + + // Advance diagonally when drawing runs + incr_major = dst_stride + (sx * dst_depth); + + // After drawing each run, correct for over-advance along minor axis + if (dx > dy) + incr_minor = -dst_stride; + else + incr_minor = -sx * dst_depth; + } else { + // Horizontal or vertical runs + if (dx > dy) { + incr_major = sx * dst_depth; + incr_minor = dst_stride; + } else { + incr_major = dst_stride; + incr_minor = sx * dst_depth; } + } + +#ifdef USE_LINES_ALL_FIXED_PT_MATH + s32 major_inv = GPU_FAST_DIV((1 << GPU_GOURAUD_FIXED_BITS), major); + + // Simultaneously divide and convert from integer to Gouraud fixed point: + gcol.r_incr = dr * major_inv; + gcol.g_incr = dg * major_inv; + gcol.b_incr = db * major_inv; +#else + gcol.r_incr = dr ? ((dr << GPU_GOURAUD_FIXED_BITS) / major) : 0; + gcol.g_incr = dg ? ((dg << GPU_GOURAUD_FIXED_BITS) / major) : 0; + gcol.b_incr = db ? ((db << GPU_GOURAUD_FIXED_BITS) / major) : 0; +#endif + + if (minor > 1) { + // Minimum number of pixels each run + min_length = major / minor; + + // Initial error term; reflects an initial step of 0.5 along minor axis + err_term = (major % minor) - (minor * 2); + + // Increment err_term this much each step along minor axis; when + // err_term crosses zero, draw longer pixel run. + err_adjup = (major % minor) * 2; } else { - if( 0 == (y0&linesInterlace) ) { - if ((u32) (x0 - xmin) < (u32) (xmax - xmin)) { - if ((u32) (y0 - ymin) < (u32) (ymax - ymin)) { - gpuPixelDriver ( - &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)], - (((b0>>GPU_DIGITSC)&0x1F)<<10) | (((g0>>GPU_DIGITSC)&0x1F)<< 5) | ((r0>>GPU_DIGITSC)&0x1F) - ); - } - } + min_length = major; + err_term = 0; + err_adjup = 0; + } + + // Error term adjustment when err_term turns over; used to factor + // out the major-axis step made at that time + err_adjdown = minor * 2; + + // The initial and last runs are partial, because minor axis advances + // only 0.5 for these runs, rather than 1. Each is half a full run, + // plus the initial pixel. + start_length = end_length = (min_length / 2) + 1; + + if (min_length & 1) { + // If there're an odd number of pixels per run, we have 1 pixel that + // can't be allocated to either the initial or last partial run, so + // we'll add 0.5 to err_term so that this pixel will be handled + // by the normal full-run loop + err_term += minor; + } else { + // If the minimum run length is even and there's no fractional advance, + // we have one pixel that could go to either the initial or last + // partial run, which we'll arbitrarily allocate to the last run + if (err_adjup == 0) + start_length--; // Leave out the extra pixel at the start + } + + // First run of pixels + dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, start_length); + dst += incr_minor; + + // Middle runs of pixels + while (--minor > 0) { + int run_length = min_length; + err_term += err_adjup; + + // If err_term passed 0, reset it and draw longer run + if (err_term > 0) { + err_term -= err_adjdown; + run_length++; } + + dst = gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, run_length); + dst += incr_minor; } + + // Final run of pixels + gpuPixelSpanDriver(dst, (uintptr_t)&gcol, incr_major, end_length); } diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index c4b0350..f66a9e2 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -18,732 +18,1431 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ -#define GPU_TESTRANGE3() \ -{ \ - if(x0<0) { if((x1-x0)>CHKMAX_X) return; if((x2-x0)>CHKMAX_X) return; } \ - if(x1<0) { if((x0-x1)>CHKMAX_X) return; if((x2-x1)>CHKMAX_X) return; } \ - if(x2<0) { if((x0-x2)>CHKMAX_X) return; if((x1-x2)>CHKMAX_X) return; } \ - if(y0<0) { if((y1-y0)>CHKMAX_Y) return; if((y2-y0)>CHKMAX_Y) return; } \ - if(y1<0) { if((y0-y1)>CHKMAX_Y) return; if((y2-y1)>CHKMAX_Y) return; } \ - if(y2<0) { if((y0-y2)>CHKMAX_Y) return; if((y1-y2)>CHKMAX_Y) return; } \ -} +//senquack - NOTE: GPU Unai poly routines have been rewritten/adapted +// from DrHell routines to fix multiple issues. See README_senquack.txt /////////////////////////////////////////////////////////////////////////////// -// GPU internal polygon drawing functions +// Shared poly vertex buffer, able to handle 3 or 4-pt polys of any type. +/////////////////////////////////////////////////////////////////////////////// +struct PolyVertex { + s32 x, y; // Sign-extended 11-bit X,Y coords + union { + struct { u8 u, v, pad[2]; } tex; // Texture coords (if used) + u32 tex_word; + }; + union { + struct { u8 r, g, b, pad; } col; // 24-bit RGB color (if used) + u32 col_word; + }; +}; + +enum PolyAttribute { + POLYATTR_TEXTURE = (1 << 0), + POLYATTR_GOURAUD = (1 << 1) +}; + +enum PolyType { + POLYTYPE_F = 0, + POLYTYPE_FT = (POLYATTR_TEXTURE), + POLYTYPE_G = (POLYATTR_GOURAUD), + POLYTYPE_GT = (POLYATTR_TEXTURE | POLYATTR_GOURAUD) +}; + +/////////////////////////////////////////////////////////////////////////////// +// polyInitVertexBuffer() +// Fills vbuf[] array with data from any type of poly draw-command packet. /////////////////////////////////////////////////////////////////////////////// -void gpuDrawF3(const PP gpuPolySpanDriver) +static void polyInitVertexBuffer(PolyVertex *vbuf, const PtrUnion packet, PolyType ptype, u32 is_quad) { - const int li=linesInterlace; - s32 temp; - s32 xa, xb, xmin, xmax; - s32 ya, yb, ymin, ymax; - s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; - s32 y0, y1, y2; + bool texturing = ptype & POLYATTR_TEXTURE; + bool gouraud = ptype & POLYATTR_GOURAUD; + + int vert_stride = 1; // Stride of vertices in cmd packet, in 32-bit words + if (texturing) + vert_stride++; + if (gouraud) + vert_stride++; + + int num_verts = (is_quad) ? 4 : 3; + u32 *ptr; + + // X,Y coords, adjusted by draw offsets + s32 x_off = gpu_unai.DrawingOffset[0]; + s32 y_off = gpu_unai.DrawingOffset[1]; + ptr = &packet.U4[1]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) { + s16* coord_ptr = (s16*)ptr; + vbuf[i].x = GPU_EXPANDSIGN(coord_ptr[0]) + x_off; + vbuf[i].y = GPU_EXPANDSIGN(coord_ptr[1]) + y_off; + } - x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]); - y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]); - x1 = GPU_EXPANDSIGN(PacketBuffer.S2[4]); - y1 = GPU_EXPANDSIGN(PacketBuffer.S2[5]); - x2 = GPU_EXPANDSIGN(PacketBuffer.S2[6]); - y2 = GPU_EXPANDSIGN(PacketBuffer.S2[7]); + // U,V texture coords (if applicable) + if (texturing) { + ptr = &packet.U4[2]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) + vbuf[i].tex_word = *ptr; + } - GPU_TESTRANGE3(); + // Colors (if applicable) + if (gouraud) { + ptr = &packet.U4[0]; + for (int i=0; i < num_verts; ++i, ptr += vert_stride) + vbuf[i].col_word = *ptr; + } +} - x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; - y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; +/////////////////////////////////////////////////////////////////////////////// +// Helper functions to determine which vertex in a 2 or 3 vertex array +// has the highest/lowest X/Y coordinate. +// Note: the comparison logic is such that, given a set of vertices with +// identical values for a given coordinate, a different index will be +// returned from vertIdxOfLeast..() than a call to vertIdxOfHighest..(). +// This ensures that, during the vertex-ordering phase of rasterization, +// all three vertices remain unique. +/////////////////////////////////////////////////////////////////////////////// - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; +template +static inline int vertIdxOfLeastXCoord2(const T *Tptr) +{ + return (Tptr[0].x <= Tptr[1].x) ? 0 : 1; +} - { - int rx0 = Max2(xmin,Min3(x0,x1,x2)); - int ry0 = Max2(ymin,Min3(y0,y1,y2)); - int rx1 = Min2(xmax,Max3(x0,x1,x2)); - int ry1 = Min2(ymax,Max3(y0,y1,y2)); - if( rx0>=rx1 || ry0>=ry1) return; - } - - PixelData = GPU_RGB16(PacketBuffer.U4[0]); +template +static inline int vertIdxOfLeastXCoord3(const T *Tptr) +{ + int least_of_v0_v1 = vertIdxOfLeastXCoord2(Tptr); + return (Tptr[least_of_v0_v1].x <= Tptr[2].x) ? least_of_v0_v1 : 2; +} - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - } - } - if (y1 >= y2) - { - if( y1!=y2 || x1>x2 ) - { - GPU_SWAP(x1, x2, temp); - GPU_SWAP(y1, y2, temp); - } - } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - } - } +template +static inline int vertIdxOfLeastYCoord2(const T *Tptr) +{ + return (Tptr[0].y <= Tptr[1].y) ? 0 : 1; +} - ya = y2 - y0; - yb = y2 - y1; - dx =(x2 - x1) * ya - (x2 - x0) * yb; +template +static inline int vertIdxOfLeastYCoord3(const T *Tptr) +{ + int least_of_v0_v1 = vertIdxOfLeastYCoord2(Tptr); + return (Tptr[least_of_v0_v1].y <= Tptr[2].y) ? least_of_v0_v1 : 2; +} + +template +static inline int vertIdxOfHighestXCoord2(const T *Tptr) +{ + return (Tptr[1].x >= Tptr[0].x) ? 1 : 0; +} + +template +static inline int vertIdxOfHighestXCoord3(const T *Tptr) +{ + int highest_of_v0_v1 = vertIdxOfHighestXCoord2(Tptr); + return (Tptr[2].x >= Tptr[highest_of_v0_v1].x) ? 2 : highest_of_v0_v1; +} + +template +static inline int vertIdxOfHighestYCoord2(const T *Tptr) +{ + return (Tptr[1].y >= Tptr[0].y) ? 1 : 0; +} + +template +static inline int vertIdxOfHighestYCoord3(const T *Tptr) +{ + int highest_of_v0_v1 = vertIdxOfHighestYCoord2(Tptr); + return (Tptr[2].y >= Tptr[highest_of_v0_v1].y) ? 2 : highest_of_v0_v1; +} - for (s32 loop0 = 2; loop0; --loop0) +/////////////////////////////////////////////////////////////////////////////// +// polyUseTriangle() +// Determines if the specified triangle should be rendered. If so, it +// fills the given array of vertex pointers, vert_ptrs, in order of +// increasing Y coordinate values, as required by rasterization algorithm. +// Parameter 'tri_num' is 0 for first triangle (idx 0,1,2 of vbuf[]), +// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]). +// Returns true if triangle should be rendered, false if not. +/////////////////////////////////////////////////////////////////////////////// +static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs) +{ + // Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)? + const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1]; + + // Get indices of highest/lowest X,Y coords within triangle + int idx_lowest_x = vertIdxOfLeastXCoord3(tri_ptr); + int idx_highest_x = vertIdxOfHighestXCoord3(tri_ptr); + int idx_lowest_y = vertIdxOfLeastYCoord3(tri_ptr); + int idx_highest_y = vertIdxOfHighestYCoord3(tri_ptr); + + // Maximum absolute distance between any two X coordinates is 1023, + // and for Y coordinates is 511 (PS1 hardware limitation) + int lowest_x = tri_ptr[idx_lowest_x].x; + int highest_x = tri_ptr[idx_highest_x].x; + int lowest_y = tri_ptr[idx_lowest_y].y; + int highest_y = tri_ptr[idx_highest_y].y; + if ((highest_x - lowest_x) >= CHKMAX_X || + (highest_y - lowest_y) >= CHKMAX_Y) + return false; + + // Determine if triangle is completely outside clipping range + int xmin, xmax, ymin, ymax; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; + int clipped_lowest_x = Max2(xmin,lowest_x); + int clipped_lowest_y = Max2(ymin,lowest_y); + int clipped_highest_x = Min2(xmax,highest_x); + int clipped_highest_y = Min2(ymax,highest_y); + if (clipped_lowest_x >= clipped_highest_x || + clipped_lowest_y >= clipped_highest_y) + return false; + + // Order vertex ptrs by increasing y value (draw routines need this). + // The middle index is deduced by a binary math trick that depends + // on index range always being between 0..2 + vert_ptrs[0] = tri_ptr + idx_lowest_y; + vert_ptrs[1] = tri_ptr + ((idx_lowest_y + idx_highest_y) ^ 3); + vert_ptrs[2] = tri_ptr + idx_highest_y; + return true; +} + +/////////////////////////////////////////////////////////////////////////////// +// GPU internal polygon drawing functions +/////////////////////////////////////////////////////////////////////////////// + +/*---------------------------------------------------------------------- +gpuDrawPolyF - Flat-shaded, untextured poly +----------------------------------------------------------------------*/ +void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) +{ + // Set up bgr555 color to be used across calls in inner driver + gpu_unai.PixelData = GPU_RGB16(packet.U4[0]); + + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do { - if (loop0 == 2) - { - ya = y0; - yb = y1; - x3 = i2x(x0); - x4 = y0!=y1 ? x3 : i2x(x1); - if (dx < 0) - { - dx3 = xLoDivx((x2 - x0), (y2 - y0)); - dx4 = xLoDivx((x1 - x0), (y1 - y0)); - } - else - { - dx3 = xLoDivx((x1 - x0), (y1 - y0)); - dx4 = xLoDivx((x2 - x0), (y2 - y0)); + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 x0, x1, x2, y0, y1, y2; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + + ya = y2 - y0; + yb = y2 - y1; + dx = (x2 - x1) * ya - (x2 - x0) * yb; + + for (int loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + dx3 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + dx3 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + dx3 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + dx3 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0) + (dx3 * (y1 - y0)); + x4 = i2x(x1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx3 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx3 = ((y2 - y1) != 0) ? xLoDivx ((x2 - x1), (y2 - y1)) : 0; +#else + dx3 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } } - } - else - { - ya = y1; - yb = y2; - if (dx < 0) - { - x4 = i2x(x1); - x3 = i2x(x0) + (dx3 * (y1 - y0)); - dx4 = xLoDivx((x2 - x1), (y2 - y1)); + + s32 xmin, xmax, ymin, ymax; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + ya = ymin; } - else + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + + for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4 ) { - x3 = i2x(x1); - x4 = i2x(x0) + (dx4 * (y1 - y0)); - dx3 = xLoDivx((x2 - x1), (y2 - y1)); + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); + if ((xmin - xa) > 0) xa = xmin; + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } } - - temp = ymin - ya; - if (temp > 0) - { - ya = ymin; - x3 += dx3*temp; - x4 += dx4*temp; - } - if (yb > ymax) yb = ymax; - if (ya>=yb) continue; - - x3+= fixed_HALF; - x4+= fixed_HALF; - - u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; - - for(;yaxmax) || (xb xmax) xb = xmax; - xb-=xa; - if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); - } - } + } while (++cur_pass < total_passes); } /*---------------------------------------------------------------------- -FT3 +gpuDrawPolyFT - Flat-shaded, textured poly ----------------------------------------------------------------------*/ - -void gpuDrawFT3(const PP gpuPolySpanDriver) +void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { - const int li=linesInterlace; - s32 temp; - s32 xa, xb, xmin, xmax; - s32 ya, yb, ymin, ymax; - s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; - s32 y0, y1, y2; - s32 u0, u1, u2, u3, du3=0; - s32 v0, v1, v2, v3, dv3=0; - - x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); - y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); - x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); - y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); - x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); - y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); - - GPU_TESTRANGE3(); - - x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; - y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; - - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; - + // r8/g8/b8 used if texture-blending & dithering is applied (24-bit light) + gpu_unai.r8 = packet.U1[0]; + gpu_unai.g8 = packet.U1[1]; + gpu_unai.b8 = packet.U1[2]; + // r5/g5/b5 used if just texture-blending is applied (15-bit light) + gpu_unai.r5 = packet.U1[0] >> 3; + gpu_unai.g5 = packet.U1[1] >> 3; + gpu_unai.b5 = packet.U1[2] >> 3; + + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad); + + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do { - int rx0 = Max2(xmin,Min3(x0,x1,x2)); - int ry0 = Max2(ymin,Min3(y0,y1,y2)); - int rx1 = Min2(xmax,Max3(x0,x1,x2)); - int ry1 = Min2(ymax,Max3(y0,y1,y2)); - if( rx0>=rx1 || ry0>=ry1) return; - } - - u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; - u1 = PacketBuffer.U1[16]; v1 = PacketBuffer.U1[17]; - u2 = PacketBuffer.U1[24]; v2 = PacketBuffer.U1[25]; - - r4 = s32(PacketBuffer.U1[0]); - g4 = s32(PacketBuffer.U1[1]); - b4 = s32(PacketBuffer.U1[2]); - dr4 = dg4 = db4 = 0; + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 u3, du3, v3, dv3; + s32 x0, x1, x2, y0, y1, y2; + s32 u0, u1, u2, v0, v1, v2; + s32 du4, dv4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + du4 = -du4; + dv4 = -dv4; + } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - GPU_SWAP(u0, u1, temp); - GPU_SWAP(v0, v1, temp); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + du4 = (fixed)((du4 << FIXED_BITS) * finv); + dv4 = (fixed)((dv4 << FIXED_BITS) * finv); + } else { + du4 = dv4 = 0; } - } - if (y1 >= y2) - { - if( y1!=y2 || x1>x2 ) - { - GPU_SWAP(x1, x2, temp); - GPU_SWAP(y1, y2, temp); - GPU_SWAP(u1, u2, temp); - GPU_SWAP(v1, v2, temp); +#else + if (dx4 != 0) { + float fdiv = dx4; + du4 = (fixed)((du4 << FIXED_BITS) / fdiv); + dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); + } else { + du4 = dv4 = 0; } - } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); - GPU_SWAP(y0, y1, temp); - GPU_SWAP(u0, u1, temp); - GPU_SWAP(v0, v1, temp); +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + du4 = xInvMulx(du4, iF, iS); + dv4 = xInvMulx(dv4, iF, iS); + } else { + du4 = dv4 = 0; } - } - - ya = y2 - y0; - yb = y2 - y1; - dx = (x2 - x1) * ya - (x2 - x0) * yb; - du4 = (u2 - u1) * ya - (u2 - u0) * yb; - dv4 = (v2 - v1) * ya - (v2 - v0) * yb; +#else + if (dx4 != 0) { + du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); + dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); + } else { + du4 = dv4 = 0; + } +#endif +#endif + // Set u,v increments for inner driver + gpu_unai.u_inc = du4; + gpu_unai.v_inc = dv4; + + //senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here? + // (SAME ISSUE ELSEWHERE) + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + u3 = i2x(u0); v3 = i2x(v0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + du3 = xInvMulx((u2 - u0), iF, iS); + dv3 = xInvMulx((v2 - v0), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); + dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + du3 = xInvMulx((u1 - u0), iF, iS); + dv3 = xInvMulx((v1 - v0), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); + dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = du3 = dv3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); + x4 = i2x(x1); + u3 = i2x(u0); + v3 = i2x(v0); + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + u3 += (du3 * (y1 - y0)); + v3 += (dv3 * (y1 - y0)); + } +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + u3 = i2x(u1); + v3 = i2x(v1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + du3 = xInvMulx((u2 - u1), iF, iS); + dv3 = xInvMulx((v2 - v1), iF, iS); + } else { + dx3 = du3 = dv3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); + dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = du3 = dv3 = 0; + } +#endif +#endif + } + } - s32 iF,iS; - xInv( dx, iF, iS); - du4 = xInvMulx( du4, iF, iS); - dv4 = xInvMulx( dv4, iF, iS); - tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); - tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; + s32 xmin, xmax, ymin, ymax; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; - for (s32 loop0 = 2; loop0; --loop0) - { - if (loop0 == 2) - { - ya = y0; - yb = y1; - u3 = i2x(u0); - v3 = i2x(v0); - x3 = i2x(x0); - x4 = y0!=y1 ? x3 : i2x(x1); - if (dx < 0) - { - xInv( (y2 - y0), iF, iS); - dx3 = xInvMulx( (x2 - x0), iF, iS); - du3 = xInvMulx( (u2 - u0), iF, iS); - dv3 = xInvMulx( (v2 - v0), iF, iS); - dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); - } - else - { - xInv( (y1 - y0), iF, iS); - dx3 = xInvMulx( (x1 - x0), iF, iS); - du3 = xInvMulx( (u1 - u0), iF, iS); - dv3 = xInvMulx( (v1 - v0), iF, iS); - dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); + if ((ymin - ya) > 0) { + x3 += dx3 * (ymin - ya); + x4 += dx4 * (ymin - ya); + u3 += du3 * (ymin - ya); + v3 += dv3 * (ymin - ya); + ya = ymin; } - } - else - { - ya = y1; - yb = y2; - if (dx < 0) - { - temp = y1 - y0; - u3 = i2x(u0) + (du3 * temp); - v3 = i2x(v0) + (dv3 * temp); - x3 = i2x(x0) + (dx3 * temp); - x4 = i2x(x1); - dx4 = xLoDivx((x2 - x1), (y2 - y1)); - } - else + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + u3 += du3, v3 += dv3 ) { - u3 = i2x(u1); - v3 = i2x(v1); - x3 = i2x(x1); - x4 = i2x(x0) + (dx4 * (y1 - y0)); - xInv( (y2 - y1), iF, iS); - dx3 = xInvMulx( (x2 - x1), iF, iS); - du3 = xInvMulx( (u2 - u1), iF, iS); - dv3 = xInvMulx( (v2 - v1), iF, iS); - } - } + if (ya&li) continue; + if ((ya&pi)==pif) continue; - temp = ymin - ya; - if (temp > 0) - { - ya = ymin; - x3 += dx3*temp; - x4 += dx4*temp; - u3 += du3*temp; - v3 += dv3*temp; - } - if (yb > ymax) yb = ymax; - if (ya>=yb) continue; + u32 u4, v4; - x3+= fixed_HALF; - x4+= fixed_HALF; - u3+= fixed_HALF; - v4+= fixed_HALF; + xa = FixedCeilToInt(x3); xb = FixedCeilToInt(x4); + u4 = u3; v4 = v3; - u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + u4 += (du4 * itmp) >> FIXED_BITS; + v4 += (dv4 * itmp) >> FIXED_BITS; + } - for(;yaxmax) || (xb 0) - { - xa = xmin; - u4 = u3 + du4*temp; - v4 = v3 + dv4*temp; - } - else - { - u4 = u3; - v4 = v3; + if ((xmin - xa) > 0) { + u4 += du4 * (xmin - xa); + v4 += dv4 * (xmin - xa); + xa = xmin; + } + + // Set u,v coords for inner driver + gpu_unai.u = u4; + gpu_unai.v = v4; + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } - if(xb > xmax) xb = xmax; - xb-=xa; - if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); } - } + } while (++cur_pass < total_passes); } /*---------------------------------------------------------------------- -G3 +gpuDrawPolyG - Gouraud-shaded, untextured poly ----------------------------------------------------------------------*/ - -void gpuDrawG3(const PP gpuPolySpanDriver) +void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { - const int li=linesInterlace; - s32 temp; - s32 xa, xb, xmin, xmax; - s32 ya, yb, ymin, ymax; - s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; - s32 y0, y1, y2; - s32 r0, r1, r2, r3, dr3=0; - s32 g0, g1, g2, g3, dg3=0; - s32 b0, b1, b2, b3, db3=0; - - x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); - y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); - x1 = GPU_EXPANDSIGN(PacketBuffer.S2[6] ); - y1 = GPU_EXPANDSIGN(PacketBuffer.S2[7] ); - x2 = GPU_EXPANDSIGN(PacketBuffer.S2[10]); - y2 = GPU_EXPANDSIGN(PacketBuffer.S2[11]); - - GPU_TESTRANGE3(); - - x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; - y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; - - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_G, is_quad); + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do { - int rx0 = Max2(xmin,Min3(x0,x1,x2)); - int ry0 = Max2(ymin,Min3(y0,y1,y2)); - int rx1 = Min2(xmax,Max3(x0,x1,x2)); - int ry1 = Min2(ymax,Max3(y0,y1,y2)); - if( rx0>=rx1 || ry0>=ry1) return; - } - - r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; - r1 = PacketBuffer.U1[8]; g1 = PacketBuffer.U1[9]; b1 = PacketBuffer.U1[10]; - r2 = PacketBuffer.U1[16]; g2 = PacketBuffer.U1[17]; b2 = PacketBuffer.U1[18]; + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 r3, dr3, g3, dg3, b3, db3; + s32 x0, x1, x2, y0, y1, y2; + s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; + s32 dr4, dg4, db4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + dr4 = -dr4; + dg4 = -dg4; + db4 = -db4; + } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); - GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + dr4 = (fixed)((dr4 << FIXED_BITS) * finv); + dg4 = (fixed)((dg4 << FIXED_BITS) * finv); + db4 = (fixed)((db4 << FIXED_BITS) * finv); + } else { + dr4 = dg4 = db4 = 0; } - } - if (y1 >= y2) - { - if( y1!=y2 || x1>x2 ) - { - GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); - GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); +#else + if (dx4 != 0) { + float fdiv = dx4; + dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); + dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); + db4 = (fixed)((db4 << FIXED_BITS) / fdiv); + } else { + dr4 = dg4 = db4 = 0; } - } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); - GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + dr4 = xInvMulx(dr4, iF, iS); + dg4 = xInvMulx(dg4, iF, iS); + db4 = xInvMulx(db4, iF, iS); + } else { + dr4 = dg4 = db4 = 0; } - } - - ya = y2 - y0; - yb = y2 - y1; - dx = (x2 - x1) * ya - (x2 - x0) * yb; - dr4 = (r2 - r1) * ya - (r2 - r0) * yb; - dg4 = (g2 - g1) * ya - (g2 - g0) * yb; - db4 = (b2 - b1) * ya - (b2 - b0) * yb; - - s32 iF,iS; - xInv( dx, iF, iS); - dr4 = xInvMulx( dr4, iF, iS); - dg4 = xInvMulx( dg4, iF, iS); - db4 = xInvMulx( db4, iF, iS); - u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; - u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; - u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; - lInc = db + dg + dr; - - for (s32 loop0 = 2; loop0; --loop0) - { - if (loop0 == 2) - { - ya = y0; - yb = y1; - r3 = i2x(r0); - g3 = i2x(g0); - b3 = i2x(b0); - x3 = i2x(x0); - x4 = y0!=y1 ? x3 : i2x(x1); - if (dx < 0) - { - xInv( (y2 - y0), iF, iS); - dx3 = xInvMulx( (x2 - x0), iF, iS); - dr3 = xInvMulx( (r2 - r0), iF, iS); - dg3 = xInvMulx( (g2 - g0), iF, iS); - db3 = xInvMulx( (b2 - b0), iF, iS); - dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); - } - else - { - xInv( (y1 - y0), iF, iS); - dx3 = xInvMulx( (x1 - x0), iF, iS); - dr3 = xInvMulx( (r1 - r0), iF, iS); - dg3 = xInvMulx( (g1 - g0), iF, iS); - db3 = xInvMulx( (b1 - b0), iF, iS); - dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); - } +#else + if (dx4 != 0) { + dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); + dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); + db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); + } else { + dr4 = dg4 = db4 = 0; } - else - { - ya = y1; - yb = y2; - if (dx < 0) - { - temp = y1 - y0; - r3 = i2x(r0) + (dr3 * temp); - g3 = i2x(g0) + (dg3 * temp); - b3 = i2x(b0) + (db3 * temp); - x3 = i2x(x0) + (dx3 * temp); - x4 = i2x(x1); - dx4 = xLoDivx((x2 - x1), (y2 - y1)); - } - else - { - r3 = i2x(r1); - g3 = i2x(g1); - b3 = i2x(b1); - x3 = i2x(x1); - x4 = i2x(x0) + (dx4 * (y1 - y0)); - - xInv( (y2 - y1), iF, iS); - dx3 = xInvMulx( (x2 - x1), iF, iS); - dr3 = xInvMulx( (r2 - r1), iF, iS); - dg3 = xInvMulx( (g2 - g1), iF, iS); - db3 = xInvMulx( (b2 - b1), iF, iS); +#endif +#endif + // Setup packed Gouraud increment for inner driver + gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; + yb = y1; + x3 = x4 = i2x(x0); + r3 = i2x(r0); + g3 = i2x(g0); + b3 = i2x(b0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + dr3 = xInvMulx((r2 - r0), iF, iS); + dg3 = xInvMulx((g2 - g0), iF, iS); + db3 = xInvMulx((b2 - b0), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); + dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); + db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / (float)(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + dr3 = xInvMulx((r1 - r0), iF, iS); + dg3 = xInvMulx((g1 - g0), iF, iS); + db3 = xInvMulx((b1 - b0), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); + dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); + db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); x4 = i2x(x1); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + r3 += (dr3 * (y1 - y0)); + g3 += (dg3 * (y1 - y0)); + b3 += (db3 * (y1 - y0)); + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + dr3 = xInvMulx((r2 - r1), iF, iS); + dg3 = xInvMulx((g2 - g1), iF, iS); + db3 = xInvMulx((b2 - b1), iF, iS); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); + dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); + db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = dr3 = dg3 = db3 = 0; + } +#endif +#endif + } } - } - temp = ymin - ya; - if (temp > 0) - { - ya = ymin; - x3 += dx3*temp; x4 += dx4*temp; - r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; - } - if (yb > ymax) yb = ymax; - if (ya>=yb) continue; - - x3+= fixed_HALF; x4+= fixed_HALF; - r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; - - u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; - - for(;yaxmax) || (xb 0) - { - xa = xmin; - r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + s32 xmin, xmax, ymin, ymax; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + r3 += (dr3 * (ymin - ya)); + g3 += (dg3 * (ymin - ya)); + b3 += (db3 * (ymin - ya)); + ya = ymin; } - else + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + r3 += dr3, g3 += dg3, b3 += db3 ) { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + u32 r4, g4, b4; + + xa = FixedCeilToInt(x3); + xb = FixedCeilToInt(x4); r4 = r3; g4 = g3; b4 = b3; + + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + r4 += (dr4 * itmp) >> FIXED_BITS; + g4 += (dg4 * itmp) >> FIXED_BITS; + b4 += (db4 * itmp) >> FIXED_BITS; + } + + r4 += fixed_HALF; + g4 += fixed_HALF; + b4 += fixed_HALF; + + if ((xmin - xa) > 0) { + r4 += (dr4 * (xmin - xa)); + g4 += (dg4 * (xmin - xa)); + b4 += (db4 * (xmin - xa)); + xa = xmin; + } + + // Setup packed Gouraud color for inner driver + gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } - if(xb > xmax) xb = xmax; - xb-=xa; - if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); } - } + } while (++cur_pass < total_passes); } /*---------------------------------------------------------------------- -GT3 +gpuDrawPolyGT - Gouraud-shaded, textured poly ----------------------------------------------------------------------*/ - -void gpuDrawGT3(const PP gpuPolySpanDriver) +void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad) { - const int li=linesInterlace; - s32 temp; - s32 xa, xb, xmin, xmax; - s32 ya, yb, ymin, ymax; - s32 x0, x1, x2, x3, dx3=0, x4, dx4=0, dx; - s32 y0, y1, y2; - s32 u0, u1, u2, u3, du3=0; - s32 v0, v1, v2, v3, dv3=0; - s32 r0, r1, r2, r3, dr3=0; - s32 g0, g1, g2, g3, dg3=0; - s32 b0, b1, b2, b3, db3=0; - - x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2] ); - y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3] ); - x1 = GPU_EXPANDSIGN(PacketBuffer.S2[8] ); - y1 = GPU_EXPANDSIGN(PacketBuffer.S2[9] ); - x2 = GPU_EXPANDSIGN(PacketBuffer.S2[14]); - y2 = GPU_EXPANDSIGN(PacketBuffer.S2[15]); - - GPU_TESTRANGE3(); - - x0 += DrawingOffset[0]; x1 += DrawingOffset[0]; x2 += DrawingOffset[0]; - y0 += DrawingOffset[1]; y1 += DrawingOffset[1]; y2 += DrawingOffset[1]; - - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; + PolyVertex vbuf[4]; + polyInitVertexBuffer(vbuf, packet, POLYTYPE_GT, is_quad); + int total_passes = is_quad ? 2 : 1; + int cur_pass = 0; + do { - int rx0 = Max2(xmin,Min3(x0,x1,x2)); - int ry0 = Max2(ymin,Min3(y0,y1,y2)); - int rx1 = Min2(xmax,Max3(x0,x1,x2)); - int ry1 = Min2(ymax,Max3(y0,y1,y2)); - if( rx0>=rx1 || ry0>=ry1) return; - } - - r0 = PacketBuffer.U1[0]; g0 = PacketBuffer.U1[1]; b0 = PacketBuffer.U1[2]; - u0 = PacketBuffer.U1[8]; v0 = PacketBuffer.U1[9]; - r1 = PacketBuffer.U1[12]; g1 = PacketBuffer.U1[13]; b1 = PacketBuffer.U1[14]; - u1 = PacketBuffer.U1[20]; v1 = PacketBuffer.U1[21]; - r2 = PacketBuffer.U1[24]; g2 = PacketBuffer.U1[25]; b2 = PacketBuffer.U1[26]; - u2 = PacketBuffer.U1[32]; v2 = PacketBuffer.U1[33]; + const PolyVertex* vptrs[3]; + if (polyUseTriangle(vbuf, cur_pass, vptrs) == false) + continue; + + s32 xa, xb, ya, yb; + s32 x3, dx3, x4, dx4, dx; + s32 u3, du3, v3, dv3; + s32 r3, dr3, g3, dg3, b3, db3; + s32 x0, x1, x2, y0, y1, y2; + s32 u0, u1, u2, v0, v1, v2; + s32 r0, r1, r2, g0, g1, g2, b0, b1, b2; + s32 du4, dv4; + s32 dr4, dg4, db4; + + x0 = vptrs[0]->x; y0 = vptrs[0]->y; + u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v; + r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b; + x1 = vptrs[1]->x; y1 = vptrs[1]->y; + u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v; + r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b; + x2 = vptrs[2]->x; y2 = vptrs[2]->y; + u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v; + r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b; + + ya = y2 - y0; + yb = y2 - y1; + dx4 = (x2 - x1) * ya - (x2 - x0) * yb; + du4 = (u2 - u1) * ya - (u2 - u0) * yb; + dv4 = (v2 - v1) * ya - (v2 - v0) * yb; + dr4 = (r2 - r1) * ya - (r2 - r0) * yb; + dg4 = (g2 - g1) * ya - (g2 - g0) * yb; + db4 = (b2 - b1) * ya - (b2 - b0) * yb; + dx = dx4; + if (dx4 < 0) { + dx4 = -dx4; + du4 = -du4; + dv4 = -dv4; + dr4 = -dr4; + dg4 = -dg4; + db4 = -db4; + } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); - GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); - GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if (dx4 != 0) { + float finv = FloatInv(dx4); + du4 = (fixed)((du4 << FIXED_BITS) * finv); + dv4 = (fixed)((dv4 << FIXED_BITS) * finv); + dr4 = (fixed)((dr4 << FIXED_BITS) * finv); + dg4 = (fixed)((dg4 << FIXED_BITS) * finv); + db4 = (fixed)((db4 << FIXED_BITS) * finv); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; } - } - if (y1 >= y2) - { - if( y1!=y2 || x1>x2 ) - { - GPU_SWAP(x1, x2, temp); GPU_SWAP(y1, y2, temp); - GPU_SWAP(u1, u2, temp); GPU_SWAP(v1, v2, temp); - GPU_SWAP(r1, r2, temp); GPU_SWAP(g1, g2, temp); GPU_SWAP(b1, b2, temp); +#else + if (dx4 != 0) { + float fdiv = dx4; + du4 = (fixed)((du4 << FIXED_BITS) / fdiv); + dv4 = (fixed)((dv4 << FIXED_BITS) / fdiv); + dr4 = (fixed)((dr4 << FIXED_BITS) / fdiv); + dg4 = (fixed)((dg4 << FIXED_BITS) / fdiv); + db4 = (fixed)((db4 << FIXED_BITS) / fdiv); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; } - } - if (y0 >= y1) - { - if( y0!=y1 || x0>x1 ) - { - GPU_SWAP(x0, x1, temp); GPU_SWAP(y0, y1, temp); - GPU_SWAP(u0, u1, temp); GPU_SWAP(v0, v1, temp); - GPU_SWAP(r0, r1, temp); GPU_SWAP(g0, g1, temp); GPU_SWAP(b0, b1, temp); +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if (dx4 != 0) { + int iF, iS; + xInv(dx4, iF, iS); + du4 = xInvMulx(du4, iF, iS); + dv4 = xInvMulx(dv4, iF, iS); + dr4 = xInvMulx(dr4, iF, iS); + dg4 = xInvMulx(dg4, iF, iS); + db4 = xInvMulx(db4, iF, iS); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; } - } - - ya = y2 - y0; - yb = y2 - y1; - dx = (x2 - x1) * ya - (x2 - x0) * yb; - du4 = (u2 - u1) * ya - (u2 - u0) * yb; - dv4 = (v2 - v1) * ya - (v2 - v0) * yb; - dr4 = (r2 - r1) * ya - (r2 - r0) * yb; - dg4 = (g2 - g1) * ya - (g2 - g0) * yb; - db4 = (b2 - b1) * ya - (b2 - b0) * yb; - - s32 iF,iS; - - xInv( dx, iF, iS); - du4 = xInvMulx( du4, iF, iS); - dv4 = xInvMulx( dv4, iF, iS); - dr4 = xInvMulx( dr4, iF, iS); - dg4 = xInvMulx( dg4, iF, iS); - db4 = xInvMulx( db4, iF, iS); - u32 dr = (u32)(dr4<< 8)&(0xffffffff<<21); if(dr4<0) dr+= 1<<21; - u32 dg = (u32)(dg4>> 3)&(0xffffffff<<10); if(dg4<0) dg+= 1<<10; - u32 db = (u32)(db4>>14)&(0xffffffff ); if(db4<0) db+= 1<< 0; - lInc = db + dg + dr; - tInc = ((u32)(du4<<7)&0x7fff0000) | ((u32)(dv4>>9)&0x00007fff); - tMsk = (TextureWindow[2]<<23) | (TextureWindow[3]<<7) | 0x00ff00ff; - - for (s32 loop0 = 2; loop0; --loop0) - { - if (loop0 == 2) - { - ya = y0; - yb = y1; - u3 = i2x(u0); - v3 = i2x(v0); - r3 = i2x(r0); - g3 = i2x(g0); - b3 = i2x(b0); - x3 = i2x(x0); - x4 = y0!=y1 ? x3 : i2x(x1); - if (dx < 0) - { - xInv( (y2 - y0), iF, iS); - dx3 = xInvMulx( (x2 - x0), iF, iS); - du3 = xInvMulx( (u2 - u0), iF, iS); - dv3 = xInvMulx( (v2 - v0), iF, iS); - dr3 = xInvMulx( (r2 - r0), iF, iS); - dg3 = xInvMulx( (g2 - g0), iF, iS); - db3 = xInvMulx( (b2 - b0), iF, iS); - dx4 = xLoDivx ( (x1 - x0), (y1 - y0)); - } - else - { - xInv( (y1 - y0), iF, iS); - dx3 = xInvMulx( (x1 - x0), iF, iS); - du3 = xInvMulx( (u1 - u0), iF, iS); - dv3 = xInvMulx( (v1 - v0), iF, iS); - dr3 = xInvMulx( (r1 - r0), iF, iS); - dg3 = xInvMulx( (g1 - g0), iF, iS); - db3 = xInvMulx( (b1 - b0), iF, iS); - dx4 = xLoDivx ( (x2 - x0), (y2 - y0)); - } +#else + if (dx4 != 0) { + du4 = GPU_FAST_DIV(du4 << FIXED_BITS, dx4); + dv4 = GPU_FAST_DIV(dv4 << FIXED_BITS, dx4); + dr4 = GPU_FAST_DIV(dr4 << FIXED_BITS, dx4); + dg4 = GPU_FAST_DIV(dg4 << FIXED_BITS, dx4); + db4 = GPU_FAST_DIV(db4 << FIXED_BITS, dx4); + } else { + du4 = dv4 = dr4 = dg4 = db4 = 0; } - else - { - ya = y1; - yb = y2; - if (dx < 0) - { - temp = y1 - y0; - u3 = i2x(u0) + (du3 * temp); - v3 = i2x(v0) + (dv3 * temp); - r3 = i2x(r0) + (dr3 * temp); - g3 = i2x(g0) + (dg3 * temp); - b3 = i2x(b0) + (db3 * temp); - x3 = i2x(x0) + (dx3 * temp); - x4 = i2x(x1); - dx4 = xLoDivx((x2 - x1), (y2 - y1)); +#endif +#endif + // Set u,v increments and packed Gouraud increment for inner driver + gpu_unai.u_inc = du4; + gpu_unai.v_inc = dv4; + gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4); + + for (s32 loop0 = 2; loop0; loop0--) { + if (loop0 == 2) { + ya = y0; yb = y1; + x3 = x4 = i2x(x0); + u3 = i2x(u0); v3 = i2x(v0); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + if (dx < 0) { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y0) != 0) { + float finv = FloatInv(y2 - y0); + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) * FloatInv(y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + float fdiv = y2 - y0; + dx3 = (fixed)(((x2 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? (fixed)(((x1 - x0) << FIXED_BITS) / (float)(y1 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y0) != 0) { + int iF, iS; + xInv((y2 - y0), iF, iS); + dx3 = xInvMulx((x2 - x0), iF, iS); + du3 = xInvMulx((u2 - u0), iF, iS); + dv3 = xInvMulx((v2 - v0), iF, iS); + dr3 = xInvMulx((r2 - r0), iF, iS); + dg3 = xInvMulx((g2 - g0), iF, iS); + db3 = xInvMulx((b2 - b0), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? xLoDivx((x1 - x0), (y1 - y0)) : 0; +#else + if ((y2 - y0) != 0) { + dx3 = GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)); + du3 = GPU_FAST_DIV((u2 - u0) << FIXED_BITS, (y2 - y0)); + dv3 = GPU_FAST_DIV((v2 - v0) << FIXED_BITS, (y2 - y0)); + dr3 = GPU_FAST_DIV((r2 - r0) << FIXED_BITS, (y2 - y0)); + dg3 = GPU_FAST_DIV((g2 - g0) << FIXED_BITS, (y2 - y0)); + db3 = GPU_FAST_DIV((b2 - b0) << FIXED_BITS, (y2 - y0)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y1 - y0) != 0) ? GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)) : 0; +#endif +#endif + } else { +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y1 - y0) != 0) { + float finv = FloatInv(y1 - y0); + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) * finv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) * finv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) * finv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) * finv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) * finv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) * FloatInv(y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + float fdiv = y1 - y0; + dx3 = (fixed)(((x1 - x0) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u1 - u0) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v1 - v0) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r1 - r0) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g1 - g0) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b1 - b0) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? (fixed)(((x2 - x0) << FIXED_BITS) / float(y2 - y0)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y1 - y0) != 0) { + int iF, iS; + xInv((y1 - y0), iF, iS); + dx3 = xInvMulx((x1 - x0), iF, iS); + du3 = xInvMulx((u1 - u0), iF, iS); + dv3 = xInvMulx((v1 - v0), iF, iS); + dr3 = xInvMulx((r1 - r0), iF, iS); + dg3 = xInvMulx((g1 - g0), iF, iS); + db3 = xInvMulx((b1 - b0), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? xLoDivx((x2 - x0), (y2 - y0)) : 0; +#else + if ((y1 - y0) != 0) { + dx3 = GPU_FAST_DIV((x1 - x0) << FIXED_BITS, (y1 - y0)); + du3 = GPU_FAST_DIV((u1 - u0) << FIXED_BITS, (y1 - y0)); + dv3 = GPU_FAST_DIV((v1 - v0) << FIXED_BITS, (y1 - y0)); + dr3 = GPU_FAST_DIV((r1 - r0) << FIXED_BITS, (y1 - y0)); + dg3 = GPU_FAST_DIV((g1 - g0) << FIXED_BITS, (y1 - y0)); + db3 = GPU_FAST_DIV((b1 - b0) << FIXED_BITS, (y1 - y0)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } + dx4 = ((y2 - y0) != 0) ? GPU_FAST_DIV((x2 - x0) << FIXED_BITS, (y2 - y0)) : 0; +#endif +#endif + } + } else { + //senquack - break out of final loop if nothing to be drawn (1st loop + // must always be taken to setup dx3/dx4) + if (y1 == y2) break; + + ya = y1; yb = y2; + + if (dx < 0) { + x3 = i2x(x0); x4 = i2x(x1); + u3 = i2x(u0); v3 = i2x(v0); + r3 = i2x(r0); g3 = i2x(g0); b3 = i2x(b0); + + if ((y1 - y0) != 0) { + x3 += (dx3 * (y1 - y0)); + u3 += (du3 * (y1 - y0)); + v3 += (dv3 * (y1 - y0)); + r3 += (dr3 * (y1 - y0)); + g3 += (dg3 * (y1 - y0)); + b3 += (db3 * (y1 - y0)); + } + +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) * FloatInv(y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? (fixed)(((x2 - x1) << FIXED_BITS) / (float)(y2 - y1)) : 0; +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + dx4 = ((y2 - y1) != 0) ? xLoDivx((x2 - x1), (y2 - y1)) : 0; +#else + dx4 = ((y2 - y1) != 0) ? GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)) : 0; +#endif +#endif + } else { + x3 = i2x(x1); + x4 = i2x(x0) + (dx4 * (y1 - y0)); + + u3 = i2x(u1); v3 = i2x(v1); + r3 = i2x(r1); g3 = i2x(g1); b3 = i2x(b1); +#ifdef GPU_UNAI_USE_FLOATMATH +#ifdef GPU_UNAI_USE_FLOAT_DIV_MULTINV + if ((y2 - y1) != 0) { + float finv = FloatInv(y2 - y1); + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) * finv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) * finv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) * finv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) * finv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) * finv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) * finv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + float fdiv = y2 - y1; + dx3 = (fixed)(((x2 - x1) << FIXED_BITS) / fdiv); + du3 = (fixed)(((u2 - u1) << FIXED_BITS) / fdiv); + dv3 = (fixed)(((v2 - v1) << FIXED_BITS) / fdiv); + dr3 = (fixed)(((r2 - r1) << FIXED_BITS) / fdiv); + dg3 = (fixed)(((g2 - g1) << FIXED_BITS) / fdiv); + db3 = (fixed)(((b2 - b1) << FIXED_BITS) / fdiv); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#endif +#else // Integer Division: +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + if ((y2 - y1) != 0) { + int iF, iS; + xInv((y2 - y1), iF, iS); + dx3 = xInvMulx((x2 - x1), iF, iS); + du3 = xInvMulx((u2 - u1), iF, iS); + dv3 = xInvMulx((v2 - v1), iF, iS); + dr3 = xInvMulx((r2 - r1), iF, iS); + dg3 = xInvMulx((g2 - g1), iF, iS); + db3 = xInvMulx((b2 - b1), iF, iS); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#else + if ((y2 - y1) != 0) { + dx3 = GPU_FAST_DIV((x2 - x1) << FIXED_BITS, (y2 - y1)); + du3 = GPU_FAST_DIV((u2 - u1) << FIXED_BITS, (y2 - y1)); + dv3 = GPU_FAST_DIV((v2 - v1) << FIXED_BITS, (y2 - y1)); + dr3 = GPU_FAST_DIV((r2 - r1) << FIXED_BITS, (y2 - y1)); + dg3 = GPU_FAST_DIV((g2 - g1) << FIXED_BITS, (y2 - y1)); + db3 = GPU_FAST_DIV((b2 - b1) << FIXED_BITS, (y2 - y1)); + } else { + dx3 = du3 = dv3 = dr3 = dg3 = db3 = 0; + } +#endif +#endif + } } - else - { - u3 = i2x(u1); - v3 = i2x(v1); - r3 = i2x(r1); - g3 = i2x(g1); - b3 = i2x(b1); - x3 = i2x(x1); - x4 = i2x(x0) + (dx4 * (y1 - y0)); - - xInv( (y2 - y1), iF, iS); - dx3 = xInvMulx( (x2 - x1), iF, iS); - du3 = xInvMulx( (u2 - u1), iF, iS); - dv3 = xInvMulx( (v2 - v1), iF, iS); - dr3 = xInvMulx( (r2 - r1), iF, iS); - dg3 = xInvMulx( (g2 - g1), iF, iS); - db3 = xInvMulx( (b2 - b1), iF, iS); - } - } - temp = ymin - ya; - if (temp > 0) - { - ya = ymin; - x3 += dx3*temp; x4 += dx4*temp; - u3 += du3*temp; v3 += dv3*temp; - r3 += dr3*temp; g3 += dg3*temp; b3 += db3*temp; - } - if (yb > ymax) yb = ymax; - if (ya>=yb) continue; - - x3+= fixed_HALF; x4+= fixed_HALF; - u3+= fixed_HALF; v4+= fixed_HALF; - r3+= fixed_HALF; g3+= fixed_HALF; b3+= fixed_HALF; - u16* PixelBase = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(0, ya)]; - - for(;yaxmax) || (xb 0) - { - xa = xmin; - u4 = u3 + du4*temp; v4 = v3 + dv4*temp; - r4 = r3 + dr4*temp; g4 = g3 + dg4*temp; b4 = b3 + db4*temp; + s32 xmin, xmax, ymin, ymax; + xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2]; + ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3]; + + if ((ymin - ya) > 0) { + x3 += (dx3 * (ymin - ya)); + x4 += (dx4 * (ymin - ya)); + u3 += (du3 * (ymin - ya)); + v3 += (dv3 * (ymin - ya)); + r3 += (dr3 * (ymin - ya)); + g3 += (dg3 * (ymin - ya)); + b3 += (db3 * (ymin - ya)); + ya = ymin; } - else + + if (yb > ymax) yb = ymax; + + int loop1 = yb - ya; + if (loop1 <= 0) + continue; + + u16* PixelBase = &((u16*)gpu_unai.vram)[FRAME_OFFSET(0, ya)]; + int li=gpu_unai.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + + for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, + x3 += dx3, x4 += dx4, + u3 += du3, v3 += dv3, + r3 += dr3, g3 += dg3, b3 += db3 ) { + if (ya&li) continue; + if ((ya&pi)==pif) continue; + + u32 u4, v4; + u32 r4, g4, b4; + + xa = FixedCeilToInt(x3); + xb = FixedCeilToInt(x4); u4 = u3; v4 = v3; r4 = r3; g4 = g3; b4 = b3; + + fixed itmp = i2x(xa) - x3; + if (itmp != 0) { + u4 += (du4 * itmp) >> FIXED_BITS; + v4 += (dv4 * itmp) >> FIXED_BITS; + r4 += (dr4 * itmp) >> FIXED_BITS; + g4 += (dg4 * itmp) >> FIXED_BITS; + b4 += (db4 * itmp) >> FIXED_BITS; + } + + u4 += fixed_HALF; + v4 += fixed_HALF; + r4 += fixed_HALF; + g4 += fixed_HALF; + b4 += fixed_HALF; + + if ((xmin - xa) > 0) { + u4 += du4 * (xmin - xa); + v4 += dv4 * (xmin - xa); + r4 += dr4 * (xmin - xa); + g4 += dg4 * (xmin - xa); + b4 += db4 * (xmin - xa); + xa = xmin; + } + + // Set packed Gouraud color and u,v coords for inner driver + gpu_unai.u = u4; + gpu_unai.v = v4; + gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4); + + if (xb > xmax) xb = xmax; + if ((xb - xa) > 0) + gpuPolySpanDriver(gpu_unai, PixelBase + xa, (xb - xa)); } - if(xb > xmax) xb = xmax; - xb-=xa; - if(xb>0) gpuPolySpanDriver(PixelBase + xa,xb); } - } + } while (++cur_pass < total_passes); } diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index a700db3..0afdbf5 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -21,73 +21,70 @@ /////////////////////////////////////////////////////////////////////////////// // GPU internal sprite drawing functions -/////////////////////////////////////////////////////////////////////////////// -void gpuDrawS(const PS gpuSpriteSpanDriver) +void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver) { - s32 x0, x1; - s32 y0, y1; - s32 u0; - s32 v0; - - x1 = x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]) + DrawingOffset[0]; - y1 = y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]) + DrawingOffset[1]; - x1+= PacketBuffer.S2[6]; - y1+= PacketBuffer.S2[7]; - - { - s32 xmin, xmax; - s32 ymin, ymax; - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; - - { - int rx0 = Max2(xmin,Min2(x0,x1)); - int ry0 = Max2(ymin,Min2(y0,y1)); - int rx1 = Min2(xmax,Max2(x0,x1)); - int ry1 = Min2(ymax,Max2(y0,y1)); - if( rx0>=rx1 || ry0>=ry1) return; - } - - u0 = PacketBuffer.U1[8]; - v0 = PacketBuffer.U1[9]; - - r4 = s32(PacketBuffer.U1[0]); - g4 = s32(PacketBuffer.U1[1]); - b4 = s32(PacketBuffer.U1[2]); - - { - s32 temp; - temp = ymin - y0; - if (temp > 0) { y0 = ymin; v0 += temp; } - if (y1 > ymax) y1 = ymax; - if (y1 <= y0) return; - - temp = xmin - x0; - if (temp > 0) { x0 = xmin; u0 += temp; } - if (x1 > xmax) x1 = xmax; - x1 -= x0; - if (x1 <= 0) return; - } - } - - { - u16 *Pixel = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)]; - const int li=linesInterlace; - const u32 masku=TextureWindow[2]; - const u32 maskv=TextureWindow[3]; - - for (;y0 0) { y0 = ymin; v0 += temp; } + if (y1 > ymax) y1 = ymax; + if (y1 <= y0) return; + + temp = xmin - x0; + if (temp > 0) { x0 = xmin; u0 += temp; } + if (x1 > xmax) x1 = xmax; + x1 -= x0; + if (x1 <= 0) return; + + gpu_unai.r5 = packet.U1[0] >> 3; + gpu_unai.g5 = packet.U1[1] >> 3; + gpu_unai.b5 = packet.U1[2] >> 3; + + u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)]; + const int li=gpu_unai.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + unsigned int tmode = gpu_unai.TEXT_MODE >> 5; + const u32 v0_mask = gpu_unai.TextureWindow[3]; + u8* pTxt_base = (u8*)gpu_unai.TBA; + + // Texture is accessed byte-wise, so adjust idx if 16bpp + if (tmode == 3) u0 <<= 1; + + for (; y0 xmax - 16 || x0 < xmin || - ((u0 | v0) & 15) || !(TextureWindow[2] & TextureWindow[3] & 8)) { + ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) { // send corner cases to general handler - PacketBuffer.U4[3] = 0x00100010; - gpuDrawS(gpuSpriteSpanFn<0x20>); + packet.U4[3] = 0x00100010; + gpuDrawS(packet, gpuSpriteSpanFn<0x20>); return; } @@ -121,54 +121,45 @@ void gpuDrawS16(void) else if (ymax - y0 < 16) h = ymax - y0; - draw_spr16_full(&GPU_FrameBuffer[FRAME_OFFSET(x0, y0)], &TBA[FRAME_OFFSET(u0/4, v0)], CBA, h); + draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h); } #endif // __arm__ -/////////////////////////////////////////////////////////////////////////////// -void gpuDrawT(const PT gpuTileSpanDriver) +void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver) { - s32 x0, y0; - s32 x1, y1; - - x1 = x0 = GPU_EXPANDSIGN(PacketBuffer.S2[2]) + DrawingOffset[0]; - y1 = y0 = GPU_EXPANDSIGN(PacketBuffer.S2[3]) + DrawingOffset[1]; - x1+= PacketBuffer.S2[4]; - y1+= PacketBuffer.S2[5]; - - { - s32 xmin, xmax; - s32 ymin, ymax; - xmin = DrawingArea[0]; xmax = DrawingArea[2]; - ymin = DrawingArea[1]; ymax = DrawingArea[3]; - - { - int rx0 = Max2(xmin,Min2(x0,x1)); - int ry0 = Max2(ymin,Min2(y0,y1)); - int rx1 = Min2(xmax,Max2(x0,x1)); - int ry1 = Min2(ymax,Max2(y0,y1)); - if(rx0>=rx1 || ry0>=ry1) return; - } - - if (y0 < ymin) y0 = ymin; - if (y1 > ymax) y1 = ymax; - if (y1 <= y0) return; - - if (x0 < xmin) x0 = xmin; - if (x1 > xmax) x1 = xmax; - x1 -= x0; - if (x1 <= 0) return; - } - - { - u16 *Pixel = &((u16*)GPU_FrameBuffer)[FRAME_OFFSET(x0, y0)]; - const u16 Data = GPU_RGB16(PacketBuffer.U4[0]); - const int li=linesInterlace; - - for (; y0 ymax) y1 = ymax; + if (y1 <= y0) return; + + if (x0 < xmin) x0 = xmin; + if (x1 > xmax) x1 = xmax; + x1 -= x0; + if (x1 <= 0) return; + + const u16 Data = GPU_RGB16(packet.U4[0]); + u16 *Pixel = &((u16*)gpu_unai.vram)[FRAME_OFFSET(x0, y0)]; + const int li=gpu_unai.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + + for (; y0 gmail com) * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * +***************************************************************************/ + +#ifndef GPU_UNAI_H +#define GPU_UNAI_H + +#include "gpu.h" + +// Header shared between both standalone gpu_unai (gpu.cpp) and new +// gpulib-compatible gpu_unai (gpulib_if.cpp) +// -> Anything here should be for gpu_unai's private use. <- + +/////////////////////////////////////////////////////////////////////////////// +// Compile Options + +//#define ENABLE_GPU_NULL_SUPPORT // Enables NullGPU support +//#define ENABLE_GPU_LOG_SUPPORT // Enables gpu logger, very slow only for windows debugging +//#define ENABLE_GPU_ARMV7 // Enables ARMv7 optimized assembly + +//Poly routine options (default is integer math and accurate division) +//#define GPU_UNAI_USE_FLOATMATH // Use float math in poly routines +//#define GPU_UNAI_USE_FLOAT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is defined, + // use multiply-by-inverse for division +//#define GPU_UNAI_USE_INT_DIV_MULTINV // If GPU_UNAI_USE_FLOATMATH is *not* + // defined, use old inaccurate division + + +#define GPU_INLINE static inline __attribute__((always_inline)) +#define INLINE static inline __attribute__((always_inline)) + +#define u8 uint8_t +#define s8 int8_t +#define u16 uint16_t +#define s16 int16_t +#define u32 uint32_t +#define s32 int32_t +#define s64 int64_t + +union PtrUnion +{ + u32 *U4; + s32 *S4; + u16 *U2; + s16 *S2; + u8 *U1; + s8 *S1; + void *ptr; +}; + +union GPUPacket +{ + u32 U4[16]; + s32 S4[16]; + u16 U2[32]; + s16 S2[32]; + u8 U1[64]; + s8 S1[64]; +}; + +template static inline void SwapValues(T &x, T &y) +{ + T tmp(x); x = y; y = tmp; +} + +template +static inline T Min2 (const T a, const T b) +{ + return (a +static inline T Min3 (const T a, const T b, const T c) +{ + return Min2(Min2(a,b),c); +} + +template +static inline T Max2 (const T a, const T b) +{ + return (a>b)?a:b; +} + +template +static inline T Max3 (const T a, const T b, const T c) +{ + return Max2(Max2(a,b),c); +} + + +/////////////////////////////////////////////////////////////////////////////// +// GPU Raster Macros + +// Convert 24bpp color parameter of GPU command to 16bpp (15bpp + mask bit) +#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) + +// Sign-extend 11-bit coordinate command param +#define GPU_EXPANDSIGN(x) (((s32)(x)<<(32-11))>>(32-11)) + +// Max difference between any two X or Y primitive coordinates +#define CHKMAX_X 1024 +#define CHKMAX_Y 512 + +#define FRAME_BUFFER_SIZE (1024*512*2) +#define FRAME_WIDTH 1024 +#define FRAME_HEIGHT 512 +#define FRAME_OFFSET(x,y) (((y)<<10)+(x)) +#define FRAME_BYTE_STRIDE 2048 +#define FRAME_BYTES_PER_PIXEL 2 + +static inline s32 GPU_DIV(s32 rs, s32 rt) +{ + return rt ? (rs / rt) : (0); +} + +// 'Unsafe' version of above that doesn't check for div-by-zero +#define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt)) + +struct gpu_unai_t { + u32 GPU_GP1; + GPUPacket PacketBuffer; + u16 *vram; + + //////////////////////////////////////////////////////////////////////////// + // Variables used only by older standalone version of gpu_unai (gpu.cpp) +#ifndef USE_GPULIB + u32 GPU_GP0; + u32 tex_window; // Current texture window vals (set by GP0(E2h) cmd) + s32 PacketCount; + s32 PacketIndex; + bool fb_dirty; // Framebuffer is dirty (according to GPU) + + // Display status + // NOTE: Standalone older gpu_unai didn't care about horiz display range + u16 DisplayArea[6]; // [0] : Start of display area (in VRAM) X + // [1] : Start of display area (in VRAM) Y + // [2] : Display mode resolution HORIZONTAL + // [3] : Display mode resolution VERTICAL + // [4] : Vertical display range (on TV) START + // [5] : Vertical display range (on TV) END + + //////////////////////////////////////////////////////////////////////////// + // Dma Transfers info + struct { + s32 px,py; + s32 x_end,y_end; + u16* pvram; + u32 *last_dma; // Last dma pointer + bool FrameToRead; // Load image in progress + bool FrameToWrite; // Store image in progress + } dma; + + //////////////////////////////////////////////////////////////////////////// + // Frameskip + struct { + int skipCount; // Frame skip (0,1,2,3...) + bool isSkip; // Skip frame (according to GPU) + bool skipFrame; // Skip this frame (according to frame skip) + bool wasSkip; // Skip frame old value (according to GPU) + bool skipGPU; // Skip GPU primitives + } frameskip; +#endif + // END of standalone gpu_unai variables + //////////////////////////////////////////////////////////////////////////// + + u32 TextureWindowCur; // Current setting from last GP0(0xE2) cmd (raw form) + u8 TextureWindow[4]; // [0] : Texture window offset X + // [1] : Texture window offset Y + // [2] : Texture window mask X + // [3] : Texture window mask Y + + u16 DrawingArea[4]; // [0] : Drawing area top left X + // [1] : Drawing area top left Y + // [2] : Drawing area bottom right X + // [3] : Drawing area bottom right Y + + s16 DrawingOffset[2]; // [0] : Drawing offset X (signed) + // [1] : Drawing offset Y (signed) + + u16* TBA; // Ptr to current texture in VRAM + u16* CBA; // Ptr to current CLUT in VRAM + + //////////////////////////////////////////////////////////////////////////// + // Inner Loop parameters + + // 22.10 Fixed-pt texture coords, mask, scanline advance + // NOTE: U,V are no longer packed together into one u32, this proved to be + // too imprecise, leading to pixel dropouts. Example: NFS3's skybox. + u32 u, v; + u32 u_msk, v_msk; + s32 u_inc, v_inc; + + // Color for Gouraud-shaded prims + // Packed fixed-pt 8.3:8.3:8.2 rgb triplet + // layout: rrrrrrrrXXXggggggggXXXbbbbbbbbXX + // ^ bit 31 ^ bit 0 + u32 gCol; + u32 gInc; // Increment along scanline for gCol + + // Color for flat-shaded, texture-blended prims + u8 r5, g5, b5; // 5-bit light for undithered prims + u8 r8, g8, b8; // 8-bit light for dithered prims + + // Color for flat-shaded, untextured prims + u16 PixelData; // bgr555 color for untextured flat-shaded polys + + // End of inner Loop parameters + //////////////////////////////////////////////////////////////////////////// + + + u8 blit_mask; // Determines what pixels to skip when rendering. + // Only useful on low-resolution devices using + // a simple pixel-dropping downscaler for PS1 + // high-res modes. See 'pixel_skip' option. + + u8 ilace_mask; // Determines what lines to skip when rendering. + // Normally 0 when PS1 240 vertical res is in + // use and ilace_force is 0. When running in + // PS1 480 vertical res on a low-resolution + // device (320x240), will usually be set to 1 + // so odd lines are not rendered. (Unless future + // full-screen scaling option is in use ..TODO) + + bool prog_ilace_flag; // Tracks successive frames for 'prog_ilace' option + + u8 BLEND_MODE; + u8 TEXT_MODE; + u8 Masking; + + u16 PixelMSB; + + gpu_unai_config_t config; + + u8 LightLUT[32*32]; // 5-bit lighting LUT (gpu_inner_light.h) + u32 DitherMatrix[64]; // Matrix of dither coefficients +}; + +static gpu_unai_t gpu_unai; + +// Global config that frontend can alter.. Values are read in GPU_init(). +// TODO: if frontend menu modifies a setting, add a function that can notify +// GPU plugin to use new setting. +gpu_unai_config_t gpu_unai_config_ext; + +/////////////////////////////////////////////////////////////////////////////// +// Internal inline funcs to get option status: (Allows flexibility) +static inline bool LightingEnabled() +{ + return gpu_unai.config.lighting; +} + +static inline bool FastLightingEnabled() +{ + return gpu_unai.config.fast_lighting; +} + +static inline bool BlendingEnabled() +{ + return gpu_unai.config.blending; +} + +static inline bool DitheringEnabled() +{ + return gpu_unai.config.dithering; +} + +// For now, this is just for development/experimentation purposes.. +// If modified to return true, it will allow ignoring the status register +// bit 9 setting (dither enable). It will still restrict dithering only +// to Gouraud-shaded or texture-blended polys. +static inline bool ForcedDitheringEnabled() +{ + return false; +} + +static inline bool ProgressiveInterlaceEnabled() +{ +#ifdef USE_GPULIB + // Using this old option greatly decreases quality of image. Disabled + // for now when using new gpulib, since it also adds more work in loops. + return false; +#else + return gpu_unai.config.prog_ilace; +#endif +} + +// For now, 320x240 output resolution is assumed, using simple line-skipping +// and pixel-skipping downscaler. +// TODO: Flesh these out so they return useful values based on whether +// running on higher-res device or a resampling downscaler is enabled. +static inline bool PixelSkipEnabled() +{ + return gpu_unai.config.pixel_skip; +} + +static inline bool LineSkipEnabled() +{ + return true; +} + +#endif // GPU_UNAI_H diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index e9a199c..8b5174e 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -2,6 +2,7 @@ * Copyright (C) 2010 PCSX4ALL Team * * Copyright (C) 2010 Unai * * Copyright (C) 2011 notaz * +* Copyright (C) 2016 Senquack (dansilsby gmail com) * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -19,140 +20,81 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ +#include #include #include #include #include "../gpulib/gpu.h" -#include "arm_features.h" - -#define u8 uint8_t -#define s8 int8_t -#define u16 uint16_t -#define s16 int16_t -#define u32 uint32_t -#define s32 int32_t -#define s64 int64_t - -#define INLINE static - -#define FRAME_BUFFER_SIZE (1024*512*2) -#define FRAME_WIDTH 1024 -#define FRAME_HEIGHT 512 -#define FRAME_OFFSET(x,y) (((y)<<10)+(x)) - -#define isSkip 0 /* skip frame (info coming from GPU) */ -#define alt_fps 0 -static int linesInterlace; /* internal lines interlace */ -static int force_interlace; - -static bool light = true; /* lighting */ -static bool blend = true; /* blending */ -static bool FrameToRead = false; /* load image in progress */ -static bool FrameToWrite = false; /* store image in progress */ - -static bool enableAbbeyHack = false; /* Abe's Odyssey hack */ - -static u8 BLEND_MODE; -static u8 TEXT_MODE; -static u8 Masking; - -static u16 PixelMSB; -static u16 PixelData; - -/////////////////////////////////////////////////////////////////////////////// -// GPU Global data -/////////////////////////////////////////////////////////////////////////////// - -// Dma Transfers info -static s32 px,py; -static s32 x_end,y_end; -static u16* pvram; - -static s32 PacketCount; -static s32 PacketIndex; - -// Rasterizer status -static u32 TextureWindow [4]; -static u32 DrawingArea [4]; -static u32 DrawingOffset [2]; - -static u16* TBA; -static u16* CBA; - -// Inner Loops -static s32 u4, du4; -static s32 v4, dv4; -static s32 r4, dr4; -static s32 g4, dg4; -static s32 b4, db4; -static u32 lInc; -static u32 tInc, tMsk; - -union GPUPacket -{ - u32 U4[16]; - s32 S4[16]; - u16 U2[32]; - s16 S2[32]; - u8 U1[64]; - s8 S1[64]; -}; - -static GPUPacket PacketBuffer; -static u16 *GPU_FrameBuffer; -static u32 GPU_GP1; - -/////////////////////////////////////////////////////////////////////////////// - -#include "../gpu_unai/gpu_fixedpoint.h" - -// Inner loop driver instanciation file -#include "../gpu_unai/gpu_inner.h" - -// GPU Raster Macros -#define GPU_RGB16(rgb) ((((rgb)&0xF80000)>>9)|(((rgb)&0xF800)>>6)|(((rgb)&0xF8)>>3)) +//#include "port.h" +#include "gpu_unai.h" -#define GPU_EXPANDSIGN(x) (((s32)(x)<<21)>>21) +// GPU fixed point math +#include "gpu_fixedpoint.h" -#define CHKMAX_X 1024 -#define CHKMAX_Y 512 - -#define GPU_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);} +// Inner loop driver instantiation file +#include "gpu_inner.h" // GPU internal image drawing functions -#include "../gpu_unai/gpu_raster_image.h" +#include "gpu_raster_image.h" // GPU internal line drawing functions -#include "../gpu_unai/gpu_raster_line.h" +#include "gpu_raster_line.h" // GPU internal polygon drawing functions -#include "../gpu_unai/gpu_raster_polygon.h" +#include "gpu_raster_polygon.h" // GPU internal sprite drawing functions -#include "../gpu_unai/gpu_raster_sprite.h" +#include "gpu_raster_sprite.h" // GPU command buffer execution/store -#include "../gpu_unai/gpu_command.h" +#include "gpu_command.h" ///////////////////////////////////////////////////////////////////////////// int renderer_init(void) { - GPU_FrameBuffer = (u16 *)gpu.vram; - - // s_invTable - for(int i=1;i<=(1<>1); - #else - v *= double(0x80000000); - #endif - s_invTable[i-1]=s32(v); - } - - return 0; + memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); + gpu_unai.vram = (u16*)gpu.vram; + + // Original standalone gpu_unai initialized TextureWindow[]. I added the + // same behavior here, since it seems unsafe to leave [2],[3] unset when + // using HLE and Rearmed gpu_neon sets this similarly on init. -senquack + gpu_unai.TextureWindow[0] = 0; + gpu_unai.TextureWindow[1] = 0; + gpu_unai.TextureWindow[2] = 255; + gpu_unai.TextureWindow[3] = 255; + //senquack - new vars must be updated whenever texture window is changed: + // (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h) + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + // Configuration options + gpu_unai.config = gpu_unai_config_ext; + //senquack - disabled, not sure this is needed and would require modifying + // sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was + // present in latest PCSX4ALL sources we were using. + //gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack; + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + +#ifdef GPU_UNAI_USE_INT_DIV_MULTINV + // s_invTable + for(int i=1;i<=(1<>1); +#else + v *= double(0x80000000); +#endif + s_invTable[i-1]=s32(v); + } +#endif + + SetupLightLUT(); + SetupDitheringConstants(); + + return 0; } void renderer_finish(void) @@ -161,6 +103,111 @@ void renderer_finish(void) void renderer_notify_res_change(void) { + if (PixelSkipEnabled()) { + // Set blit_mask for high horizontal resolutions. This allows skipping + // rendering pixels that would never get displayed on low-resolution + // platforms that use simple pixel-dropping scaler. + + switch (gpu.screen.hres) + { + case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS + case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS + default: gpu_unai.blit_mask = 0; break; + } + } else { + gpu_unai.blit_mask = 0; + } + + if (LineSkipEnabled()) { + // Set rendering line-skip (only render every other line in high-res + // 480 vertical mode, or, optionally, force it for all video modes) + + if (gpu.screen.vres == 480) { + if (gpu_unai.config.ilace_force) { + gpu_unai.ilace_mask = 3; // Only need 1/4 of lines + } else { + gpu_unai.ilace_mask = 1; // Only need 1/2 of lines + } + } else { + // Vert resolution changed from 480 to lower one + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + } + } else { + gpu_unai.ilace_mask = 0; + } + + /* + printf("res change hres: %d vres: %d depth: %d ilace_mask: %d\n", + gpu.screen.hres, gpu.screen.vres, gpu.status.rgb24 ? 24 : 15, + gpu_unai.ilace_mask); + */ +} + +// Handles GP0 draw settings commands 0xE1...0xE6 +static void gpuGP0Cmd_0xEx(gpu_unai_t &gpu_unai, u32 cmd_word) +{ + // Assume incoming GP0 command is 0xE1..0xE6, convert to 1..6 + u8 num = (cmd_word >> 24) & 7; + gpu.ex_regs[num] = cmd_word; // Update gpulib register + switch (num) { + case 1: { + // GP0(E1h) - Draw Mode setting (aka "Texpage") + u32 cur_texpage = gpu_unai.GPU_GP1 & 0x7FF; + u32 new_texpage = cmd_word & 0x7FF; + if (cur_texpage != new_texpage) { + gpu_unai.GPU_GP1 = (gpu_unai.GPU_GP1 & ~0x7FF) | new_texpage; + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 2: { + // GP0(E2h) - Texture Window setting + if (cmd_word != gpu_unai.TextureWindowCur) { + static const u8 TextureMask[32] = { + 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, + 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 + }; + gpu_unai.TextureWindowCur = cmd_word; + gpu_unai.TextureWindow[0] = ((cmd_word >> 10) & 0x1F) << 3; + gpu_unai.TextureWindow[1] = ((cmd_word >> 15) & 0x1F) << 3; + gpu_unai.TextureWindow[2] = TextureMask[(cmd_word >> 0) & 0x1F]; + gpu_unai.TextureWindow[3] = TextureMask[(cmd_word >> 5) & 0x1F]; + gpu_unai.TextureWindow[0] &= ~gpu_unai.TextureWindow[2]; + gpu_unai.TextureWindow[1] &= ~gpu_unai.TextureWindow[3]; + + // Inner loop vars must be updated whenever texture window is changed: + const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4 + gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1); + gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1); + + gpuSetTexture(gpu_unai.GPU_GP1); + } + } break; + + case 3: { + // GP0(E3h) - Set Drawing Area top left (X1,Y1) + gpu_unai.DrawingArea[0] = cmd_word & 0x3FF; + gpu_unai.DrawingArea[1] = (cmd_word >> 10) & 0x3FF; + } break; + + case 4: { + // GP0(E4h) - Set Drawing Area bottom right (X2,Y2) + gpu_unai.DrawingArea[2] = (cmd_word & 0x3FF) + 1; + gpu_unai.DrawingArea[3] = ((cmd_word >> 10) & 0x3FF) + 1; + } break; + + case 5: { + // GP0(E5h) - Set Drawing Offset (X,Y) + gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11); + gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11); + } break; + + case 6: { + // GP0(E6h) - Mask Bit Setting + gpu_unai.Masking = (cmd_word & 0x2) << 1; + gpu_unai.PixelMSB = (cmd_word & 0x1) << 8; + } break; + } } extern const unsigned char cmd_lengths[256]; @@ -171,9 +218,12 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) u32 *list_start = list; u32 *list_end = list + list_len; - linesInterlace = force_interlace; + //TODO: set ilace_mask when resolution changes instead of every time, + // eliminate #ifdef below. + gpu_unai.ilace_mask = gpu_unai.config.ilace_force; + #ifdef HAVE_PRE_ARMV7 /* XXX */ - linesInterlace |= gpu.status.interlace; + gpu_unai.ilace_mask |= gpu.status.interlace; #endif for (; list < list_end; list += 1 + len) @@ -186,126 +236,175 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) } #define PRIM cmd - PacketBuffer.U4[0] = list[0]; + gpu_unai.PacketBuffer.U4[0] = list[0]; for (i = 1; i <= len; i++) - PacketBuffer.U4[i] = list[i]; + gpu_unai.PacketBuffer.U4[i] = list[i]; + + PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer }; switch (cmd) { case 0x02: - gpuClearImage(); + gpuClearImage(packet); break; case 0x20: case 0x21: case 0x22: - case 0x23: - gpuDrawF3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]); - break; + case 0x23: { // Monochrome 3-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, false); + } break; case 0x24: case 0x25: case 0x26: - case 0x27: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture(PacketBuffer.U4[4] >> 16); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]); - else - gpuDrawFT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]); - break; + case 0x27: { // Textured 3-pt poly + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, false); + } break; case 0x28: case 0x29: case 0x2A: - case 0x2B: { - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | PixelMSB]; - gpuDrawF3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[4]; - gpuDrawF3(gpuPolySpanDriver); - break; - } + case 0x2B: { // Monochrome 4-pt poly + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Blending_Mode | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB + ]; + gpuDrawPolyF(packet, driver, true); // is_quad = true + } break; case 0x2C: case 0x2D: case 0x2E: - case 0x2F: { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture(PacketBuffer.U4[4] >> 16); - PP gpuPolySpanDriver; - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | PixelMSB]; - else - gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | PixelMSB]; - gpuDrawFT3(gpuPolySpanDriver); - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - PacketBuffer.U4[2] = PacketBuffer.U4[8]; - gpuDrawFT3(gpuPolySpanDriver); - break; - } + case 0x2F: { // Textured 4-pt poly + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture(gpu_unai.PacketBuffer.U4[4] >> 16); + + u32 driver_idx = + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | gpu_unai.PixelMSB; + + if (!FastLightingEnabled()) { + driver_idx |= Lighting; + } else { + if (!((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))) + driver_idx |= Lighting; + } + + PP driver = gpuPolySpanDrivers[driver_idx]; + gpuDrawPolyFT(packet, driver, true); // is_quad = true + } break; case 0x30: case 0x31: case 0x32: - case 0x33: - gpuDrawG3(gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]); - break; + case 0x33: { // Gouraud-shaded 3-pt poly + //NOTE: The '129' here is CF_GOURAUD | CF_LIGHT, however + // this is an untextured poly, so CF_LIGHT (texture blend) + // shouldn't apply. Until the original array of template + // instantiation ptrs is fixed, we're stuck with this. (TODO) + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, false); + } break; case 0x34: case 0x35: case 0x36: - case 0x37: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - gpuDrawGT3(gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]); - break; + case 0x37: { // Gouraud-shaded, textured 3-pt poly + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, false); + } break; case 0x38: case 0x39: case 0x3A: - case 0x3B: { - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | Masking | Blending | 129 | PixelMSB]; - gpuDrawG3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[6]; - PacketBuffer.U4[1] = PacketBuffer.U4[7]; - gpuDrawG3(gpuPolySpanDriver); - break; - } + case 0x3B: { // Gouraud-shaded 4-pt poly + // See notes regarding '129' for 0x30..0x33 further above -senquack + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | + gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB + ]; + gpuDrawPolyG(packet, driver, true); // is_quad = true + } break; case 0x3C: case 0x3D: case 0x3E: - case 0x3F: { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (PacketBuffer.U4[5] >> 16); - const PP gpuPolySpanDriver = gpuPolySpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | ((Lighting)?129:0) | PixelMSB]; - gpuDrawGT3(gpuPolySpanDriver); - PacketBuffer.U4[0] = PacketBuffer.U4[9]; - PacketBuffer.U4[1] = PacketBuffer.U4[10]; - PacketBuffer.U4[2] = PacketBuffer.U4[11]; - gpuDrawGT3(gpuPolySpanDriver); - break; - } + case 0x3F: { // Gouraud-shaded, textured 4-pt poly + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuSetTexture (gpu_unai.PacketBuffer.U4[5] >> 16); + PP driver = gpuPolySpanDrivers[ + (gpu_unai.blit_mask?1024:0) | + Dithering | + Blending_Mode | gpu_unai.TEXT_MODE | + gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB + ]; + gpuDrawPolyGT(packet, driver, true); // is_quad = true + } break; case 0x40: case 0x41: case 0x42: - case 0x43: - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - break; - - case 0x48 ... 0x4F: - { + case 0x43: { // Monochrome line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); + } break; + + case 0x48 ... 0x4F: { // Monochrome line strip u32 num_vertexes = 1; u32 *list_position = &(list[2]); - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineF(packet, driver); while(1) { - PacketBuffer.U4[1] = PacketBuffer.U4[2]; - PacketBuffer.U4[2] = *list_position++; - gpuDrawLF(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; + gpuDrawLineF(packet, driver); num_vertexes++; if(list_position >= list_end) { @@ -317,30 +416,38 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) } len += (num_vertexes - 2); - break; - } + } break; case 0x50: case 0x51: case 0x52: - case 0x53: - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); - break; - - case 0x58 ... 0x5F: - { + case 0x53: { // Gouraud-shaded line + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); + } break; + + case 0x58 ... 0x5F: { // Gouraud-shaded line strip u32 num_vertexes = 1; u32 *list_position = &(list[2]); - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + // Shift index right by one, as untextured prims don't use lighting + u32 driver_idx = (Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1; + // Index MSB selects Gouraud-shaded PixelSpanDriver: + driver_idx |= (1 << 5); + PSD driver = gpuPixelSpanDrivers[driver_idx]; + gpuDrawLineG(packet, driver); while(1) { - PacketBuffer.U4[0] = PacketBuffer.U4[2]; - PacketBuffer.U4[1] = PacketBuffer.U4[3]; - PacketBuffer.U4[2] = *list_position++; - PacketBuffer.U4[3] = *list_position++; - gpuDrawLG(gpuPixelDrivers [ (Blending_Mode | Masking | Blending | (PixelMSB>>3)) >> 1]); + gpu_unai.PacketBuffer.U4[0] = gpu_unai.PacketBuffer.U4[2]; + gpu_unai.PacketBuffer.U4[1] = gpu_unai.PacketBuffer.U4[3]; + gpu_unai.PacketBuffer.U4[2] = *list_position++; + gpu_unai.PacketBuffer.U4[3] = *list_position++; + gpuDrawLineG(packet, driver); num_vertexes++; if(list_position >= list_end) { @@ -352,91 +459,116 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) } len += (num_vertexes - 2) * 2; - break; - } + } break; case 0x60: case 0x61: case 0x62: - case 0x63: - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x63: { // Monochrome rectangle (variable size) + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; case 0x64: case 0x65: case 0x66: - case 0x67: - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x67: { // Textured rectangle (variable size) + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + // This fixes Silent Hill running animation on loading screens: + // (On PSX, color values 0x00-0x7F darken the source texture's color, + // 0x81-FF lighten textures (ultimately clamped to 0x1F), + // 0x80 leaves source texture color unchanged, HOWEVER, + // gpu_unai uses a simple lighting LUT whereby only the upper + // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as + // 0x80. + // + // NOTE: I've changed all textured sprite draw commands here and + // elsewhere to use proper behavior, but left poly commands + // alone, I don't want to slow rendering down too much. (TODO) + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; case 0x68: case 0x69: case 0x6A: - case 0x6B: - PacketBuffer.U4[2] = 0x00010001; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x6B: { // Monochrome rectangle (1x1 dot) + gpu_unai.PacketBuffer.U4[2] = 0x00010001; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; case 0x70: case 0x71: case 0x72: - case 0x73: - PacketBuffer.U4[2] = 0x00080008; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x73: { // Monochrome rectangle (8x8) + gpu_unai.PacketBuffer.U4[2] = 0x00080008; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; case 0x74: case 0x75: case 0x76: - case 0x77: - PacketBuffer.U4[3] = 0x00080008; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x77: { // Textured rectangle (8x8) + gpu_unai.PacketBuffer.U4[3] = 0x00080008; + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; case 0x78: case 0x79: case 0x7A: - case 0x7B: - PacketBuffer.U4[2] = 0x00100010; - gpuDrawT(gpuTileSpanDrivers [Blending_Mode | Masking | Blending | (PixelMSB>>3)]); - break; + case 0x7B: { // Monochrome rectangle (16x16) + gpu_unai.PacketBuffer.U4[2] = 0x00100010; + PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + gpuDrawT(packet, driver); + } break; case 0x7C: case 0x7D: #ifdef __arm__ - if ((GPU_GP1 & 0x180) == 0 && (Masking | PixelMSB) == 0) + if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0) { - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - gpuDrawS16(); + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + gpuDrawS16(packet); break; } // fallthrough #endif case 0x7E: - case 0x7F: - PacketBuffer.U4[3] = 0x00100010; - gpuSetCLUT (PacketBuffer.U4[2] >> 16); - gpuSetTexture (GPU_GP1); - if ((PacketBuffer.U1[0]>0x5F) && (PacketBuffer.U1[1]>0x5F) && (PacketBuffer.U1[2]>0x5F)) - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | (enableAbbeyHack<<7) | PixelMSB]); - else - gpuDrawS(gpuSpriteSpanDrivers [Blending_Mode | TEXT_MODE | Masking | Blending | Lighting | (enableAbbeyHack<<7) | PixelMSB]); - break; + case 0x7F: { // Textured rectangle (16x16) + gpu_unai.PacketBuffer.U4[3] = 0x00100010; + gpuSetCLUT (gpu_unai.PacketBuffer.U4[2] >> 16); + u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1); + //senquack - Only color 808080h-878787h allows skipping lighting calculation: + //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F)) + // Strip lower 3 bits of each color and determine if lighting should be used: + if ((gpu_unai.PacketBuffer.U4[0] & 0xF8F8F8) != 0x808080) + driver_idx |= Lighting; + PS driver = gpuSpriteSpanDrivers[driver_idx]; + gpuDrawS(packet, driver); + } break; case 0x80: // vid -> vid - gpuMoveImage(); // prim handles updateLace && skip + gpuMoveImage(packet); break; + #ifdef TEST case 0xA0: // sys -> vid { @@ -445,70 +577,25 @@ int do_cmd_list(u32 *list, int list_len, int *last_cmd) u32 load_size = load_width * load_height; len += load_size / 2; - break; - } + } break; + case 0xC0: break; #else case 0xA0: // sys ->vid case 0xC0: // vid -> sys + // Handled by gpulib goto breakloop; #endif - case 0xE1: { - const u32 temp = PacketBuffer.U4[0]; - GPU_GP1 = (GPU_GP1 & ~0x000007FF) | (temp & 0x000007FF); - gpuSetTexture(temp); - gpu.ex_regs[1] = temp; - break; - } - case 0xE2: { - static const u8 TextureMask[32] = { - 255, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7, - 127, 7, 15, 7, 31, 7, 15, 7, 63, 7, 15, 7, 31, 7, 15, 7 - }; - const u32 temp = PacketBuffer.U4[0]; - TextureWindow[0] = ((temp >> 10) & 0x1F) << 3; - TextureWindow[1] = ((temp >> 15) & 0x1F) << 3; - TextureWindow[2] = TextureMask[(temp >> 0) & 0x1F]; - TextureWindow[3] = TextureMask[(temp >> 5) & 0x1F]; - gpuSetTexture(GPU_GP1); - gpu.ex_regs[2] = temp; - break; - } - case 0xE3: { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[0] = temp & 0x3FF; - DrawingArea[1] = (temp >> 10) & 0x3FF; - gpu.ex_regs[3] = temp; - break; - } - case 0xE4: { - const u32 temp = PacketBuffer.U4[0]; - DrawingArea[2] = (temp & 0x3FF) + 1; - DrawingArea[3] = ((temp >> 10) & 0x3FF) + 1; - gpu.ex_regs[4] = temp; - break; - } - case 0xE5: { - const u32 temp = PacketBuffer.U4[0]; - DrawingOffset[0] = ((s32)temp<<(32-11))>>(32-11); - DrawingOffset[1] = ((s32)temp<<(32-22))>>(32-11); - gpu.ex_regs[5] = temp; - break; - } - case 0xE6: { - const u32 temp = PacketBuffer.U4[0]; - Masking = (temp & 0x2) << 1; - PixelMSB =(temp & 0x1) << 8; - gpu.ex_regs[6] = temp; - break; - } + case 0xE1 ... 0xE6: { // Draw settings + gpuGP0Cmd_0xEx(gpu_unai, gpu_unai.PacketBuffer.U4[0]); + } break; } } breakloop: gpu.ex_regs[1] &= ~0x1ff; - gpu.ex_regs[1] |= GPU_GP1 & 0x1ff; + gpu.ex_regs[1] |= gpu_unai.GPU_GP1 & 0x1ff; *last_cmd = cmd; return list - list_start; @@ -532,20 +619,17 @@ void renderer_set_interlace(int enable, int is_odd) { } -#ifndef TEST - #include "../../frontend/plugin_lib.h" - +// Handle any gpulib settings applicable to gpu_unai: void renderer_set_config(const struct rearmed_cbs *cbs) { - force_interlace = cbs->gpu_unai.lineskip; - enableAbbeyHack = cbs->gpu_unai.abe_hack; - light = !cbs->gpu_unai.no_light; - blend = !cbs->gpu_unai.no_blend; - - GPU_FrameBuffer = (u16 *)gpu.vram; + gpu_unai.vram = (u16*)gpu.vram; + gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force; + gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip; + gpu_unai.config.lighting = cbs->gpu_unai.lighting; + gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting; + gpu_unai.config.blending = cbs->gpu_unai.blending; + gpu_unai.config.dithering = cbs->gpu_unai.dithering; } -#endif - // vim:shiftwidth=2:expandtab -- cgit v1.2.3