aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMax Horn2005-04-27 08:43:23 +0000
committerMax Horn2005-04-27 08:43:23 +0000
commit3a4c1f057ec070d579d0be4307dc95afbdfb3160 (patch)
treeff7334b4fbd8a77621d6e1358adb1479e0b0fd55
parentb407f0040edf88090ebe76cab20191c5dcb2065b (diff)
downloadscummvm-rg350-3a4c1f057ec070d579d0be4307dc95afbdfb3160.tar.gz
scummvm-rg350-3a4c1f057ec070d579d0be4307dc95afbdfb3160.tar.bz2
scummvm-rg350-3a4c1f057ec070d579d0be4307dc95afbdfb3160.zip
Use a pointer ref for RGBtoYUV, instead of an array, so that we can choose to allocate RGBtoYUV on the heap; added a comment that tries to explain why RGBtoYUV and LUT16to32 are evil and slow and how they could be removed
svn-id: r17834
-rw-r--r--common/scaler.cpp29
-rw-r--r--common/scaler/intern.h2
2 files changed, 29 insertions, 2 deletions
diff --git a/common/scaler.cpp b/common/scaler.cpp
index 084c1849f7..db37cc72bd 100644
--- a/common/scaler.cpp
+++ b/common/scaler.cpp
@@ -41,7 +41,34 @@ extern "C" {
#endif
-uint RGBtoYUV[65536];
+// FIXME/TODO: The following two tables suck up 512 KB.
+// They should at least be allocated on the heap, to reduce the size of the
+// binary.
+//
+// Note: a memory lookup table is *not* necessarily faster than computing
+// these things on the fly, because of its size. Both tables together, plus
+// the code, plus the input/output GFX data, won't fit in the cache on many
+// systems, so main memory has to be accessed, which is about the worst thing
+// that can happen to code which tries to be fast...
+//
+// So we should think about ways to get these smaller / removed. The LUT16to32
+// is only used by the HQX asm right now; maybe somebody can modify the code
+// there to work w/o it (and do some benchmarking, too?). To do that, just
+// do the conversion on the fly, or even do w/o it (as the C++ code manages to),
+// by making different versions of the code based on gBitFormat (or by writing
+// bit masks into registers which are computed based on gBitFormat).
+//
+// RGBtoYUV is also used by the C(++) version of the HQX code. Maybe we can
+// use the same technique which is employed by our MPEG code to reduce the
+// size of the lookup tables at the cost of some additional computations? That
+// might actually result in a speedup, too, if done right (and the code code
+// might actually be suitable for AltiVec/MMX/SSE speedup).
+//
+// Of course, the above is largely a conjecture, and the actual speed
+// differences are likely to vary a lot between different architectures and
+// CPUs.
+uint RGBtoYUVstorage[65536];
+uint *RGBtoYUV = RGBtoYUVstorage;
uint LUT16to32[65536];
}
diff --git a/common/scaler/intern.h b/common/scaler/intern.h
index 0c8a8fd859..bb20a43748 100644
--- a/common/scaler/intern.h
+++ b/common/scaler/intern.h
@@ -153,7 +153,7 @@ static inline bool diffYUV(int yuv1, int yuv2) {
* 16bit RGB to YUV conversion table. This table is setup by InitLUT().
* Used by the hq scaler family.
*/
-extern "C" uint RGBtoYUV[65536];
+extern "C" uint *RGBtoYUV;
/** Auxiliary macro to simplify creating those template function wrappers. */
#define MAKE_WRAPPER(FUNC) \