From 5683f076331d2831eb4720b65bb53e8d01ca33ee Mon Sep 17 00:00:00 2001 From: Einar Johan Trøan Sømåen Date: Sat, 21 Jul 2012 18:19:07 +0200 Subject: WINTERMUTE: Rename CamelCased filenames to prefixed_under_score-filenames This is mostly a lead-up to namespacing the Ad/Base folders, and then possibly removing the prefixes from the files, it also has the added benefit of getting rid of the odd case-typos that makes for issues on platforms that don't ignore case. --- engines/wintermute/utils/convert_utf.h | 149 +++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 engines/wintermute/utils/convert_utf.h (limited to 'engines/wintermute/utils/convert_utf.h') diff --git a/engines/wintermute/utils/convert_utf.h b/engines/wintermute/utils/convert_utf.h new file mode 100644 index 0000000000..03a8bb2bae --- /dev/null +++ b/engines/wintermute/utils/convert_utf.h @@ -0,0 +1,149 @@ +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +/* --------------------------------------------------------------------- + + Conversions between UTF32, UTF-16, and UTF-8. Header file. + + Several funtions are included here, forming a complete set of + conversions between the three formats. UTF-7 is not included + here, but is handled in a separate source file. + + Each of these routines takes pointers to input buffers and output + buffers. The input buffers are const. + + Each routine converts the text between *sourceStart and sourceEnd, + putting the result into the buffer between *targetStart and + targetEnd. Note: the end pointers are *after* the last item: e.g. + *(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, + and if not, whether the problem was in the source or target buffers. + (Only the first encountered problem is indicated.) + + After the conversion, *sourceStart and *targetStart are both + updated to point to the end of last text successfully converted in + the respective buffers. + + Input parameters: + sourceStart - pointer to a pointer to the source buffer. + The contents of this are modified on return so that + it points at the next thing to be converted. + targetStart - similarly, pointer to pointer to the target buffer. + sourceEnd, targetEnd - respectively pointers to the ends of the + two buffers, for overflow checking only. + + These conversion functions take a ConversionFlags argument. When this + flag is set to strict, both irregular sequences and isolated surrogates + will cause an error. When the flag is set to lenient, both irregular + sequences and isolated surrogates are converted. + + Whether the flag is strict or lenient, all illegal sequences will cause + an error return. This includes sequences such as: , , + or in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code + must check for illegal sequences. + + When the flag is set to lenient, characters over 0x10FFFF are converted + to the replacement character; otherwise (when the flag is set to strict) + they constitute an error. + + Output parameters: + The value "sourceIllegal" is returned from some routines if the input + sequence is malformed. When "sourceIllegal" is returned, the source + value will point to the illegal value that caused the problem. E.g., + in UTF-8 when a sequence is malformed, it points to the start of the + malformed sequence. + + Author: Mark E. Davis, 1994. + Rev History: Rick McGowan, fixes & updates May 2001. + Fixes & updates, Sept 2001. + +------------------------------------------------------------------------ */ + +/* --------------------------------------------------------------------- + The following 4 definitions are compiler-specific. + The C standard does not guarantee that wchar_t has at least + 16 bits, so wchar_t is no less portable than unsigned short! + All should be unsigned values to avoid sign extension during + bit mask & shift operations. +------------------------------------------------------------------------ */ + +typedef unsigned long UTF32; /* at least 32 bits */ +typedef unsigned short UTF16; /* at least 16 bits */ +typedef unsigned char UTF8; /* typically 8 bits */ +typedef unsigned char Boolean; /* 0 or 1 */ + +/* Some fundamental constants */ +#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD +#define UNI_MAX_BMP (UTF32)0x0000FFFF +#define UNI_MAX_UTF16 (UTF32)0x0010FFFF +#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF +#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF + +typedef enum { + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ +} ConversionResult; + +typedef enum { + strictConversion = 0, + lenientConversion +} ConversionFlags; + +/* This is for C++ and does no harm in C */ +#ifdef __cplusplus +extern "C" { +#endif + + ConversionResult ConvertUTF8toUTF16( + const UTF8 **sourceStart, const UTF8 *sourceEnd, + UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); + + ConversionResult ConvertUTF16toUTF8( + const UTF16 **sourceStart, const UTF16 *sourceEnd, + UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); + + ConversionResult ConvertUTF8toUTF32( + const UTF8 **sourceStart, const UTF8 *sourceEnd, + UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); + + ConversionResult ConvertUTF32toUTF8( + const UTF32 **sourceStart, const UTF32 *sourceEnd, + UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); + + ConversionResult ConvertUTF16toUTF32( + const UTF16 **sourceStart, const UTF16 *sourceEnd, + UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); + + ConversionResult ConvertUTF32toUTF16( + const UTF32 **sourceStart, const UTF32 *sourceEnd, + UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); + + Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); + +#ifdef __cplusplus +} +#endif + +/* --------------------------------------------------------------------- */ -- cgit v1.2.3 From 8a212c3b6cb5100d750fb3025e79af64e4ed86e4 Mon Sep 17 00:00:00 2001 From: Einar Johan Trøan Sømåen Date: Fri, 31 Aug 2012 17:29:16 +0200 Subject: WINTERMUTE: Add namespacing to the UTF-conversion code --- engines/wintermute/utils/convert_utf.h | 61 +++++++++++++++++----------------- 1 file changed, 30 insertions(+), 31 deletions(-) (limited to 'engines/wintermute/utils/convert_utf.h') diff --git a/engines/wintermute/utils/convert_utf.h b/engines/wintermute/utils/convert_utf.h index 03a8bb2bae..171d86343a 100644 --- a/engines/wintermute/utils/convert_utf.h +++ b/engines/wintermute/utils/convert_utf.h @@ -20,6 +20,9 @@ * remains attached. */ +// NOTE: Modifications have been made to the code for inclusion +// into ScummVM. + /* --------------------------------------------------------------------- Conversions between UTF32, UTF-16, and UTF-8. Header file. @@ -86,11 +89,14 @@ All should be unsigned values to avoid sign extension during bit mask & shift operations. ------------------------------------------------------------------------ */ +#include "common/system.h" + +namespace Wintermute { -typedef unsigned long UTF32; /* at least 32 bits */ -typedef unsigned short UTF16; /* at least 16 bits */ -typedef unsigned char UTF8; /* typically 8 bits */ -typedef unsigned char Boolean; /* 0 or 1 */ +typedef uint32 UTF32; /* at least 32 bits */ +typedef uint16 UTF16; /* at least 16 bits */ +typedef uint8 UTF8; /* typically 8 bits */ +typedef uint8 Boolean; /* 0 or 1 */ /* Some fundamental constants */ #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD @@ -111,39 +117,32 @@ typedef enum { lenientConversion } ConversionFlags; -/* This is for C++ and does no harm in C */ -#ifdef __cplusplus -extern "C" { -#endif - - ConversionResult ConvertUTF8toUTF16( - const UTF8 **sourceStart, const UTF8 *sourceEnd, - UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF8toUTF16( + const UTF8 **sourceStart, const UTF8 *sourceEnd, + UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); - ConversionResult ConvertUTF16toUTF8( - const UTF16 **sourceStart, const UTF16 *sourceEnd, - UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF16toUTF8( + const UTF16 **sourceStart, const UTF16 *sourceEnd, + UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); - ConversionResult ConvertUTF8toUTF32( - const UTF8 **sourceStart, const UTF8 *sourceEnd, - UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF8toUTF32( + const UTF8 **sourceStart, const UTF8 *sourceEnd, + UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); - ConversionResult ConvertUTF32toUTF8( - const UTF32 **sourceStart, const UTF32 *sourceEnd, - UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF32toUTF8( + const UTF32 **sourceStart, const UTF32 *sourceEnd, + UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags); - ConversionResult ConvertUTF16toUTF32( - const UTF16 **sourceStart, const UTF16 *sourceEnd, - UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF16toUTF32( + const UTF16 **sourceStart, const UTF16 *sourceEnd, + UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags); - ConversionResult ConvertUTF32toUTF16( - const UTF32 **sourceStart, const UTF32 *sourceEnd, - UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); +ConversionResult ConvertUTF32toUTF16( + const UTF32 **sourceStart, const UTF32 *sourceEnd, + UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags); - Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); +Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); -#ifdef __cplusplus -} -#endif +} // End of namespace Wintermute /* --------------------------------------------------------------------- */ -- cgit v1.2.3 From e067520bb904615ed75217808ba27235eb244336 Mon Sep 17 00:00:00 2001 From: Einar Johan Trøan Sømåen Date: Fri, 31 Aug 2012 21:01:53 +0200 Subject: WINTERMUTE: Use tabs in enums --- engines/wintermute/utils/convert_utf.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'engines/wintermute/utils/convert_utf.h') diff --git a/engines/wintermute/utils/convert_utf.h b/engines/wintermute/utils/convert_utf.h index 171d86343a..a5f34456f5 100644 --- a/engines/wintermute/utils/convert_utf.h +++ b/engines/wintermute/utils/convert_utf.h @@ -106,15 +106,15 @@ typedef uint8 Boolean; /* 0 or 1 */ #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF typedef enum { - conversionOK, /* conversion successful */ - sourceExhausted, /* partial character in source, but hit end */ - targetExhausted, /* insuff. room in target for conversion */ - sourceIllegal /* source sequence is illegal/malformed */ + conversionOK, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted, /* insuff. room in target for conversion */ + sourceIllegal /* source sequence is illegal/malformed */ } ConversionResult; typedef enum { - strictConversion = 0, - lenientConversion + strictConversion = 0, + lenientConversion } ConversionFlags; ConversionResult ConvertUTF8toUTF16( -- cgit v1.2.3