1 files changed, 160 insertions, 0 deletions
diff --git a/textscreen/txt_utf8.c b/textscreen/txt_utf8.c
new file mode 100644
index 00000000..1306f265
--- /dev/null
+++ b/textscreen/txt_utf8.c
@@ -0,0 +1,160 @@
+// Emacs style mode select   -*- C++ -*- 
+//-----------------------------------------------------------------------------
+//
+// Copyright(C) 2012 Simon Howard
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// as published by the Free Software Foundation; either version 2
+// of the License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, write to the Free Software
+// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+// 02111-1307, USA.
+//
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "txt_utf8.h"
+
+// Encode a Unicode character as UTF-8, storing it in the buffer 'p'
+// and returning the new, incremented position.
+
+char *TXT_EncodeUTF8(char *p, unsigned int c)
+{
+    if (c < 0x80)                             // 1 character (ASCII):
+    {
+        p[0] = c;
+        return p + 1;
+    }
+    else if (c < 0x800)                       // 2 character:
+    {
+        p[0] = 0xc0 | (c >> 6);
+        p[1] = 0x80 | (c & 0x3f);
+        return p + 2;
+    }
+    else if (c < 0x10000)                     // 3 chacater:
+    {
+        p[0] = 0xe0 | (c >> 12);
+        p[1] = 0x80 | ((c >> 6) & 0x3f);
+        p[2] = 0x80 | (c & 0x3f);
+        return p + 3;
+    }
+    else if (c < 0x200000)                    // 4 character:
+    {
+        p[0] = 0xf0 | (c >> 18);
+        p[1] = 0x80 | ((c >> 12) & 0x3f);
+        p[2] = 0x80 | ((c >> 6) & 0x3f);
+        p[3] = 0x80 | (c & 0x3f);
+        return p + 4;
+    }
+    else
+    {
+        // Too big!
+
+        return p;
+    }
+}
+
+// Decode UTF-8 character, incrementing *ptr over the decoded bytes.
+
+unsigned int TXT_DecodeUTF8(const char **ptr)
+{
+    const char *p = *ptr;
+    unsigned int c;
+
+    // UTF-8 decode.
+
+    if ((*p & 0x80) == 0)                     // 1 character (ASCII):
+    {
+        c = *p;
+        *ptr += 1;
+    }
+    else if ((p[0] & 0xe0) == 0xc0            // 2 character:
+          && (p[1] & 0xc0) == 0x80)
+    {
+        c = ((p[0] & 0x1f) << 6)
+          |  (p[1] & 0x3f);
+        *ptr += 2;
+    }
+    else if ((p[0] & 0xf0) == 0xe0            // 3 character:
+          && (p[1] & 0xc0) == 0x80
+          && (p[2] & 0xc0) == 0x80)
+    {
+        c = ((p[0] & 0x0f) << 12)
+          | ((p[1] & 0x3f) << 6)
+          |  (p[2] & 0x3f);
+        *ptr += 3;
+    }
+    else if ((p[0] & 0xf8) == 0xf0            // 4 character:
+          && (p[1] & 0xc0) == 0x80
+          && (p[2] & 0xc0) == 0x80
+          && (p[3] & 0xc0) == 0x80)
+    {
+        c = ((p[0] & 0x07) << 18)
+          | ((p[1] & 0x3f) << 12)
+          | ((p[2] & 0x3f) << 6)
+          |  (p[3] & 0x3f);
+        *ptr += 4;
+    }
+    else
+    {
+        // Decode failure.
+        // Don't bother with 5/6 byte sequences.
+
+        c = 0;
+    }
+
+    return c;
+}
+
+// Count the number of characters in a UTF-8 string.
+
+unsigned int TXT_UTF8_Strlen(const char *s)
+{
+    const char *p;
+    unsigned int result = 0;
+    unsigned int c;
+
+    for (p = s; *p != '\0';)
+    {
+        c = TXT_DecodeUTF8(&p);
+
+        if (c == 0)
+        {
+            break;
+        }
+
+        ++result;
+    }
+
+    return result;
+}
+
+// Skip past the first n characters in a UTF-8 string.
+
+char *TXT_UTF8_SkipChars(const char *s, unsigned int n)
+{
+    unsigned int i;
+    const char *p;
+
+    p = s;
+
+    for (i = 0; i < n; ++i)
+    {
+        if (TXT_DecodeUTF8(&p) == 0)
+        {
+            break;
+        }
+    }
+
+    return (char *) p;
+}
+