From 3de09db9220e5f39015447628a43ca58edc452da Mon Sep 17 00:00:00 2001 From: Vladislav Khorev Date: Wed, 26 Nov 2014 22:40:06 +0000 Subject: [PATCH] utf8 dammit --- rudict/rudict/utf8utf16.cpp | 42 ++++++++++++++++++++++++++++++++----- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/rudict/rudict/utf8utf16.cpp b/rudict/rudict/utf8utf16.cpp index 5e2ae2a..af4f3b7 100644 --- a/rudict/rudict/utf8utf16.cpp +++ b/rudict/rudict/utf8utf16.cpp @@ -1,15 +1,47 @@ #include "utf8utf16.h" #include -#include -#include + std::string UTF16to8(const wchar_t * in) { - std::wstring_convert> conv; - std::string s = conv.to_bytes(in); + std::string out; + unsigned int codepoint = 0; + for (in; *in != 0; ++in) + { + if (*in >= 0xd800 && *in <= 0xdbff) + codepoint = ((*in - 0xd800) << 10) + 0x10000; + else + { + if (*in >= 0xdc00 && *in <= 0xdfff) + codepoint |= *in - 0xdc00; + else + codepoint = *in; - return s; + if (codepoint <= 0x7f) + out.append(1, static_cast(codepoint)); + else if (codepoint <= 0x7ff) + { + out.append(1, static_cast(0xc0 | ((codepoint >> 6) & 0x1f))); + out.append(1, static_cast(0x80 | (codepoint & 0x3f))); + } + else if (codepoint <= 0xffff) + { + out.append(1, static_cast(0xe0 | ((codepoint >> 12) & 0x0f))); + out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f))); + out.append(1, static_cast(0x80 | (codepoint & 0x3f))); + } + else + { + out.append(1, static_cast(0xf0 | ((codepoint >> 18) & 0x07))); + out.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3f))); + out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f))); + out.append(1, static_cast(0x80 | (codepoint & 0x3f))); + } + codepoint = 0; + } + } + return out; } std::wstring UTF8to16(const char * in)