utf8 dammit
This commit is contained in:
parent
cae8c7c0f4
commit
3de09db922
@ -1,15 +1,47 @@
|
||||
#include "utf8utf16.h"
|
||||
|
||||
#include <string>
|
||||
#include <locale>
|
||||
#include <codecvt>
|
||||
|
||||
|
||||
std::string UTF16to8(const wchar_t * in)
|
||||
{
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
|
||||
std::string s = conv.to_bytes(in);
|
||||
std::string out;
|
||||
unsigned int codepoint = 0;
|
||||
for (in; *in != 0; ++in)
|
||||
{
|
||||
if (*in >= 0xd800 && *in <= 0xdbff)
|
||||
codepoint = ((*in - 0xd800) << 10) + 0x10000;
|
||||
else
|
||||
{
|
||||
if (*in >= 0xdc00 && *in <= 0xdfff)
|
||||
codepoint |= *in - 0xdc00;
|
||||
else
|
||||
codepoint = *in;
|
||||
|
||||
return s;
|
||||
if (codepoint <= 0x7f)
|
||||
out.append(1, static_cast<char>(codepoint));
|
||||
else if (codepoint <= 0x7ff)
|
||||
{
|
||||
out.append(1, static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)));
|
||||
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
|
||||
}
|
||||
else if (codepoint <= 0xffff)
|
||||
{
|
||||
out.append(1, static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)));
|
||||
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
|
||||
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
|
||||
}
|
||||
else
|
||||
{
|
||||
out.append(1, static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)));
|
||||
out.append(1, static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)));
|
||||
out.append(1, static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)));
|
||||
out.append(1, static_cast<char>(0x80 | (codepoint & 0x3f)));
|
||||
}
|
||||
codepoint = 0;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::wstring UTF8to16(const char * in)
|
||||
|
Loading…
Reference in New Issue
Block a user