libs/sqplus/testSqPlus2unit/test_StringConv.cpp

359 lines
12 KiB
C++
Raw Permalink Normal View History

2017-01-10 04:08:49 +00:00
#include "testEnv.hpp"
#ifdef SQPLUS_AUTOCONVERT_OTHER_CHAR
# include "../sqplus/SqPlusOCharBuf.h"
namespace {
class StringConvTest {
public:
StringConvTest() {}
int AsciiArgFunc( const char *arg1 ){
m_a1 = arg1;
return arg1 ? (int)strlen(arg1) : 0;
}
int WideArgFunc( const wchar_t *arg2 ){
m_a2 = arg2;
return arg2 ? (int)wcslen(arg2) : 0;
}
const char *GetArg1() { return m_a1; }
const wchar_t *GetArg2() { return m_a2; }
SQACharBuf m_a1;
SQWCharBuf m_a2;
};
}
DECLARE_INSTANCE_TYPE(StringConvTest);
static bool
checkElems_8bit(const char *prefix,
const char *str, const unsigned char *ref)
{
bool result = true;
printf("%s\t[ ", prefix);
for (const char *p = str; *p; ++p, ++ref) {
printf("%02x ", *p & 0xff);
if (*reinterpret_cast<const unsigned char*>(p) != *ref) {
printf(" (<- expected %02x) ", *ref & 0xff);
result = false;
}
}
printf("]\n");
return result;
}
static bool
checkElems_16bit(const char *prefix,
const wchar_t *wstr, const unsigned char *ref)
{
bool result = true;
printf("%s\t[ ", prefix);
for (const wchar_t *p = wstr; *p; ++p) {
unsigned char msb = (*p & 0xff00) >> 8;
unsigned char lsb = (*p & 0x00ff);
printf("%02x%02x ", msb, lsb);
if (msb != *(ref + 1) || lsb != *ref) {
printf(" (<- expected %02x%02x) ", *(ref + 1) & 0xff, *ref & 0xff);
result = false;
}
ref += 2;
}
printf("]\n");
return result;
}
#if 0
static bool
checkElems_32bit(const char *prefix,
const wchar_t *wstr, const unsigned char *ref)
{
bool result = true;
printf("%s\t[ ", prefix);
for (const wchar_t *p = wstr; *p; ++p) {
unsigned char b0 = (*p & 0xff000000) >> 24;
unsigned char b1 = (*p & 0x00ff0000) >> 16;
unsigned char b2 = (*p & 0x0000ff00) >> 8;
unsigned char b3 = (*p & 0x000000ff);
printf("%02x%02x%02x%02x ", b0, b1, b2, b3);
if (b0 != *(ref + 3)||
b1 != *(ref + 2) ||
b2 != *(ref + 1) ||
b3 != *(ref + 0)) {
printf(" (<- expected %02x%02x%02x%02x) ",
*(ref + 3)& 0xff,
*(ref + 2) & 0xff,
*(ref + 1) & 0xff,
*(ref + 0) & 0xff);
result = false;
}
ref += 4;
}
printf("]\n");
return result;
}
#endif
static const unsigned char testString_utf_8[] = {
0x61, // 'a'
0x62, // 'b'
0x63, // 'c'
0x5f, // '_'
0xe2, 0x82, 0xac, // Unicode Character 'EURO SIGN'
0x5f, // '_'
0x33, // '3'
0x5f, // '_'
0xc2, 0xa3, // Unicode Character 'POUND SIGN'
0x5f, // '_'
0x00 // NUL
};
static const unsigned char testString_latin_1[] = {
0x61, // 'a'
0x62, // 'b'
0x63, // 'c'
0x5f, // '_'
0x3f, // '?' (implies broken in conversion)
0x5f, // '_'
0x33, // '3'
0x5f, // '_'
0xa3, // Latin-1 Character 'POUND SIGN'
0x5f, // '_'
0x00 // NUL
};
static const unsigned char testString_utf_16_le[] = {
0x61, 0x00, // 'a'
0x62, 0x00, // 'b'
0x63, 0x00, // 'c'
0x5f, 0x00, // '_'
0xac, 0x20, // Unicode Character 'EURO SIGN'
0x5f, 0x00, // '_'
0x33, 0x00, // '3'
0x5f, 0x00, // '_'
0xa3, 0x00, // Unicode Character 'POUND SIGN'
0x5f, 0x00, // '_'
0x00, 0x00 // NUL
};
static const unsigned char brokenString_utf_8[] = {
0x61, // 'a'
0x62, // 'b'
0x63, // 'c'
0x5f, // '_'
0x20, 0xac, // Invalid UTF-8 Character
0x5f, // '_'
0x33, // '3'
0x5f, // '_'
0xc2, 0xa3, // Unicode Character 'POUND SIGN'
0x5f, // '_'
0x00 // NUL
};
static const unsigned char brokenString_utf_16_le[] = {
0x61, 0x00, // 'a'
0x62, 0x00, // 'b'
0x63, 0x00, // 'c'
0x5f, 0x00, // '_'
0x20, 0x00, // ' ' first byte of Invalid UTF-8
0xff, 0xff, // broken character
0x5f, 0x00, // '_'
0x33, 0x00, // '3'
0x5f, 0x00, // '_'
0xa3, 0x00, // Unicode Character 'POUND SIGN'
0x5f, 0x00, // '_'
0x00, 0x00 // NUL
};
static const unsigned char testString2_latin_1[] = {
0x61, // 'a'
0x62, // 'b'
0x63, // 'c'
0x5f, // '_'
0xa5, // Latin-1 Character 'YEN SIGN'
0x5f, // '_'
0x33, // '3'
0x5f, // '_'
0xa3, // Latin-1 Character 'POUND SIGN'
0x5f, // '_'
0x00, // NUL
};
static const unsigned char testString2_utf_16_le[] = {
0x61, 0x00, // 'a'
0x62, 0x00, // 'b'
0x63, 0x00, // 'c'
0x5f, 0x00, // '_'
0xa5, 0x00, // Unicode Character 'YEN SIGN'
0x5f, 0x00, // '_'
0x33, 0x00, // '3'
0x5f, 0x00, // '_'
0xa3, 0x00, // Unicode Character 'POUND SIGN'
0x5f, 0x00, // '_'
0x00, 0x00 // NUL
};
#endif // SQPLUS_AUTOCONVERT_OTHER_CHAR
SQPLUS_TEST(Test_StringConv)
{
SQPLUS_TEST_TRACE();
using namespace SqPlus;
SquirrelObject root = SquirrelVM::GetRootTable();
// A Squirrel helper function to print an array of elems
# if defined(SQUNICODE)
RUN_SCRIPT(_SC("\n\
function PrintElems16(name, v){ \n\
local s = name + \"\\t[ \" \n\
foreach( e in v ) s += format(\"%04x \", e & 0xffff) \n\
s+=\"]\\n\" \n\
print(s) \n\
} \n\
PrintElems16(\"car\", \"car\") \n\
"));
# else
RUN_SCRIPT(_SC("\n\
function PrintElems8(name, v){ \n\
local s = name + \"\\t[ \" \n\
foreach( e in v ) s += format(\"%02x \", e & 0xff) \n\
s+=\"]\\n\" \n\
print(s) \n\
} \n\
PrintElems8(\"car\", \"car\") \n\
"));
#endif
#ifdef SQPLUS_AUTOCONVERT_OTHER_CHAR
printf( "SQPLUS_USE_LATIN1:%d\n", SQPLUS_USE_LATIN1 );
SQClassDef<StringConvTest>(_SC("StringConvTest"))
.func(&StringConvTest::AsciiArgFunc, _SC("AsciiArgFunc"))
.func(&StringConvTest::WideArgFunc, _SC("WideArgFunc"))
.func(&StringConvTest::GetArg1, _SC("GetArg1"))
.func(&StringConvTest::GetArg2, _SC("GetArg2"))
;
// Test that basic string (only 7-bit chars) can be set and read
// back for both Ascii and wide strings
SQPLUS_TEST_TRACE_SUB(7bit_chars);
RUN_SCRIPT(_SC("\n\
print(\"StringConvTest - with only ASCII characters\\n\"); \
local a = StringConvTest(); \n\
a.AsciiArgFunc(\"Hello\"); \n\
print(a.GetArg1()); \n\
a.WideArgFunc(\"World\"); \n\
print(a.GetArg2()); \n\
assert(a.GetArg1()==\"Hello\"); \n\
assert(a.GetArg2()==\"World\"); \n\
"));
printf("-- \n\n");
RUN_SCRIPT(_SC("print(\"StringConvTest - with non-ASCII characters\\n\");"));
//
// This script tests conversion with strings that use more than 7 bits.
// The character \u20AC is the Euro sign (outside of Latin1 code page)
//
# if defined(SQUNICODE) && SQPLUS_USE_LATIN1 == 0
// (case 1)
// SQChar is wchar_t, and 8 bit strings are UTF-8.
// Conversions should work.
SQPLUS_TEST_TRACE_SUB(With_SQUNICODE);
RUN_SCRIPT(L"dofile(\"test_StringConv_case1.nut\", true)");
SquirrelObject a1 = root.GetValue(_SC("a1"));
StringConvTest *a1p = static_cast<StringConvTest*>(
a1.GetInstanceUP(ClassType<StringConvTest>::type())
);
CHECK(checkElems_8bit("a1p->GetArg1()",
a1p->GetArg1(), testString_utf_8));
CHECK(checkElems_16bit("a1p->GetArg2()",
a1p->GetArg2(), testString_utf_16_le));
# elif defined(SQUNICODE) && SQPLUS_USE_LATIN1 == 1
// (case 2)
// SQChar is wchar_t, and 8 bit strings are Latin1.
// Going from wchar_t => char => wchar_t will not work.
SQPLUS_TEST_TRACE_SUB(With_SQUNICODE_LATIN1);
RUN_SCRIPT(L"dofile(\"test_StringConv_case2.nut\", true)");
SquirrelObject a2 = root.GetValue(_SC("a2"));
StringConvTest *a2p = static_cast<StringConvTest*>(
a2.GetInstanceUP(ClassType<StringConvTest>::type())
);
CHECK(checkElems_8bit("a2p->GetArg1()",
a2p->GetArg1(), testString_latin_1));
CHECK(checkElems_16bit("a2p->GetArg2()",
a2p->GetArg2(), testString_utf_16_le));
# elif !defined(SQUNICODE) && SQPLUS_USE_LATIN1 == 0
// (case 3)
// SQChar is char, and 8 bit strings are treated as UTF-8 in conversion.
// Conversion from char=>wchar_t fails when the string is not valid UTF-8.
// This mode could be useful when one knows that all 8-bit strings are
// valid UTF-8.
SQPLUS_TEST_TRACE_SUB(Without_SQUNICODE);
RUN_SCRIPT("dofile(\"test_StringConv_case3.nut\", true);");
SquirrelObject a3 = root.GetValue(_SC("a3"));
StringConvTest *a3p = static_cast<StringConvTest*>(
a3.GetInstanceUP(ClassType<StringConvTest>::type())
);
CHECK(checkElems_8bit("a3p->GetArg1()",
a3p->GetArg1(), testString_utf_8));
CHECK(checkElems_16bit("a3p->GetArg2()",
a3p->GetArg2(), testString_utf_16_le));
SquirrelObject a3b = root.GetValue(_SC("a3b"));
StringConvTest *a3bp = static_cast<StringConvTest*>(
a3b.GetInstanceUP(ClassType<StringConvTest>::type())
);
CHECK(checkElems_8bit("a3bp->GetArg1()",
a3bp->GetArg1(), brokenString_utf_8));
CHECK(checkElems_16bit("a3bp->GetArg2()",
a3bp->GetArg2(), brokenString_utf_16_le));
# elif !defined(SQUNICODE) && SQPLUS_USE_LATIN1 == 1
// (case 4)
// SQChar is char, and 8 bit strings treated like Latin1.
SQPLUS_TEST_TRACE_SUB(Without_SQUNICODE_LATIN1);
RUN_SCRIPT("dofile(\"test_StringConv_case4.nut\", true);");
SquirrelObject a4 = root.GetValue(_SC("a4"));
StringConvTest *a4p = static_cast<StringConvTest*>(
a4.GetInstanceUP(ClassType<StringConvTest>::type())
);
CHECK(checkElems_8bit("a4p->GetArg1()",
a4p->GetArg1(), testString2_latin_1));
CHECK(checkElems_16bit("a4p->GetArg2()",
a4p->GetArg2(), testString2_utf_16_le));
// Conversion wchar_t => char won't work for characters outside of latin-1.
a4p->WideArgFunc((wchar_t*)testString_utf_16_le); // containing 'EURO SIGN'
RUN_SCRIPT("test4();");
# endif
#else // !defined(SQPLUS_AUTOCONVERT_OTHER_CHAR)
printf("Skipped (SQPLUS_AUTOCONVERT_OTHER_CHAR is not defined)\n");
#endif
}