diff --git a/rudict/rudict/http/request_handler.cpp b/rudict/rudict/http/request_handler.cpp
index 18efacd..74a9b43 100644
--- a/rudict/rudict/http/request_handler.cpp
+++ b/rudict/rudict/http/request_handler.cpp
@@ -17,7 +17,7 @@
#include "reply.hpp"
#include "request.hpp"
-#include "boost/algorithm/string.hpp"
+#include "boost/algorithm/string.hpp"
#include "boost/property_tree/json_parser.hpp"
#include "../utf8utf16.h"
#include "../noun.h"
@@ -62,7 +62,7 @@ namespace http {
boost::to_lower(request_path);
- std::wstring requestedStr = UTF8to16(request_path.c_str());
+ std::wstring requestedStr = string_to_wstring(request_path);
/*
requestedStr = L"Вы запросили: " + requestedStr;
@@ -71,13 +71,13 @@ namespace http {
rep.content = "
" + rep.content + "";
*/
- boost::property_tree::wptree propertyTree = PrepareReport(requestedStr);
-
- std::wstringstream output_stream;
-
- boost::property_tree::write_json(output_stream, propertyTree);
-
- std::string outputJsonCode = UTF16to8(output_stream.str().c_str());
+ boost::property_tree::wptree propertyTree = PrepareReport(requestedStr);
+
+ std::wstringstream output_stream;
+
+ boost::property_tree::write_json(output_stream, propertyTree);
+
+ std::string outputJsonCode = wstring_to_string(output_stream.str());
rep.status = reply::ok;
diff --git a/rudict/rudict/noun.cpp b/rudict/rudict/noun.cpp
index 38b2ea6..22e012a 100644
--- a/rudict/rudict/noun.cpp
+++ b/rudict/rudict/noun.cpp
@@ -197,8 +197,8 @@ bool NounIsInDictionary(std::wstring nounNominative)
std::cout <c_str()) << std::endl;
-std::cout <<"$$" << UTF16to8(nounNominative.c_str()) << std::endl;
+std::cout << "$$" << wstring_to_string(*frequentWordSet.begin()) << std::endl;
+std::cout << "$$" << wstring_to_string(nounNominative) << std::endl;
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
@@ -366,9 +366,9 @@ std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
std::vector RecognizeNoun(std::wstring noun)
{
-std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
+ std::cout << "!" << wstring_to_string(noun) << std::endl;
-std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) < result;
@@ -384,14 +384,14 @@ std::cout << nounEndingDivisionArr.size() << std::endl;
std::vector possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
-std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
+ std::cout << "BASE" << wstring_to_string(nounBase) << std::endl;
for (auto nounTuple : possibleTupleArr)
{
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
-std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
+ std::cout << "Nominative" << wstring_to_string(nounNominative) << std::endl;
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
@@ -435,7 +435,7 @@ std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
std::cout<<"File found!" << std::endl;
while (getline(f, line))
{
- wline = UTF8to16(line.c_str());
+ wline = string_to_wstring(line);
frequentWordSet.insert(wline);
}
f.close();
diff --git a/rudict/rudict/utf8utf16.cpp b/rudict/rudict/utf8utf16.cpp
index af4f3b7..3ce2bb7 100644
--- a/rudict/rudict/utf8utf16.cpp
+++ b/rudict/rudict/utf8utf16.cpp
@@ -1,81 +1,95 @@
-#include "utf8utf16.h"
-
-#include
-
+#include "utf8utf16.h"
+
+
+#include
+#include
+#include
+
+std::string wstring_to_string(std::wstring in)
+{
+ /*
+ std::string out;
+ unsigned int codepoint = 0;
+ for (in; *in != 0; ++in)
+ {
+ if (*in >= 0xd800 && *in <= 0xdbff)
+ codepoint = ((*in - 0xd800) << 10) + 0x10000;
+ else
+ {
+ if (*in >= 0xdc00 && *in <= 0xdfff)
+ codepoint |= *in - 0xdc00;
+ else
+ codepoint = *in;
+
+ if (codepoint <= 0x7f)
+ out.append(1, static_cast(codepoint));
+ else if (codepoint <= 0x7ff)
+ {
+ out.append(1, static_cast(0xc0 | ((codepoint >> 6) & 0x1f)));
+ out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
+ }
+ else if (codepoint <= 0xffff)
+ {
+ out.append(1, static_cast(0xe0 | ((codepoint >> 12) & 0x0f)));
+ out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f)));
+ out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
+ }
+ else
+ {
+ out.append(1, static_cast(0xf0 | ((codepoint >> 18) & 0x07)));
+ out.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3f)));
+ out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f)));
+ out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
+ }
+ codepoint = 0;
+ }
+ }
+ return out;*/
+
+ std::string out = boost::locale::conv::utf_to_utf(in);
+
+ return out;
-std::string UTF16to8(const wchar_t * in)
-{
- std::string out;
- unsigned int codepoint = 0;
- for (in; *in != 0; ++in)
- {
- if (*in >= 0xd800 && *in <= 0xdbff)
- codepoint = ((*in - 0xd800) << 10) + 0x10000;
- else
- {
- if (*in >= 0xdc00 && *in <= 0xdfff)
- codepoint |= *in - 0xdc00;
- else
- codepoint = *in;
-
- if (codepoint <= 0x7f)
- out.append(1, static_cast(codepoint));
- else if (codepoint <= 0x7ff)
- {
- out.append(1, static_cast(0xc0 | ((codepoint >> 6) & 0x1f)));
- out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
- }
- else if (codepoint <= 0xffff)
- {
- out.append(1, static_cast(0xe0 | ((codepoint >> 12) & 0x0f)));
- out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f)));
- out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
- }
- else
- {
- out.append(1, static_cast(0xf0 | ((codepoint >> 18) & 0x07)));
- out.append(1, static_cast(0x80 | ((codepoint >> 12) & 0x3f)));
- out.append(1, static_cast(0x80 | ((codepoint >> 6) & 0x3f)));
- out.append(1, static_cast(0x80 | (codepoint & 0x3f)));
- }
- codepoint = 0;
- }
- }
- return out;
}
-std::wstring UTF8to16(const char * in)
-{
-
- std::wstring out;
- if (in == NULL)
- return out;
-
- unsigned int codepoint;
- while (*in != 0)
- {
- unsigned char ch = static_cast(*in);
- if (ch <= 0x7f)
- codepoint = ch;
- else if (ch <= 0xbf)
- codepoint = (codepoint << 6) | (ch & 0x3f);
- else if (ch <= 0xdf)
- codepoint = ch & 0x1f;
- else if (ch <= 0xef)
- codepoint = ch & 0x0f;
- else
- codepoint = ch & 0x07;
- ++in;
- if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff))
- {
- if (codepoint > 0xffff)
- {
- out.append(1, static_cast(0xd800 + (codepoint >> 10)));
- out.append(1, static_cast(0xdc00 + (codepoint & 0x03ff)));
- }
- else if (codepoint < 0xd800 || codepoint >= 0xe000)
- out.append(1, static_cast(codepoint));
- }
- }
- return out;
-}
+std::wstring string_to_wstring(std::string in)
+{
+ /*
+ std::wstring out;
+ if (in == NULL)
+ return out;
+
+ unsigned int codepoint;
+ while (*in != 0)
+ {
+ unsigned char ch = static_cast(*in);
+ if (ch <= 0x7f)
+ codepoint = ch;
+ else if (ch <= 0xbf)
+ codepoint = (codepoint << 6) | (ch & 0x3f);
+ else if (ch <= 0xdf)
+ codepoint = ch & 0x1f;
+ else if (ch <= 0xef)
+ codepoint = ch & 0x0f;
+ else
+ codepoint = ch & 0x07;
+ ++in;
+ if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff))
+ {
+ if (codepoint > 0xffff)
+ {
+ out.append(1, static_cast(0xd800 + (codepoint >> 10)));
+ out.append(1, static_cast(0xdc00 + (codepoint & 0x03ff)));
+ }
+ else if (codepoint < 0xd800 || codepoint >= 0xe000)
+ out.append(1, static_cast(codepoint));
+ }
+ }
+ return out;
+
+ */
+
+ std::wstring out = boost::locale::conv::utf_to_utf(in);
+
+ return out;
+}
diff --git a/rudict/rudict/utf8utf16.h b/rudict/rudict/utf8utf16.h
index ab1e77c..870083e 100644
--- a/rudict/rudict/utf8utf16.h
+++ b/rudict/rudict/utf8utf16.h
@@ -1,17 +1,18 @@
-#ifndef UTF8UTF16_H_INCLUDED
-#define UTF8UTF16_H_INCLUDED
-
-
-#include
-#include
-#include
-#include
-#include
-#include
-
-std::wstring UTF8to16(const char * in);
-std::string UTF16to8(const wchar_t * in);
-
-
-
-#endif //UTF8UTF16_H_INCLUDED
+#ifndef UTF8UTF16_H_INCLUDED
+#define UTF8UTF16_H_INCLUDED
+
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+std::string wstring_to_string(std::wstring in);
+
+std::wstring string_to_wstring(std::string in);
+
+
+
+#endif //UTF8UTF16_H_INCLUDED