utf8 dammit
This commit is contained in:
parent
b1878507f9
commit
7cf1a99d98
@ -62,7 +62,7 @@ namespace http {
|
|||||||
boost::to_lower(request_path);
|
boost::to_lower(request_path);
|
||||||
|
|
||||||
|
|
||||||
std::wstring requestedStr = UTF8to16(request_path.c_str());
|
std::wstring requestedStr = string_to_wstring(request_path);
|
||||||
/*
|
/*
|
||||||
requestedStr = L"Вы запросили: " + requestedStr;
|
requestedStr = L"Вы запросили: " + requestedStr;
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ namespace http {
|
|||||||
|
|
||||||
boost::property_tree::write_json(output_stream, propertyTree);
|
boost::property_tree::write_json(output_stream, propertyTree);
|
||||||
|
|
||||||
std::string outputJsonCode = UTF16to8(output_stream.str().c_str());
|
std::string outputJsonCode = wstring_to_string(output_stream.str());
|
||||||
|
|
||||||
rep.status = reply::ok;
|
rep.status = reply::ok;
|
||||||
|
|
||||||
|
@ -197,8 +197,8 @@ bool NounIsInDictionary(std::wstring nounNominative)
|
|||||||
|
|
||||||
std::cout <<frequentWordSet.size() << std::endl;
|
std::cout <<frequentWordSet.size() << std::endl;
|
||||||
|
|
||||||
std::cout << "$$" << UTF16to8(frequentWordSet.begin()->c_str()) << std::endl;
|
std::cout << "$$" << wstring_to_string(*frequentWordSet.begin()) << std::endl;
|
||||||
std::cout <<"$$" << UTF16to8(nounNominative.c_str()) << std::endl;
|
std::cout << "$$" << wstring_to_string(nounNominative) << std::endl;
|
||||||
|
|
||||||
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
|
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
|
||||||
|
|
||||||
@ -366,9 +366,9 @@ std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
|
|||||||
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
|
std::cout << "!" << wstring_to_string(noun) << std::endl;
|
||||||
|
|
||||||
std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl;
|
std::cout << "?" << wstring_to_string(*frequentWordSet.begin()) << std::endl;
|
||||||
|
|
||||||
|
|
||||||
std::vector<NounStruct> result;
|
std::vector<NounStruct> result;
|
||||||
@ -384,14 +384,14 @@ std::cout << nounEndingDivisionArr.size() << std::endl;
|
|||||||
|
|
||||||
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
|
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
|
||||||
|
|
||||||
std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
|
std::cout << "BASE" << wstring_to_string(nounBase) << std::endl;
|
||||||
|
|
||||||
|
|
||||||
for (auto nounTuple : possibleTupleArr)
|
for (auto nounTuple : possibleTupleArr)
|
||||||
{
|
{
|
||||||
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
|
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
|
||||||
|
|
||||||
std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
|
std::cout << "Nominative" << wstring_to_string(nounNominative) << std::endl;
|
||||||
|
|
||||||
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
|
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
|
||||||
|
|
||||||
@ -435,7 +435,7 @@ std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
|
|||||||
std::cout<<"File found!" << std::endl;
|
std::cout<<"File found!" << std::endl;
|
||||||
while (getline(f, line))
|
while (getline(f, line))
|
||||||
{
|
{
|
||||||
wline = UTF8to16(line.c_str());
|
wline = string_to_wstring(line);
|
||||||
frequentWordSet.insert(wline);
|
frequentWordSet.insert(wline);
|
||||||
}
|
}
|
||||||
f.close();
|
f.close();
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
#include "utf8utf16.h"
|
#include "utf8utf16.h"
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <boost/locale.hpp>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
|
std::string wstring_to_string(std::wstring in)
|
||||||
std::string UTF16to8(const wchar_t * in)
|
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
std::string out;
|
std::string out;
|
||||||
unsigned int codepoint = 0;
|
unsigned int codepoint = 0;
|
||||||
for (in; *in != 0; ++in)
|
for (in; *in != 0; ++in)
|
||||||
@ -41,12 +44,17 @@ std::string UTF16to8(const wchar_t * in)
|
|||||||
codepoint = 0;
|
codepoint = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return out;*/
|
||||||
|
|
||||||
|
std::string out = boost::locale::conv::utf_to_utf<char>(in);
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::wstring UTF8to16(const char * in)
|
std::wstring string_to_wstring(std::string in)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
std::wstring out;
|
std::wstring out;
|
||||||
if (in == NULL)
|
if (in == NULL)
|
||||||
return out;
|
return out;
|
||||||
@ -78,4 +86,10 @@ std::wstring UTF8to16(const char * in)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
std::wstring out = boost::locale::conv::utf_to_utf<wchar_t>(in);
|
||||||
|
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -9,8 +9,9 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
|
|
||||||
std::wstring UTF8to16(const char * in);
|
std::string wstring_to_string(std::wstring in);
|
||||||
std::string UTF16to8(const wchar_t * in);
|
|
||||||
|
std::wstring string_to_wstring(std::string in);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user