This commit is contained in:
Vladislav Khorev 2014-11-27 08:43:39 +00:00
parent 654cdad2e7
commit 6e25c55c14
2 changed files with 48 additions and 4 deletions

View File

@ -1,6 +1,6 @@
CC=g++ CC=g++-4.8.3
CFLAGS=-Wall -O3 -std=gnu++0x CFLAGS=-Wall -O3 -std=gnu++11 -finput-charset=UTF-8 -fexec-charset=UTF-8
LDFLAGS= -lpthread -lrt -lmysqlconn-static -L/usr/local/lib LDFLAGS= -lpthread -lrt -L/usr/local/lib
BoostPath=../../../boost_1_56_0 BoostPath=../../../boost_1_56_0

View File

@ -194,8 +194,27 @@ std::map<NounTuple, StringSet> getNounEndingTable()
bool NounIsInDictionary(std::wstring nounNominative) bool NounIsInDictionary(std::wstring nounNominative)
{ {
std::cout <<frequentWordSet.size() << std::endl;
std::cout << "$$" << UTF16to8(frequentWordSet.begin()->c_str()) << std::endl;
std::cout <<"$$" << UTF16to8(nounNominative.c_str()) << std::endl;
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
if (nounNominative == frequentWordSet.begin()->c_str())
{
std::cout <<"true!" << std::endl;
}
else
{
std::cout << "false!" << std::endl;
}
if (frequentWordSet.count(nounNominative) != 0) if (frequentWordSet.count(nounNominative) != 0)
{ {
return true; return true;
} }
return false; return false;
@ -338,31 +357,47 @@ std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
} }
std::vector<NounStruct> RecognizeNoun(std::wstring noun) std::vector<NounStruct> RecognizeNoun(std::wstring noun)
{ {
std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl;
std::vector<NounStruct> result; std::vector<NounStruct> result;
auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun); auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun);
std::cout << nounEndingDivisionArr.size() << std::endl;
for (auto nounEndingDivision : nounEndingDivisionArr) for (auto nounEndingDivision : nounEndingDivisionArr)
{ {
std::wstring nounBase = nounEndingDivision.first; std::wstring nounBase = nounEndingDivision.first;
std::wstring nounEnding = nounEndingDivision.second; std::wstring nounEnding = nounEndingDivision.second;
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding); std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
for (auto nounTuple : possibleTupleArr) for (auto nounTuple : possibleTupleArr)
{ {
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple); std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative); auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl;
if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0) if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0)
{ {
std::cout<<"if1" << std::endl;
if (NounIsInDictionary(nounNominative)) if (NounIsInDictionary(nounNominative))
{ {
std::cout <<"result1 go!" << std::endl;
result.push_back({ nounTuple, nounNominative }); result.push_back({ nounTuple, nounNominative });
} }
} }
@ -377,8 +412,12 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
void LoadFrequentWordSet() void LoadFrequentWordSet()
{ {
#ifdef _WIN32
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt"); std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt");
#else
std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
#endif
//f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>)); //f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
std::string line; std::string line;
@ -386,6 +425,7 @@ void LoadFrequentWordSet()
if (f.is_open()) if (f.is_open())
{ {
std::cout<<"File found!" << std::endl;
while (getline(f, line)) while (getline(f, line))
{ {
wline = UTF8to16(line.c_str()); wline = UTF8to16(line.c_str());
@ -393,4 +433,8 @@ void LoadFrequentWordSet()
} }
f.close(); f.close();
} }
else
{
std::cout <<"file not found!" << std::endl;
}
} }