rudict
This commit is contained in:
parent
f9c1f9f153
commit
7bcafe1afd
20002
rudict/frequent_words.txt
Normal file
20002
rudict/frequent_words.txt
Normal file
File diff suppressed because it is too large
Load Diff
22
rudict/rudict.sln
Normal file
22
rudict/rudict.sln
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Express 2013 for Windows Desktop
|
||||||
|
VisualStudioVersion = 12.0.30723.0
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rudict", "rudict\rudict.vcxproj", "{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Win32 = Debug|Win32
|
||||||
|
Release|Win32 = Release|Win32
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||||
|
{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}.Debug|Win32.Build.0 = Debug|Win32
|
||||||
|
{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}.Release|Win32.ActiveCfg = Release|Win32
|
||||||
|
{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}.Release|Win32.Build.0 = Release|Win32
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
25
rudict/rudict/main.cpp
Normal file
25
rudict/rudict/main.cpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
|
||||||
|
#include "noun.h"
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
LoadFrequentWordSet();
|
||||||
|
|
||||||
|
std::wstring s(L"ïîÿñíèöåé");
|
||||||
|
|
||||||
|
std::vector<NounStruct> recognizedResultArr = RecognizeNoun(s);
|
||||||
|
|
||||||
|
for (auto& i : recognizedResultArr)
|
||||||
|
{
|
||||||
|
std::wcout << i.noun << L"\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
system("pause");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
340
rudict/rudict/noun.cpp
Normal file
340
rudict/rudict/noun.cpp
Normal file
@ -0,0 +1,340 @@
|
|||||||
|
#include "noun.h"
|
||||||
|
|
||||||
|
#include <iostream> //Xperimental -- for debug only
|
||||||
|
|
||||||
|
|
||||||
|
std::set<std::wstring> frequentWordSet;
|
||||||
|
|
||||||
|
std::vector<std::wstring> GetAllNounEndingArr()
|
||||||
|
{
|
||||||
|
std::vector<std::wstring> result
|
||||||
|
{
|
||||||
|
L"",
|
||||||
|
L"à",
|
||||||
|
L"è",
|
||||||
|
L"å",
|
||||||
|
L"ó",
|
||||||
|
L"îé",
|
||||||
|
L"û",
|
||||||
|
L"îì",
|
||||||
|
L"ü",
|
||||||
|
L"ÿ",
|
||||||
|
L"þ",
|
||||||
|
L"åì",
|
||||||
|
L"î",
|
||||||
|
L"üþ",
|
||||||
|
L"àì",
|
||||||
|
L"àìè",
|
||||||
|
L"àõ",
|
||||||
|
L"îâ",
|
||||||
|
L"åé",
|
||||||
|
L"ÿì",
|
||||||
|
L"ÿõ",
|
||||||
|
L"ÿ",
|
||||||
|
L"ÿìè",
|
||||||
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<NounTuple, std::wstring> getNounEndingTable()
|
||||||
|
{
|
||||||
|
std::map<NounTuple, std::wstring> result;
|
||||||
|
|
||||||
|
//Singular
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"à";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"è";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = L"å";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"ó";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"îé"; //Xperimental -- need also deal with "îþ" ending
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"à";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"û";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = L"å";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"ó";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"îé"; //Xperimental -- need also deal with "îþ" ending
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"à";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = L"ó";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"îì";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"ü";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"ÿ";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = L"þ";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"ü";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"åì";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"î";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"à";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = L"ó";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"î";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"îì";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"å";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"ÿ";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = L"þ";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"å";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"åì";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"å";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = L"ü";
|
||||||
|
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = L"è";
|
||||||
|
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = L"è";
|
||||||
|
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = L"ü";
|
||||||
|
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = L"üþ";
|
||||||
|
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = L"è";
|
||||||
|
|
||||||
|
//Plural
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = L""; //Xperimental -- need special modificator for suffix
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = L"àì";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"àìè";
|
||||||
|
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"àõ";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = L"";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = L"àì";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"àìè";
|
||||||
|
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"àõ";
|
||||||
|
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"û";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = L"îâ";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = L"àì";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"û";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"àìè";
|
||||||
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"àõ";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = L"åé";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = L"ÿì";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"ÿõ";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"à";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = L"";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = L"àì";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"à";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"àìè";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"àõ";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"ÿ";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = L"åé";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = L"ÿì";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"ÿ";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"ÿìè";
|
||||||
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"ÿõ";
|
||||||
|
|
||||||
|
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = L"åé";
|
||||||
|
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = L"ÿì";
|
||||||
|
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = L"è";
|
||||||
|
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = L"ÿìè";
|
||||||
|
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = L"ÿõ";
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NounIsInDictionary(std::wstring nounNominative)
|
||||||
|
{
|
||||||
|
if (frequentWordSet.count(nounNominative) != 0)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (nounNominative.size() <= 1)
|
||||||
|
{
|
||||||
|
//Xperimental -- need to say that word is too short!
|
||||||
|
return{};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
wchar_t lastChar = nounNominative[nounNominative.size()-1];
|
||||||
|
wchar_t prevLastChar = nounNominative[nounNominative.size() - 2];
|
||||||
|
|
||||||
|
if (lastChar == L'à')
|
||||||
|
{
|
||||||
|
return{ ND_1_HARD, ND_1_SOFT };
|
||||||
|
}
|
||||||
|
if (lastChar == L'î')
|
||||||
|
{
|
||||||
|
return{ ND_2_NEUTER_O };
|
||||||
|
}
|
||||||
|
if (lastChar == L'å')
|
||||||
|
{
|
||||||
|
return{ ND_2_NEUTER_E };
|
||||||
|
}
|
||||||
|
if (lastChar == L'ü')
|
||||||
|
{
|
||||||
|
return{ ND_2_SOFT_MALE };
|
||||||
|
}
|
||||||
|
|
||||||
|
return{ ND_2_HARD_MALE };
|
||||||
|
}
|
||||||
|
|
||||||
|
bool charIsConsolant(wchar_t c)
|
||||||
|
{
|
||||||
|
std::wstring consolants = L"éöêíãøùçõôâïðëäæ÷ñìòá";
|
||||||
|
|
||||||
|
for (wchar_t ic : consolants)
|
||||||
|
{
|
||||||
|
if (c == ic)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool charIsVowel(wchar_t c)
|
||||||
|
{
|
||||||
|
std::wstring vovels = L"àîóûýÿ¸þèå";
|
||||||
|
|
||||||
|
for (wchar_t ic : vovels)
|
||||||
|
{
|
||||||
|
if (c == ic)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun)
|
||||||
|
{
|
||||||
|
std::vector<std::pair<std::wstring, std::wstring>> result;
|
||||||
|
|
||||||
|
auto allNounEndingArr = GetAllNounEndingArr();
|
||||||
|
|
||||||
|
for (auto ending : allNounEndingArr)
|
||||||
|
{
|
||||||
|
if (boost::ends_with(noun, ending))
|
||||||
|
{
|
||||||
|
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
||||||
|
|
||||||
|
if (charIsConsolant(nounBase[nounBase.size() - 1]))
|
||||||
|
{
|
||||||
|
result.push_back({ nounBase, ending});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
||||||
|
{
|
||||||
|
std::vector<NounTuple> result;
|
||||||
|
|
||||||
|
auto nounEndingTable = getNounEndingTable();
|
||||||
|
|
||||||
|
for (auto i : nounEndingTable)
|
||||||
|
{
|
||||||
|
if (i.second == nounEnding)
|
||||||
|
{
|
||||||
|
result.push_back(i.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
|
||||||
|
{
|
||||||
|
std::vector<NounTuple> result;
|
||||||
|
|
||||||
|
for (auto nounTuple : nounTupleArr)
|
||||||
|
{
|
||||||
|
if (filter.count(std::get<0>(nounTuple)) != 0)
|
||||||
|
{
|
||||||
|
result.push_back(nounTuple);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
|
||||||
|
{
|
||||||
|
auto nounEndingTable = getNounEndingTable();
|
||||||
|
|
||||||
|
NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR };
|
||||||
|
|
||||||
|
return nounBase + nounEndingTable[nominativeNounTuple];
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
||||||
|
{
|
||||||
|
|
||||||
|
std::vector<NounStruct> result;
|
||||||
|
|
||||||
|
auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun);
|
||||||
|
|
||||||
|
for (auto nounEndingDivision : nounEndingDivisionArr)
|
||||||
|
{
|
||||||
|
std::wstring nounBase = nounEndingDivision.first;
|
||||||
|
std::wstring nounEnding = nounEndingDivision.second;
|
||||||
|
|
||||||
|
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
|
||||||
|
|
||||||
|
|
||||||
|
for (auto nounTuple : possibleTupleArr)
|
||||||
|
{
|
||||||
|
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
|
||||||
|
|
||||||
|
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
|
||||||
|
|
||||||
|
if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0)
|
||||||
|
{
|
||||||
|
if (NounIsInDictionary(nounNominative))
|
||||||
|
{
|
||||||
|
result.push_back({ nounTuple, nounNominative });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadFrequentWordSet()
|
||||||
|
{
|
||||||
|
std::wifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt");
|
||||||
|
|
||||||
|
f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
|
||||||
|
|
||||||
|
std::wstring line;
|
||||||
|
|
||||||
|
if (f.is_open())
|
||||||
|
{
|
||||||
|
while (getline(f, line))
|
||||||
|
{
|
||||||
|
frequentWordSet.insert(line);
|
||||||
|
}
|
||||||
|
f.close();
|
||||||
|
}
|
||||||
|
}
|
73
rudict/rudict/noun.h
Normal file
73
rudict/rudict/noun.h
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
#ifndef NOUN_H_INCLUDED
|
||||||
|
#define NOUN_H_INCLUDED
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <vector>
|
||||||
|
#include <fstream>
|
||||||
|
#include <codecvt>
|
||||||
|
|
||||||
|
#include "boost/algorithm/string.hpp"
|
||||||
|
|
||||||
|
enum NounDeclencion
|
||||||
|
{
|
||||||
|
ND_1_HARD, // Ïàïà (ïàïû)
|
||||||
|
ND_1_SOFT, // Äîñêà (äîñêè)
|
||||||
|
ND_2_HARD_MALE, // Òðàêòîð (òðàêòîðû)
|
||||||
|
ND_2_SOFT_MALE, // Òîïîëü (òîïîëè)
|
||||||
|
ND_2_NEUTER_O, // Áðåâíî (áð¸âíà)
|
||||||
|
ND_2_NEUTER_E, // Ïîëå (ïîëÿ)
|
||||||
|
ND_3 // Ëîøàäü
|
||||||
|
};
|
||||||
|
|
||||||
|
enum NounGrammaticalCase
|
||||||
|
{
|
||||||
|
NGC_P1_NOMINATIVE,
|
||||||
|
NGC_P2_GENITIVE,
|
||||||
|
NGC_P3_DATIVE,
|
||||||
|
NGC_P4_ACCUSATIVE,
|
||||||
|
NGC_P5_INSTRUMENTAL,
|
||||||
|
NGC_P6_PREPOSITIONAL
|
||||||
|
};
|
||||||
|
|
||||||
|
enum NounNumber
|
||||||
|
{
|
||||||
|
NPF_SINGULAR,
|
||||||
|
NPF_PLURAL
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::tuple<NounDeclencion, NounGrammaticalCase, NounNumber> NounTuple;
|
||||||
|
|
||||||
|
std::vector<std::wstring> GetAllNounEndingArr();
|
||||||
|
std::map<NounTuple, std::wstring> getNounEndingTable();
|
||||||
|
|
||||||
|
bool NounIsInDictionary(std::wstring nounNominative);
|
||||||
|
|
||||||
|
std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative);
|
||||||
|
|
||||||
|
bool charIsConsolant(wchar_t c);
|
||||||
|
|
||||||
|
bool charIsVowel(wchar_t c);
|
||||||
|
|
||||||
|
|
||||||
|
struct NounStruct
|
||||||
|
{
|
||||||
|
NounTuple nounTuple;
|
||||||
|
std::wstring noun;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun);
|
||||||
|
|
||||||
|
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
|
||||||
|
|
||||||
|
//std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter);
|
||||||
|
|
||||||
|
std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple);
|
||||||
|
|
||||||
|
std::vector<NounStruct> RecognizeNoun(std::wstring noun);
|
||||||
|
|
||||||
|
void LoadFrequentWordSet();
|
||||||
|
|
||||||
|
#endif //NOUN_H_INCLUDED
|
80
rudict/rudict/rudict.vcxproj
Normal file
80
rudict/rudict/rudict.vcxproj
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
|
<Configuration>Debug</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
<ProjectConfiguration Include="Release|Win32">
|
||||||
|
<Configuration>Release</Configuration>
|
||||||
|
<Platform>Win32</Platform>
|
||||||
|
</ProjectConfiguration>
|
||||||
|
</ItemGroup>
|
||||||
|
<PropertyGroup Label="Globals">
|
||||||
|
<ProjectGuid>{6AAE716F-0F7C-4A75-94C9-83C86F18BEA3}</ProjectGuid>
|
||||||
|
<RootNamespace>rudict</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>true</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v120</PlatformToolset>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||||
|
<ConfigurationType>Application</ConfigurationType>
|
||||||
|
<UseDebugLibraries>false</UseDebugLibraries>
|
||||||
|
<PlatformToolset>v120</PlatformToolset>
|
||||||
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
|
<CharacterSet>MultiByte</CharacterSet>
|
||||||
|
</PropertyGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
|
<ImportGroup Label="ExtensionSettings">
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||||
|
</ImportGroup>
|
||||||
|
<PropertyGroup Label="UserMacros" />
|
||||||
|
<PropertyGroup />
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>Disabled</Optimization>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<AdditionalIncludeDirectories>C:/Workplace/boost_1_56_0</AdditionalIncludeDirectories>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<AdditionalLibraryDirectories>C:/Workplace/boost_1_56_0/stage/lib</AdditionalLibraryDirectories>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
|
<ClCompile>
|
||||||
|
<WarningLevel>Level3</WarningLevel>
|
||||||
|
<Optimization>MaxSpeed</Optimization>
|
||||||
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
|
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||||
|
<SDLCheck>true</SDLCheck>
|
||||||
|
<AdditionalIncludeDirectories>C:/Workplace/boost_1_56_0</AdditionalIncludeDirectories>
|
||||||
|
</ClCompile>
|
||||||
|
<Link>
|
||||||
|
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||||
|
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||||
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
|
<AdditionalLibraryDirectories>C:/Workplace/boost_1_56_0/stage/lib</AdditionalLibraryDirectories>
|
||||||
|
</Link>
|
||||||
|
</ItemDefinitionGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="main.cpp" />
|
||||||
|
<ClCompile Include="noun.cpp" />
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="noun.h" />
|
||||||
|
</ItemGroup>
|
||||||
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
</ImportGroup>
|
||||||
|
</Project>
|
30
rudict/rudict/rudict.vcxproj.filters
Normal file
30
rudict/rudict/rudict.vcxproj.filters
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
|
<ItemGroup>
|
||||||
|
<Filter Include="Source Files">
|
||||||
|
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||||
|
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Header Files">
|
||||||
|
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||||
|
<Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
|
||||||
|
</Filter>
|
||||||
|
<Filter Include="Resource Files">
|
||||||
|
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||||
|
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||||
|
</Filter>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClCompile Include="main.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="noun.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<ClInclude Include="noun.h">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
Loading…
Reference in New Issue
Block a user