linux stuff

This commit is contained in:
Vladislav Khorev 2014-11-27 08:57:13 +00:00
parent 6e25c55c14
commit 6797548c79
2 changed files with 400 additions and 398 deletions

View File

@ -5582,7 +5582,7 @@
констатировать констатировать
легендарный легендарный
людской людской
Люсин люсин
обитатель обитатель
перехватить перехватить
пожить пожить
@ -10874,7 +10874,7 @@
упрекнуть упрекнуть
файл файл
форменный форменный
xуй хуй
читательский читательский
энтузиаст энтузиаст
ярый ярый

View File

@ -1,199 +1,199 @@
#include "noun.h" #include "noun.h"
#include <iostream> //Xperimental -- for debug only #include <iostream> //Xperimental -- for debug only
#include "utf8utf16.h" #include "utf8utf16.h"
std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion) std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion)
{ {
switch (nounDeclencion) switch (nounDeclencion)
{ {
case ND_1_HARD: return L"First declencion (hard type), female"; case ND_1_HARD: return L"First declencion (hard type), female";
case ND_1_SOFT: return L"First declencion (soft type), female"; case ND_1_SOFT: return L"First declencion (soft type), female";
case ND_2_HARD_MALE: return L"Second declencion (hard type), male"; case ND_2_HARD_MALE: return L"Second declencion (hard type), male";
case ND_2_SOFT_MALE: return L"Second declencion (soft type), male"; case ND_2_SOFT_MALE: return L"Second declencion (soft type), male";
case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter"; case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter";
case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter"; case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter";
case ND_3: return L"Third declencion, female"; case ND_3: return L"Third declencion, female";
} }
return L""; return L"";
} }
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase) std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
{ {
switch (nounGrammaticalCase) switch (nounGrammaticalCase)
{ {
case NGC_P1_NOMINATIVE: return L"Nominative case"; case NGC_P1_NOMINATIVE: return L"Nominative case";
case NGC_P2_GENITIVE: return L"Genitive case"; case NGC_P2_GENITIVE: return L"Genitive case";
case NGC_P3_DATIVE: return L"Dative case"; case NGC_P3_DATIVE: return L"Dative case";
case NGC_P4_ACCUSATIVE: return L"Accusative case"; case NGC_P4_ACCUSATIVE: return L"Accusative case";
case NGC_P5_INSTRUMENTAL: return L"Instrumental case"; case NGC_P5_INSTRUMENTAL: return L"Instrumental case";
case NGC_P6_PREPOSITIONAL: return L"Prepositional case"; case NGC_P6_PREPOSITIONAL: return L"Prepositional case";
} }
return L""; return L"";
} }
std::wstring NounNumberToWString(NounNumber nounNumber) std::wstring NounNumberToWString(NounNumber nounNumber)
{ {
switch (nounNumber) switch (nounNumber)
{ {
case NPF_SINGULAR: return L"Singular form"; case NPF_SINGULAR: return L"Singular form";
case NPF_PLURAL: return L"Plural form"; case NPF_PLURAL: return L"Plural form";
} }
return L""; return L"";
} }
std::set<std::wstring> frequentWordSet; std::set<std::wstring> frequentWordSet;
std::vector<std::wstring> GetAllNounEndingArr() std::vector<std::wstring> GetAllNounEndingArr()
{ {
std::vector<std::wstring> result std::vector<std::wstring> result
{ {
L"", L"",
L"а", L"а",
L"и", L"и",
L"е", L"е",
L"у", L"у",
L"ой", L"ой",
L"ы", L"ы",
L"ом", L"ом",
L"ь", L"ь",
L"я", L"я",
L"ю", L"ю",
L"ем", L"ем",
L"о", L"о",
L"ью", L"ью",
L"ам", L"ам",
L"ами", L"ами",
L"ах", L"ах",
L"ов", L"ов",
L"ей", L"ей",
L"ям", L"ям",
L"ях", L"ях",
L"я", L"я",
L"ями", L"ями",
}; };
return result; return result;
} }
std::map<NounTuple, StringSet> getNounEndingTable() std::map<NounTuple, StringSet> getNounEndingTable()
{ {
std::map<NounTuple, StringSet> result; std::map<NounTuple, StringSet> result;
//Singular //Singular
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" }; result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" };
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" }; result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" };
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"}; result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"};
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"}; result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"};
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" }; result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" };
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""}; result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""};
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""}; result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""};
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"}; result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"};
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"}; result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"};
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"}; result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"};
//Plural //Plural
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""}; result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""};
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
return result; return result;
} }
bool NounIsInDictionary(std::wstring nounNominative) bool NounIsInDictionary(std::wstring nounNominative)
{ {
std::cout <<frequentWordSet.size() << std::endl; std::cout <<frequentWordSet.size() << std::endl;
@ -202,7 +202,9 @@ std::cout <<"$$" << UTF16to8(nounNominative.c_str()) << std::endl;
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl; std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
if (nounNominative == frequentWordSet.begin()->c_str()) std::wstring huy = *(frequentWordSet.find(L"баран"));
if (nounNominative == huy)
{ {
std::cout <<"true!" << std::endl; std::cout <<"true!" << std::endl;
@ -212,227 +214,227 @@ else
std::cout << "false!" << std::endl; std::cout << "false!" << std::endl;
} }
if (frequentWordSet.count(nounNominative) != 0) if (frequentWordSet.count(nounNominative) != 0)
{ {
return true; return true;
} }
return false; return false;
} }
std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative) std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative)
{ {
if (nounNominative.size() <= 1) if (nounNominative.size() <= 1)
{ {
//Xperimental -- need to say that word is too short! //Xperimental -- need to say that word is too short!
return{}; return{};
} }
wchar_t lastChar = nounNominative[nounNominative.size()-1]; wchar_t lastChar = nounNominative[nounNominative.size()-1];
wchar_t prevLastChar = nounNominative[nounNominative.size() - 2]; wchar_t prevLastChar = nounNominative[nounNominative.size() - 2];
if (lastChar == L'а') if (lastChar == L'а')
{ {
return{ ND_1_HARD, ND_1_SOFT }; return{ ND_1_HARD, ND_1_SOFT };
} }
if (lastChar == L'о') if (lastChar == L'о')
{ {
return{ ND_2_NEUTER_O }; return{ ND_2_NEUTER_O };
} }
if (lastChar == L'е') if (lastChar == L'е')
{ {
return{ ND_2_NEUTER_E }; return{ ND_2_NEUTER_E };
} }
if (lastChar == L'ь') if (lastChar == L'ь')
{ {
return{ ND_2_SOFT_MALE }; return{ ND_2_SOFT_MALE };
} }
return{ ND_2_HARD_MALE }; return{ ND_2_HARD_MALE };
} }
bool charIsConsolant(wchar_t c) bool charIsConsolant(wchar_t c)
{ {
std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб"; std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб";
for (wchar_t ic : consolants) for (wchar_t ic : consolants)
{ {
if (c == ic) if (c == ic)
{ {
return true; return true;
} }
} }
return false; return false;
} }
bool charIsVowel(wchar_t c) bool charIsVowel(wchar_t c)
{ {
std::wstring vovels = L"аоуыэяёюие"; std::wstring vovels = L"аоуыэяёюие";
for (wchar_t ic : vovels) for (wchar_t ic : vovels)
{ {
if (c == ic) if (c == ic)
{ {
return true; return true;
} }
} }
return false; return false;
} }
std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun) std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun)
{ {
std::vector<std::pair<std::wstring, std::wstring>> result; std::vector<std::pair<std::wstring, std::wstring>> result;
auto allNounEndingArr = GetAllNounEndingArr(); auto allNounEndingArr = GetAllNounEndingArr();
for (auto ending : allNounEndingArr) for (auto ending : allNounEndingArr)
{ {
if (boost::ends_with(noun, ending)) if (boost::ends_with(noun, ending))
{ {
std::wstring nounBase = boost::replace_last_copy(noun, ending, ""); std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
if (charIsConsolant(nounBase[nounBase.size() - 1])) if (charIsConsolant(nounBase[nounBase.size() - 1]))
{ {
result.push_back({ nounBase, ending}); result.push_back({ nounBase, ending});
} }
} }
} }
return result; return result;
} }
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding) std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
{ {
std::vector<NounTuple> result; std::vector<NounTuple> result;
auto nounEndingTable = getNounEndingTable(); auto nounEndingTable = getNounEndingTable();
for (auto i : nounEndingTable) for (auto i : nounEndingTable)
{ {
if (i.second.count(nounEnding) != 0) if (i.second.count(nounEnding) != 0)
{ {
result.push_back(i.first); result.push_back(i.first);
} }
} }
return result; return result;
} }
std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter) std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
{ {
std::vector<NounTuple> result; std::vector<NounTuple> result;
for (auto nounTuple : nounTupleArr) for (auto nounTuple : nounTupleArr)
{ {
if (filter.count(std::get<0>(nounTuple)) != 0) if (filter.count(std::get<0>(nounTuple)) != 0)
{ {
result.push_back(nounTuple); result.push_back(nounTuple);
} }
} }
return result; return result;
} }
std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple) std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
{ {
auto nounEndingTable = getNounEndingTable(); auto nounEndingTable = getNounEndingTable();
NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR }; NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR };
auto nounEndingSet = nounEndingTable[nominativeNounTuple]; auto nounEndingSet = nounEndingTable[nominativeNounTuple];
if (nounEndingSet.size() != 1) if (nounEndingSet.size() != 1)
{ {
//throw std::exception("There is problem - noun have more than 1 form!"); //throw std::exception("There is problem - noun have more than 1 form!");
} }
return nounBase + *(nounEndingTable[nominativeNounTuple].begin()); return nounBase + *(nounEndingTable[nominativeNounTuple].begin());
} }
std::vector<NounStruct> RecognizeNoun(std::wstring noun) std::vector<NounStruct> RecognizeNoun(std::wstring noun)
{ {
std::cout << "!" << UTF16to8(noun.c_str()) << std::endl; std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl; std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl;
std::vector<NounStruct> result; std::vector<NounStruct> result;
auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun); auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun);
std::cout << nounEndingDivisionArr.size() << std::endl; std::cout << nounEndingDivisionArr.size() << std::endl;
for (auto nounEndingDivision : nounEndingDivisionArr) for (auto nounEndingDivision : nounEndingDivisionArr)
{ {
std::wstring nounBase = nounEndingDivision.first; std::wstring nounBase = nounEndingDivision.first;
std::wstring nounEnding = nounEndingDivision.second; std::wstring nounEnding = nounEndingDivision.second;
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding); std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl; std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
for (auto nounTuple : possibleTupleArr) for (auto nounTuple : possibleTupleArr)
{ {
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple); std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl; std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative); auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl; std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl;
if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0) if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0)
{ {
std::cout<<"if1" << std::endl; std::cout<<"if1" << std::endl;
if (NounIsInDictionary(nounNominative)) if (NounIsInDictionary(nounNominative))
{ {
std::cout <<"result1 go!" << std::endl; std::cout <<"result1 go!" << std::endl;
result.push_back({ nounTuple, nounNominative }); result.push_back({ nounTuple, nounNominative });
} }
} }
} }
} }
return result; return result;
} }
void LoadFrequentWordSet() void LoadFrequentWordSet()
{ {
#ifdef _WIN32 #ifdef _WIN32
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt"); std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt");
#else #else
std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt"); std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
#endif #endif
//f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>)); //f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
std::string line; std::string line;
std::wstring wline; std::wstring wline;
if (f.is_open()) if (f.is_open())
{ {
std::cout<<"File found!" << std::endl; std::cout<<"File found!" << std::endl;
while (getline(f, line)) while (getline(f, line))
{ {
wline = UTF8to16(line.c_str()); wline = UTF8to16(line.c_str());
frequentWordSet.insert(wline); frequentWordSet.insert(wline);
} }
f.close(); f.close();
} }
else else
{ {
std::cout <<"file not found!" << std::endl; std::cout <<"file not found!" << std::endl;