diff --git a/rudict/frequent_words.txt b/rudict/frequent_words.txt index 345fd66..33a21ca 100644 --- a/rudict/frequent_words.txt +++ b/rudict/frequent_words.txt @@ -5582,7 +5582,7 @@ констатировать легендарный людской -Люсин +люсин обитатель перехватить пожить @@ -10874,7 +10874,7 @@ упрекнуть файл форменный -xуй +хуй читательский энтузиаст ярый diff --git a/rudict/rudict/noun.cpp b/rudict/rudict/noun.cpp index 8a2abdf..faadf75 100644 --- a/rudict/rudict/noun.cpp +++ b/rudict/rudict/noun.cpp @@ -1,199 +1,199 @@ -#include "noun.h" - -#include //Xperimental -- for debug only - -#include "utf8utf16.h" - - -std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion) -{ - switch (nounDeclencion) - { - case ND_1_HARD: return L"First declencion (hard type), female"; - case ND_1_SOFT: return L"First declencion (soft type), female"; - case ND_2_HARD_MALE: return L"Second declencion (hard type), male"; - case ND_2_SOFT_MALE: return L"Second declencion (soft type), male"; - case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter"; - case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter"; - case ND_3: return L"Third declencion, female"; - } - - return L""; -} - - -std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase) -{ - switch (nounGrammaticalCase) - { - case NGC_P1_NOMINATIVE: return L"Nominative case"; - case NGC_P2_GENITIVE: return L"Genitive case"; - case NGC_P3_DATIVE: return L"Dative case"; - case NGC_P4_ACCUSATIVE: return L"Accusative case"; - case NGC_P5_INSTRUMENTAL: return L"Instrumental case"; - case NGC_P6_PREPOSITIONAL: return L"Prepositional case"; - } - - return L""; -} - -std::wstring NounNumberToWString(NounNumber nounNumber) -{ - switch (nounNumber) - { - case NPF_SINGULAR: return L"Singular form"; - case NPF_PLURAL: return L"Plural form"; - } - - return L""; -} - -std::set frequentWordSet; - -std::vector GetAllNounEndingArr() -{ - std::vector result - { - L"", - L"а", - L"и", - L"е", - L"у", - L"ой", - L"ы", - L"ом", - L"ь", - L"я", - L"ю", - L"ем", - L"о", - L"ью", - L"ам", - L"ами", - L"ах", - L"ов", - L"ей", - L"ям", - L"ях", - L"я", - L"ями", - }; - - return result; -} - -std::map getNounEndingTable() -{ - std::map result; - - //Singular - result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" }; - result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; - result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; - result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; - result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" }; - result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"}; - result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"}; - result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; - result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; - result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" }; - result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - - result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - - result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; - - result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; - result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; - result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"}; - result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; - result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"}; - result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"}; - - //Plural - result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix - result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; - result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; - result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; - - result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; - result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; - result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""}; - result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; - result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; - - - result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; - result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; - - result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; - - result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; - result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; - - result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; - result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; - - result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; - result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; - result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; - result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; - result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; - - return result; -} - -bool NounIsInDictionary(std::wstring nounNominative) -{ +#include "noun.h" + +#include //Xperimental -- for debug only + +#include "utf8utf16.h" + + +std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion) +{ + switch (nounDeclencion) + { + case ND_1_HARD: return L"First declencion (hard type), female"; + case ND_1_SOFT: return L"First declencion (soft type), female"; + case ND_2_HARD_MALE: return L"Second declencion (hard type), male"; + case ND_2_SOFT_MALE: return L"Second declencion (soft type), male"; + case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter"; + case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter"; + case ND_3: return L"Third declencion, female"; + } + + return L""; +} + + +std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase) +{ + switch (nounGrammaticalCase) + { + case NGC_P1_NOMINATIVE: return L"Nominative case"; + case NGC_P2_GENITIVE: return L"Genitive case"; + case NGC_P3_DATIVE: return L"Dative case"; + case NGC_P4_ACCUSATIVE: return L"Accusative case"; + case NGC_P5_INSTRUMENTAL: return L"Instrumental case"; + case NGC_P6_PREPOSITIONAL: return L"Prepositional case"; + } + + return L""; +} + +std::wstring NounNumberToWString(NounNumber nounNumber) +{ + switch (nounNumber) + { + case NPF_SINGULAR: return L"Singular form"; + case NPF_PLURAL: return L"Plural form"; + } + + return L""; +} + +std::set frequentWordSet; + +std::vector GetAllNounEndingArr() +{ + std::vector result + { + L"", + L"а", + L"и", + L"е", + L"у", + L"ой", + L"ы", + L"ом", + L"ь", + L"я", + L"ю", + L"ем", + L"о", + L"ью", + L"ам", + L"ами", + L"ах", + L"ов", + L"ей", + L"ям", + L"ях", + L"я", + L"ями", + }; + + return result; +} + +std::map getNounEndingTable() +{ + std::map result; + + //Singular + result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" }; + result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; + result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; + result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; + result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" }; + result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"}; + result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"}; + result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"}; + result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"}; + result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" }; + result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + + result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + + result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"}; + + result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; + result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"}; + result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"}; + result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"}; + result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"}; + result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"}; + + //Plural + result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix + result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; + result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; + result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; + + result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; + result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; + result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""}; + result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; + result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; + + + result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; + result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; + + result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; + + result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"}; + result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"}; + + result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; + result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; + + result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"}; + result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"}; + result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"}; + result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"}; + result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"}; + + return result; +} + +bool NounIsInDictionary(std::wstring nounNominative) +{ std::cout <c_str()) +std::wstring huy = *(frequentWordSet.find(L"баран")); + +if (nounNominative == huy) { std::cout <<"true!" << std::endl; @@ -212,227 +214,227 @@ else std::cout << "false!" << std::endl; } - if (frequentWordSet.count(nounNominative) != 0) - { + if (frequentWordSet.count(nounNominative) != 0) + { - return true; - } - return false; -} - - -std::set GetPossibleNounDeclencionSet(std::wstring nounNominative) -{ - - if (nounNominative.size() <= 1) - { - //Xperimental -- need to say that word is too short! - return{}; - } - - - wchar_t lastChar = nounNominative[nounNominative.size()-1]; - wchar_t prevLastChar = nounNominative[nounNominative.size() - 2]; - - if (lastChar == L'а') - { - return{ ND_1_HARD, ND_1_SOFT }; - } - if (lastChar == L'о') - { - return{ ND_2_NEUTER_O }; - } - if (lastChar == L'е') - { - return{ ND_2_NEUTER_E }; - } - if (lastChar == L'ь') - { - return{ ND_2_SOFT_MALE }; - } - - return{ ND_2_HARD_MALE }; -} - -bool charIsConsolant(wchar_t c) -{ - std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб"; - - for (wchar_t ic : consolants) - { - if (c == ic) - { - return true; - } - } - - return false; -} - -bool charIsVowel(wchar_t c) -{ - std::wstring vovels = L"аоуыэяёюие"; - - for (wchar_t ic : vovels) - { - if (c == ic) - { - return true; - } - } - - return false; -} - - -std::vector> getPossibleNounEndingDivisionArr(std::wstring noun) -{ - std::vector> result; - - auto allNounEndingArr = GetAllNounEndingArr(); - - for (auto ending : allNounEndingArr) - { - if (boost::ends_with(noun, ending)) - { - std::wstring nounBase = boost::replace_last_copy(noun, ending, ""); - - if (charIsConsolant(nounBase[nounBase.size() - 1])) - { - result.push_back({ nounBase, ending}); - } - } - } - - return result; -} - - -std::vector GetPossibleNounTupleArr(std::wstring nounEnding) -{ - std::vector result; - - auto nounEndingTable = getNounEndingTable(); - - for (auto i : nounEndingTable) - { - if (i.second.count(nounEnding) != 0) - { - result.push_back(i.first); - } - } - - return result; -} - -std::vector FilterNounTupleArrByNounDeclentionSet(std::vector nounTupleArr, std::set filter) -{ - std::vector result; - - for (auto nounTuple : nounTupleArr) - { - if (filter.count(std::get<0>(nounTuple)) != 0) - { - result.push_back(nounTuple); - } - } - - return result; -} - -std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple) -{ - auto nounEndingTable = getNounEndingTable(); - - NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR }; - - auto nounEndingSet = nounEndingTable[nominativeNounTuple]; - - if (nounEndingSet.size() != 1) - { - //throw std::exception("There is problem - noun have more than 1 form!"); - } - - return nounBase + *(nounEndingTable[nominativeNounTuple].begin()); -} - -std::vector RecognizeNoun(std::wstring noun) + return true; + } + return false; +} + + +std::set GetPossibleNounDeclencionSet(std::wstring nounNominative) +{ + + if (nounNominative.size() <= 1) + { + //Xperimental -- need to say that word is too short! + return{}; + } + + + wchar_t lastChar = nounNominative[nounNominative.size()-1]; + wchar_t prevLastChar = nounNominative[nounNominative.size() - 2]; + + if (lastChar == L'а') + { + return{ ND_1_HARD, ND_1_SOFT }; + } + if (lastChar == L'о') + { + return{ ND_2_NEUTER_O }; + } + if (lastChar == L'е') + { + return{ ND_2_NEUTER_E }; + } + if (lastChar == L'ь') + { + return{ ND_2_SOFT_MALE }; + } + + return{ ND_2_HARD_MALE }; +} + +bool charIsConsolant(wchar_t c) +{ + std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб"; + + for (wchar_t ic : consolants) + { + if (c == ic) + { + return true; + } + } + + return false; +} + +bool charIsVowel(wchar_t c) +{ + std::wstring vovels = L"аоуыэяёюие"; + + for (wchar_t ic : vovels) + { + if (c == ic) + { + return true; + } + } + + return false; +} + + +std::vector> getPossibleNounEndingDivisionArr(std::wstring noun) +{ + std::vector> result; + + auto allNounEndingArr = GetAllNounEndingArr(); + + for (auto ending : allNounEndingArr) + { + if (boost::ends_with(noun, ending)) + { + std::wstring nounBase = boost::replace_last_copy(noun, ending, ""); + + if (charIsConsolant(nounBase[nounBase.size() - 1])) + { + result.push_back({ nounBase, ending}); + } + } + } + + return result; +} + + +std::vector GetPossibleNounTupleArr(std::wstring nounEnding) +{ + std::vector result; + + auto nounEndingTable = getNounEndingTable(); + + for (auto i : nounEndingTable) + { + if (i.second.count(nounEnding) != 0) + { + result.push_back(i.first); + } + } + + return result; +} + +std::vector FilterNounTupleArrByNounDeclentionSet(std::vector nounTupleArr, std::set filter) +{ + std::vector result; + + for (auto nounTuple : nounTupleArr) + { + if (filter.count(std::get<0>(nounTuple)) != 0) + { + result.push_back(nounTuple); + } + } + + return result; +} + +std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple) +{ + auto nounEndingTable = getNounEndingTable(); + + NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR }; + + auto nounEndingSet = nounEndingTable[nominativeNounTuple]; + + if (nounEndingSet.size() != 1) + { + //throw std::exception("There is problem - noun have more than 1 form!"); + } + + return nounBase + *(nounEndingTable[nominativeNounTuple].begin()); +} + +std::vector RecognizeNoun(std::wstring noun) { std::cout << "!" << UTF16to8(noun.c_str()) << std::endl; std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) < result; - - auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun); - + + + std::vector result; + + auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun); + std::cout << nounEndingDivisionArr.size() << std::endl; - for (auto nounEndingDivision : nounEndingDivisionArr) - { - std::wstring nounBase = nounEndingDivision.first; - std::wstring nounEnding = nounEndingDivision.second; - - std::vector possibleTupleArr = GetPossibleNounTupleArr(nounEnding); + for (auto nounEndingDivision : nounEndingDivisionArr) + { + std::wstring nounBase = nounEndingDivision.first; + std::wstring nounEnding = nounEndingDivision.second; + + std::vector possibleTupleArr = GetPossibleNounTupleArr(nounEnding); std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl; - - - for (auto nounTuple : possibleTupleArr) - { - std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple); - + + + for (auto nounTuple : possibleTupleArr) + { + std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple); + std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl; - - auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative); - + + auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative); + std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl; - if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0) - { + if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0) + { std::cout<<"if1" << std::endl; - if (NounIsInDictionary(nounNominative)) - { + if (NounIsInDictionary(nounNominative)) + { std::cout <<"result1 go!" << std::endl; - result.push_back({ nounTuple, nounNominative }); - } - } - - } - - - } - - return result; -} - -void LoadFrequentWordSet() -{ + result.push_back({ nounTuple, nounNominative }); + } + } + + } + + + } + + return result; +} + +void LoadFrequentWordSet() +{ #ifdef _WIN32 - std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt"); - + std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt"); + #else std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt"); #endif - //f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8)); - - std::string line; - std::wstring wline; - - if (f.is_open()) - { + //f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8)); + + std::string line; + std::wstring wline; + + if (f.is_open()) + { std::cout<<"File found!" << std::endl; - while (getline(f, line)) - { - wline = UTF8to16(line.c_str()); - frequentWordSet.insert(wline); - } - f.close(); - } + while (getline(f, line)) + { + wline = UTF8to16(line.c_str()); + frequentWordSet.insert(wline); + } + f.close(); + } else { std::cout <<"file not found!" << std::endl;