linux stuff
This commit is contained in:
parent
6e25c55c14
commit
6797548c79
@ -5582,7 +5582,7 @@
|
|||||||
констатировать
|
констатировать
|
||||||
легендарный
|
легендарный
|
||||||
людской
|
людской
|
||||||
Люсин
|
люсин
|
||||||
обитатель
|
обитатель
|
||||||
перехватить
|
перехватить
|
||||||
пожить
|
пожить
|
||||||
@ -10874,7 +10874,7 @@
|
|||||||
упрекнуть
|
упрекнуть
|
||||||
файл
|
файл
|
||||||
форменный
|
форменный
|
||||||
xуй
|
хуй
|
||||||
читательский
|
читательский
|
||||||
энтузиаст
|
энтузиаст
|
||||||
ярый
|
ярый
|
||||||
|
@ -1,199 +1,199 @@
|
|||||||
#include "noun.h"
|
#include "noun.h"
|
||||||
|
|
||||||
#include <iostream> //Xperimental -- for debug only
|
#include <iostream> //Xperimental -- for debug only
|
||||||
|
|
||||||
#include "utf8utf16.h"
|
#include "utf8utf16.h"
|
||||||
|
|
||||||
|
|
||||||
std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion)
|
std::wstring NounDeclencionToWString(NounDeclencion nounDeclencion)
|
||||||
{
|
{
|
||||||
switch (nounDeclencion)
|
switch (nounDeclencion)
|
||||||
{
|
{
|
||||||
case ND_1_HARD: return L"First declencion (hard type), female";
|
case ND_1_HARD: return L"First declencion (hard type), female";
|
||||||
case ND_1_SOFT: return L"First declencion (soft type), female";
|
case ND_1_SOFT: return L"First declencion (soft type), female";
|
||||||
case ND_2_HARD_MALE: return L"Second declencion (hard type), male";
|
case ND_2_HARD_MALE: return L"Second declencion (hard type), male";
|
||||||
case ND_2_SOFT_MALE: return L"Second declencion (soft type), male";
|
case ND_2_SOFT_MALE: return L"Second declencion (soft type), male";
|
||||||
case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter";
|
case ND_2_NEUTER_O: return L"Second declencion, E-ending, neuter";
|
||||||
case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter";
|
case ND_2_NEUTER_E: return L"Second declencion, O-ending, neuter";
|
||||||
case ND_3: return L"Third declencion, female";
|
case ND_3: return L"Third declencion, female";
|
||||||
}
|
}
|
||||||
|
|
||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
||||||
{
|
{
|
||||||
switch (nounGrammaticalCase)
|
switch (nounGrammaticalCase)
|
||||||
{
|
{
|
||||||
case NGC_P1_NOMINATIVE: return L"Nominative case";
|
case NGC_P1_NOMINATIVE: return L"Nominative case";
|
||||||
case NGC_P2_GENITIVE: return L"Genitive case";
|
case NGC_P2_GENITIVE: return L"Genitive case";
|
||||||
case NGC_P3_DATIVE: return L"Dative case";
|
case NGC_P3_DATIVE: return L"Dative case";
|
||||||
case NGC_P4_ACCUSATIVE: return L"Accusative case";
|
case NGC_P4_ACCUSATIVE: return L"Accusative case";
|
||||||
case NGC_P5_INSTRUMENTAL: return L"Instrumental case";
|
case NGC_P5_INSTRUMENTAL: return L"Instrumental case";
|
||||||
case NGC_P6_PREPOSITIONAL: return L"Prepositional case";
|
case NGC_P6_PREPOSITIONAL: return L"Prepositional case";
|
||||||
}
|
}
|
||||||
|
|
||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::wstring NounNumberToWString(NounNumber nounNumber)
|
std::wstring NounNumberToWString(NounNumber nounNumber)
|
||||||
{
|
{
|
||||||
switch (nounNumber)
|
switch (nounNumber)
|
||||||
{
|
{
|
||||||
case NPF_SINGULAR: return L"Singular form";
|
case NPF_SINGULAR: return L"Singular form";
|
||||||
case NPF_PLURAL: return L"Plural form";
|
case NPF_PLURAL: return L"Plural form";
|
||||||
}
|
}
|
||||||
|
|
||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<std::wstring> frequentWordSet;
|
std::set<std::wstring> frequentWordSet;
|
||||||
|
|
||||||
std::vector<std::wstring> GetAllNounEndingArr()
|
std::vector<std::wstring> GetAllNounEndingArr()
|
||||||
{
|
{
|
||||||
std::vector<std::wstring> result
|
std::vector<std::wstring> result
|
||||||
{
|
{
|
||||||
L"",
|
L"",
|
||||||
L"а",
|
L"а",
|
||||||
L"и",
|
L"и",
|
||||||
L"е",
|
L"е",
|
||||||
L"у",
|
L"у",
|
||||||
L"ой",
|
L"ой",
|
||||||
L"ы",
|
L"ы",
|
||||||
L"ом",
|
L"ом",
|
||||||
L"ь",
|
L"ь",
|
||||||
L"я",
|
L"я",
|
||||||
L"ю",
|
L"ю",
|
||||||
L"ем",
|
L"ем",
|
||||||
L"о",
|
L"о",
|
||||||
L"ью",
|
L"ью",
|
||||||
L"ам",
|
L"ам",
|
||||||
L"ами",
|
L"ами",
|
||||||
L"ах",
|
L"ах",
|
||||||
L"ов",
|
L"ов",
|
||||||
L"ей",
|
L"ей",
|
||||||
L"ям",
|
L"ям",
|
||||||
L"ях",
|
L"ях",
|
||||||
L"я",
|
L"я",
|
||||||
L"ями",
|
L"ями",
|
||||||
};
|
};
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<NounTuple, StringSet> getNounEndingTable()
|
std::map<NounTuple, StringSet> getNounEndingTable()
|
||||||
{
|
{
|
||||||
std::map<NounTuple, StringSet> result;
|
std::map<NounTuple, StringSet> result;
|
||||||
|
|
||||||
//Singular
|
//Singular
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" };
|
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{ L"а" };
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" };
|
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою" };
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"};
|
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"ы"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" };
|
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{ L"ой", L"ою", L"ей", L"ею" };
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
|
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L""};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L""};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
|
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"о"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"а"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"у"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"о"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ом"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"я"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"ю"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ем"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"е"};
|
||||||
|
|
||||||
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
||||||
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_SINGULAR }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_SINGULAR }] = StringSet{L"ь"};
|
||||||
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"};
|
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_SINGULAR }] = StringSet{L"ью"};
|
||||||
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"};
|
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_SINGULAR }] = StringSet{L"и"};
|
||||||
|
|
||||||
//Plural
|
//Plural
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_1_SOFT, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix
|
result[NounTuple{ ND_1_SOFT, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""}; //Xperimental -- need special modificator for suffix
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
result[NounTuple{ ND_1_SOFT, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_1_SOFT, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
result[NounTuple{ ND_1_SOFT, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
||||||
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
result[NounTuple{ ND_1_SOFT, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
||||||
|
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_1_HARD, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
|
result[NounTuple{ ND_1_HARD, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
result[NounTuple{ ND_1_HARD, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""};
|
result[NounTuple{ ND_1_HARD, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L""};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
result[NounTuple{ ND_1_HARD, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
||||||
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
result[NounTuple{ ND_1_HARD, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
||||||
|
|
||||||
|
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"ы"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ов"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"ы"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
||||||
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
result[NounTuple{ ND_2_HARD_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
||||||
|
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
result[NounTuple{ ND_2_SOFT_MALE, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
||||||
|
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"а"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L""};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ам"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"а"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ами"};
|
||||||
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
result[NounTuple{ ND_2_NEUTER_O, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ах"};
|
||||||
|
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"я"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"я"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
|
||||||
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
result[NounTuple{ ND_2_NEUTER_E, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
||||||
|
|
||||||
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_3, NGC_P1_NOMINATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
result[NounTuple{ ND_3, NGC_P2_GENITIVE, NPF_PLURAL }] = StringSet{L"ей"};
|
||||||
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
result[NounTuple{ ND_3, NGC_P3_DATIVE, NPF_PLURAL }] = StringSet{L"ям"};
|
||||||
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
result[NounTuple{ ND_3, NGC_P4_ACCUSATIVE, NPF_PLURAL }] = StringSet{L"и"};
|
||||||
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
|
result[NounTuple{ ND_3, NGC_P5_INSTRUMENTAL, NPF_PLURAL }] = StringSet{L"ями"};
|
||||||
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
result[NounTuple{ ND_3, NGC_P6_PREPOSITIONAL, NPF_PLURAL }] = StringSet{L"ях"};
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NounIsInDictionary(std::wstring nounNominative)
|
bool NounIsInDictionary(std::wstring nounNominative)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout <<frequentWordSet.size() << std::endl;
|
std::cout <<frequentWordSet.size() << std::endl;
|
||||||
|
|
||||||
@ -202,7 +202,9 @@ std::cout <<"$$" << UTF16to8(nounNominative.c_str()) << std::endl;
|
|||||||
|
|
||||||
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
|
std::cout << "count" << frequentWordSet.count(nounNominative) << std::endl;
|
||||||
|
|
||||||
if (nounNominative == frequentWordSet.begin()->c_str())
|
std::wstring huy = *(frequentWordSet.find(L"баран"));
|
||||||
|
|
||||||
|
if (nounNominative == huy)
|
||||||
{
|
{
|
||||||
std::cout <<"true!" << std::endl;
|
std::cout <<"true!" << std::endl;
|
||||||
|
|
||||||
@ -212,227 +214,227 @@ else
|
|||||||
std::cout << "false!" << std::endl;
|
std::cout << "false!" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (frequentWordSet.count(nounNominative) != 0)
|
if (frequentWordSet.count(nounNominative) != 0)
|
||||||
{
|
{
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative)
|
std::set<NounDeclencion> GetPossibleNounDeclencionSet(std::wstring nounNominative)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (nounNominative.size() <= 1)
|
if (nounNominative.size() <= 1)
|
||||||
{
|
{
|
||||||
//Xperimental -- need to say that word is too short!
|
//Xperimental -- need to say that word is too short!
|
||||||
return{};
|
return{};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
wchar_t lastChar = nounNominative[nounNominative.size()-1];
|
wchar_t lastChar = nounNominative[nounNominative.size()-1];
|
||||||
wchar_t prevLastChar = nounNominative[nounNominative.size() - 2];
|
wchar_t prevLastChar = nounNominative[nounNominative.size() - 2];
|
||||||
|
|
||||||
if (lastChar == L'а')
|
if (lastChar == L'а')
|
||||||
{
|
{
|
||||||
return{ ND_1_HARD, ND_1_SOFT };
|
return{ ND_1_HARD, ND_1_SOFT };
|
||||||
}
|
}
|
||||||
if (lastChar == L'о')
|
if (lastChar == L'о')
|
||||||
{
|
{
|
||||||
return{ ND_2_NEUTER_O };
|
return{ ND_2_NEUTER_O };
|
||||||
}
|
}
|
||||||
if (lastChar == L'е')
|
if (lastChar == L'е')
|
||||||
{
|
{
|
||||||
return{ ND_2_NEUTER_E };
|
return{ ND_2_NEUTER_E };
|
||||||
}
|
}
|
||||||
if (lastChar == L'ь')
|
if (lastChar == L'ь')
|
||||||
{
|
{
|
||||||
return{ ND_2_SOFT_MALE };
|
return{ ND_2_SOFT_MALE };
|
||||||
}
|
}
|
||||||
|
|
||||||
return{ ND_2_HARD_MALE };
|
return{ ND_2_HARD_MALE };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool charIsConsolant(wchar_t c)
|
bool charIsConsolant(wchar_t c)
|
||||||
{
|
{
|
||||||
std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб";
|
std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб";
|
||||||
|
|
||||||
for (wchar_t ic : consolants)
|
for (wchar_t ic : consolants)
|
||||||
{
|
{
|
||||||
if (c == ic)
|
if (c == ic)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool charIsVowel(wchar_t c)
|
bool charIsVowel(wchar_t c)
|
||||||
{
|
{
|
||||||
std::wstring vovels = L"аоуыэяёюие";
|
std::wstring vovels = L"аоуыэяёюие";
|
||||||
|
|
||||||
for (wchar_t ic : vovels)
|
for (wchar_t ic : vovels)
|
||||||
{
|
{
|
||||||
if (c == ic)
|
if (c == ic)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun)
|
std::vector<std::pair<std::wstring, std::wstring>> getPossibleNounEndingDivisionArr(std::wstring noun)
|
||||||
{
|
{
|
||||||
std::vector<std::pair<std::wstring, std::wstring>> result;
|
std::vector<std::pair<std::wstring, std::wstring>> result;
|
||||||
|
|
||||||
auto allNounEndingArr = GetAllNounEndingArr();
|
auto allNounEndingArr = GetAllNounEndingArr();
|
||||||
|
|
||||||
for (auto ending : allNounEndingArr)
|
for (auto ending : allNounEndingArr)
|
||||||
{
|
{
|
||||||
if (boost::ends_with(noun, ending))
|
if (boost::ends_with(noun, ending))
|
||||||
{
|
{
|
||||||
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
||||||
|
|
||||||
if (charIsConsolant(nounBase[nounBase.size() - 1]))
|
if (charIsConsolant(nounBase[nounBase.size() - 1]))
|
||||||
{
|
{
|
||||||
result.push_back({ nounBase, ending});
|
result.push_back({ nounBase, ending});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
||||||
{
|
{
|
||||||
std::vector<NounTuple> result;
|
std::vector<NounTuple> result;
|
||||||
|
|
||||||
auto nounEndingTable = getNounEndingTable();
|
auto nounEndingTable = getNounEndingTable();
|
||||||
|
|
||||||
for (auto i : nounEndingTable)
|
for (auto i : nounEndingTable)
|
||||||
{
|
{
|
||||||
if (i.second.count(nounEnding) != 0)
|
if (i.second.count(nounEnding) != 0)
|
||||||
{
|
{
|
||||||
result.push_back(i.first);
|
result.push_back(i.first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
|
std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
|
||||||
{
|
{
|
||||||
std::vector<NounTuple> result;
|
std::vector<NounTuple> result;
|
||||||
|
|
||||||
for (auto nounTuple : nounTupleArr)
|
for (auto nounTuple : nounTupleArr)
|
||||||
{
|
{
|
||||||
if (filter.count(std::get<0>(nounTuple)) != 0)
|
if (filter.count(std::get<0>(nounTuple)) != 0)
|
||||||
{
|
{
|
||||||
result.push_back(nounTuple);
|
result.push_back(nounTuple);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
|
std::wstring RestoreNounByTuple(std::wstring nounBase, NounTuple nounTuple)
|
||||||
{
|
{
|
||||||
auto nounEndingTable = getNounEndingTable();
|
auto nounEndingTable = getNounEndingTable();
|
||||||
|
|
||||||
NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR };
|
NounTuple nominativeNounTuple{ std::get<0>(nounTuple), NGC_P1_NOMINATIVE, NPF_SINGULAR };
|
||||||
|
|
||||||
auto nounEndingSet = nounEndingTable[nominativeNounTuple];
|
auto nounEndingSet = nounEndingTable[nominativeNounTuple];
|
||||||
|
|
||||||
if (nounEndingSet.size() != 1)
|
if (nounEndingSet.size() != 1)
|
||||||
{
|
{
|
||||||
//throw std::exception("There is problem - noun have more than 1 form!");
|
//throw std::exception("There is problem - noun have more than 1 form!");
|
||||||
}
|
}
|
||||||
|
|
||||||
return nounBase + *(nounEndingTable[nominativeNounTuple].begin());
|
return nounBase + *(nounEndingTable[nominativeNounTuple].begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
|
std::cout << "!" << UTF16to8(noun.c_str()) << std::endl;
|
||||||
|
|
||||||
std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl;
|
std::cout << "?" << UTF16to8(frequentWordSet.begin()->c_str()) <<std::endl;
|
||||||
|
|
||||||
|
|
||||||
std::vector<NounStruct> result;
|
std::vector<NounStruct> result;
|
||||||
|
|
||||||
auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun);
|
auto nounEndingDivisionArr = getPossibleNounEndingDivisionArr(noun);
|
||||||
|
|
||||||
std::cout << nounEndingDivisionArr.size() << std::endl;
|
std::cout << nounEndingDivisionArr.size() << std::endl;
|
||||||
|
|
||||||
for (auto nounEndingDivision : nounEndingDivisionArr)
|
for (auto nounEndingDivision : nounEndingDivisionArr)
|
||||||
{
|
{
|
||||||
std::wstring nounBase = nounEndingDivision.first;
|
std::wstring nounBase = nounEndingDivision.first;
|
||||||
std::wstring nounEnding = nounEndingDivision.second;
|
std::wstring nounEnding = nounEndingDivision.second;
|
||||||
|
|
||||||
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
|
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
|
||||||
|
|
||||||
std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
|
std::cout << "BASE" << UTF16to8(nounBase.c_str()) << std::endl;
|
||||||
|
|
||||||
|
|
||||||
for (auto nounTuple : possibleTupleArr)
|
for (auto nounTuple : possibleTupleArr)
|
||||||
{
|
{
|
||||||
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
|
std::wstring nounNominative = RestoreNounByTuple(nounBase, nounTuple);
|
||||||
|
|
||||||
std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
|
std::cout <<"Nominative" << UTF16to8(nounNominative.c_str()) << std::endl;
|
||||||
|
|
||||||
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
|
auto possibleNounDetectionSet = GetPossibleNounDeclencionSet(nounNominative);
|
||||||
|
|
||||||
|
|
||||||
std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl;
|
std::cout <<"setsize" << possibleNounDetectionSet.size() << std::endl;
|
||||||
|
|
||||||
if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0)
|
if (possibleNounDetectionSet.count(std::get<0>(nounTuple)) != 0)
|
||||||
{
|
{
|
||||||
|
|
||||||
std::cout<<"if1" << std::endl;
|
std::cout<<"if1" << std::endl;
|
||||||
if (NounIsInDictionary(nounNominative))
|
if (NounIsInDictionary(nounNominative))
|
||||||
{
|
{
|
||||||
std::cout <<"result1 go!" << std::endl;
|
std::cout <<"result1 go!" << std::endl;
|
||||||
result.push_back({ nounTuple, nounNominative });
|
result.push_back({ nounTuple, nounNominative });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LoadFrequentWordSet()
|
void LoadFrequentWordSet()
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt");
|
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_words.txt");
|
||||||
|
|
||||||
#else
|
#else
|
||||||
std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
|
std::ifstream f("/home/devuser/workplace/rudict/frequent_words.txt");
|
||||||
#endif
|
#endif
|
||||||
//f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
|
//f.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
|
||||||
|
|
||||||
std::string line;
|
std::string line;
|
||||||
std::wstring wline;
|
std::wstring wline;
|
||||||
|
|
||||||
if (f.is_open())
|
if (f.is_open())
|
||||||
{
|
{
|
||||||
std::cout<<"File found!" << std::endl;
|
std::cout<<"File found!" << std::endl;
|
||||||
while (getline(f, line))
|
while (getline(f, line))
|
||||||
{
|
{
|
||||||
wline = UTF8to16(line.c_str());
|
wline = UTF8to16(line.c_str());
|
||||||
frequentWordSet.insert(wline);
|
frequentWordSet.insert(wline);
|
||||||
}
|
}
|
||||||
f.close();
|
f.close();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::cout <<"file not found!" << std::endl;
|
std::cout <<"file not found!" << std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user