diff --git a/rudict/frequent_nouns_2000.csv b/rudict/frequent_nouns_2000.csv index 56dbf6b..3611bb8 100644 --- a/rudict/frequent_nouns_2000.csv +++ b/rudict/frequent_nouns_2000.csv @@ -1,4 +1,4 @@ -Номер;Слово;Род / число;Есть единственное число;Есть множественное число;Есть стандартное множественное число;Есть стандартное множественное число, полученное путем удаления последней гласной из базы;Есть стандартное окончание множественного числа;Есть альтернативное окончание множественного числа;Специальные мн. числа;Может быть одушевленным;Может быть неодушевленным;Специальная форма падежа;1 +Номер;Слово;Род / число;Есть единственное число;Есть множественное число;Есть стандартное множественное число;Есть стандартное множественное число, полученное путем удаления последней гласной из базы;Есть стандартное окончание множественного числа;"Есть альтернативное окончание множественного числа ""а/я""";Специальные мн. числа;Может быть одушевленным;Может быть неодушевленным;Специальная форма падежа;1 307;любовь;ж;1;0;0;0;0;0;;0;1;;1 338;помощь;ж;1;0;0;0;0;0;;0;1;;1 589;борьба;ж;1;0;0;0;0;0;;0;1;;1 @@ -73,7 +73,7 @@ 292;улица;ж;1;1;1;0;1;0;;0;1;;1 299;мысль;ж;1;1;1;0;1;0;;0;1;;1 300;дорога;ж;1;1;1;0;1;0;;0;1;;1 -301;мать;ж;1;1;0;0;1;0;матери;1;0;;1 +301;мать;ж;1;1;0;0;0;0;матери;1;0;;1 309;мама;ж;1;1;1;0;1;0;;1;0;;1 315;школа;ж;1;1;1;0;1;0;;0;1;;1 318;цель;ж;1;1;1;0;1;0;;0;1;;1 @@ -144,7 +144,7 @@ 808;площадь;ж;1;1;1;0;1;0;;0;1;;1 823;линия;ж;1;1;1;0;1;0;;0;1;;1 830;губа;ж;1;1;1;0;1;0;;0;1;;1 -834;дочь;ж;1;1;0;0;1;0;дочери;0;1;;1 +834;дочь;ж;1;1;0;0;0;0;дочери;1;0;;1 836;среда;ж;1;1;1;0;1;0;;0;1;;1 862;грудь;ж;1;1;1;0;1;0;;0;1;;1 869;услуга;ж;1;1;1;0;1;0;;0;1;;1 @@ -370,7 +370,7 @@ 479;проект;м;1;1;1;0;1;0;;0;1;;1 484;директор;м;1;1;1;0;1;1;;1;0;;1 494;срок;м;1;1;1;0;1;0;;0;1;;1 -495;палец;м;1;1;0;0;0;0;пальцы;0;1;;1 +495;палец;м;1;1;0;1;0;0;;0;1;;1 496;опыт;м;1;1;1;0;1;0;;0;1;;1 512;лес;м;1;1;0;0;0;1;;0;1;;1 517;состав;м;1;1;1;0;1;0;;0;1;;1 @@ -457,7 +457,7 @@ 839;солдат;м;1;1;1;0;1;0;;1;0;;1 840;художник;м;1;1;1;0;1;0;;1;0;;1 842;волос;м;1;1;1;0;1;0;;0;1;;1 -847;ветер;м;1;1;0;1;0;0;ветра;0;1;;1 +847;ветер;м;1;1;0;1;1;1;;0;1;;1 849;парень;м;1;1;0;1;1;0;;1;0;;1 853;генерал;м;1;1;1;0;1;0;;1;0;;1 855;огонь;м;1;1;0;1;1;0;;0;1;;1 @@ -570,7 +570,7 @@ 1396;март;м;1;1;1;0;1;0;;0;1;;1 1398;клиент;м;1;1;1;0;1;0;;1;0;;1 1403;фронт;м;1;1;1;0;1;0;;0;1;;1 -1406;стул;м;1;1;0;0;0;0;стулья;0;1;;1 +1406;стул;м;1;1;0;0;1;0;стулья;0;1;;1 1420;музей;м;1;1;1;0;1;0;;0;1;;1 1423;след;м;1;1;1;0;1;0;;0;1;;1 1425;полковник;м;1;1;1;0;1;0;;1;0;;1 @@ -659,18 +659,18 @@ 1854;эксперт;м;1;1;1;0;1;0;;1;0;;1 1861;автобус;м;1;1;1;0;1;0;;0;1;;1 1866;кадр;м;1;1;1;0;1;0;;1;1;;1 -1874;лев;м;1;1;0;1;1;0;львы;1;0;;1 +1874;лев;м;1;1;0;1;1;0;;1;0;;1 1876;порог;м;1;1;1;0;1;0;;0;1;;1 1881;ремонт;м;1;1;1;0;1;0;;0;1;;1 1891;памятник;м;1;1;1;0;1;0;;0;1;;1 1892;корень;м;1;1;0;1;1;0;;0;1;;1 1905;штаб;м;1;1;1;0;1;0;;0;1;;1 -1907;владелец;м;1;1;0;1;1;0;владельцы;1;0;;1 +1907;владелец;м;1;1;0;1;1;0;;1;0;;1 1908;компьютер;м;1;1;1;0;1;0;;0;1;;1 1911;инженер;м;1;1;1;0;1;0;;1;0;;1 1926;выпуск;м;1;1;1;0;1;0;;0;1;;1 1927;кулак;м;1;1;1;0;1;0;;0;1;;1 -1928;лед;м;1;1;0;1;1;0;льды;0;1;;1 +1928;лед;м;1;1;0;1;1;0;;0;1;;1 1938;корпус;м;1;1;1;0;1;1;;0;1;;1 1940;недостаток;м;1;1;0;1;1;0;;0;1;;1 1942;сапог;м;1;1;1;0;1;0;;0;1;;1 diff --git a/rudict/frequent_nouns_2000.xlsx b/rudict/frequent_nouns_2000.xlsx index a78ef5d..2524d0a 100644 Binary files a/rudict/frequent_nouns_2000.xlsx and b/rudict/frequent_nouns_2000.xlsx differ diff --git a/rudict/grammar_case.csv b/rudict/grammar_case.csv index b4b984f..ca9bdfe 100644 --- a/rudict/grammar_case.csv +++ b/rudict/grammar_case.csv @@ -7,7 +7,7 @@ FIRST_A_IFORM_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;а;рука ;;NGC_P6_PREPOSITIONAL;е;говорю о руке ;NC_PLURAL;NGC_P1_NOMINATIVE;и;руки ;;NGC_P2_GENITIVE;;подарок для рук -;;NGC_P3_DATIVE;е;подарок рукам +;;NGC_P3_DATIVE;ам;подарок рукам ;;NGC_P4_ACCUSATIVE;и;обвиняю руки ;;NGC_P5_INSTRUMENTAL;ами;говорю с руками ;;NGC_P6_PREPOSITIONAL;ах;говорю о руках diff --git a/rudict/grammar_case.xlsx b/rudict/grammar_case.xlsx index 2caad6c..ffc9621 100644 Binary files a/rudict/grammar_case.xlsx and b/rudict/grammar_case.xlsx differ diff --git a/rudict/rudict/main.cpp b/rudict/rudict/main.cpp index 1d9cd7b..ea6df40 100644 --- a/rudict/rudict/main.cpp +++ b/rudict/rudict/main.cpp @@ -11,6 +11,7 @@ int main() SetupDeclentionMap(); LoadNounDeclencionCaseTable(); LoadFrequentWordSet(); + FillDivisionCaseMaps(); //RecognizeNoun(L"стульями"); //Косяк: "вечер" diff --git a/rudict/rudict/noun.cpp b/rudict/rudict/noun.cpp index 33e1590..fa10204 100644 --- a/rudict/rudict/noun.cpp +++ b/rudict/rudict/noun.cpp @@ -77,7 +77,7 @@ NounDeclencion WStringToNounDeclencion(std::wstring str) } if (str == L"FIRST_A_IFORM_ANIMATE") { - return FIRST_A_IFORM_INANIMATE; + return FIRST_A_IFORM_ANIMATE; } if (str == L"FIRST_A_UFORM_INANIMATE") { @@ -274,6 +274,11 @@ bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural) { return true; } + + if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural) + { + return true; + } } return false; @@ -301,6 +306,11 @@ NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNomi { return noun; } + + if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural) + { + return noun; + } } return{}; @@ -339,6 +349,26 @@ bool charIsVowel(wchar_t c) } +bool charIsMissingVowelSoftenerConsolant(wchar_t c) +{ + //This test belongs to missing vowel case. + //лев -> львы (because л, then е replaced by soft sign) + //немец -> немцы (because not л, the е is not replaced, just missing) + + std::wstring consolants = L"л"; + + for (wchar_t ic : consolants) + { + if (c == ic) + { + return true; + } + } + + return false; +} + + std::set getPossibleNounEndingDivisionSet(std::wstring noun) { std::set result; @@ -354,6 +384,7 @@ std::set getPossibleNounEndingDivisionSet(std::wstring noun) if (charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ') { result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON }); + result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM }); } //Check missed vowel (simple case) @@ -362,11 +393,22 @@ std::set getPossibleNounEndingDivisionSet(std::wstring noun) result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_O }); } - if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь') + if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь' && charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 3])) { result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E }); } - + + if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]) && !charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 2])) + { + result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E }); + } + + + if (ending == L"а" || ending == L"я") + { + result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_A }); + } + } } @@ -452,51 +494,93 @@ bool charIsUFormConsolant(wchar_t c) } -bool FirstAIFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate +bool AIFormEndingIsCorrect(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); + return (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && nounRecord.haveSingleForm || + (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && !nounRecord.haveSingleForm; +} + + + +bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord) +{ + return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); +} + +bool FirstAIFormAnimateSingularCondition(const NounRecord& nounRecord) +{ + return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); } bool FirstAIFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); + return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) + ); } bool FirstAIFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); + return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) + ); } -bool FirstAUFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate +bool FirstAUFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))); + return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))); } +bool FirstAUFormAnimateSingularCondition(const NounRecord& nounRecord) +{ + return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))); +} + + bool FirstAUFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))); + return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) + ); } bool FirstAUFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))); + return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) + ); } -bool FirstYaFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate +bool FirstYaFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'я'); + return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я'); +} + +bool FirstYaFormAnimateSingularCondition(const NounRecord& nounRecord) +{ + return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я'); } bool FirstYaFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я'); + return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + (GetLastChar(nounRecord) == L'я') || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и') + ); } bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я'); + return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + (GetLastChar(nounRecord) == L'я') || + !nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и') + ); } @@ -509,7 +593,11 @@ bool SecondMaleIFormInanimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsIFormConsolant(GetLastChar(nounRecord)); + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + charIsIFormConsolant(GetLastChar(nounRecord)) || + !nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и' + ); + } @@ -520,7 +608,10 @@ bool SecondMaleIFormAnimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleIFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsIFormConsolant(GetLastChar(nounRecord)); + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + charIsIFormConsolant(GetLastChar(nounRecord)) || + !nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и' + ); } @@ -531,7 +622,10 @@ bool SecondMaleUFormInanimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleUFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsUFormConsolant(GetLastChar(nounRecord)); + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + charIsUFormConsolant(GetLastChar(nounRecord)) || + !nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы' + ); } @@ -542,7 +636,10 @@ bool SecondMaleUFormAnimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleUFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsUFormConsolant(GetLastChar(nounRecord)); + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + charIsUFormConsolant(GetLastChar(nounRecord)) || + !nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы' + ); } @@ -553,7 +650,10 @@ bool SecondMaleSSFormInanimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleSSFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + GetLastChar(nounRecord) == L'ь' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и' + ); } @@ -564,7 +664,10 @@ bool SecondMaleSSFormAnimateSingularCondition(const NounRecord& nounRecord) bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + GetLastChar(nounRecord) == L'ь' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и' + ); } bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord) @@ -574,7 +677,10 @@ bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord) bool SecondNeutralEFormPluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && ( + GetLastChar(nounRecord) == L'е' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'я' + ); } bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord) @@ -584,22 +690,37 @@ bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord) bool SecondNeutralOFormPluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'о'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && ( + GetLastChar(nounRecord) == L'о' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'а' + ); } -bool ThirdFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate +bool ThirdFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && GetLastChar(nounRecord) == L'ь'; + return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь'; } +bool ThirdFormAnimateSingularCondition(const NounRecord& nounRecord) +{ + return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь'; +} + + bool ThirdFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && ( + GetLastChar(nounRecord) == L'ь' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и' + ); } bool ThirdFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь'; + return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && ( + GetLastChar(nounRecord) == L'ь' || + !nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и' + ); } @@ -608,18 +729,18 @@ std::map, std::function < bool(const NounRe void SetupDeclentionMap() { - DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormSingularCondition, std::placeholders::_1); - DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormInanimateSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormAnimateSingularCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAIFormInanimatePluralCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAIFormAnimatePluralCondition, std::placeholders::_1); - DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormSingularCondition, std::placeholders::_1); - DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormInanimateSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormAnimateSingularCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAUFormInanimatePluralCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAUFormAnimatePluralCondition, std::placeholders::_1); - DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormSingularCondition, std::placeholders::_1); - DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormInanimateSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormAnimateSingularCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_PLURAL}] = std::bind(FirstYaFormInanimatePluralCondition, std::placeholders::_1); DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_PLURAL}] = std::bind(FirstYaFormAnimatePluralCondition, std::placeholders::_1); @@ -645,8 +766,8 @@ void SetupDeclentionMap() DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_PLURAL}] = std::bind(SecondNeutralOFormPluralCondition, std::placeholders::_1); - DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormSingularCondition, std::placeholders::_1); - DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormInanimateSingularCondition, std::placeholders::_1); + DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormAnimateSingularCondition, std::placeholders::_1); DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_PLURAL}] = std::bind(ThirdFormInanimatePluralCondition, std::placeholders::_1); DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_PLURAL}] = std::bind(ThirdFormAnimatePluralCondition, std::placeholders::_1); @@ -686,17 +807,17 @@ bool NounScructIsAlreadyInArray(const NounStruct& nounStruct, const std::vector< bool IsDeclencionSecondType(NounDeclencion nounDeclention) { switch (nounDeclention) - { - case SECOND_MALE_IFORM_INANIMATE: - case SECOND_MALE_IFORM_ANIMATE: - case SECOND_MALE_UFORM_INANIMATE: - case SECOND_MALE_UFORM_ANIMATE: - case SECOND_MALE_SSFORM_INANIMATE: - case SECOND_MALE_SSFORM_ANIMATE: - return true; - break; - default: - return false; + { + case SECOND_MALE_IFORM_INANIMATE: + case SECOND_MALE_IFORM_ANIMATE: + case SECOND_MALE_UFORM_INANIMATE: + case SECOND_MALE_UFORM_ANIMATE: + case SECOND_MALE_SSFORM_INANIMATE: + case SECOND_MALE_SSFORM_ANIMATE: + return true; + break; + default: + return false; break; } } @@ -704,18 +825,18 @@ bool IsDeclencionSecondType(NounDeclencion nounDeclention) bool IsDeclencionAnimated(NounDeclencion nounDeclention) { switch (nounDeclention) - { - case FIRST_A_IFORM_ANIMATE: - case FIRST_A_UFORM_ANIMATE: - case FIRST_YA_FORM_ANIMATE: - case SECOND_MALE_IFORM_ANIMATE: - case SECOND_MALE_UFORM_ANIMATE: - case SECOND_MALE_SSFORM_ANIMATE: - case THIRD_FORM_ANIMATE: - return true; - break; - default: - return false; + { + case FIRST_A_IFORM_ANIMATE: + case FIRST_A_UFORM_ANIMATE: + case FIRST_YA_FORM_ANIMATE: + case SECOND_MALE_IFORM_ANIMATE: + case SECOND_MALE_UFORM_ANIMATE: + case SECOND_MALE_SSFORM_ANIMATE: + case THIRD_FORM_ANIMATE: + return true; + break; + default: + return false; break; } } @@ -735,6 +856,107 @@ bool LostVowelETest(NounTuple nounTuple, NounRecord nounRecord) return nounRecord.haveStandardMultipleFormWithMissingLastVowel; } +std::map> DivisionCaseNounModificatorMap; +std::map> DivisionCaseEndingModificatorMap; + + +std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple) >> DivisionCaseNounTupleFilterMap; + +std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple, NounRecord) >> DivisionCaseNounTupleRecordFilterMap; + + +void FillDivisionCaseMaps() +{ + DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; }; + + DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](std::wstring s) + { + return std::wstring(s.begin(), s.end() - 2) + L"o" + s[s.size() - 1]; + }; + + DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](std::wstring s) + { + if (s[s.size() - 2] == L'ь') + { + return std::wstring(s.begin(), s.end() - 2) + L"е" + s[s.size() - 1]; + } + else + { + return std::wstring(s.begin(), s.end() - 1) + L"е" + s[s.size() - 1]; + } + }; + + DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON]; + + DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON]; + + DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; }; + DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON]; + DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON]; + DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](std::wstring s) + { + if (s == L"а") return L"ы"; + if (s == L"я") return L"и"; + std::cout << "Error in DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A]" << std::endl; + return L""; + }; + + DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON]; + + + DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; }; + + DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t) + { + return (std::get<1>(t) == NC_PLURAL) || + ((std::get<2>(t) != NGC_P1_NOMINATIVE) && + (!(std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t))))); + }; + + DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O]; + + + DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](NounTuple t) + { + return (std::get<1>(t) == NC_PLURAL) && + ( (std::get<2>(t) == NGC_P1_NOMINATIVE) || + ((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t))))); + }; + + DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t) + { + return (std::get<1>(t) == NC_PLURAL); + }; + + DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r) + { + return (r.haveStandardMultipleForm || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r); + }; + + DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t, NounRecord r) + { + return LostVowelOTest(t, r) && StandardTest(t, r); + }; + + DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](NounTuple t, NounRecord r) + { + return LostVowelETest(t, r) && StandardTest(t, r); + }; + + DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](NounTuple t, NounRecord r) + { + return r.haveAlternativeMultipleFormEnding && StandardTest(t, r); + }; + + + DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t, NounRecord r) + { + return r.specialMultipleForm != L"" && StandardTest(t, r); + }; + + +} + std::vector RecognizeNoun(std::wstring noun) @@ -750,91 +972,48 @@ std::vector RecognizeNoun(std::wstring noun) std::wstring nounEnding = nounEndingDivision.ending; NounEndingDivision::DivisionCase dc = nounEndingDivision.divisionCase; + std::wstring modifiedNounBase = DivisionCaseNounModificatorMap[dc](nounBase); + std::wstring modifiedNounEnding = DivisionCaseEndingModificatorMap[dc](nounEnding); - if (dc == NounEndingDivision::DC_LOST_VOWEL_O) - { - nounBase.insert(nounBase.begin() + nounBase.size() - 1, L'о'); - } - - if (dc == NounEndingDivision::DC_LOST_VOWEL_E) - { - nounBase[nounBase.size() - 2] = L'е'; - } - - std::vector possibleTupleArr = GetPossibleNounTupleArr(nounEnding); + std::vector possibleTupleArr = GetPossibleNounTupleArr(modifiedNounEnding); //Standard check for (NounTuple nounTuple : possibleTupleArr) { - bool animated = IsDeclencionAnimated(std::get<0>(nounTuple)); - bool additionalTest = true; - - if (dc == NounEndingDivision::DC_LOST_VOWEL_O) - { - additionalTest = (std::get<1>(nounTuple) == NC_PLURAL) || - ((std::get<0>(nounTuple) != NGC_P1_NOMINATIVE) && - (!(std::get<0>(nounTuple) == NGC_P4_ACCUSATIVE && !animated))); - } - if (dc == NounEndingDivision::DC_LOST_VOWEL_E) - { - additionalTest = (std::get<1>(nounTuple) == NC_PLURAL) || - ((std::get<0>(nounTuple) != NGC_P1_NOMINATIVE) && - (!(std::get<0>(nounTuple) == NGC_P4_ACCUSATIVE && !animated))); - } - - if (!additionalTest) - { - continue; - } - - std::wstring nounNominaviteSingular = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_SINGULAR); - - if (NounIsInDictionary(nounNominaviteSingular)) + if (DivisionCaseNounTupleFilterMap[dc](nounTuple)) { - NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular); + std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR); - bool secondAdditionalTest = true; - - if (dc == NounEndingDivision::DC_LOST_VOWEL_O) - { - secondAdditionalTest = LostVowelOTest(nounTuple, nounRecord) && StandardTest(nounTuple, nounRecord); - } - else if (dc == NounEndingDivision::DC_LOST_VOWEL_E) - { - secondAdditionalTest = LostVowelETest(nounTuple, nounRecord) && StandardTest(nounTuple, nounRecord); - } - else - { - secondAdditionalTest = (nounRecord.haveStandardMultipleForm || std::get<1>(nounTuple) == NC_SINGULAR) && StandardTest(nounTuple, nounRecord); - } - - - if (secondAdditionalTest) + if (NounIsInDictionary(nounNominaviteSingular)) { - result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), animated, nounRecord }); + NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular); - //Check for additional form - if (IsDeclencionSecondType(std::get<0>(nounTuple))) + if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord)) { - if (std::get<1>(nounTuple) == NC_SINGULAR && nounRecord.haveAlternativeMultipleFormEnding) - { - //Add additional form! - result.push_back({ NGC_P1_NOMINATIVE, NC_PLURAL, animated, nounRecord }); - - if (nounRecord.canBeInanimate) - { - result.push_back({ NGC_P4_ACCUSATIVE, NC_PLURAL, animated, nounRecord }); - } - } + result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord }); } - } + + + std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL); + + if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural)) + { + NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural); + + if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord)) + { + result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord }); + } + } + } } + /* //Special plural form check for (auto nounTuple : possibleTupleArr) { @@ -869,7 +1048,7 @@ std::vector RecognizeNoun(std::wstring noun) } } - } + }*/ diff --git a/rudict/rudict/noun.h b/rudict/rudict/noun.h index 399e82c..8eecefa 100644 --- a/rudict/rudict/noun.h +++ b/rudict/rudict/noun.h @@ -126,6 +126,8 @@ struct NounEndingDivision DC_COMMON = 0, DC_LOST_VOWEL_O, DC_LOST_VOWEL_E, + DC_SPECIAL_PLURAL_A, + DC_SPECIAL_PLURAL_FORM } divisionCase; bool operator<(const NounEndingDivision& other) const @@ -184,6 +186,7 @@ bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple); bool IsDeclencionSecondType(NounDeclencion nounDeclention); bool IsDeclencionAnimated(NounDeclencion nounDeclention); +void FillDivisionCaseMaps(); std::vector RecognizeNoun(std::wstring noun);