fixing minor bugs

This commit is contained in:
Vladislav Khorev 2014-12-01 23:58:28 +00:00
parent 9f1ec812d2
commit 1a9bd97ff8
7 changed files with 319 additions and 136 deletions

View File

@ -1,4 +1,4 @@
Номер;Слово;Род / число;Есть единственное число;Есть множественное число;Есть стандартное множественное число;Есть стандартное множественное число, полученное путем удаления последней гласной из базы;Есть стандартное окончание множественного числа;Есть альтернативное окончание множественного числа;Специальные мн. числа;Может быть одушевленным;Может быть неодушевленным;Специальная форма падежа;1
Номер;Слово;Род / число;Есть единственное число;Есть множественное число;Есть стандартное множественное число;Есть стандартное множественное число, полученное путем удаления последней гласной из базы;Есть стандартное окончание множественного числа;"Есть альтернативное окончание множественного числа ""а/я""";Специальные мн. числа;Может быть одушевленным;Может быть неодушевленным;Специальная форма падежа;1
307;любовь;ж;1;0;0;0;0;0;;0;1;;1
338;помощь;ж;1;0;0;0;0;0;;0;1;;1
589;борьба;ж;1;0;0;0;0;0;;0;1;;1
@ -73,7 +73,7 @@
292;улица;ж;1;1;1;0;1;0;;0;1;;1
299;мысль;ж;1;1;1;0;1;0;;0;1;;1
300;дорога;ж;1;1;1;0;1;0;;0;1;;1
301;мать;ж;1;1;0;0;1;0;матери;1;0;;1
301;мать;ж;1;1;0;0;0;0;матери;1;0;;1
309;мама;ж;1;1;1;0;1;0;;1;0;;1
315;школа;ж;1;1;1;0;1;0;;0;1;;1
318;цель;ж;1;1;1;0;1;0;;0;1;;1
@ -144,7 +144,7 @@
808;площадь;ж;1;1;1;0;1;0;;0;1;;1
823;линия;ж;1;1;1;0;1;0;;0;1;;1
830;губа;ж;1;1;1;0;1;0;;0;1;;1
834;дочь;ж;1;1;0;0;1;0;дочери;0;1;;1
834;дочь;ж;1;1;0;0;0;0;дочери;1;0;;1
836;среда;ж;1;1;1;0;1;0;;0;1;;1
862;грудь;ж;1;1;1;0;1;0;;0;1;;1
869;услуга;ж;1;1;1;0;1;0;;0;1;;1
@ -370,7 +370,7 @@
479;проект;м;1;1;1;0;1;0;;0;1;;1
484;директор;м;1;1;1;0;1;1;;1;0;;1
494;срок;м;1;1;1;0;1;0;;0;1;;1
495;палец;м;1;1;0;0;0;0;пальцы;0;1;;1
495;палец;м;1;1;0;1;0;0;;0;1;;1
496;опыт;м;1;1;1;0;1;0;;0;1;;1
512;лес;м;1;1;0;0;0;1;;0;1;;1
517;состав;м;1;1;1;0;1;0;;0;1;;1
@ -457,7 +457,7 @@
839;солдат;м;1;1;1;0;1;0;;1;0;;1
840;художник;м;1;1;1;0;1;0;;1;0;;1
842;волос;м;1;1;1;0;1;0;;0;1;;1
847;ветер;м;1;1;0;1;0;0;ветра;0;1;;1
847;ветер;м;1;1;0;1;1;1;;0;1;;1
849;парень;м;1;1;0;1;1;0;;1;0;;1
853;генерал;м;1;1;1;0;1;0;;1;0;;1
855;огонь;м;1;1;0;1;1;0;;0;1;;1
@ -570,7 +570,7 @@
1396;март;м;1;1;1;0;1;0;;0;1;;1
1398;клиент;м;1;1;1;0;1;0;;1;0;;1
1403;фронт;м;1;1;1;0;1;0;;0;1;;1
1406;стул;м;1;1;0;0;0;0;стулья;0;1;;1
1406;стул;м;1;1;0;0;1;0;стулья;0;1;;1
1420;музей;м;1;1;1;0;1;0;;0;1;;1
1423;след;м;1;1;1;0;1;0;;0;1;;1
1425;полковник;м;1;1;1;0;1;0;;1;0;;1
@ -659,18 +659,18 @@
1854;эксперт;м;1;1;1;0;1;0;;1;0;;1
1861;автобус;м;1;1;1;0;1;0;;0;1;;1
1866;кадр;м;1;1;1;0;1;0;;1;1;;1
1874;лев;м;1;1;0;1;1;0;львы;1;0;;1
1874;лев;м;1;1;0;1;1;0;;1;0;;1
1876;порог;м;1;1;1;0;1;0;;0;1;;1
1881;ремонт;м;1;1;1;0;1;0;;0;1;;1
1891;памятник;м;1;1;1;0;1;0;;0;1;;1
1892;корень;м;1;1;0;1;1;0;;0;1;;1
1905;штаб;м;1;1;1;0;1;0;;0;1;;1
1907;владелец;м;1;1;0;1;1;0;владельцы;1;0;;1
1907;владелец;м;1;1;0;1;1;0;;1;0;;1
1908;компьютер;м;1;1;1;0;1;0;;0;1;;1
1911;инженер;м;1;1;1;0;1;0;;1;0;;1
1926;выпуск;м;1;1;1;0;1;0;;0;1;;1
1927;кулак;м;1;1;1;0;1;0;;0;1;;1
1928;лед;м;1;1;0;1;1;0;льды;0;1;;1
1928;лед;м;1;1;0;1;1;0;;0;1;;1
1938;корпус;м;1;1;1;0;1;1;;0;1;;1
1940;недостаток;м;1;1;0;1;1;0;;0;1;;1
1942;сапог;м;1;1;1;0;1;0;;0;1;;1

1 Номер Слово Род / число Есть единственное число Есть множественное число Есть стандартное множественное число Есть стандартное множественное число, полученное путем удаления последней гласной из базы Есть стандартное окончание множественного числа Есть альтернативное окончание множественного числа Есть альтернативное окончание множественного числа "а/я" Специальные мн. числа Может быть одушевленным Может быть неодушевленным Специальная форма падежа 1
2 307 любовь ж 1 0 0 0 0 0 0 1 1
3 338 помощь ж 1 0 0 0 0 0 0 1 1
4 589 борьба ж 1 0 0 0 0 0 0 1 1
73 292 улица ж 1 1 1 0 1 0 0 1 1
74 299 мысль ж 1 1 1 0 1 0 0 1 1
75 300 дорога ж 1 1 1 0 1 0 0 1 1
76 301 мать ж 1 1 0 0 1 0 0 матери 1 0 1
77 309 мама ж 1 1 1 0 1 0 1 0 1
78 315 школа ж 1 1 1 0 1 0 0 1 1
79 318 цель ж 1 1 1 0 1 0 0 1 1
144 808 площадь ж 1 1 1 0 1 0 0 1 1
145 823 линия ж 1 1 1 0 1 0 0 1 1
146 830 губа ж 1 1 1 0 1 0 0 1 1
147 834 дочь ж 1 1 0 0 1 0 0 дочери 0 1 1 0 1
148 836 среда ж 1 1 1 0 1 0 0 1 1
149 862 грудь ж 1 1 1 0 1 0 0 1 1
150 869 услуга ж 1 1 1 0 1 0 0 1 1
370 479 проект м 1 1 1 0 1 0 0 1 1
371 484 директор м 1 1 1 0 1 1 1 0 1
372 494 срок м 1 1 1 0 1 0 0 1 1
373 495 палец м 1 1 0 0 1 0 0 пальцы 0 1 1
374 496 опыт м 1 1 1 0 1 0 0 1 1
375 512 лес м 1 1 0 0 0 1 0 1 1
376 517 состав м 1 1 1 0 1 0 0 1 1
457 839 солдат м 1 1 1 0 1 0 1 0 1
458 840 художник м 1 1 1 0 1 0 1 0 1
459 842 волос м 1 1 1 0 1 0 0 1 1
460 847 ветер м 1 1 0 1 0 1 0 1 ветра 0 1 1
461 849 парень м 1 1 0 1 1 0 1 0 1
462 853 генерал м 1 1 1 0 1 0 1 0 1
463 855 огонь м 1 1 0 1 1 0 0 1 1
570 1396 март м 1 1 1 0 1 0 0 1 1
571 1398 клиент м 1 1 1 0 1 0 1 0 1
572 1403 фронт м 1 1 1 0 1 0 0 1 1
573 1406 стул м 1 1 0 0 0 1 0 стулья 0 1 1
574 1420 музей м 1 1 1 0 1 0 0 1 1
575 1423 след м 1 1 1 0 1 0 0 1 1
576 1425 полковник м 1 1 1 0 1 0 1 0 1
659 1854 эксперт м 1 1 1 0 1 0 1 0 1
660 1861 автобус м 1 1 1 0 1 0 0 1 1
661 1866 кадр м 1 1 1 0 1 0 1 1 1
662 1874 лев м 1 1 0 1 1 0 львы 1 0 1
663 1876 порог м 1 1 1 0 1 0 0 1 1
664 1881 ремонт м 1 1 1 0 1 0 0 1 1
665 1891 памятник м 1 1 1 0 1 0 0 1 1
666 1892 корень м 1 1 0 1 1 0 0 1 1
667 1905 штаб м 1 1 1 0 1 0 0 1 1
668 1907 владелец м 1 1 0 1 1 0 владельцы 1 0 1
669 1908 компьютер м 1 1 1 0 1 0 0 1 1
670 1911 инженер м 1 1 1 0 1 0 1 0 1
671 1926 выпуск м 1 1 1 0 1 0 0 1 1
672 1927 кулак м 1 1 1 0 1 0 0 1 1
673 1928 лед м 1 1 0 1 1 0 льды 0 1 1
674 1938 корпус м 1 1 1 0 1 1 0 1 1
675 1940 недостаток м 1 1 0 1 1 0 0 1 1
676 1942 сапог м 1 1 1 0 1 0 0 1 1

Binary file not shown.

View File

@ -7,7 +7,7 @@ FIRST_A_IFORM_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;а;рука
;;NGC_P6_PREPOSITIONAL;е;говорю о руке
;NC_PLURAL;NGC_P1_NOMINATIVE;и;руки
;;NGC_P2_GENITIVE;;подарок для рук
;;NGC_P3_DATIVE;е;подарок рукам
;;NGC_P3_DATIVE;ам;подарок рукам
;;NGC_P4_ACCUSATIVE;и;обвиняю руки
;;NGC_P5_INSTRUMENTAL;ами;говорю с руками
;;NGC_P6_PREPOSITIONAL;ах;говорю о руках

1 Склонение Род Падеж Окончание Пример
7 NGC_P6_PREPOSITIONAL е говорю о руке
8 NC_PLURAL NGC_P1_NOMINATIVE и руки
9 NGC_P2_GENITIVE подарок для рук
10 NGC_P3_DATIVE е ам подарок рукам
11 NGC_P4_ACCUSATIVE и обвиняю руки
12 NGC_P5_INSTRUMENTAL ами говорю с руками
13 NGC_P6_PREPOSITIONAL ах говорю о руках

Binary file not shown.

View File

@ -11,6 +11,7 @@ int main()
SetupDeclentionMap();
LoadNounDeclencionCaseTable();
LoadFrequentWordSet();
FillDivisionCaseMaps();
//RecognizeNoun(L"стульями");
//Косяк: "вечер"

View File

@ -77,7 +77,7 @@ NounDeclencion WStringToNounDeclencion(std::wstring str)
}
if (str == L"FIRST_A_IFORM_ANIMATE")
{
return FIRST_A_IFORM_INANIMATE;
return FIRST_A_IFORM_ANIMATE;
}
if (str == L"FIRST_A_UFORM_INANIMATE")
{
@ -274,6 +274,11 @@ bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural)
{
return true;
}
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
{
return true;
}
}
return false;
@ -301,6 +306,11 @@ NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNomi
{
return noun;
}
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
{
return noun;
}
}
return{};
@ -339,6 +349,26 @@ bool charIsVowel(wchar_t c)
}
bool charIsMissingVowelSoftenerConsolant(wchar_t c)
{
//This test belongs to missing vowel case.
//лев -> львы (because л, then е replaced by soft sign)
//немец -> немцы (because not л, the е is not replaced, just missing)
std::wstring consolants = L"л";
for (wchar_t ic : consolants)
{
if (c == ic)
{
return true;
}
}
return false;
}
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
{
std::set<NounEndingDivision> result;
@ -354,6 +384,7 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
if (charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ')
{
result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM });
}
//Check missed vowel (simple case)
@ -362,11 +393,22 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_O });
}
if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь')
if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь' && charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 3]))
{
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
}
if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]) && !charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 2]))
{
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
}
if (ending == L"а" || ending == L"я")
{
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_A });
}
}
}
@ -452,51 +494,93 @@ bool charIsUFormConsolant(wchar_t c)
}
bool FirstAIFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate
bool AIFormEndingIsCorrect(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
return (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && nounRecord.haveSingleForm ||
(GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && !nounRecord.haveSingleForm;
}
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
}
bool FirstAIFormAnimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
}
bool FirstAIFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
);
}
bool FirstAIFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
);
}
bool FirstAUFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate
bool FirstAUFormInanimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
}
bool FirstAUFormAnimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
}
bool FirstAUFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
);
}
bool FirstAUFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
);
}
bool FirstYaFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate
bool FirstYaFormInanimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'я');
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я');
}
bool FirstYaFormAnimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я');
}
bool FirstYaFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я');
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
(GetLastChar(nounRecord) == L'я') ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
);
}
bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я');
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
(GetLastChar(nounRecord) == L'я') ||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
);
}
@ -509,7 +593,11 @@ bool SecondMaleIFormInanimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsIFormConsolant(GetLastChar(nounRecord));
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
charIsIFormConsolant(GetLastChar(nounRecord)) ||
!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
);
}
@ -520,7 +608,10 @@ bool SecondMaleIFormAnimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleIFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsIFormConsolant(GetLastChar(nounRecord));
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
charIsIFormConsolant(GetLastChar(nounRecord)) ||
!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
);
}
@ -531,7 +622,10 @@ bool SecondMaleUFormInanimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleUFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsUFormConsolant(GetLastChar(nounRecord));
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
charIsUFormConsolant(GetLastChar(nounRecord)) ||
!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
);
}
@ -542,7 +636,10 @@ bool SecondMaleUFormAnimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleUFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsUFormConsolant(GetLastChar(nounRecord));
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
charIsUFormConsolant(GetLastChar(nounRecord)) ||
!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
);
}
@ -553,7 +650,10 @@ bool SecondMaleSSFormInanimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleSSFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
GetLastChar(nounRecord) == L'ь' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
);
}
@ -564,7 +664,10 @@ bool SecondMaleSSFormAnimateSingularCondition(const NounRecord& nounRecord)
bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
GetLastChar(nounRecord) == L'ь' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
);
}
bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
@ -574,7 +677,10 @@ bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
bool SecondNeutralEFormPluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
GetLastChar(nounRecord) == L'е' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'я'
);
}
bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord)
@ -584,22 +690,37 @@ bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord)
bool SecondNeutralOFormPluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'о';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
GetLastChar(nounRecord) == L'о' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'а'
);
}
bool ThirdFormSingularCondition(const NounRecord& nounRecord) //Same for both animate and inanimate
bool ThirdFormInanimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && GetLastChar(nounRecord) == L'ь';
return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
}
bool ThirdFormAnimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
}
bool ThirdFormInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && (
GetLastChar(nounRecord) == L'ь' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
);
}
bool ThirdFormAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && (
GetLastChar(nounRecord) == L'ь' ||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
);
}
@ -608,18 +729,18 @@ std::map<std::pair<NounDeclencion, NounCount>, std::function < bool(const NounRe
void SetupDeclentionMap()
{
DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormInanimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAIFormInanimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAIFormAnimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormInanimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAUFormInanimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAUFormAnimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormInanimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_PLURAL}] = std::bind(FirstYaFormInanimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_PLURAL}] = std::bind(FirstYaFormAnimatePluralCondition, std::placeholders::_1);
@ -645,8 +766,8 @@ void SetupDeclentionMap()
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_PLURAL}] = std::bind(SecondNeutralOFormPluralCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormInanimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_PLURAL}] = std::bind(ThirdFormInanimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_PLURAL}] = std::bind(ThirdFormAnimatePluralCondition, std::placeholders::_1);
@ -735,6 +856,107 @@ bool LostVowelETest(NounTuple nounTuple, NounRecord nounRecord)
return nounRecord.haveStandardMultipleFormWithMissingLastVowel;
}
std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseNounModificatorMap;
std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseEndingModificatorMap;
std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple) >> DivisionCaseNounTupleFilterMap;
std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple, NounRecord) >> DivisionCaseNounTupleRecordFilterMap;
void FillDivisionCaseMaps()
{
DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](std::wstring s)
{
return std::wstring(s.begin(), s.end() - 2) + L"o" + s[s.size() - 1];
};
DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](std::wstring s)
{
if (s[s.size() - 2] == L'ь')
{
return std::wstring(s.begin(), s.end() - 2) + L"е" + s[s.size() - 1];
}
else
{
return std::wstring(s.begin(), s.end() - 1) + L"е" + s[s.size() - 1];
}
};
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](std::wstring s)
{
if (s == L"а") return L"ы";
if (s == L"я") return L"и";
std::cout << "Error in DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A]" << std::endl;
return L"";
};
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t)
{
return (std::get<1>(t) == NC_PLURAL) ||
((std::get<2>(t) != NGC_P1_NOMINATIVE) &&
(!(std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
};
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O];
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](NounTuple t)
{
return (std::get<1>(t) == NC_PLURAL) &&
( (std::get<2>(t) == NGC_P1_NOMINATIVE) ||
((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
};
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t)
{
return (std::get<1>(t) == NC_PLURAL);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
{
return (r.haveStandardMultipleForm || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t, NounRecord r)
{
return LostVowelOTest(t, r) && StandardTest(t, r);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](NounTuple t, NounRecord r)
{
return LostVowelETest(t, r) && StandardTest(t, r);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = [](NounTuple t, NounRecord r)
{
return r.haveAlternativeMultipleFormEnding && StandardTest(t, r);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t, NounRecord r)
{
return r.specialMultipleForm != L"" && StandardTest(t, r);
};
}
std::vector<NounStruct> RecognizeNoun(std::wstring noun)
@ -750,91 +972,48 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
std::wstring nounEnding = nounEndingDivision.ending;
NounEndingDivision::DivisionCase dc = nounEndingDivision.divisionCase;
std::wstring modifiedNounBase = DivisionCaseNounModificatorMap[dc](nounBase);
std::wstring modifiedNounEnding = DivisionCaseEndingModificatorMap[dc](nounEnding);
if (dc == NounEndingDivision::DC_LOST_VOWEL_O)
{
nounBase.insert(nounBase.begin() + nounBase.size() - 1, L'о');
}
if (dc == NounEndingDivision::DC_LOST_VOWEL_E)
{
nounBase[nounBase.size() - 2] = L'е';
}
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(nounEnding);
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(modifiedNounEnding);
//Standard check
for (NounTuple nounTuple : possibleTupleArr)
{
bool animated = IsDeclencionAnimated(std::get<0>(nounTuple));
bool additionalTest = true;
if (dc == NounEndingDivision::DC_LOST_VOWEL_O)
{
additionalTest = (std::get<1>(nounTuple) == NC_PLURAL) ||
((std::get<0>(nounTuple) != NGC_P1_NOMINATIVE) &&
(!(std::get<0>(nounTuple) == NGC_P4_ACCUSATIVE && !animated)));
}
if (dc == NounEndingDivision::DC_LOST_VOWEL_E)
{
additionalTest = (std::get<1>(nounTuple) == NC_PLURAL) ||
((std::get<0>(nounTuple) != NGC_P1_NOMINATIVE) &&
(!(std::get<0>(nounTuple) == NGC_P4_ACCUSATIVE && !animated)));
}
if (!additionalTest)
{
continue;
}
std::wstring nounNominaviteSingular = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_SINGULAR);
if (NounIsInDictionary(nounNominaviteSingular))
if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
{
NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular);
std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);
bool secondAdditionalTest = true;
if (dc == NounEndingDivision::DC_LOST_VOWEL_O)
{
secondAdditionalTest = LostVowelOTest(nounTuple, nounRecord) && StandardTest(nounTuple, nounRecord);
}
else if (dc == NounEndingDivision::DC_LOST_VOWEL_E)
{
secondAdditionalTest = LostVowelETest(nounTuple, nounRecord) && StandardTest(nounTuple, nounRecord);
}
else
{
secondAdditionalTest = (nounRecord.haveStandardMultipleForm || std::get<1>(nounTuple) == NC_SINGULAR) && StandardTest(nounTuple, nounRecord);
}
if (secondAdditionalTest)
if (NounIsInDictionary(nounNominaviteSingular))
{
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), animated, nounRecord });
NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular);
//Check for additional form
if (IsDeclencionSecondType(std::get<0>(nounTuple)))
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
{
if (std::get<1>(nounTuple) == NC_SINGULAR && nounRecord.haveAlternativeMultipleFormEnding)
{
//Add additional form!
result.push_back({ NGC_P1_NOMINATIVE, NC_PLURAL, animated, nounRecord });
if (nounRecord.canBeInanimate)
{
result.push_back({ NGC_P4_ACCUSATIVE, NC_PLURAL, animated, nounRecord });
}
}
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
}
}
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural))
{
NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural);
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
{
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
}
}
}
}
/*
//Special plural form check
for (auto nounTuple : possibleTupleArr)
{
@ -869,7 +1048,7 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
}
}
}
}*/

View File

@ -126,6 +126,8 @@ struct NounEndingDivision
DC_COMMON = 0,
DC_LOST_VOWEL_O,
DC_LOST_VOWEL_E,
DC_SPECIAL_PLURAL_A,
DC_SPECIAL_PLURAL_FORM
} divisionCase;
bool operator<(const NounEndingDivision& other) const
@ -184,6 +186,7 @@ bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
bool IsDeclencionSecondType(NounDeclencion nounDeclention);
bool IsDeclencionAnimated(NounDeclencion nounDeclention);
void FillDivisionCaseMaps();
std::vector<NounStruct> RecognizeNoun(std::wstring noun);