Added й form
This commit is contained in:
parent
1a9bd97ff8
commit
33bbd2863e
@ -143,6 +143,30 @@ SECOND_MALE_SSFORM_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ь;парень
|
|||||||
;;NGC_P4_ACCUSATIVE;ей;обвиняю парней
|
;;NGC_P4_ACCUSATIVE;ей;обвиняю парней
|
||||||
;;NGC_P5_INSTRUMENTAL;ями;говорю с парнями
|
;;NGC_P5_INSTRUMENTAL;ями;говорю с парнями
|
||||||
;;NGC_P6_PREPOSITIONAL;ях;говорю о парнях
|
;;NGC_P6_PREPOSITIONAL;ях;говорю о парнях
|
||||||
|
SECOND_I_SHORT_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;й;каравай
|
||||||
|
;;NGC_P2_GENITIVE;я;подарок для каравая
|
||||||
|
;;NGC_P3_DATIVE;ю;подарок караваю
|
||||||
|
;;NGC_P4_ACCUSATIVE;й;обвиняю каравай
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ем;говорю с караваем
|
||||||
|
;;NGC_P6_PREPOSITIONAL;е;говорю о каравае
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;и;караваи
|
||||||
|
;;NGC_P2_GENITIVE;ев;подарок для караваев
|
||||||
|
;;NGC_P3_DATIVE;ям;подарок караваям
|
||||||
|
;;NGC_P4_ACCUSATIVE;и;обвиняю караваи
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ями;говорю с караваями
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ях;говорю о караваях
|
||||||
|
SECOND_I_SHORT_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;й;герой
|
||||||
|
;;NGC_P2_GENITIVE;я;подарок для героя
|
||||||
|
;;NGC_P3_DATIVE;ю;подарок герою
|
||||||
|
;;NGC_P4_ACCUSATIVE;я;обвиняю героя
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ем;говорю с героем
|
||||||
|
;;NGC_P6_PREPOSITIONAL;е;говорю о каравае
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;и;герои
|
||||||
|
;;NGC_P2_GENITIVE;ев;подарок для героев
|
||||||
|
;;NGC_P3_DATIVE;ям;подарок героям
|
||||||
|
;;NGC_P4_ACCUSATIVE;ев;обвиняю героев
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ями;говорю с героями
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ях;говорю о героях
|
||||||
SECOND_NEUTRAL_E_FORM;NC_SINGULAR;NGC_P1_NOMINATIVE;е;поле
|
SECOND_NEUTRAL_E_FORM;NC_SINGULAR;NGC_P1_NOMINATIVE;е;поле
|
||||||
;;NGC_P2_GENITIVE;я;подарок для поля
|
;;NGC_P2_GENITIVE;я;подарок для поля
|
||||||
;;NGC_P3_DATIVE;ю;подарок полю
|
;;NGC_P3_DATIVE;ю;подарок полю
|
||||||
|
|
Binary file not shown.
@ -12,6 +12,7 @@ int main()
|
|||||||
LoadNounDeclencionCaseTable();
|
LoadNounDeclencionCaseTable();
|
||||||
LoadFrequentWordSet();
|
LoadFrequentWordSet();
|
||||||
FillDivisionCaseMaps();
|
FillDivisionCaseMaps();
|
||||||
|
CalculatePluralForm();
|
||||||
|
|
||||||
//RecognizeNoun(L"стульями");
|
//RecognizeNoun(L"стульями");
|
||||||
//Косяк: "вечер"
|
//Косяк: "вечер"
|
||||||
|
@ -120,6 +120,17 @@ NounDeclencion WStringToNounDeclencion(std::wstring str)
|
|||||||
{
|
{
|
||||||
return SECOND_MALE_SSFORM_ANIMATE;
|
return SECOND_MALE_SSFORM_ANIMATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (str == L"SECOND_I_SHORT_INANIMATE")
|
||||||
|
{
|
||||||
|
return SECOND_I_SHORT_INANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"SECOND_I_SHORT_ANIMATE")
|
||||||
|
{
|
||||||
|
return SECOND_I_SHORT_ANIMATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (str == L"SECOND_NEUTRAL_E_FORM")
|
if (str == L"SECOND_NEUTRAL_E_FORM")
|
||||||
{
|
{
|
||||||
return SECOND_NEUTRAL_E_FORM;
|
return SECOND_NEUTRAL_E_FORM;
|
||||||
@ -225,6 +236,10 @@ std::vector<std::wstring> GetAllNounEndingArr()
|
|||||||
std::vector<std::wstring> result
|
std::vector<std::wstring> result
|
||||||
{
|
{
|
||||||
L"",
|
L"",
|
||||||
|
|
||||||
|
L"й",
|
||||||
|
L"ев",
|
||||||
|
|
||||||
L"а",
|
L"а",
|
||||||
L"и",
|
L"и",
|
||||||
L"е",
|
L"е",
|
||||||
@ -266,18 +281,143 @@ bool NounIsInDictionary(std::wstring nounNominative)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural)
|
std::wstring convertToStandardPluralForm(std::wstring s)
|
||||||
{
|
{
|
||||||
for (auto& noun : NounRecordArr)
|
std::wstring pluralForm = s;
|
||||||
|
|
||||||
|
if (pluralForm[pluralForm.size() - 1] == L'а' && charIsIFormConsolant(pluralForm[pluralForm.size() - 2]))
|
||||||
{
|
{
|
||||||
if (noun.specialMultipleForm == nounNominativePlural)
|
pluralForm[pluralForm.size() - 1] = L'и';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'а' && charIsUFormConsolant(pluralForm[pluralForm.size() - 2]))
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'ы';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'я')
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'и';
|
||||||
|
}
|
||||||
|
else if (charIsIFormConsolant(pluralForm[pluralForm.size() - 1]))
|
||||||
|
{
|
||||||
|
pluralForm += L'и';
|
||||||
|
}
|
||||||
|
else if (charIsUFormConsolant(pluralForm[pluralForm.size() - 1]))
|
||||||
|
{
|
||||||
|
pluralForm += L'ы';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'ь')
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'и';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'й')
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'и';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'о')
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'а';
|
||||||
|
}
|
||||||
|
else if (pluralForm[pluralForm.size() - 1] == L'е')
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 1] = L'я';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "Error in convertToStandardPluralForm" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pluralForm;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring convertFromStandardToAlternativePluralForm(std::wstring s)
|
||||||
|
{
|
||||||
|
if (s[s.size() - 1] == L'и')
|
||||||
|
{
|
||||||
|
s[s.size() - 1] = L'я';
|
||||||
|
}
|
||||||
|
if (s[s.size() - 1] == L'ы')
|
||||||
|
{
|
||||||
|
s[s.size() - 1] = L'а';
|
||||||
|
}
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<std::wstring> getPluralForm(NounRecord noun)
|
||||||
|
{
|
||||||
|
std::set<std::wstring> result;
|
||||||
|
|
||||||
|
if (noun.specialMultipleForm != L"")
|
||||||
|
{
|
||||||
|
result.insert(noun.specialMultipleForm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noun.haveSingleForm)
|
||||||
|
{
|
||||||
|
if (noun.haveStandardMultipleForm)
|
||||||
{
|
{
|
||||||
return true;
|
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
|
||||||
|
|
||||||
|
if (noun.haveStandardMultipleFormEnding)
|
||||||
|
{
|
||||||
|
result.insert(pluralForm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noun.haveAlternativeMultipleFormEnding)
|
||||||
|
{
|
||||||
|
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noun.haveStandardMultipleFormWithMissingLastVowel)
|
||||||
|
{
|
||||||
|
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
|
||||||
|
|
||||||
|
wchar_t prevsschar = pluralForm[pluralForm.size() - 4];
|
||||||
|
|
||||||
|
if (charIsMissingVowelSoftenerConsolant(prevsschar))
|
||||||
|
{
|
||||||
|
pluralForm[pluralForm.size() - 3] = L'ь';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pluralForm.erase(pluralForm.begin() + pluralForm.size() - 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noun.haveStandardMultipleFormEnding)
|
||||||
|
{
|
||||||
|
result.insert(pluralForm);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noun.haveAlternativeMultipleFormEnding)
|
||||||
|
{
|
||||||
|
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
|
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.insert(noun.nominativeForm);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural)
|
||||||
|
{
|
||||||
|
for (auto& noun : NounRecordArr)
|
||||||
|
{
|
||||||
|
if (noun.haveMultipleForm)
|
||||||
{
|
{
|
||||||
return true;
|
if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -298,29 +438,26 @@ NounRecord GetNounRecordFromDictionary(std::wstring nounNominative)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNominativePlural)
|
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural)
|
||||||
{
|
{
|
||||||
for (auto& noun : NounRecordArr)
|
for (auto& noun : NounRecordArr)
|
||||||
{
|
{
|
||||||
if (noun.specialMultipleForm == nounNominativePlural)
|
if (noun.haveMultipleForm)
|
||||||
{
|
{
|
||||||
return noun;
|
if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
|
||||||
}
|
{
|
||||||
|
return noun;
|
||||||
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
|
}
|
||||||
{
|
|
||||||
return noun;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return{};
|
return{};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool charIsConsolant(wchar_t c)
|
bool charIsConsolant(wchar_t c) //except й
|
||||||
{
|
{
|
||||||
std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб";
|
std::wstring consolants = L"цкнгшщзхфвпрлджчсмтб";
|
||||||
|
|
||||||
for (wchar_t ic : consolants)
|
for (wchar_t ic : consolants)
|
||||||
{
|
{
|
||||||
@ -381,10 +518,13 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
|
|||||||
{
|
{
|
||||||
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
||||||
|
|
||||||
if (charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ')
|
|
||||||
|
|
||||||
|
if ((charIsVowel(nounBase[nounBase.size() - 1])) || //Might be exact the й case
|
||||||
|
(charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ'))
|
||||||
{
|
{
|
||||||
result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
|
result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
|
||||||
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM });
|
//result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM });
|
||||||
}
|
}
|
||||||
|
|
||||||
//Check missed vowel (simple case)
|
//Check missed vowel (simple case)
|
||||||
@ -470,7 +610,7 @@ std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclenc
|
|||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::wstring i_form_consolants = L"гкжшчщ";
|
std::wstring i_form_consolants = L"гхкжшчщ";
|
||||||
std::wstring u_form_consolants = L"бпдтвфзснмлрц";
|
std::wstring u_form_consolants = L"бпдтвфзснмлрц";
|
||||||
|
|
||||||
wchar_t GetLastChar(const NounRecord& nounRecord)
|
wchar_t GetLastChar(const NounRecord& nounRecord)
|
||||||
@ -494,14 +634,6 @@ bool charIsUFormConsolant(wchar_t c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool AIFormEndingIsCorrect(const NounRecord& nounRecord)
|
|
||||||
{
|
|
||||||
return (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && nounRecord.haveSingleForm ||
|
|
||||||
(GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && !nounRecord.haveSingleForm;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||||||
{
|
{
|
||||||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
|
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
|
||||||
@ -670,6 +802,31 @@ bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord)
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool SecondIShortInanimateSingularCondition(const NounRecord& nounRecord)
|
||||||
|
{
|
||||||
|
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SecondIShortAnimateSingularCondition(const NounRecord& nounRecord)
|
||||||
|
{
|
||||||
|
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SecondIShortInanimatePluralCondition(const NounRecord& nounRecord)
|
||||||
|
{
|
||||||
|
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord)
|
||||||
|
{
|
||||||
|
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
|
bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
|
||||||
{
|
{
|
||||||
return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
|
return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
|
||||||
@ -760,6 +917,12 @@ void SetupDeclentionMap()
|
|||||||
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1);
|
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1);
|
||||||
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1);
|
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1);
|
||||||
|
|
||||||
|
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_SINGULAR}] = std::bind(SecondIShortInanimateSingularCondition, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_PLURAL}] = std::bind(SecondIShortInanimatePluralCondition, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_SINGULAR}] = std::bind(SecondIShortAnimateSingularCondition, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_PLURAL}] = std::bind(SecondIShortAnimatePluralCondition, std::placeholders::_1);
|
||||||
|
|
||||||
|
|
||||||
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1);
|
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1);
|
||||||
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1);
|
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1);
|
||||||
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1);
|
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1);
|
||||||
@ -814,6 +977,8 @@ bool IsDeclencionSecondType(NounDeclencion nounDeclention)
|
|||||||
case SECOND_MALE_UFORM_ANIMATE:
|
case SECOND_MALE_UFORM_ANIMATE:
|
||||||
case SECOND_MALE_SSFORM_INANIMATE:
|
case SECOND_MALE_SSFORM_INANIMATE:
|
||||||
case SECOND_MALE_SSFORM_ANIMATE:
|
case SECOND_MALE_SSFORM_ANIMATE:
|
||||||
|
case SECOND_I_SHORT_INANIMATE:
|
||||||
|
case SECOND_I_SHORT_ANIMATE:
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -832,6 +997,7 @@ bool IsDeclencionAnimated(NounDeclencion nounDeclention)
|
|||||||
case SECOND_MALE_IFORM_ANIMATE:
|
case SECOND_MALE_IFORM_ANIMATE:
|
||||||
case SECOND_MALE_UFORM_ANIMATE:
|
case SECOND_MALE_UFORM_ANIMATE:
|
||||||
case SECOND_MALE_SSFORM_ANIMATE:
|
case SECOND_MALE_SSFORM_ANIMATE:
|
||||||
|
case SECOND_I_SHORT_ANIMATE:
|
||||||
case THIRD_FORM_ANIMATE:
|
case THIRD_FORM_ANIMATE:
|
||||||
return true;
|
return true;
|
||||||
break;
|
break;
|
||||||
@ -888,8 +1054,6 @@ void FillDivisionCaseMaps()
|
|||||||
|
|
||||||
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
|
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
|
||||||
|
|
||||||
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
|
|
||||||
|
|
||||||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
|
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
|
||||||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
||||||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
||||||
@ -901,8 +1065,6 @@ void FillDivisionCaseMaps()
|
|||||||
return L"";
|
return L"";
|
||||||
};
|
};
|
||||||
|
|
||||||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
|
||||||
|
|
||||||
|
|
||||||
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };
|
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };
|
||||||
|
|
||||||
@ -923,10 +1085,6 @@ void FillDivisionCaseMaps()
|
|||||||
((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
|
((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
|
||||||
};
|
};
|
||||||
|
|
||||||
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t)
|
|
||||||
{
|
|
||||||
return (std::get<1>(t) == NC_PLURAL);
|
|
||||||
};
|
|
||||||
|
|
||||||
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
|
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
|
||||||
{
|
{
|
||||||
@ -948,12 +1106,6 @@ void FillDivisionCaseMaps()
|
|||||||
return r.haveAlternativeMultipleFormEnding && StandardTest(t, r);
|
return r.haveAlternativeMultipleFormEnding && StandardTest(t, r);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t, NounRecord r)
|
|
||||||
{
|
|
||||||
return r.specialMultipleForm != L"" && StandardTest(t, r);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -984,74 +1136,41 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
|
|||||||
if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
|
if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
|
||||||
{
|
{
|
||||||
|
|
||||||
std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);
|
if (std::get<1>(nounTuple) == NC_SINGULAR)
|
||||||
|
|
||||||
if (NounIsInDictionary(nounNominaviteSingular))
|
|
||||||
{
|
{
|
||||||
|
|
||||||
NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular);
|
std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);
|
||||||
|
|
||||||
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
if (NounIsInDictionary(nounNominaviteSingular))
|
||||||
{
|
{
|
||||||
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
|
||||||
|
NounRecord nounRecord = GetNounRecordFromDictionary(nounNominaviteSingular);
|
||||||
|
|
||||||
|
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
||||||
|
{
|
||||||
|
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
|
||||||
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
|
|
||||||
|
|
||||||
if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural))
|
|
||||||
{
|
{
|
||||||
NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural);
|
|
||||||
|
|
||||||
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
|
||||||
|
|
||||||
|
if (NounPluralFormIsInDictionary(nounNominavitePlural))
|
||||||
{
|
{
|
||||||
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
NounRecord nounRecord = GetNounRecordFromDictionary_ByPluralForm(nounNominavitePlural);
|
||||||
|
|
||||||
|
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
||||||
|
{
|
||||||
|
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
//Special plural form check
|
|
||||||
for (auto nounTuple : possibleTupleArr)
|
|
||||||
{
|
|
||||||
if (std::get<1>(nounTuple) == NC_PLURAL)
|
|
||||||
{
|
|
||||||
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
|
|
||||||
|
|
||||||
if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural))
|
|
||||||
{
|
|
||||||
NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural);
|
|
||||||
|
|
||||||
|
|
||||||
if (nounRecord.canBeAnimate)
|
|
||||||
{
|
|
||||||
NounStruct ns{ std::get<2>(nounTuple), std::get<1>(nounTuple), true, nounRecord };
|
|
||||||
|
|
||||||
if (!NounScructIsAlreadyInArray(ns, result))
|
|
||||||
{
|
|
||||||
result.push_back(ns);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nounRecord.canBeInanimate)
|
|
||||||
{
|
|
||||||
NounStruct ns{ std::get<2>(nounTuple), std::get<1>(nounTuple), false, nounRecord };
|
|
||||||
|
|
||||||
if (!NounScructIsAlreadyInArray(ns, result))
|
|
||||||
{
|
|
||||||
result.push_back(ns);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1078,6 +1197,15 @@ NounDeclencion CalculateNounDeclention(NounRecord nounRecord)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void CalculatePluralForm()
|
||||||
|
{
|
||||||
|
for (auto& nounRecord : NounRecordArr)
|
||||||
|
{
|
||||||
|
nounRecord.precalculatedNominativePluralSet = getPluralForm(nounRecord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void LoadNounDeclencionCaseTable()
|
void LoadNounDeclencionCaseTable()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -35,6 +35,8 @@ struct NounRecord
|
|||||||
bool canBeAnimate;
|
bool canBeAnimate;
|
||||||
bool canBeInanimate;
|
bool canBeInanimate;
|
||||||
|
|
||||||
|
std::set<std::wstring> precalculatedNominativePluralSet;
|
||||||
|
|
||||||
NounRecord();
|
NounRecord();
|
||||||
NounRecord(std::wstring line);
|
NounRecord(std::wstring line);
|
||||||
};
|
};
|
||||||
@ -53,6 +55,8 @@ enum NounDeclencion
|
|||||||
SECOND_MALE_UFORM_ANIMATE,
|
SECOND_MALE_UFORM_ANIMATE,
|
||||||
SECOND_MALE_SSFORM_INANIMATE,
|
SECOND_MALE_SSFORM_INANIMATE,
|
||||||
SECOND_MALE_SSFORM_ANIMATE,
|
SECOND_MALE_SSFORM_ANIMATE,
|
||||||
|
SECOND_I_SHORT_INANIMATE,
|
||||||
|
SECOND_I_SHORT_ANIMATE,
|
||||||
SECOND_NEUTRAL_E_FORM,
|
SECOND_NEUTRAL_E_FORM,
|
||||||
SECOND_NEUTRAL_O_FORM,
|
SECOND_NEUTRAL_O_FORM,
|
||||||
THIRD_FORM_INANIMATE,
|
THIRD_FORM_INANIMATE,
|
||||||
@ -126,8 +130,7 @@ struct NounEndingDivision
|
|||||||
DC_COMMON = 0,
|
DC_COMMON = 0,
|
||||||
DC_LOST_VOWEL_O,
|
DC_LOST_VOWEL_O,
|
||||||
DC_LOST_VOWEL_E,
|
DC_LOST_VOWEL_E,
|
||||||
DC_SPECIAL_PLURAL_A,
|
DC_SPECIAL_PLURAL_A
|
||||||
DC_SPECIAL_PLURAL_FORM
|
|
||||||
} divisionCase;
|
} divisionCase;
|
||||||
|
|
||||||
bool operator<(const NounEndingDivision& other) const
|
bool operator<(const NounEndingDivision& other) const
|
||||||
@ -152,18 +155,23 @@ struct NounEndingDivision
|
|||||||
|
|
||||||
std::vector<std::wstring> GetAllNounEndingArr();
|
std::vector<std::wstring> GetAllNounEndingArr();
|
||||||
|
|
||||||
|
|
||||||
|
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
|
||||||
|
|
||||||
bool NounIsInDictionary(std::wstring nounNominative);
|
bool NounIsInDictionary(std::wstring nounNominative);
|
||||||
bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
||||||
|
|
||||||
|
|
||||||
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
||||||
NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNominativePlural);
|
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
|
||||||
|
|
||||||
bool charIsConsolant(wchar_t c);
|
bool charIsConsolant(wchar_t c); //except й
|
||||||
|
|
||||||
bool charIsVowel(wchar_t c);
|
bool charIsVowel(wchar_t c);
|
||||||
|
|
||||||
|
|
||||||
|
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
|
||||||
|
|
||||||
struct NounStruct
|
struct NounStruct
|
||||||
{
|
{
|
||||||
NounGrammaticalCase nounGrammaticalCase;
|
NounGrammaticalCase nounGrammaticalCase;
|
||||||
@ -179,6 +187,12 @@ std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
|
|||||||
|
|
||||||
std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
|
std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
|
||||||
|
|
||||||
|
wchar_t GetLastChar(const NounRecord& nounRecord);
|
||||||
|
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
|
||||||
|
bool charIsIFormConsolant(wchar_t c);
|
||||||
|
bool charIsUFormConsolant(wchar_t c);
|
||||||
|
|
||||||
|
|
||||||
void SetupDeclentionMap();
|
void SetupDeclentionMap();
|
||||||
|
|
||||||
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
|
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
|
||||||
@ -192,8 +206,11 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun);
|
|||||||
|
|
||||||
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
|
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
|
||||||
|
|
||||||
|
void CalculatePluralForm();
|
||||||
|
|
||||||
void LoadFrequentWordSet();
|
void LoadFrequentWordSet();
|
||||||
|
|
||||||
|
|
||||||
void LoadNounDeclencionCaseTable();
|
void LoadNounDeclencionCaseTable();
|
||||||
|
|
||||||
#endif //NOUN_H_INCLUDED
|
#endif //NOUN_H_INCLUDED
|
||||||
|
Loading…
Reference in New Issue
Block a user