Added й form

This commit is contained in:
Vladislav Khorev 2014-12-02 01:48:08 +00:00
parent 1a9bd97ff8
commit 33bbd2863e
5 changed files with 271 additions and 101 deletions

View File

@ -143,6 +143,30 @@ SECOND_MALE_SSFORM_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ь;парень
;;NGC_P4_ACCUSATIVE;ей;обвиняю парней ;;NGC_P4_ACCUSATIVE;ей;обвиняю парней
;;NGC_P5_INSTRUMENTAL;ями;говорю с парнями ;;NGC_P5_INSTRUMENTAL;ями;говорю с парнями
;;NGC_P6_PREPOSITIONAL;ях;говорю о парнях ;;NGC_P6_PREPOSITIONAL;ях;говорю о парнях
SECOND_I_SHORT_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;й;каравай
;;NGC_P2_GENITIVE;я;подарок для каравая
;;NGC_P3_DATIVE;ю;подарок караваю
;;NGC_P4_ACCUSATIVE;й;обвиняю каравай
;;NGC_P5_INSTRUMENTAL;ем;говорю с караваем
;;NGC_P6_PREPOSITIONAL;е;говорю о каравае
;NC_PLURAL;NGC_P1_NOMINATIVE;и;караваи
;;NGC_P2_GENITIVE;ев;подарок для караваев
;;NGC_P3_DATIVE;ям;подарок караваям
;;NGC_P4_ACCUSATIVE;и;обвиняю караваи
;;NGC_P5_INSTRUMENTAL;ями;говорю с караваями
;;NGC_P6_PREPOSITIONAL;ях;говорю о караваях
SECOND_I_SHORT_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;й;герой
;;NGC_P2_GENITIVE;я;подарок для героя
;;NGC_P3_DATIVE;ю;подарок герою
;;NGC_P4_ACCUSATIVE;я;обвиняю героя
;;NGC_P5_INSTRUMENTAL;ем;говорю с героем
;;NGC_P6_PREPOSITIONAL;е;говорю о каравае
;NC_PLURAL;NGC_P1_NOMINATIVE;и;герои
;;NGC_P2_GENITIVE;ев;подарок для героев
;;NGC_P3_DATIVE;ям;подарок героям
;;NGC_P4_ACCUSATIVE;ев;обвиняю героев
;;NGC_P5_INSTRUMENTAL;ями;говорю с героями
;;NGC_P6_PREPOSITIONAL;ях;говорю о героях
SECOND_NEUTRAL_E_FORM;NC_SINGULAR;NGC_P1_NOMINATIVE;е;поле SECOND_NEUTRAL_E_FORM;NC_SINGULAR;NGC_P1_NOMINATIVE;е;поле
;;NGC_P2_GENITIVE;я;подарок для поля ;;NGC_P2_GENITIVE;я;подарок для поля
;;NGC_P3_DATIVE;ю;подарок полю ;;NGC_P3_DATIVE;ю;подарок полю

1 Склонение Род Падеж Окончание Пример
143 NGC_P4_ACCUSATIVE ей обвиняю парней
144 NGC_P5_INSTRUMENTAL ями говорю с парнями
145 NGC_P6_PREPOSITIONAL ях говорю о парнях
146 SECOND_I_SHORT_INANIMATE NC_SINGULAR NGC_P1_NOMINATIVE й каравай
147 NGC_P2_GENITIVE я подарок для каравая
148 NGC_P3_DATIVE ю подарок караваю
149 NGC_P4_ACCUSATIVE й обвиняю каравай
150 NGC_P5_INSTRUMENTAL ем говорю с караваем
151 NGC_P6_PREPOSITIONAL е говорю о каравае
152 NC_PLURAL NGC_P1_NOMINATIVE и караваи
153 NGC_P2_GENITIVE ев подарок для караваев
154 NGC_P3_DATIVE ям подарок караваям
155 NGC_P4_ACCUSATIVE и обвиняю караваи
156 NGC_P5_INSTRUMENTAL ями говорю с караваями
157 NGC_P6_PREPOSITIONAL ях говорю о караваях
158 SECOND_I_SHORT_ANIMATE NC_SINGULAR NGC_P1_NOMINATIVE й герой
159 NGC_P2_GENITIVE я подарок для героя
160 NGC_P3_DATIVE ю подарок герою
161 NGC_P4_ACCUSATIVE я обвиняю героя
162 NGC_P5_INSTRUMENTAL ем говорю с героем
163 NGC_P6_PREPOSITIONAL е говорю о каравае
164 NC_PLURAL NGC_P1_NOMINATIVE и герои
165 NGC_P2_GENITIVE ев подарок для героев
166 NGC_P3_DATIVE ям подарок героям
167 NGC_P4_ACCUSATIVE ев обвиняю героев
168 NGC_P5_INSTRUMENTAL ями говорю с героями
169 NGC_P6_PREPOSITIONAL ях говорю о героях
170 SECOND_NEUTRAL_E_FORM NC_SINGULAR NGC_P1_NOMINATIVE е поле
171 NGC_P2_GENITIVE я подарок для поля
172 NGC_P3_DATIVE ю подарок полю

Binary file not shown.

View File

@ -12,6 +12,7 @@ int main()
LoadNounDeclencionCaseTable(); LoadNounDeclencionCaseTable();
LoadFrequentWordSet(); LoadFrequentWordSet();
FillDivisionCaseMaps(); FillDivisionCaseMaps();
CalculatePluralForm();
//RecognizeNoun(L"стульями"); //RecognizeNoun(L"стульями");
//Косяк: "вечер" //Косяк: "вечер"

View File

@ -120,6 +120,17 @@ NounDeclencion WStringToNounDeclencion(std::wstring str)
{ {
return SECOND_MALE_SSFORM_ANIMATE; return SECOND_MALE_SSFORM_ANIMATE;
} }
if (str == L"SECOND_I_SHORT_INANIMATE")
{
return SECOND_I_SHORT_INANIMATE;
}
if (str == L"SECOND_I_SHORT_ANIMATE")
{
return SECOND_I_SHORT_ANIMATE;
}
if (str == L"SECOND_NEUTRAL_E_FORM") if (str == L"SECOND_NEUTRAL_E_FORM")
{ {
return SECOND_NEUTRAL_E_FORM; return SECOND_NEUTRAL_E_FORM;
@ -225,6 +236,10 @@ std::vector<std::wstring> GetAllNounEndingArr()
std::vector<std::wstring> result std::vector<std::wstring> result
{ {
L"", L"",
L"й",
L"ев",
L"а", L"а",
L"и", L"и",
L"е", L"е",
@ -266,18 +281,143 @@ bool NounIsInDictionary(std::wstring nounNominative)
} }
bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural) std::wstring convertToStandardPluralForm(std::wstring s)
{
std::wstring pluralForm = s;
if (pluralForm[pluralForm.size() - 1] == L'а' && charIsIFormConsolant(pluralForm[pluralForm.size() - 2]))
{
pluralForm[pluralForm.size() - 1] = L'и';
}
else if (pluralForm[pluralForm.size() - 1] == L'а' && charIsUFormConsolant(pluralForm[pluralForm.size() - 2]))
{
pluralForm[pluralForm.size() - 1] = L'ы';
}
else if (pluralForm[pluralForm.size() - 1] == L'я')
{
pluralForm[pluralForm.size() - 1] = L'и';
}
else if (charIsIFormConsolant(pluralForm[pluralForm.size() - 1]))
{
pluralForm += L'и';
}
else if (charIsUFormConsolant(pluralForm[pluralForm.size() - 1]))
{
pluralForm += L'ы';
}
else if (pluralForm[pluralForm.size() - 1] == L'ь')
{
pluralForm[pluralForm.size() - 1] = L'и';
}
else if (pluralForm[pluralForm.size() - 1] == L'й')
{
pluralForm[pluralForm.size() - 1] = L'и';
}
else if (pluralForm[pluralForm.size() - 1] == L'о')
{
pluralForm[pluralForm.size() - 1] = L'а';
}
else if (pluralForm[pluralForm.size() - 1] == L'е')
{
pluralForm[pluralForm.size() - 1] = L'я';
}
else
{
std::cout << "Error in convertToStandardPluralForm" << std::endl;
}
return pluralForm;
}
std::wstring convertFromStandardToAlternativePluralForm(std::wstring s)
{
if (s[s.size() - 1] == L'и')
{
s[s.size() - 1] = L'я';
}
if (s[s.size() - 1] == L'ы')
{
s[s.size() - 1] = L'а';
}
return s;
}
std::set<std::wstring> getPluralForm(NounRecord noun)
{
std::set<std::wstring> result;
if (noun.specialMultipleForm != L"")
{
result.insert(noun.specialMultipleForm);
}
if (noun.haveSingleForm)
{
if (noun.haveStandardMultipleForm)
{
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
if (noun.haveStandardMultipleFormEnding)
{
result.insert(pluralForm);
}
if (noun.haveAlternativeMultipleFormEnding)
{
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
}
}
if (noun.haveStandardMultipleFormWithMissingLastVowel)
{
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
wchar_t prevsschar = pluralForm[pluralForm.size() - 4];
if (charIsMissingVowelSoftenerConsolant(prevsschar))
{
pluralForm[pluralForm.size() - 3] = L'ь';
}
else
{
pluralForm.erase(pluralForm.begin() + pluralForm.size() - 3);
}
if (noun.haveStandardMultipleFormEnding)
{
result.insert(pluralForm);
}
if (noun.haveAlternativeMultipleFormEnding)
{
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
}
}
}
else
{
result.insert(noun.nominativeForm);
}
return result;
}
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural)
{ {
for (auto& noun : NounRecordArr) for (auto& noun : NounRecordArr)
{ {
if (noun.specialMultipleForm == nounNominativePlural) if (noun.haveMultipleForm)
{
if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
{ {
return true; return true;
} }
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
{
return true;
} }
} }
@ -298,29 +438,26 @@ NounRecord GetNounRecordFromDictionary(std::wstring nounNominative)
} }
NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNominativePlural) NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural)
{ {
for (auto& noun : NounRecordArr) for (auto& noun : NounRecordArr)
{ {
if (noun.specialMultipleForm == nounNominativePlural) if (noun.haveMultipleForm)
{ {
return noun; if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
}
if (!noun.haveSingleForm && noun.nominativeForm == nounNominativePlural)
{ {
return noun; return noun;
} }
} }
}
return{}; return{};
} }
bool charIsConsolant(wchar_t c) bool charIsConsolant(wchar_t c) //except й
{ {
std::wstring consolants = L"йцкнгшщзхфвпрлджчсмтб"; std::wstring consolants = L"цкнгшщзхфвпрлджчсмтб";
for (wchar_t ic : consolants) for (wchar_t ic : consolants)
{ {
@ -381,10 +518,13 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
{ {
std::wstring nounBase = boost::replace_last_copy(noun, ending, ""); std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
if (charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ')
if ((charIsVowel(nounBase[nounBase.size() - 1])) || //Might be exact the й case
(charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ'))
{ {
result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON }); result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM }); //result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_FORM });
} }
//Check missed vowel (simple case) //Check missed vowel (simple case)
@ -470,7 +610,7 @@ std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclenc
return L""; return L"";
} }
std::wstring i_form_consolants = L"гкжшчщ"; std::wstring i_form_consolants = L"гхкжшчщ";
std::wstring u_form_consolants = L"бпдтвфзснмлрц"; std::wstring u_form_consolants = L"бпдтвфзснмлрц";
wchar_t GetLastChar(const NounRecord& nounRecord) wchar_t GetLastChar(const NounRecord& nounRecord)
@ -494,14 +634,6 @@ bool charIsUFormConsolant(wchar_t c)
} }
bool AIFormEndingIsCorrect(const NounRecord& nounRecord)
{
return (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && nounRecord.haveSingleForm ||
(GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) && !nounRecord.haveSingleForm;
}
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord) bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
{ {
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))); return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
@ -670,6 +802,31 @@ bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord)
); );
} }
bool SecondIShortInanimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
}
bool SecondIShortAnimateSingularCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
}
bool SecondIShortInanimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
}
bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord)
{
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
}
bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord) bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
{ {
return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е'; return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
@ -760,6 +917,12 @@ void SetupDeclentionMap()
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1); DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1); DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_SINGULAR}] = std::bind(SecondIShortInanimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_PLURAL}] = std::bind(SecondIShortInanimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_SINGULAR}] = std::bind(SecondIShortAnimateSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_PLURAL}] = std::bind(SecondIShortAnimatePluralCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1); DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1); DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1);
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1); DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1);
@ -814,6 +977,8 @@ bool IsDeclencionSecondType(NounDeclencion nounDeclention)
case SECOND_MALE_UFORM_ANIMATE: case SECOND_MALE_UFORM_ANIMATE:
case SECOND_MALE_SSFORM_INANIMATE: case SECOND_MALE_SSFORM_INANIMATE:
case SECOND_MALE_SSFORM_ANIMATE: case SECOND_MALE_SSFORM_ANIMATE:
case SECOND_I_SHORT_INANIMATE:
case SECOND_I_SHORT_ANIMATE:
return true; return true;
break; break;
default: default:
@ -832,6 +997,7 @@ bool IsDeclencionAnimated(NounDeclencion nounDeclention)
case SECOND_MALE_IFORM_ANIMATE: case SECOND_MALE_IFORM_ANIMATE:
case SECOND_MALE_UFORM_ANIMATE: case SECOND_MALE_UFORM_ANIMATE:
case SECOND_MALE_SSFORM_ANIMATE: case SECOND_MALE_SSFORM_ANIMATE:
case SECOND_I_SHORT_ANIMATE:
case THIRD_FORM_ANIMATE: case THIRD_FORM_ANIMATE:
return true; return true;
break; break;
@ -888,8 +1054,6 @@ void FillDivisionCaseMaps()
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON]; DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_A] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseNounModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; }; DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON]; DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON]; DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
@ -901,8 +1065,6 @@ void FillDivisionCaseMaps()
return L""; return L"";
}; };
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; }; DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };
@ -923,10 +1085,6 @@ void FillDivisionCaseMaps()
((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t))))); ((std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
}; };
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t)
{
return (std::get<1>(t) == NC_PLURAL);
};
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r) DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
{ {
@ -949,12 +1107,6 @@ void FillDivisionCaseMaps()
}; };
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_SPECIAL_PLURAL_FORM] = [](NounTuple t, NounRecord r)
{
return r.specialMultipleForm != L"" && StandardTest(t, r);
};
} }
@ -982,6 +1134,9 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
{ {
if (DivisionCaseNounTupleFilterMap[dc](nounTuple)) if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
{
if (std::get<1>(nounTuple) == NC_SINGULAR)
{ {
std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR); std::wstring nounNominaviteSingular = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);
@ -997,61 +1152,25 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun)
} }
} }
}
else
{
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL); std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural)) if (NounPluralFormIsInDictionary(nounNominavitePlural))
{ {
NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural); NounRecord nounRecord = GetNounRecordFromDictionary_ByPluralForm(nounNominavitePlural);
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord)) if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
{ {
result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord }); result.push_back({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
} }
} }
}
} }
} }
/*
//Special plural form check
for (auto nounTuple : possibleTupleArr)
{
if (std::get<1>(nounTuple) == NC_PLURAL)
{
std::wstring nounNominavitePlural = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
if (NounSpecialPluralFormIsInDictionary(nounNominavitePlural))
{
NounRecord nounRecord = GetNounRecordFromDictionary_BySpecialPluralForm(nounNominavitePlural);
if (nounRecord.canBeAnimate)
{
NounStruct ns{ std::get<2>(nounTuple), std::get<1>(nounTuple), true, nounRecord };
if (!NounScructIsAlreadyInArray(ns, result))
{
result.push_back(ns);
}
}
if (nounRecord.canBeInanimate)
{
NounStruct ns{ std::get<2>(nounTuple), std::get<1>(nounTuple), false, nounRecord };
if (!NounScructIsAlreadyInArray(ns, result))
{
result.push_back(ns);
}
}
}
}
}*/
} }
@ -1078,6 +1197,15 @@ NounDeclencion CalculateNounDeclention(NounRecord nounRecord)
} }
void CalculatePluralForm()
{
for (auto& nounRecord : NounRecordArr)
{
nounRecord.precalculatedNominativePluralSet = getPluralForm(nounRecord);
}
}
void LoadNounDeclencionCaseTable() void LoadNounDeclencionCaseTable()
{ {

View File

@ -35,6 +35,8 @@ struct NounRecord
bool canBeAnimate; bool canBeAnimate;
bool canBeInanimate; bool canBeInanimate;
std::set<std::wstring> precalculatedNominativePluralSet;
NounRecord(); NounRecord();
NounRecord(std::wstring line); NounRecord(std::wstring line);
}; };
@ -53,6 +55,8 @@ enum NounDeclencion
SECOND_MALE_UFORM_ANIMATE, SECOND_MALE_UFORM_ANIMATE,
SECOND_MALE_SSFORM_INANIMATE, SECOND_MALE_SSFORM_INANIMATE,
SECOND_MALE_SSFORM_ANIMATE, SECOND_MALE_SSFORM_ANIMATE,
SECOND_I_SHORT_INANIMATE,
SECOND_I_SHORT_ANIMATE,
SECOND_NEUTRAL_E_FORM, SECOND_NEUTRAL_E_FORM,
SECOND_NEUTRAL_O_FORM, SECOND_NEUTRAL_O_FORM,
THIRD_FORM_INANIMATE, THIRD_FORM_INANIMATE,
@ -126,8 +130,7 @@ struct NounEndingDivision
DC_COMMON = 0, DC_COMMON = 0,
DC_LOST_VOWEL_O, DC_LOST_VOWEL_O,
DC_LOST_VOWEL_E, DC_LOST_VOWEL_E,
DC_SPECIAL_PLURAL_A, DC_SPECIAL_PLURAL_A
DC_SPECIAL_PLURAL_FORM
} divisionCase; } divisionCase;
bool operator<(const NounEndingDivision& other) const bool operator<(const NounEndingDivision& other) const
@ -152,18 +155,23 @@ struct NounEndingDivision
std::vector<std::wstring> GetAllNounEndingArr(); std::vector<std::wstring> GetAllNounEndingArr();
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
bool NounIsInDictionary(std::wstring nounNominative); bool NounIsInDictionary(std::wstring nounNominative);
bool NounSpecialPluralFormIsInDictionary(std::wstring nounNominativePlural); bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative); NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
NounRecord GetNounRecordFromDictionary_BySpecialPluralForm(std::wstring nounNominativePlural); NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
bool charIsConsolant(wchar_t c); bool charIsConsolant(wchar_t c); //except й
bool charIsVowel(wchar_t c); bool charIsVowel(wchar_t c);
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
struct NounStruct struct NounStruct
{ {
NounGrammaticalCase nounGrammaticalCase; NounGrammaticalCase nounGrammaticalCase;
@ -179,6 +187,12 @@ std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount); std::wstring GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
wchar_t GetLastChar(const NounRecord& nounRecord);
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
bool charIsIFormConsolant(wchar_t c);
bool charIsUFormConsolant(wchar_t c);
void SetupDeclentionMap(); void SetupDeclentionMap();
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple); bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
@ -192,8 +206,11 @@ std::vector<NounStruct> RecognizeNoun(std::wstring noun);
NounDeclencion CalculateNounDeclention(NounRecord nounRecord); NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
void CalculatePluralForm();
void LoadFrequentWordSet(); void LoadFrequentWordSet();
void LoadNounDeclencionCaseTable(); void LoadNounDeclencionCaseTable();
#endif //NOUN_H_INCLUDED #endif //NOUN_H_INCLUDED