1229 lines
34 KiB
C++
1229 lines
34 KiB
C++
#include "noun.h"
|
||
|
||
#include <iostream> //Xperimental -- for debug only
|
||
|
||
#include "utf8utf16.h"
|
||
|
||
#include "boost/regex.hpp"
|
||
#include "boost/algorithm/string/regex.hpp"
|
||
|
||
namespace NN
|
||
{
|
||
|
||
std::vector<NounRecord> NounRecordArr;
|
||
|
||
std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
|
||
|
||
NounRecord::NounRecord()
|
||
: gender(NG_MALE)
|
||
, haveSingleForm(false)
|
||
, haveMultipleForm(false)
|
||
, haveStandardMultipleForm(false)
|
||
, haveStandardMultipleFormWithMissingLastVowel(false)
|
||
, haveStandardMultipleFormEnding(false)
|
||
, haveAlternativeMultipleFormEnding(false)
|
||
, canBeAnimate(false)
|
||
, canBeInanimate(false)
|
||
{
|
||
|
||
}
|
||
|
||
|
||
NounRecord::NounRecord(std::wstring line)
|
||
{
|
||
std::vector<std::wstring> lineArr;
|
||
|
||
boost::split_regex(lineArr, line, boost::wregex(L";"));
|
||
|
||
nominativeForm = lineArr[1];
|
||
|
||
if (lineArr[2] == L"м")
|
||
{
|
||
gender = NG_MALE;
|
||
}
|
||
else if (lineArr[2] == L"ж")
|
||
{
|
||
gender = NG_FEMALE;
|
||
}
|
||
else
|
||
{
|
||
gender = NG_NEUTRAL;
|
||
}
|
||
|
||
haveSingleForm = lineArr[3] == L"1" ? true : false;
|
||
haveMultipleForm = lineArr[4] == L"1" ? true : false;
|
||
|
||
haveStandardMultipleForm = lineArr[5] == L"1" ? true : false;
|
||
|
||
haveStandardMultipleFormWithMissingLastVowel = lineArr[6] == L"1" ? true : false;
|
||
|
||
haveStandardMultipleFormEnding = lineArr[7] == L"1" ? true : false;
|
||
|
||
haveAlternativeMultipleFormEnding = lineArr[8] == L"1" ? true : false;
|
||
|
||
specialMultipleForm = lineArr[9];
|
||
|
||
canBeAnimate = lineArr[10] == L"1" ? true : false;
|
||
canBeInanimate = lineArr[11] == L"1" ? true : false;
|
||
|
||
|
||
}
|
||
|
||
|
||
NounDeclencion WStringToNounDeclencion(std::wstring str)
|
||
{
|
||
|
||
if (str == L"FIRST_A_IFORM_INANIMATE")
|
||
{
|
||
return FIRST_A_IFORM_INANIMATE;
|
||
}
|
||
if (str == L"FIRST_A_IFORM_ANIMATE")
|
||
{
|
||
return FIRST_A_IFORM_ANIMATE;
|
||
}
|
||
if (str == L"FIRST_A_UFORM_INANIMATE")
|
||
{
|
||
return FIRST_A_UFORM_INANIMATE;
|
||
}
|
||
if (str == L"FIRST_A_UFORM_ANIMATE")
|
||
{
|
||
return FIRST_A_UFORM_ANIMATE;
|
||
}
|
||
if (str == L"FIRST_YA_FORM_INANIMATE")
|
||
{
|
||
return FIRST_YA_FORM_INANIMATE;
|
||
}
|
||
if (str == L"FIRST_YA_FORM_ANIMATE")
|
||
{
|
||
return FIRST_YA_FORM_ANIMATE;
|
||
}
|
||
|
||
if (str == L"SECOND_MALE_IFORM_INANIMATE")
|
||
{
|
||
return SECOND_MALE_IFORM_INANIMATE;
|
||
}
|
||
if (str == L"SECOND_MALE_IFORM_ANIMATE")
|
||
{
|
||
return SECOND_MALE_IFORM_ANIMATE;
|
||
}
|
||
if (str == L"SECOND_MALE_UFORM_INANIMATE")
|
||
{
|
||
return SECOND_MALE_UFORM_INANIMATE;
|
||
}
|
||
if (str == L"SECOND_MALE_UFORM_ANIMATE")
|
||
{
|
||
return SECOND_MALE_UFORM_ANIMATE;
|
||
}
|
||
if (str == L"SECOND_MALE_SSFORM_INANIMATE")
|
||
{
|
||
return SECOND_MALE_SSFORM_INANIMATE;
|
||
}
|
||
if (str == L"SECOND_MALE_SSFORM_ANIMATE")
|
||
{
|
||
return SECOND_MALE_SSFORM_ANIMATE;
|
||
}
|
||
|
||
if (str == L"SECOND_I_SHORT_INANIMATE")
|
||
{
|
||
return SECOND_I_SHORT_INANIMATE;
|
||
}
|
||
if (str == L"SECOND_I_SHORT_ANIMATE")
|
||
{
|
||
return SECOND_I_SHORT_ANIMATE;
|
||
}
|
||
|
||
|
||
if (str == L"SECOND_NEUTRAL_E_FORM")
|
||
{
|
||
return SECOND_NEUTRAL_E_FORM;
|
||
}
|
||
if (str == L"SECOND_NEUTRAL_O_FORM")
|
||
{
|
||
return SECOND_NEUTRAL_O_FORM;
|
||
}
|
||
if (str == L"THIRD_FORM_INANIMATE")
|
||
{
|
||
return THIRD_FORM_INANIMATE;
|
||
}
|
||
if (str == L"THIRD_FORM_ANIMATE")
|
||
{
|
||
return THIRD_FORM_ANIMATE;
|
||
}
|
||
|
||
std::cout << "Error in WStringToNounDeclencion!" << std::endl;
|
||
return FIRST_A_IFORM_INANIMATE;
|
||
|
||
}
|
||
|
||
|
||
std::vector<std::wstring> GetAllNounEndingArr()
|
||
{
|
||
std::vector<std::wstring> result
|
||
{
|
||
L"",
|
||
|
||
L"й",
|
||
L"ев",
|
||
|
||
L"а",
|
||
L"и",
|
||
L"е",
|
||
L"у",
|
||
L"ой",
|
||
L"ы",
|
||
L"ом",
|
||
L"ь",
|
||
L"я",
|
||
L"ю",
|
||
L"ем",
|
||
L"о",
|
||
L"ью",
|
||
L"ам",
|
||
L"ами",
|
||
L"ах",
|
||
L"ов",
|
||
L"ей",
|
||
L"ям",
|
||
L"ях",
|
||
L"я",
|
||
L"ями",
|
||
};
|
||
|
||
return result;
|
||
}
|
||
|
||
bool NounIsInDictionary(std::wstring nounNominative)
|
||
{
|
||
for (auto& noun : NounRecordArr)
|
||
{
|
||
if (noun.nominativeForm == nounNominative)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
|
||
}
|
||
|
||
std::wstring convertToStandardPluralForm(std::wstring s)
|
||
{
|
||
std::wstring pluralForm = s;
|
||
|
||
if (pluralForm[pluralForm.size() - 1] == L'а' && charIsIFormConsolant(pluralForm[pluralForm.size() - 2]))
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'и';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'а' && charIsUFormConsolant(pluralForm[pluralForm.size() - 2]))
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'ы';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'я')
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'и';
|
||
}
|
||
else if (charIsIFormConsolant(pluralForm[pluralForm.size() - 1]))
|
||
{
|
||
pluralForm += L'и';
|
||
}
|
||
else if (charIsUFormConsolant(pluralForm[pluralForm.size() - 1]))
|
||
{
|
||
pluralForm += L'ы';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'ь')
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'и';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'й')
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'и';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'о')
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'а';
|
||
}
|
||
else if (pluralForm[pluralForm.size() - 1] == L'е')
|
||
{
|
||
pluralForm[pluralForm.size() - 1] = L'я';
|
||
}
|
||
else
|
||
{
|
||
std::cout << "Error in convertToStandardPluralForm" << std::endl;
|
||
}
|
||
|
||
return pluralForm;
|
||
}
|
||
|
||
|
||
std::wstring convertFromStandardToAlternativePluralForm(std::wstring s)
|
||
{
|
||
if (s[s.size() - 1] == L'и')
|
||
{
|
||
s[s.size() - 1] = L'я';
|
||
}
|
||
if (s[s.size() - 1] == L'ы')
|
||
{
|
||
s[s.size() - 1] = L'а';
|
||
}
|
||
|
||
return s;
|
||
}
|
||
|
||
std::set<std::wstring> getPluralForm(NounRecord noun)
|
||
{
|
||
std::set<std::wstring> result;
|
||
|
||
if (noun.specialMultipleForm != L"")
|
||
{
|
||
result.insert(noun.specialMultipleForm);
|
||
}
|
||
|
||
if (noun.haveSingleForm)
|
||
{
|
||
if (noun.haveStandardMultipleForm)
|
||
{
|
||
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
|
||
|
||
if (noun.haveStandardMultipleFormEnding)
|
||
{
|
||
result.insert(pluralForm);
|
||
}
|
||
|
||
|
||
if (noun.haveAlternativeMultipleFormEnding)
|
||
{
|
||
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
|
||
}
|
||
|
||
}
|
||
|
||
if (noun.haveStandardMultipleFormWithMissingLastVowel)
|
||
{
|
||
std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);
|
||
|
||
wchar_t prevsschar = pluralForm[pluralForm.size() - 4];
|
||
|
||
if (charIsMissingVowelSoftenerConsolant(prevsschar))
|
||
{
|
||
pluralForm[pluralForm.size() - 3] = L'ь';
|
||
}
|
||
else
|
||
{
|
||
pluralForm.erase(pluralForm.begin() + pluralForm.size() - 3);
|
||
}
|
||
|
||
if (noun.haveStandardMultipleFormEnding)
|
||
{
|
||
result.insert(pluralForm);
|
||
}
|
||
|
||
|
||
if (noun.haveAlternativeMultipleFormEnding)
|
||
{
|
||
result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
|
||
}
|
||
}
|
||
|
||
|
||
|
||
}
|
||
else
|
||
{
|
||
result.insert(noun.nominativeForm);
|
||
}
|
||
|
||
|
||
return result;
|
||
}
|
||
|
||
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural)
|
||
{
|
||
for (auto& noun : NounRecordArr)
|
||
{
|
||
if (noun.haveMultipleForm)
|
||
{
|
||
if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative)
|
||
{
|
||
for (auto& noun : NounRecordArr)
|
||
{
|
||
if (noun.nominativeForm == nounNominative)
|
||
{
|
||
return noun;
|
||
}
|
||
}
|
||
|
||
return{};
|
||
|
||
}
|
||
|
||
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural)
|
||
{
|
||
for (auto& noun : NounRecordArr)
|
||
{
|
||
if (noun.haveMultipleForm)
|
||
{
|
||
if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
|
||
{
|
||
return noun;
|
||
}
|
||
}
|
||
}
|
||
return{};
|
||
|
||
}
|
||
|
||
|
||
|
||
|
||
bool charIsMissingVowelSoftenerConsolant(wchar_t c)
|
||
{
|
||
//This test belongs to missing vowel case.
|
||
//лев -> львы (because л, then е replaced by soft sign)
|
||
//немец -> немцы (because not л, the е is not replaced, just missing)
|
||
|
||
std::wstring consolants = L"л";
|
||
|
||
for (wchar_t ic : consolants)
|
||
{
|
||
if (c == ic)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
|
||
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
|
||
{
|
||
std::set<NounEndingDivision> result;
|
||
|
||
auto allNounEndingArr = GetAllNounEndingArr();
|
||
|
||
for (auto ending : allNounEndingArr)
|
||
{
|
||
if (boost::ends_with(noun, ending))
|
||
{
|
||
std::wstring nounBase = boost::replace_last_copy(noun, ending, "");
|
||
|
||
|
||
|
||
if ((charIsVowel(nounBase[nounBase.size() - 1])) || //Might be exact the й case
|
||
(charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ'))
|
||
{
|
||
result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
|
||
}
|
||
|
||
//Check missed vowel (simple case)
|
||
if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]))
|
||
{
|
||
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_O });
|
||
}
|
||
|
||
if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь' && charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 3]))
|
||
{
|
||
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
|
||
}
|
||
|
||
if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]) && !charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 2]))
|
||
{
|
||
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
|
||
|
||
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
||
{
|
||
std::vector<NounTuple> result;
|
||
|
||
for (auto& noun : nounDeclencionCaseTable)
|
||
{
|
||
for (int i = 0; i < NGC_SIZE * NC_SIZE; i++)
|
||
{
|
||
if (noun.grammaticalCaseTable[i].ending.count(nounEnding) != 0)
|
||
{
|
||
result.push_back(NounTuple{ noun.nounDeclencion, noun.grammaticalCaseTable[i].count, noun.grammaticalCaseTable[i].grammaticalCase });
|
||
}
|
||
}
|
||
|
||
}
|
||
return result;
|
||
}
|
||
|
||
std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
|
||
{
|
||
std::vector<NounTuple> result;
|
||
|
||
for (auto nounTuple : nounTupleArr)
|
||
{
|
||
if (filter.count(std::get<0>(nounTuple)) != 0)
|
||
{
|
||
result.push_back(nounTuple);
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
|
||
|
||
|
||
std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount)
|
||
{
|
||
std::set<std::wstring> result;
|
||
|
||
NounDeclencionCaseTableRecord nounDeclencionCaseTableRecord = nounDeclencionCaseTable[static_cast<int>(nounDeclencion)];
|
||
|
||
for (auto& grammaticalTableRecord : nounDeclencionCaseTableRecord.grammaticalCaseTable)
|
||
{
|
||
if (grammaticalTableRecord.grammaticalCase == NGC_P1_NOMINATIVE && grammaticalTableRecord.count == nounCount)
|
||
{
|
||
for (auto& e : grammaticalTableRecord.ending)
|
||
{
|
||
result.insert(nounBase + e);
|
||
}
|
||
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion)
|
||
{
|
||
|
||
if (charIsIFormConsolant(nounBase[nounBase.size() - 1]))
|
||
{
|
||
return nounBase + L"я";
|
||
}
|
||
|
||
if (charIsUFormConsolant(nounBase[nounBase.size() - 1]))
|
||
{
|
||
return nounBase + L"а";
|
||
}
|
||
|
||
if (charIsVowel(nounBase[nounBase.size() - 1]))
|
||
{
|
||
return nounBase + L"я";
|
||
}
|
||
|
||
std::cout << "Error in GetNounNoninative" << std::endl;
|
||
|
||
return L"";
|
||
}
|
||
|
||
|
||
wchar_t GetLastChar(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 1];
|
||
}
|
||
|
||
wchar_t GetPrevLastChar(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2];
|
||
}
|
||
|
||
|
||
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
|
||
}
|
||
|
||
bool FirstAIFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
|
||
}
|
||
|
||
bool FirstAIFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
|
||
(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
|
||
);
|
||
}
|
||
|
||
bool FirstAIFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
|
||
(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
|
||
);
|
||
}
|
||
|
||
|
||
bool FirstAUFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
|
||
}
|
||
|
||
bool FirstAUFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
|
||
}
|
||
|
||
|
||
bool FirstAUFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
|
||
(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
|
||
);
|
||
}
|
||
|
||
bool FirstAUFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
|
||
(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
|
||
);
|
||
}
|
||
|
||
|
||
bool FirstYaFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я');
|
||
}
|
||
|
||
bool FirstYaFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я');
|
||
}
|
||
|
||
bool FirstYaFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
|
||
(GetLastChar(nounRecord) == L'я') ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
|
||
);
|
||
}
|
||
|
||
bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
|
||
(GetLastChar(nounRecord) == L'я') ||
|
||
!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
|
||
);
|
||
}
|
||
|
||
|
||
// Second declention
|
||
|
||
bool SecondMaleIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsIFormConsolant(GetLastChar(nounRecord));
|
||
}
|
||
|
||
bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
|
||
charIsIFormConsolant(GetLastChar(nounRecord)) ||
|
||
!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
|
||
}
|
||
|
||
|
||
bool SecondMaleIFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsIFormConsolant(GetLastChar(nounRecord));
|
||
}
|
||
|
||
bool SecondMaleIFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
|
||
charIsIFormConsolant(GetLastChar(nounRecord)) ||
|
||
!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
}
|
||
|
||
|
||
bool SecondMaleUFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsUFormConsolant(GetLastChar(nounRecord));
|
||
}
|
||
|
||
bool SecondMaleUFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
|
||
charIsUFormConsolant(GetLastChar(nounRecord)) ||
|
||
!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
|
||
);
|
||
}
|
||
|
||
|
||
bool SecondMaleUFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsUFormConsolant(GetLastChar(nounRecord));
|
||
}
|
||
|
||
bool SecondMaleUFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
|
||
charIsUFormConsolant(GetLastChar(nounRecord)) ||
|
||
!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
|
||
);
|
||
}
|
||
|
||
|
||
bool SecondMaleSSFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
|
||
}
|
||
|
||
bool SecondMaleSSFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
|
||
GetLastChar(nounRecord) == L'ь' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
}
|
||
|
||
|
||
bool SecondMaleSSFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
|
||
}
|
||
|
||
bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
|
||
GetLastChar(nounRecord) == L'ь' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
}
|
||
|
||
|
||
bool SecondIShortInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
|
||
}
|
||
|
||
bool SecondIShortAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
|
||
}
|
||
|
||
bool SecondIShortInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
|
||
}
|
||
|
||
bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
|
||
}
|
||
|
||
bool SecondNeutralEFormPluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
|
||
GetLastChar(nounRecord) == L'е' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'я'
|
||
);
|
||
}
|
||
|
||
bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'о';
|
||
}
|
||
|
||
bool SecondNeutralOFormPluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
|
||
GetLastChar(nounRecord) == L'о' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'а'
|
||
);
|
||
}
|
||
|
||
bool ThirdFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
|
||
}
|
||
|
||
bool ThirdFormAnimateSingularCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
|
||
}
|
||
|
||
|
||
bool ThirdFormInanimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && (
|
||
GetLastChar(nounRecord) == L'ь' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
}
|
||
|
||
bool ThirdFormAnimatePluralCondition(const NounRecord& nounRecord)
|
||
{
|
||
return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && (
|
||
GetLastChar(nounRecord) == L'ь' ||
|
||
!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
|
||
);
|
||
}
|
||
|
||
|
||
std::map<std::pair<NounDeclencion, NounCount>, std::function < bool(const NounRecord&) >> DeclentionConditionMap;
|
||
|
||
void SetupDeclentionMap()
|
||
{
|
||
|
||
DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAIFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAIFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAUFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAUFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_PLURAL}] = std::bind(FirstYaFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_PLURAL}] = std::bind(FirstYaFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
//Second form
|
||
DeclentionConditionMap[{SECOND_MALE_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleIFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_IFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleIFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleIFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_IFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleIFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
DeclentionConditionMap[{SECOND_MALE_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleUFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_UFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleUFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleUFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_UFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleUFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
DeclentionConditionMap[{SECOND_MALE_SSFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_SINGULAR}] = std::bind(SecondIShortInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_PLURAL}] = std::bind(SecondIShortInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_SINGULAR}] = std::bind(SecondIShortAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_PLURAL}] = std::bind(SecondIShortAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
|
||
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_PLURAL}] = std::bind(SecondNeutralOFormPluralCondition, std::placeholders::_1);
|
||
|
||
|
||
DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormInanimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormAnimateSingularCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_PLURAL}] = std::bind(ThirdFormInanimatePluralCondition, std::placeholders::_1);
|
||
DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_PLURAL}] = std::bind(ThirdFormAnimatePluralCondition, std::placeholders::_1);
|
||
|
||
|
||
}
|
||
|
||
|
||
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple)
|
||
{
|
||
|
||
NounDeclencion nounDeclencion = std::get<0>(nounTuple);
|
||
NounCount nounCount = std::get<1>(nounTuple);
|
||
|
||
bool standardDeclention = DeclentionConditionMap[{nounDeclencion, nounCount}](nounRecord);
|
||
|
||
if (standardDeclention)
|
||
{
|
||
return true;
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
bool NounScructIsAlreadyInArray(const NounStruct& nounStruct, const std::vector<NounStruct>& arr)
|
||
{
|
||
for (auto& ns : arr)
|
||
{
|
||
if (ns.nounGrammaticalCase == nounStruct.nounGrammaticalCase &&
|
||
ns.nounRecord.nominativeForm == nounStruct.nounRecord.nominativeForm)
|
||
{
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
bool IsDeclencionSecondType(NounDeclencion nounDeclention)
|
||
{
|
||
switch (nounDeclention)
|
||
{
|
||
case SECOND_MALE_IFORM_INANIMATE:
|
||
case SECOND_MALE_IFORM_ANIMATE:
|
||
case SECOND_MALE_UFORM_INANIMATE:
|
||
case SECOND_MALE_UFORM_ANIMATE:
|
||
case SECOND_MALE_SSFORM_INANIMATE:
|
||
case SECOND_MALE_SSFORM_ANIMATE:
|
||
case SECOND_I_SHORT_INANIMATE:
|
||
case SECOND_I_SHORT_ANIMATE:
|
||
return true;
|
||
break;
|
||
default:
|
||
return false;
|
||
break;
|
||
}
|
||
}
|
||
|
||
bool IsDeclencionAnimated(NounDeclencion nounDeclention)
|
||
{
|
||
switch (nounDeclention)
|
||
{
|
||
case FIRST_A_IFORM_ANIMATE:
|
||
case FIRST_A_UFORM_ANIMATE:
|
||
case FIRST_YA_FORM_ANIMATE:
|
||
case SECOND_MALE_IFORM_ANIMATE:
|
||
case SECOND_MALE_UFORM_ANIMATE:
|
||
case SECOND_MALE_SSFORM_ANIMATE:
|
||
case SECOND_I_SHORT_ANIMATE:
|
||
case THIRD_FORM_ANIMATE:
|
||
return true;
|
||
break;
|
||
default:
|
||
return false;
|
||
break;
|
||
}
|
||
}
|
||
|
||
bool StandardTest(NounTuple nounTuple, NounRecord nounRecord)
|
||
{
|
||
return NounFitsDeclention(nounRecord, nounTuple);
|
||
}
|
||
|
||
bool LostVowelOTest(NounTuple nounTuple, NounRecord nounRecord)
|
||
{
|
||
return nounRecord.haveStandardMultipleFormWithMissingLastVowel;
|
||
}
|
||
|
||
bool LostVowelETest(NounTuple nounTuple, NounRecord nounRecord)
|
||
{
|
||
return nounRecord.haveStandardMultipleFormWithMissingLastVowel;
|
||
}
|
||
|
||
std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseNounModificatorMap;
|
||
std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseEndingModificatorMap;
|
||
|
||
std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple) >> DivisionCaseNounTupleFilterMap;
|
||
|
||
std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple, NounRecord) >> DivisionCaseNounTupleRecordFilterMap;
|
||
|
||
|
||
void FillDivisionCaseMaps()
|
||
{
|
||
DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
|
||
|
||
DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](std::wstring s)
|
||
{
|
||
return std::wstring(s.begin(), s.end() - 2) + L"o" + s[s.size() - 1];
|
||
};
|
||
|
||
DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](std::wstring s)
|
||
{
|
||
if (s[s.size() - 2] == L'ь')
|
||
{
|
||
return std::wstring(s.begin(), s.end() - 2) + L"е" + s[s.size() - 1];
|
||
}
|
||
else
|
||
{
|
||
return std::wstring(s.begin(), s.end() - 1) + L"е" + s[s.size() - 1];
|
||
}
|
||
};
|
||
|
||
|
||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
|
||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
||
DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
|
||
|
||
|
||
|
||
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };
|
||
|
||
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t)
|
||
{
|
||
return (std::get<1>(t) == NC_PLURAL) ||
|
||
((std::get<2>(t) != NGC_P1_NOMINATIVE) &&
|
||
(!(std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
|
||
};
|
||
|
||
DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O];
|
||
|
||
|
||
|
||
|
||
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
|
||
{
|
||
return (r.haveStandardMultipleForm || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r);
|
||
};
|
||
|
||
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t, NounRecord r)
|
||
{
|
||
return LostVowelOTest(t, r) && StandardTest(t, r);
|
||
};
|
||
|
||
DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](NounTuple t, NounRecord r)
|
||
{
|
||
return LostVowelETest(t, r) && StandardTest(t, r);
|
||
};
|
||
|
||
|
||
|
||
}
|
||
|
||
|
||
|
||
std::set<NounStruct> RecognizeNoun(std::wstring noun)
|
||
{
|
||
|
||
std::set<NounStruct> result;
|
||
|
||
auto nounEndingDivisionArr = getPossibleNounEndingDivisionSet(noun);
|
||
|
||
for (auto nounEndingDivision : nounEndingDivisionArr)
|
||
{
|
||
std::wstring nounBase = nounEndingDivision.base;
|
||
std::wstring nounEnding = nounEndingDivision.ending;
|
||
NounEndingDivision::DivisionCase dc = nounEndingDivision.divisionCase;
|
||
|
||
std::wstring modifiedNounBase = DivisionCaseNounModificatorMap[dc](nounBase);
|
||
std::wstring modifiedNounEnding = DivisionCaseEndingModificatorMap[dc](nounEnding);
|
||
|
||
std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(modifiedNounEnding);
|
||
|
||
//Standard check
|
||
for (NounTuple nounTuple : possibleTupleArr)
|
||
{
|
||
|
||
if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
|
||
{
|
||
|
||
if (std::get<1>(nounTuple) == NC_SINGULAR)
|
||
{
|
||
|
||
std::set<std::wstring> nounNominaviteSingularSet = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);
|
||
|
||
for (auto& nn : nounNominaviteSingularSet)
|
||
{
|
||
|
||
if (NounIsInDictionary(nn))
|
||
{
|
||
|
||
NounRecord nounRecord = GetNounRecordFromDictionary(nn);
|
||
|
||
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
||
{
|
||
result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
||
}
|
||
}
|
||
|
||
}
|
||
}
|
||
else
|
||
{
|
||
|
||
std::set<std::wstring> nounNominavitePluralSet = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);
|
||
|
||
//Check all plural forms
|
||
|
||
for (auto& nn : nounNominavitePluralSet)
|
||
{
|
||
|
||
if (NounPluralFormIsInDictionary(nn))
|
||
{
|
||
NounRecord nounRecord = GetNounRecordFromDictionary_ByPluralForm(nn);
|
||
|
||
if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
|
||
{
|
||
result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
|
||
return result;
|
||
}
|
||
|
||
NounDeclencion CalculateNounDeclention(NounRecord nounRecord)
|
||
{
|
||
|
||
//Xperimental -- need to find if here might be more than 1 declention
|
||
|
||
for (auto& i : DeclentionConditionMap)
|
||
{
|
||
if (i.second(nounRecord))
|
||
{
|
||
return i.first.first;
|
||
}
|
||
}
|
||
|
||
std::cout << "Error in CalculateNounDeclention" << std::endl;
|
||
|
||
return{};
|
||
}
|
||
|
||
|
||
void CalculatePluralForm()
|
||
{
|
||
for (auto& nounRecord : NounRecordArr)
|
||
{
|
||
nounRecord.precalculatedNominativePluralSet = getPluralForm(nounRecord);
|
||
}
|
||
}
|
||
|
||
|
||
void LoadNounDeclencionCaseTable()
|
||
{
|
||
|
||
nounDeclencionCaseTable.clear();
|
||
|
||
#ifdef _WIN32
|
||
std::ifstream f("C:/Workplace/ChineseJournal/rudict/grammar_case.csv");
|
||
|
||
#else
|
||
std::ifstream f("/home/devuser/workplace/rudict/grammar_case.csv");
|
||
#endif
|
||
|
||
std::string line;
|
||
std::wstring wline;
|
||
|
||
if (f.is_open())
|
||
{
|
||
|
||
std::cout << "File found!" << std::endl;
|
||
|
||
std::vector<GrammaticalTableRecord> currentGrammaticalCaseTable;
|
||
std::wstring currentNounDeclencion;
|
||
std::wstring currentNounCount;
|
||
|
||
getline(f, line); //Skip one line
|
||
|
||
while (getline(f, line))
|
||
{
|
||
std::vector<std::string> lineArr;
|
||
|
||
boost::split_regex(lineArr, line, boost::regex(";"));
|
||
|
||
if (lineArr[0] != "")
|
||
{
|
||
if (currentNounDeclencion == L"")
|
||
{
|
||
currentNounDeclencion = string_to_wstring(lineArr[0]);
|
||
}
|
||
else
|
||
{
|
||
nounDeclencionCaseTable.push_back(NounDeclencionCaseTableRecord{ WStringToNounDeclencion(currentNounDeclencion), currentGrammaticalCaseTable });
|
||
|
||
currentNounDeclencion = string_to_wstring(lineArr[0]);
|
||
|
||
currentGrammaticalCaseTable.clear();
|
||
}
|
||
|
||
}
|
||
|
||
if (lineArr[1] != "")
|
||
{
|
||
currentNounCount = string_to_wstring(lineArr[1]);
|
||
}
|
||
|
||
std::wstring endings = string_to_wstring(lineArr[3]);
|
||
|
||
std::set<std::wstring> endingsSet;
|
||
boost::split_regex(endingsSet, endings, boost::regex(", "));
|
||
|
||
currentGrammaticalCaseTable.push_back({
|
||
WStringToNounCount(currentNounCount),
|
||
WStringToNounGrammaticalCase(string_to_wstring(lineArr[2])),
|
||
endingsSet
|
||
});
|
||
|
||
}
|
||
//Add last one
|
||
if (currentNounDeclencion != L"")
|
||
{
|
||
nounDeclencionCaseTable.push_back(NounDeclencionCaseTableRecord{ WStringToNounDeclencion(currentNounDeclencion), currentGrammaticalCaseTable });
|
||
}
|
||
|
||
f.close();
|
||
}
|
||
else
|
||
{
|
||
std::cout << "file not found!" << std::endl;
|
||
}
|
||
|
||
|
||
}
|
||
|
||
void LoadFrequentWordSet()
|
||
{
|
||
|
||
|
||
#ifdef _WIN32
|
||
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_nouns_2000.csv");
|
||
|
||
#else
|
||
std::ifstream f("/home/devuser/workplace/rudict/frequent_nouns_2000.csv");
|
||
#endif
|
||
|
||
std::string line;
|
||
std::wstring wline;
|
||
|
||
if (f.is_open())
|
||
{
|
||
|
||
getline(f, line); //Skip one line
|
||
|
||
std::cout << "File found!" << std::endl;
|
||
while (getline(f, line))
|
||
{
|
||
|
||
wline = string_to_wstring(line);
|
||
NounRecord nounRecord(wline);
|
||
|
||
NounRecordArr.push_back(nounRecord);
|
||
|
||
}
|
||
f.close();
|
||
}
|
||
else
|
||
{
|
||
std::cout << "file not found!" << std::endl;
|
||
}
|
||
|
||
|
||
}
|
||
|
||
} //namespace NN
|