chinese-journal/rudict/rudict/noun.cpp

#include "noun.h"

#include <iostream> //Xperimental -- for debug only

#include "utf8utf16.h"

#include "boost/regex.hpp"
#include "boost/algorithm/string/regex.hpp"

namespace NN
{

std::vector<NounRecord> NounRecordArr;

std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;

NounRecord::NounRecord()
	: gender(NG_MALE)
	, haveSingleForm(false)
	, haveMultipleForm(false)
	, haveStandardMultipleForm(false)
	, haveStandardMultipleFormWithMissingLastVowel(false)
	, haveStandardMultipleFormEnding(false)
	, haveAlternativeMultipleFormEnding(false)
	, canBeAnimate(false)
	, canBeInanimate(false)
{

}


NounRecord::NounRecord(std::wstring line)
{
	std::vector<std::wstring> lineArr;

	boost::split_regex(lineArr, line, boost::wregex(L";"));

	nominativeForm = lineArr[1];

	if (lineArr[2] == L"м")
	{
		gender = NG_MALE;
	}
	else if (lineArr[2] == L"ж")
	{
		gender = NG_FEMALE;
	}
	else
	{
		gender = NG_NEUTRAL;
	}

	haveSingleForm = lineArr[3] == L"1" ? true : false;
	haveMultipleForm = lineArr[4] == L"1" ? true : false;

	haveStandardMultipleForm = lineArr[5] == L"1" ? true : false;

	haveStandardMultipleFormWithMissingLastVowel = lineArr[6] == L"1" ? true : false;

	haveStandardMultipleFormEnding = lineArr[7] == L"1" ? true : false;

	haveAlternativeMultipleFormEnding = lineArr[8] == L"1" ? true : false;

	specialMultipleForm = lineArr[9];

	canBeAnimate = lineArr[10] == L"1" ? true : false;
	canBeInanimate = lineArr[11] == L"1" ? true : false;


}


NounDeclencion WStringToNounDeclencion(std::wstring str)
{

	if (str == L"FIRST_A_IFORM_INANIMATE")
	{
		return FIRST_A_IFORM_INANIMATE;
	}
	if (str == L"FIRST_A_IFORM_ANIMATE")
	{
		return FIRST_A_IFORM_ANIMATE;
	}
	if (str == L"FIRST_A_UFORM_INANIMATE")
	{
		return FIRST_A_UFORM_INANIMATE;
	}
	if (str == L"FIRST_A_UFORM_ANIMATE")
	{
		return FIRST_A_UFORM_ANIMATE;
	}
	if (str == L"FIRST_YA_FORM_INANIMATE")
	{
		return FIRST_YA_FORM_INANIMATE;
	}
	if (str == L"FIRST_YA_FORM_ANIMATE")
	{
		return FIRST_YA_FORM_ANIMATE;
	}

	if (str == L"SECOND_MALE_IFORM_INANIMATE")
	{
		return SECOND_MALE_IFORM_INANIMATE;
	}
	if (str == L"SECOND_MALE_IFORM_ANIMATE")
	{
		return SECOND_MALE_IFORM_ANIMATE;
	}
	if (str == L"SECOND_MALE_UFORM_INANIMATE")
	{
		return SECOND_MALE_UFORM_INANIMATE;
	}
	if (str == L"SECOND_MALE_UFORM_ANIMATE")
	{
		return SECOND_MALE_UFORM_ANIMATE;
	}
	if (str == L"SECOND_MALE_SSFORM_INANIMATE")
	{
		return SECOND_MALE_SSFORM_INANIMATE;
	}
	if (str == L"SECOND_MALE_SSFORM_ANIMATE")
	{
		return SECOND_MALE_SSFORM_ANIMATE;
	}

	if (str == L"SECOND_I_SHORT_INANIMATE")
	{
		return SECOND_I_SHORT_INANIMATE;
	}
	if (str == L"SECOND_I_SHORT_ANIMATE")
	{
		return SECOND_I_SHORT_ANIMATE;
	}


	if (str == L"SECOND_NEUTRAL_E_FORM")
	{
		return SECOND_NEUTRAL_E_FORM;
	}
	if (str == L"SECOND_NEUTRAL_O_FORM")
	{
		return SECOND_NEUTRAL_O_FORM;
	}
	if (str == L"THIRD_FORM_INANIMATE")
	{
		return THIRD_FORM_INANIMATE;
	}
	if (str == L"THIRD_FORM_ANIMATE")
	{
		return THIRD_FORM_ANIMATE;
	}

	std::cout << "Error in WStringToNounDeclencion!" << std::endl;
	return FIRST_A_IFORM_INANIMATE;

}


std::vector<std::wstring> GetAllNounEndingArr()
{
	std::vector<std::wstring> result
	{
		L"",

		L"й",
		L"ев",

		L"а",
		L"и",
		L"е",
		L"у",
		L"ой",
		L"ы",
		L"ом",
		L"ь",
		L"я",
		L"ю",
		L"ем",
		L"о",
		L"ью",
		L"ам",
		L"ами",
		L"ах",
		L"ов",
		L"ей",
		L"ям",
		L"ях",
		L"я",
		L"ями",
	};

	return result;
}

bool NounIsInDictionary(std::wstring nounNominative)
{
	for (auto& noun : NounRecordArr)
	{
		if (noun.nominativeForm == nounNominative)
		{
			return true;
		}
	}

	return false;

}

std::wstring convertToStandardPluralForm(std::wstring s)
{
	std::wstring pluralForm = s;

	if (pluralForm[pluralForm.size() - 1] == L'а' && charIsIFormConsolant(pluralForm[pluralForm.size() - 2]))
	{
		pluralForm[pluralForm.size() - 1] = L'и';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'а' && charIsUFormConsolant(pluralForm[pluralForm.size() - 2]))
	{
		pluralForm[pluralForm.size() - 1] = L'ы';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'я')
	{
		pluralForm[pluralForm.size() - 1] = L'и';
	}
	else if (charIsIFormConsolant(pluralForm[pluralForm.size() - 1]))
	{
		pluralForm += L'и';
	}
	else if (charIsUFormConsolant(pluralForm[pluralForm.size() - 1]))
	{
		pluralForm += L'ы';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'ь')
	{
		pluralForm[pluralForm.size() - 1] = L'и';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'й')
	{
		pluralForm[pluralForm.size() - 1] = L'и';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'о')
	{
		pluralForm[pluralForm.size() - 1] = L'а';
	}
	else if (pluralForm[pluralForm.size() - 1] == L'е')
	{
		pluralForm[pluralForm.size() - 1] = L'я';
	}
	else
	{
		std::cout << "Error in convertToStandardPluralForm" << std::endl;
	}

	return pluralForm;
}


std::wstring convertFromStandardToAlternativePluralForm(std::wstring s)
{
	if (s[s.size() - 1] == L'и')
	{
		s[s.size() - 1] = L'я';
	}
	if (s[s.size() - 1] == L'ы')
	{
		s[s.size() - 1] = L'а';
	}

	return s;
}

std::set<std::wstring> getPluralForm(NounRecord noun)
{
	std::set<std::wstring> result;

	if (noun.specialMultipleForm != L"")
	{
		result.insert(noun.specialMultipleForm);
	}

	if (noun.haveSingleForm)
	{
		if (noun.haveStandardMultipleForm)
		{
			std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);

			if (noun.haveStandardMultipleFormEnding)
			{
				result.insert(pluralForm);
			}


			if (noun.haveAlternativeMultipleFormEnding)
			{
				result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
			}

		}

		if (noun.haveStandardMultipleFormWithMissingLastVowel)
		{
			std::wstring pluralForm = convertToStandardPluralForm(noun.nominativeForm);

			wchar_t prevsschar = pluralForm[pluralForm.size() - 4];

			if (charIsMissingVowelSoftenerConsolant(prevsschar))
			{
				pluralForm[pluralForm.size() - 3] = L'ь';
			}
			else
			{
				pluralForm.erase(pluralForm.begin() + pluralForm.size() - 3);
			}

			if (noun.haveStandardMultipleFormEnding)
			{
				result.insert(pluralForm);
			}


			if (noun.haveAlternativeMultipleFormEnding)
			{
				result.insert(convertFromStandardToAlternativePluralForm(pluralForm));
			}
		}


	}
	else
	{
		result.insert(noun.nominativeForm);
	}


	return result;
}

bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural)
{
	for (auto& noun : NounRecordArr)
	{
		if (noun.haveMultipleForm)
		{
			if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
			{
				return true;
			}
		}
	}

	return false;
}

NounRecord GetNounRecordFromDictionary(std::wstring nounNominative)
{
	for (auto& noun : NounRecordArr)
	{
		if (noun.nominativeForm == nounNominative)
		{
			return noun;
		}
	}

	return{};

}

NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural)
{
	for (auto& noun : NounRecordArr)
	{
		if (noun.haveMultipleForm)
		{
			if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0)
			{
				return noun;
			}
		}
	}
	return{};

}


bool charIsMissingVowelSoftenerConsolant(wchar_t c)
{
	//This test belongs to missing vowel case.
	//лев -> львы (because л, then е replaced by soft sign)
	//немец -> немцы (because not л, the е is not replaced, just missing)

	std::wstring consolants = L"л";

	for (wchar_t ic : consolants)
	{
		if (c == ic)
		{
			return true;
		}
	}

	return false;
}


std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
{
	std::set<NounEndingDivision> result;

	auto allNounEndingArr = GetAllNounEndingArr();

	for (auto ending : allNounEndingArr)
	{
		if (boost::ends_with(noun, ending))
		{
			std::wstring nounBase = boost::replace_last_copy(noun, ending, "");


			if ((charIsVowel(nounBase[nounBase.size() - 1])) || //Might be exact the й case
				(charIsConsolant(nounBase[nounBase.size() - 1]) || nounBase[nounBase.size() - 1] == L'ь' || nounBase[nounBase.size() - 1] == L'ъ'))
			{
				result.insert({ nounBase, ending, NounEndingDivision::DC_COMMON });
			}

			//Check missed vowel (simple case)
			if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]))
			{
				result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_O });
			}

			if (charIsConsolant(nounBase[nounBase.size() - 1]) && nounBase[nounBase.size() - 2] == L'ь' && charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 3]))
			{
				result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
			}

			if (charIsConsolant(nounBase[nounBase.size() - 1]) && charIsConsolant(nounBase[nounBase.size() - 2]) && !charIsMissingVowelSoftenerConsolant(nounBase[nounBase.size() - 2]))
			{
				result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
			}

		}
	}

	return result;
}


std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
{
	std::vector<NounTuple> result;

	for (auto& noun : nounDeclencionCaseTable)
	{
		for (int i = 0; i < NGC_SIZE * NC_SIZE; i++)
		{
			if (noun.grammaticalCaseTable[i].ending.count(nounEnding) != 0)
			{
				result.push_back(NounTuple{ noun.nounDeclencion, noun.grammaticalCaseTable[i].count, noun.grammaticalCaseTable[i].grammaticalCase });
			}
		}

	}
	return result;
}

std::vector<NounTuple> FilterNounTupleArrByNounDeclentionSet(std::vector<NounTuple> nounTupleArr, std::set<NounDeclencion> filter)
{
	std::vector<NounTuple> result;

	for (auto nounTuple : nounTupleArr)
	{
		if (filter.count(std::get<0>(nounTuple)) != 0)
		{
			result.push_back(nounTuple);
		}
	}

	return result;
}


std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount)
{
	std::set<std::wstring> result;

	NounDeclencionCaseTableRecord nounDeclencionCaseTableRecord = nounDeclencionCaseTable[static_cast<int>(nounDeclencion)];

	for (auto& grammaticalTableRecord : nounDeclencionCaseTableRecord.grammaticalCaseTable)
	{
		if (grammaticalTableRecord.grammaticalCase == NGC_P1_NOMINATIVE && grammaticalTableRecord.count == nounCount)
		{
			for (auto& e : grammaticalTableRecord.ending)
			{
				result.insert(nounBase + e);
			}

		}
	}

	return result;
}

std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion)
{

	if (charIsIFormConsolant(nounBase[nounBase.size() - 1]))
	{
		return nounBase + L"я";
	}

	if (charIsUFormConsolant(nounBase[nounBase.size() - 1]))
	{
		return nounBase + L"а";
	}

	if (charIsVowel(nounBase[nounBase.size() - 1]))
	{
		return nounBase + L"я";
	}

	std::cout << "Error in GetNounNoninative" << std::endl;

	return L"";
}


wchar_t GetLastChar(const NounRecord& nounRecord)
{
	return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 1];
}

wchar_t GetPrevLastChar(const NounRecord& nounRecord)
{
	return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2];
}


bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
}

bool FirstAIFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord)));
}

bool FirstAIFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
		(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
		);
}

bool FirstAIFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
		(GetLastChar(nounRecord) == L'а' && charIsIFormConsolant(GetPrevLastChar(nounRecord))) ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и' && charIsIFormConsolant(GetPrevLastChar(nounRecord)))
		);
}


bool FirstAUFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
}

bool FirstAUFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord)));
}


bool FirstAUFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
		(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
		);
}

bool FirstAUFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
		(GetLastChar(nounRecord) == L'а' && charIsUFormConsolant(GetPrevLastChar(nounRecord))) ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'ы' && charIsUFormConsolant(GetPrevLastChar(nounRecord)))
		);
}


bool FirstYaFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (GetLastChar(nounRecord) == L'я');
}

bool FirstYaFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (GetLastChar(nounRecord) == L'я');
}

bool FirstYaFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && (
		(GetLastChar(nounRecord) == L'я') ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
		);
}

bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && (
		(GetLastChar(nounRecord) == L'я') ||
		!nounRecord.haveSingleForm && (GetLastChar(nounRecord) == L'и')
		);
}


// Second declention

bool SecondMaleIFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsIFormConsolant(GetLastChar(nounRecord));
}

bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
		charIsIFormConsolant(GetLastChar(nounRecord)) ||
		!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
		);

}


bool SecondMaleIFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsIFormConsolant(GetLastChar(nounRecord));
}

bool SecondMaleIFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
		charIsIFormConsolant(GetLastChar(nounRecord)) ||
		!nounRecord.haveSingleForm && charIsIFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'и'
		);
}


bool SecondMaleUFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsUFormConsolant(GetLastChar(nounRecord));
}

bool SecondMaleUFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
		charIsUFormConsolant(GetLastChar(nounRecord)) ||
		!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
		);
}


bool SecondMaleUFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsUFormConsolant(GetLastChar(nounRecord));
}

bool SecondMaleUFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
		charIsUFormConsolant(GetLastChar(nounRecord)) ||
		!nounRecord.haveSingleForm && charIsUFormConsolant(GetPrevLastChar(nounRecord)) && GetLastChar(nounRecord) == L'ы'
		);
}


bool SecondMaleSSFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
}

bool SecondMaleSSFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && (
		GetLastChar(nounRecord) == L'ь' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
		);
}


bool SecondMaleSSFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
}

bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && (
		GetLastChar(nounRecord) == L'ь' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
		);
}


bool SecondIShortInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
}

bool SecondIShortAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
}

bool SecondIShortInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'й';
}

bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'й';
}


bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'е';
}

bool SecondNeutralEFormPluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
		GetLastChar(nounRecord) == L'е' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'я'
		);
}

bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && GetLastChar(nounRecord) == L'о';
}

bool SecondNeutralOFormPluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && (
		GetLastChar(nounRecord) == L'о' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'а'
		);
}

bool ThirdFormInanimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && GetLastChar(nounRecord) == L'ь';
}

bool ThirdFormAnimateSingularCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && GetLastChar(nounRecord) == L'ь';
}


bool ThirdFormInanimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && (
		GetLastChar(nounRecord) == L'ь' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
		);
}

bool ThirdFormAnimatePluralCondition(const NounRecord& nounRecord)
{
	return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && (
		GetLastChar(nounRecord) == L'ь' ||
		!nounRecord.haveSingleForm && GetLastChar(nounRecord) == L'и'
		);
}


std::map<std::pair<NounDeclencion, NounCount>, std::function < bool(const NounRecord&) >> DeclentionConditionMap;

void SetupDeclentionMap()
{

	DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAIFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_IFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAIFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_IFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAIFormAnimatePluralCondition, std::placeholders::_1);

	DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstAUFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_UFORM_INANIMATE, NC_PLURAL}] = std::bind(FirstAUFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_A_UFORM_ANIMATE, NC_PLURAL}] = std::bind(FirstAUFormAnimatePluralCondition, std::placeholders::_1);

	DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_SINGULAR}] = std::bind(FirstYaFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_YA_FORM_INANIMATE, NC_PLURAL}] = std::bind(FirstYaFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{FIRST_YA_FORM_ANIMATE, NC_PLURAL}] = std::bind(FirstYaFormAnimatePluralCondition, std::placeholders::_1);

	//Second form
	DeclentionConditionMap[{SECOND_MALE_IFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleIFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_IFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleIFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_IFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleIFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_IFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleIFormAnimatePluralCondition, std::placeholders::_1);

	DeclentionConditionMap[{SECOND_MALE_UFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleUFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_UFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleUFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_UFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleUFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_UFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleUFormAnimatePluralCondition, std::placeholders::_1);

	DeclentionConditionMap[{SECOND_MALE_SSFORM_INANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_SINGULAR}] = std::bind(SecondMaleSSFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL}] = std::bind(SecondMaleSSFormAnimatePluralCondition, std::placeholders::_1);

	DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_SINGULAR}] = std::bind(SecondIShortInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_I_SHORT_INANIMATE, NC_PLURAL}] = std::bind(SecondIShortInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_SINGULAR}] = std::bind(SecondIShortAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_I_SHORT_ANIMATE, NC_PLURAL}] = std::bind(SecondIShortAnimatePluralCondition, std::placeholders::_1);


	DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_SINGULAR}] = std::bind(SecondNeutralEFormSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_NEUTRAL_E_FORM, NC_PLURAL}] = std::bind(SecondNeutralEFormPluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_SINGULAR}] = std::bind(SecondNeutralOFormSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{SECOND_NEUTRAL_O_FORM, NC_PLURAL}] = std::bind(SecondNeutralOFormPluralCondition, std::placeholders::_1);


	DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_SINGULAR}] = std::bind(ThirdFormInanimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_SINGULAR}] = std::bind(ThirdFormAnimateSingularCondition, std::placeholders::_1);
	DeclentionConditionMap[{THIRD_FORM_INANIMATE, NC_PLURAL}] = std::bind(ThirdFormInanimatePluralCondition, std::placeholders::_1);
	DeclentionConditionMap[{THIRD_FORM_ANIMATE, NC_PLURAL}] = std::bind(ThirdFormAnimatePluralCondition, std::placeholders::_1);


}


bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple)
{

	NounDeclencion nounDeclencion = std::get<0>(nounTuple);
	NounCount nounCount = std::get<1>(nounTuple);

	bool standardDeclention = DeclentionConditionMap[{nounDeclencion, nounCount}](nounRecord);

	if (standardDeclention)
	{
		return true;
	}

	return false;
}

bool NounScructIsAlreadyInArray(const NounStruct& nounStruct, const std::vector<NounStruct>& arr)
{
	for (auto& ns : arr)
	{
		if (ns.nounGrammaticalCase == nounStruct.nounGrammaticalCase &&
			ns.nounRecord.nominativeForm == nounStruct.nounRecord.nominativeForm)
		{
			return true;
		}
	}

	return false;
}
bool IsDeclencionSecondType(NounDeclencion nounDeclention)
{
	switch (nounDeclention)
	{
	case SECOND_MALE_IFORM_INANIMATE:
	case SECOND_MALE_IFORM_ANIMATE:
	case SECOND_MALE_UFORM_INANIMATE:
	case SECOND_MALE_UFORM_ANIMATE:
	case SECOND_MALE_SSFORM_INANIMATE:
	case SECOND_MALE_SSFORM_ANIMATE:
	case SECOND_I_SHORT_INANIMATE:
	case SECOND_I_SHORT_ANIMATE:
		return true;
		break;
	default:
		return false;
		break;
	}
}

bool IsDeclencionAnimated(NounDeclencion nounDeclention)
{
	switch (nounDeclention)
	{
	case FIRST_A_IFORM_ANIMATE:
	case FIRST_A_UFORM_ANIMATE:
	case FIRST_YA_FORM_ANIMATE:
	case SECOND_MALE_IFORM_ANIMATE:
	case SECOND_MALE_UFORM_ANIMATE:
	case SECOND_MALE_SSFORM_ANIMATE:
	case SECOND_I_SHORT_ANIMATE:
	case THIRD_FORM_ANIMATE:
		return true;
		break;
	default:
		return false;
		break;
	}
}

bool StandardTest(NounTuple nounTuple, NounRecord nounRecord)
{
	return NounFitsDeclention(nounRecord, nounTuple);
}

bool LostVowelOTest(NounTuple nounTuple, NounRecord nounRecord)
{
	return nounRecord.haveStandardMultipleFormWithMissingLastVowel;
}

bool LostVowelETest(NounTuple nounTuple, NounRecord nounRecord)
{
	return nounRecord.haveStandardMultipleFormWithMissingLastVowel;
}

std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseNounModificatorMap;
std::map<NounEndingDivision::DivisionCase, std::function<std::wstring(std::wstring)>> DivisionCaseEndingModificatorMap;

std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple) >> DivisionCaseNounTupleFilterMap;

std::map < NounEndingDivision::DivisionCase, std::function < bool(NounTuple, NounRecord) >> DivisionCaseNounTupleRecordFilterMap;


void FillDivisionCaseMaps()
{
	DivisionCaseNounModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };

	DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](std::wstring s)
	{
		return std::wstring(s.begin(), s.end() - 2) + L"o" + s[s.size() - 1];
	};

	DivisionCaseNounModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](std::wstring s)
	{
		if (s[s.size() - 2] == L'ь')
		{
			return std::wstring(s.begin(), s.end() - 2) + L"е" + s[s.size() - 1];
		}
		else
		{
			return std::wstring(s.begin(), s.end() - 1) + L"е" + s[s.size() - 1];
		}
	};


	DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON] = [](std::wstring s){ return s; };
	DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_O] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];
	DivisionCaseEndingModificatorMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseEndingModificatorMap[NounEndingDivision::DC_COMMON];


	DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t) { return true; };

	DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t)
	{
		return (std::get<1>(t) == NC_PLURAL) ||
			((std::get<2>(t) != NGC_P1_NOMINATIVE) &&
			(!(std::get<2>(t) == NGC_P4_ACCUSATIVE && !IsDeclencionAnimated(std::get<0>(t)))));
	};

	DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = DivisionCaseNounTupleFilterMap[NounEndingDivision::DC_LOST_VOWEL_O];


	DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r)
	{
		return (r.haveStandardMultipleForm || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r);
	};

	DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t, NounRecord r)
	{
		return LostVowelOTest(t, r) && StandardTest(t, r);
	};

	DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_E] = [](NounTuple t, NounRecord r)
	{
		return LostVowelETest(t, r) && StandardTest(t, r);
	};


}


std::set<NounStruct> RecognizeNoun(std::wstring noun)
{

	std::set<NounStruct> result;

	auto nounEndingDivisionArr = getPossibleNounEndingDivisionSet(noun);

	for (auto nounEndingDivision : nounEndingDivisionArr)
	{
		std::wstring nounBase = nounEndingDivision.base;
		std::wstring nounEnding = nounEndingDivision.ending;
		NounEndingDivision::DivisionCase dc = nounEndingDivision.divisionCase;

		std::wstring modifiedNounBase = DivisionCaseNounModificatorMap[dc](nounBase);
		std::wstring modifiedNounEnding = DivisionCaseEndingModificatorMap[dc](nounEnding);

		std::vector<NounTuple> possibleTupleArr = GetPossibleNounTupleArr(modifiedNounEnding);

		//Standard check
		for (NounTuple nounTuple : possibleTupleArr)
		{

			if (DivisionCaseNounTupleFilterMap[dc](nounTuple))
			{

				if (std::get<1>(nounTuple) == NC_SINGULAR)
				{

					std::set<std::wstring> nounNominaviteSingularSet = GetNounNoninative(modifiedNounBase, std::get<0>(nounTuple), NC_SINGULAR);

					for (auto& nn : nounNominaviteSingularSet)
					{

						if (NounIsInDictionary(nn))
						{

							NounRecord nounRecord = GetNounRecordFromDictionary(nn);

							if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
							{
								result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
							}
						}

					}
				}
				else
				{

					std::set<std::wstring> nounNominavitePluralSet = GetNounNoninative(nounBase, std::get<0>(nounTuple), NC_PLURAL);

					//Check all plural forms

					for (auto& nn : nounNominavitePluralSet)
					{

						if (NounPluralFormIsInDictionary(nn))
						{
							NounRecord nounRecord = GetNounRecordFromDictionary_ByPluralForm(nn);

							if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord))
							{
								result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord });
							}
						}
					}
				}

			}
		}
	}


	return result;
}

NounDeclencion CalculateNounDeclention(NounRecord nounRecord)
{

	//Xperimental -- need to find if here might be more than 1 declention

	for (auto& i : DeclentionConditionMap)
	{
		if (i.second(nounRecord))
		{
			return i.first.first;
		}
	}

	std::cout << "Error in CalculateNounDeclention" << std::endl;

	return{};
}


void CalculatePluralForm()
{
	for (auto& nounRecord : NounRecordArr)
	{
		nounRecord.precalculatedNominativePluralSet = getPluralForm(nounRecord);
	}
}


void LoadNounDeclencionCaseTable()
{

	nounDeclencionCaseTable.clear();

#ifdef _WIN32
	std::ifstream f("C:/Workplace/ChineseJournal/rudict/grammar_case.csv");

#else
	std::ifstream f("/home/devuser/workplace/rudict/grammar_case.csv");
#endif

	std::string line;
	std::wstring wline;

	if (f.is_open())
	{

		std::cout << "File found!" << std::endl;

		std::vector<GrammaticalTableRecord> currentGrammaticalCaseTable;
		std::wstring currentNounDeclencion;
		std::wstring currentNounCount;

		getline(f, line); //Skip one line

		while (getline(f, line))
		{
			std::vector<std::string> lineArr;

			boost::split_regex(lineArr, line, boost::regex(";"));

			if (lineArr[0] != "")
			{
				if (currentNounDeclencion == L"")
				{
					currentNounDeclencion = string_to_wstring(lineArr[0]);
				}
				else
				{
					nounDeclencionCaseTable.push_back(NounDeclencionCaseTableRecord{ WStringToNounDeclencion(currentNounDeclencion), currentGrammaticalCaseTable });

					currentNounDeclencion = string_to_wstring(lineArr[0]);

					currentGrammaticalCaseTable.clear();
				}

			}

			if (lineArr[1] != "")
			{
				currentNounCount = string_to_wstring(lineArr[1]);
			}

			std::wstring endings = string_to_wstring(lineArr[3]);

			std::set<std::wstring> endingsSet;
			boost::split_regex(endingsSet, endings, boost::regex(", "));

			currentGrammaticalCaseTable.push_back({
				WStringToNounCount(currentNounCount),
				WStringToNounGrammaticalCase(string_to_wstring(lineArr[2])),
				endingsSet
			});

		}
		//Add last one
		if (currentNounDeclencion != L"")
		{
			nounDeclencionCaseTable.push_back(NounDeclencionCaseTableRecord{ WStringToNounDeclencion(currentNounDeclencion), currentGrammaticalCaseTable });
		}

		f.close();
	}
	else
	{
		std::cout << "file not found!" << std::endl;
	}


}

void LoadFrequentWordSet()
{


#ifdef _WIN32
	std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_nouns_2000.csv");

#else
	std::ifstream f("/home/devuser/workplace/rudict/frequent_nouns_2000.csv");
#endif

	std::string line;
	std::wstring wline;

	if (f.is_open())
	{

		getline(f, line); //Skip one line

		std::cout << "File found!" << std::endl;
		while (getline(f, line))
		{

			wline = string_to_wstring(line);
			NounRecord nounRecord(wline);

			NounRecordArr.push_back(nounRecord);

		}
		f.close();
	}
	else
	{
		std::cout << "file not found!" << std::endl;
	}


}

} //namespace NN