206 lines
4.3 KiB
C++
206 lines
4.3 KiB
C++
#ifndef NOUN_H_INCLUDED
|
|
#define NOUN_H_INCLUDED
|
|
|
|
#include <string>
|
|
#include <map>
|
|
#include <set>
|
|
#include <vector>
|
|
#include <fstream>
|
|
|
|
#include "boost/algorithm/string.hpp"
|
|
|
|
#include "grammarCase.h"
|
|
|
|
namespace NN
|
|
{
|
|
|
|
struct NounRecord : public TranslationUnitSetMix
|
|
{
|
|
std::wstring nominativeForm;
|
|
NounGender gender;
|
|
|
|
bool haveSingleForm;
|
|
bool haveMultipleForm;
|
|
|
|
bool haveStandardMultipleForm;
|
|
bool haveStandardMultipleFormWithMissingLastVowel;
|
|
|
|
bool haveStandardMultipleFormEnding;
|
|
bool haveAlternativeMultipleFormEnding;
|
|
|
|
std::wstring specialMultipleForm;
|
|
|
|
bool canBeAnimate;
|
|
bool canBeInanimate;
|
|
|
|
std::set<std::wstring> precalculatedNominativePluralSet;
|
|
|
|
NounRecord();
|
|
NounRecord(std::wstring line);
|
|
|
|
bool operator<(const NounRecord& n) const
|
|
{
|
|
return nominativeForm < n.nominativeForm;
|
|
}
|
|
};
|
|
|
|
enum NounDeclencion
|
|
{
|
|
FIRST_A_IFORM_INANIMATE = 0,
|
|
FIRST_A_IFORM_ANIMATE,
|
|
FIRST_A_UFORM_INANIMATE,
|
|
FIRST_A_UFORM_ANIMATE,
|
|
FIRST_YA_FORM_INANIMATE,
|
|
FIRST_YA_FORM_ANIMATE,
|
|
SECOND_MALE_IFORM_INANIMATE,
|
|
SECOND_MALE_IFORM_ANIMATE,
|
|
SECOND_MALE_UFORM_INANIMATE,
|
|
SECOND_MALE_UFORM_ANIMATE,
|
|
SECOND_MALE_SSFORM_INANIMATE,
|
|
SECOND_MALE_SSFORM_ANIMATE,
|
|
SECOND_I_SHORT_INANIMATE,
|
|
SECOND_I_SHORT_ANIMATE,
|
|
SECOND_NEUTRAL_E_FORM,
|
|
SECOND_NEUTRAL_O_FORM,
|
|
THIRD_FORM_INANIMATE,
|
|
THIRD_FORM_ANIMATE,
|
|
};
|
|
|
|
extern std::vector<NounRecord> NounRecordArr;
|
|
|
|
|
|
|
|
struct NounDeclencionCaseTableRecord
|
|
{
|
|
NounDeclencion nounDeclencion;
|
|
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
|
};
|
|
|
|
extern std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
|
|
|
|
|
|
NounDeclencion WStringToNounDeclencion(std::wstring str);
|
|
|
|
typedef std::tuple <
|
|
NounDeclencion,
|
|
NounCount,
|
|
NounGrammaticalCase
|
|
> NounTuple;
|
|
|
|
typedef std::set<std::wstring> StringSet;
|
|
|
|
struct NounEndingDivision
|
|
{
|
|
std::wstring base;
|
|
std::wstring ending;
|
|
|
|
enum DivisionCase
|
|
{
|
|
DC_COMMON = 0,
|
|
DC_LOST_VOWEL_O,
|
|
DC_LOST_VOWEL_E
|
|
} divisionCase;
|
|
|
|
bool operator<(const NounEndingDivision& other) const
|
|
{
|
|
if (base != other.base)
|
|
{
|
|
return base < other.base;
|
|
}
|
|
else
|
|
{
|
|
if (ending != other.ending)
|
|
{
|
|
return ending < other.ending;
|
|
}
|
|
else
|
|
{
|
|
return divisionCase < other.divisionCase;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
std::vector<std::wstring> GetAllNounEndingArr();
|
|
|
|
|
|
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
|
|
|
|
bool NounIsInDictionary(std::wstring nounNominative);
|
|
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
|
|
|
|
|
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
|
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
|
|
|
|
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
|
|
|
|
struct NounStruct
|
|
{
|
|
NounGrammaticalCase nounGrammaticalCase;
|
|
NounCount nounCount;
|
|
bool animated;
|
|
NounRecord nounRecord;
|
|
|
|
bool operator<(const NounStruct& other) const
|
|
{
|
|
if (nounGrammaticalCase != other.nounGrammaticalCase)
|
|
{
|
|
return nounGrammaticalCase < other.nounGrammaticalCase;
|
|
}
|
|
else
|
|
{
|
|
if (nounCount != other.nounCount)
|
|
{
|
|
return nounCount < other.nounCount;
|
|
}
|
|
else
|
|
{
|
|
if (animated != other.animated)
|
|
{
|
|
return animated < other.animated;
|
|
}
|
|
else
|
|
{
|
|
return nounRecord < other.nounRecord;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
|
|
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun);
|
|
|
|
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
|
|
|
|
std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
|
|
std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion);
|
|
|
|
wchar_t GetLastChar(const NounRecord& nounRecord);
|
|
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
|
|
|
|
void SetupDeclentionMap();
|
|
|
|
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
|
|
|
|
bool IsDeclencionSecondType(NounDeclencion nounDeclention);
|
|
bool IsDeclencionAnimated(NounDeclencion nounDeclention);
|
|
|
|
void FillDivisionCaseMaps();
|
|
|
|
std::set<NounStruct> RecognizeNoun(std::wstring noun);
|
|
|
|
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
|
|
|
|
void CalculatePluralForm();
|
|
|
|
void LoadFrequentWordSet();
|
|
|
|
void LoadNounDeclencionCaseTable();
|
|
|
|
|
|
} //namespace NN
|
|
|
|
#endif //NOUN_H_INCLUDED
|