chinese-journal/rudict/rudict/noun.h
2014-12-13 11:34:06 +00:00

206 lines
4.3 KiB
C++

#ifndef NOUN_H_INCLUDED
#define NOUN_H_INCLUDED
#include <string>
#include <map>
#include <set>
#include <vector>
#include <fstream>
#include "boost/algorithm/string.hpp"
#include "grammarCase.h"
namespace NN
{
struct NounRecord : public TranslationUnitSetMix
{
std::wstring nominativeForm;
NounGender gender;
bool haveSingleForm;
bool haveMultipleForm;
bool haveStandardMultipleForm;
bool haveStandardMultipleFormWithMissingLastVowel;
bool haveStandardMultipleFormEnding;
bool haveAlternativeMultipleFormEnding;
std::wstring specialMultipleForm;
bool canBeAnimate;
bool canBeInanimate;
std::set<std::wstring> precalculatedNominativePluralSet;
NounRecord();
NounRecord(std::wstring line);
bool operator<(const NounRecord& n) const
{
return nominativeForm < n.nominativeForm;
}
};
enum NounDeclencion
{
FIRST_A_IFORM_INANIMATE = 0,
FIRST_A_IFORM_ANIMATE,
FIRST_A_UFORM_INANIMATE,
FIRST_A_UFORM_ANIMATE,
FIRST_YA_FORM_INANIMATE,
FIRST_YA_FORM_ANIMATE,
SECOND_MALE_IFORM_INANIMATE,
SECOND_MALE_IFORM_ANIMATE,
SECOND_MALE_UFORM_INANIMATE,
SECOND_MALE_UFORM_ANIMATE,
SECOND_MALE_SSFORM_INANIMATE,
SECOND_MALE_SSFORM_ANIMATE,
SECOND_I_SHORT_INANIMATE,
SECOND_I_SHORT_ANIMATE,
SECOND_NEUTRAL_E_FORM,
SECOND_NEUTRAL_O_FORM,
THIRD_FORM_INANIMATE,
THIRD_FORM_ANIMATE,
};
extern std::vector<NounRecord> NounRecordArr;
struct NounDeclencionCaseTableRecord
{
NounDeclencion nounDeclencion;
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
};
extern std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
NounDeclencion WStringToNounDeclencion(std::wstring str);
typedef std::tuple <
NounDeclencion,
NounCount,
NounGrammaticalCase
> NounTuple;
typedef std::set<std::wstring> StringSet;
struct NounEndingDivision
{
std::wstring base;
std::wstring ending;
enum DivisionCase
{
DC_COMMON = 0,
DC_LOST_VOWEL_O,
DC_LOST_VOWEL_E
} divisionCase;
bool operator<(const NounEndingDivision& other) const
{
if (base != other.base)
{
return base < other.base;
}
else
{
if (ending != other.ending)
{
return ending < other.ending;
}
else
{
return divisionCase < other.divisionCase;
}
}
}
};
std::vector<std::wstring> GetAllNounEndingArr();
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
bool NounIsInDictionary(std::wstring nounNominative);
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
struct NounStruct
{
NounGrammaticalCase nounGrammaticalCase;
NounCount nounCount;
bool animated;
NounRecord nounRecord;
bool operator<(const NounStruct& other) const
{
if (nounGrammaticalCase != other.nounGrammaticalCase)
{
return nounGrammaticalCase < other.nounGrammaticalCase;
}
else
{
if (nounCount != other.nounCount)
{
return nounCount < other.nounCount;
}
else
{
if (animated != other.animated)
{
return animated < other.animated;
}
else
{
return nounRecord < other.nounRecord;
}
}
}
}
};
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun);
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion);
wchar_t GetLastChar(const NounRecord& nounRecord);
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
void SetupDeclentionMap();
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
bool IsDeclencionSecondType(NounDeclencion nounDeclention);
bool IsDeclencionAnimated(NounDeclencion nounDeclention);
void FillDivisionCaseMaps();
std::set<NounStruct> RecognizeNoun(std::wstring noun);
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
void CalculatePluralForm();
void LoadFrequentWordSet();
void LoadNounDeclencionCaseTable();
} //namespace NN
#endif //NOUN_H_INCLUDED