diff --git a/rudict/frequent_nouns_2000.txt b/rudict/frequent_nouns_2000.txt index 6f9c373..c607473 100644 Binary files a/rudict/frequent_nouns_2000.txt and b/rudict/frequent_nouns_2000.txt differ diff --git a/rudict/rudict/adjective.cpp b/rudict/rudict/adjective.cpp index b3d559b..b16fc49 100644 --- a/rudict/rudict/adjective.cpp +++ b/rudict/rudict/adjective.cpp @@ -17,9 +17,8 @@ namespace AJ AdjectiveRecord::AdjectiveRecord() - : standardShortFormAvailable(false) { - + SetAllFields(); } AdjectiveRecord::AdjectiveRecord(std::wstring line) @@ -30,14 +29,35 @@ namespace AJ SetWord(lineArr[1]); - standardShortFormAvailable = lineArr[2] == L"1" ? true : false; - - specialShortForm = lineArr[3]; + SetAllFields(lineArr); FillTranslationUnit(lineArr, 4); } + bool AdjectiveRecord::getStandardShortFormAvailable() const + { + return properties.get(L"standardShortFormAvailable"); + } + + std::wstring AdjectiveRecord::getSpecialShortForm() const + { + return properties.get(L"specialShortForm"); + } + + void AdjectiveRecord::SetAllFields() + { + properties.put(L"standardShortFormAvailable", false); + properties.put(L"specialShortForm", L""); + } + + void AdjectiveRecord::SetAllFields(const std::vector& lineArr) + { + properties.put(L"standardShortFormAvailable", lineArr[2] == L"1" ? true : false); + properties.put(L"specialShortForm", lineArr[3]); + } + + /* bool AdjectiveStruct::operator<(const AdjectiveStruct& n) const { if (grammaticalCase != n.grammaticalCase) @@ -70,6 +90,81 @@ namespace AJ } } } + */ + + /*NounGrammaticalCase grammaticalCase; + + NounCount count; + NounGender gender; + + bool isDeclentionAnimated;*/ + + AdjectiveStruct::AdjectiveStruct() + /* + : grammaticalCase(NGC_P1_NOMINATIVE) + , count(NC_SINGULAR) + , gender(NG_MALE) + , isDeclentionAnimated(false)*/ + + { + + } + + AdjectiveStruct::AdjectiveStruct(NounGrammaticalCase iGrammaticalCase, NounCount iCount, NounGender iGender, bool iIsDeclentionAnimated) + /* + : grammaticalCase(iGrammaticalCase) + , count(iCount) + , gender(iGender) + , isDeclentionAnimated(iIsDeclentionAnimated) + */ + { + modificatorTree.put(L"grammaticalCase", NounGrammaticalCaseToWString(iGrammaticalCase)); + modificatorTree.put(L"animated", iIsDeclentionAnimated); + modificatorTree.put(L"count", NounCountToWString(iCount)); + modificatorTree.put(L"gender", iGender); + } + /* + boost::property_tree::wptree AdjectiveStruct::GetModificators() const + { + + boost::property_tree::wptree result; + + result.put(L"grammaticalCase", NounGrammaticalCaseToWString(grammaticalCase)); + result.put(L"animated", isDeclentionAnimated); + result.put(L"count", NounCountToWString(count)); + result.put(L"gender", gender); + + return result; + + } + */ + + AdjectivePair::AdjectivePair() + { + } + + AdjectivePair::AdjectivePair(AdjectiveStruct iModificator, AdjectiveRecord iWordRecord) + : modificator(iModificator) + , wordRecord(iWordRecord) + { + } + + WordModificatorInterface& AdjectivePair::wordModificator() + { + return modificator; + } + + WordHolder& AdjectivePair::word() + { + return wordRecord; + } + + std::wstring AdjectivePair::getType() + { + return L"adjective"; + } + + std::vector GetAllAdjectiveEndingArr() { @@ -354,12 +449,12 @@ namespace AJ bool IFormTest(const AdjectiveRecord& r) { - return charIsIFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]); + return charIsIFormConsolant(r.GetPrevPrevLastChar()); } bool UFormTest(const AdjectiveRecord& r) { - return charIsUFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]); + return charIsUFormConsolant(r.GetPrevPrevLastChar()); } @@ -430,10 +525,9 @@ namespace AJ } - std::set RecognizeAdjective(std::wstring noun) + void RecognizeAdjective(std::wstring noun, std::vector>& wordPairArr) { - std::set result; - + auto adjectiveEndingDivisionArr = getPossibleAdjectiveEndingDivisionSet(noun); for (auto adjectiveEndingDivision : adjectiveEndingDivisionArr) @@ -463,7 +557,11 @@ namespace AJ if (DivisionCaseAdjectiveTupleRecordFilterMap[dc](tuple, record)) { - result.insert({ std::get<2>(tuple), std::get<1>(tuple), GetGenderFromDeclencion(std::get<0>(tuple)), IsDeclencionAnimated(std::get<0>(tuple)), record }); + AdjectiveStruct modificator(std::get<2>(tuple), std::get<1>(tuple), GetGenderFromDeclencion(std::get<0>(tuple)), IsDeclencionAnimated(std::get<0>(tuple))); + + AdjectivePair* wPair = new AdjectivePair(modificator, record); + + wordPairArr.push_back(std::shared_ptr(wPair)); } } @@ -474,7 +572,6 @@ namespace AJ } - return result; } void LoadAdjectiveDeclencionCaseTable() diff --git a/rudict/rudict/adjective.h b/rudict/rudict/adjective.h index 7ffbee7..2b8bb72 100644 --- a/rudict/rudict/adjective.h +++ b/rudict/rudict/adjective.h @@ -14,11 +14,15 @@ namespace AJ { - struct AdjectiveRecord : public TranslationUnitSetMix, public WordHolder + struct AdjectiveRecord : public WordHolder { - bool standardShortFormAvailable; - std::wstring specialShortForm; + bool getStandardShortFormAvailable() const; + + std::wstring getSpecialShortForm() const; + + void SetAllFields(); + void SetAllFields(const std::vector& lineArr); AdjectiveRecord(); AdjectiveRecord(std::wstring line); @@ -29,18 +33,40 @@ namespace AJ extern std::vector AdjectiveRecordArr; - struct AdjectiveStruct + struct AdjectiveStruct : public WordModificatorInterface { + /* NounGrammaticalCase grammaticalCase; NounCount count; NounGender gender; bool isDeclentionAnimated; + */ + AdjectiveStruct(); + AdjectiveStruct(NounGrammaticalCase iGrammaticalCase, NounCount iCount, NounGender iGender, bool iIsDeclentionAnimated); - AdjectiveRecord adjectiveRecord; + //boost::property_tree::wptree GetModificators() const; + }; - bool operator<(const AdjectiveStruct& n) const; + struct AdjectivePair : public WordPairInterface + { + protected: + + AdjectiveStruct modificator; + AdjectiveRecord wordRecord; + + public: + + AdjectivePair(); + + AdjectivePair(AdjectiveStruct iModificator, AdjectiveRecord iWordRecord); + + virtual WordModificatorInterface& wordModificator(); + + virtual WordHolder& word(); + + virtual std::wstring getType(); }; @@ -126,8 +152,7 @@ namespace AJ void FillDivisionCaseMaps(); - std::set RecognizeAdjective(std::wstring noun); - + void RecognizeAdjective(std::wstring noun, std::vector>& wordPairArr); void LoadAdjectiveDeclencionCaseTable(); diff --git a/rudict/rudict/grammarCase.cpp b/rudict/rudict/grammarCase.cpp index 8eb3cc5..c4ee610 100644 --- a/rudict/rudict/grammarCase.cpp +++ b/rudict/rudict/grammarCase.cpp @@ -5,7 +5,7 @@ //std::vector translationUnitArr; -void TranslationUnitSetMix::FillTranslationUnit(const std::vector& lineArr, int startFrom) +void WordHolder::FillTranslationUnit(const std::vector& lineArr, int startFrom) { for (int i = 0; i < 3; i++) { @@ -22,7 +22,7 @@ void TranslationUnitSetMix::FillTranslationUnit(const std::vector& } } -boost::property_tree::wptree TranslationUnitSetMix::CreateTranslationPropertyTree() const +boost::property_tree::wptree WordHolder::CreateTranslationPropertyTree() const { boost::property_tree::wptree ptree; @@ -44,6 +44,10 @@ bool WordHolder::operator<(const WordHolder& other) const return word < other.word; } +boost::property_tree::wptree WordHolder::GetProperties() const +{ + return properties; +} void WordHolder::SetWord(const std::wstring& w) { @@ -57,7 +61,7 @@ std::wstring WordHolder::GetWord() const wchar_t WordHolder::GetLastChar() const { - if (word.size() > 0) + if (word.size() == 0) { std::cout << "Error in GetLastChar" << std::endl; } @@ -67,7 +71,7 @@ wchar_t WordHolder::GetLastChar() const wchar_t WordHolder::GetPrevLastChar() const { - if (word.size() > 1) + if (word.size() < 2) { std::cout << "Error in GetPrevLastChar" << std::endl; } @@ -75,6 +79,91 @@ wchar_t WordHolder::GetPrevLastChar() const return word[word.size() - 2]; } +wchar_t WordHolder::GetPrevPrevLastChar() const +{ + if (word.size() < 3) + { + std::cout << "Error in GetPrevPrevLastChar" << std::endl; + } + + return word[word.size() - 3]; +} + +std::wstring NounGenderToWString(NounGender g) +{ + if (g == NG_MALE) + { + return L"NG_MALE"; + } + else if (g == NG_FEMALE) + { + return L"NG_FEMALE"; + } + else if (g == NG_NEUTRAL) + { + return L"NG_NEUTRAL"; + } + + std::cout << "Error in NounGenderToWString" << std::endl; + + return L""; +} + +std::wstring NormalizeNounGenderString(std::wstring str) +{ + if (str == L"м") + { + return L"NG_MALE"; + } + else if (str == L"ж") + { + return L"NG_FEMALE"; + } + else if (str == L"с") + { + return L"NG_NEUTRAL"; + } + + std::cout << "Error in NormalizeNounGenderString" << std::endl; + + return L"NG_MALE"; +} + +NounGender WStringToNounGender(std::wstring str) +{ + if (str == L"NG_MALE") + { + return NG_MALE; + } + else if (str == L"NG_FEMALE") + { + return NG_FEMALE; + } + else if (str == L"NG_NEUTRAL") + { + return NG_NEUTRAL; + } + + std::cout << "Warning - passing old variables to WStringToNounGender" << std::endl; + + if (str == L"м") + { + return NG_MALE; + } + else if (str == L"ж") + { + return NG_FEMALE; + } + else if (str == L"с") + { + return NG_NEUTRAL; + } + + std::cout << "Error in NounGenderToWString" << std::endl; + + return NG_MALE; +} + NounCount WStringToNounCount(std::wstring str) { diff --git a/rudict/rudict/grammarCase.h b/rudict/rudict/grammarCase.h index d4b396c..380cc85 100644 --- a/rudict/rudict/grammarCase.h +++ b/rudict/rudict/grammarCase.h @@ -1,74 +1,70 @@ -#ifndef GRAMMAR_CASE_H_INCLUDED -#define GRAMMAR_CASE_H_INCLUDED - +#ifndef GRAMMAR_CASE_H_INCLUDED +#define GRAMMAR_CASE_H_INCLUDED + #include #include #include #include #include -#include "boost/algorithm/string.hpp" +#include "boost/algorithm/string.hpp" #include "boost/property_tree/ptree.hpp" - - -enum NounGender -{ - NG_MALE = 0, - NG_FEMALE, - NG_NEUTRAL -}; - -enum NounGrammaticalCase -{ - NGC_P1_NOMINATIVE = 0, - NGC_P2_GENITIVE, - NGC_P3_DATIVE, - NGC_P4_ACCUSATIVE, - NGC_P5_INSTRUMENTAL, - NGC_P6_PREPOSITIONAL, - NGC_SIZE -}; - -enum NounCount -{ - NC_SINGULAR = 0, - NC_PLURAL, - NC_SIZE -}; - - -struct GrammaticalTableRecord -{ - NounCount count; - - NounGrammaticalCase grammaticalCase; - - std::set ending; -}; - -struct TranslationUnit -{ - std::wstring meaning; - std::wstring comment; - std::wstring example; -}; - -class TranslationUnitSetMix -{ -protected: - std::vector translationUnitArr; - - void FillTranslationUnit(const std::vector& lineArr, int startFrom); - -public: - - boost::property_tree::wptree CreateTranslationPropertyTree() const; -}; - -class WordHolder -{ + + +enum NounGender +{ + NG_MALE = 0, + NG_FEMALE, + NG_NEUTRAL +}; + +enum NounGrammaticalCase +{ + NGC_P1_NOMINATIVE = 0, + NGC_P2_GENITIVE, + NGC_P3_DATIVE, + NGC_P4_ACCUSATIVE, + NGC_P5_INSTRUMENTAL, + NGC_P6_PREPOSITIONAL, + NGC_SIZE +}; + +enum NounCount +{ + NC_SINGULAR = 0, + NC_PLURAL, + NC_SIZE +}; + + +struct GrammaticalTableRecord +{ + NounCount count; + + NounGrammaticalCase grammaticalCase; + + std::set ending; +}; + +struct TranslationUnit +{ + std::wstring meaning; + std::wstring comment; + std::wstring example; +}; + +class WordHolder +{ private: std::wstring word; + +protected: + boost::property_tree::wptree properties; + + std::vector translationUnitArr; + + void FillTranslationUnit(const std::vector& lineArr, int startFrom); + public: void SetWord(const std::wstring& w); @@ -78,24 +74,64 @@ public: wchar_t GetLastChar() const; wchar_t GetPrevLastChar() const; - + + wchar_t GetPrevPrevLastChar() const; + bool operator<(const WordHolder& other) const; - -}; - - -std::wstring NounCountToWString(NounCount nounCount); -NounCount WStringToNounCount(std::wstring str); -std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase); -NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str); - - -bool charIsConsolant(wchar_t c); //except й - -bool charIsVowel(wchar_t c); - -bool charIsIFormConsolant(wchar_t c); -bool charIsUFormConsolant(wchar_t c); - - -#endif //GRAMMAR_CASE_H_INCLUDED + + boost::property_tree::wptree GetProperties() const; + + boost::property_tree::wptree CreateTranslationPropertyTree() const; + +}; + +class WordModificatorInterface +{ +protected: + boost::property_tree::wptree modificatorTree; +public: + + virtual ~WordModificatorInterface() + { + } + + boost::property_tree::wptree GetModificators() const + { + return modificatorTree; + } +}; + +struct WordPairInterface +{ + virtual WordModificatorInterface& wordModificator() = 0; + + virtual WordHolder& word() = 0; + + virtual std::wstring getType() = 0; + + virtual ~WordPairInterface() + { + + } +}; + +std::wstring NounGenderToWString(NounGender g); + +std::wstring NormalizeNounGenderString(std::wstring str); +NounGender WStringToNounGender(std::wstring str); + +std::wstring NounCountToWString(NounCount nounCount); +NounCount WStringToNounCount(std::wstring str); +std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase); +NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str); + + +bool charIsConsolant(wchar_t c); //except й + +bool charIsVowel(wchar_t c); + +bool charIsIFormConsolant(wchar_t c); +bool charIsUFormConsolant(wchar_t c); + + +#endif //GRAMMAR_CASE_H_INCLUDED diff --git a/rudict/rudict/http/request_handler.cpp b/rudict/rudict/http/request_handler.cpp index 6074bdc..3e3f773 100644 --- a/rudict/rudict/http/request_handler.cpp +++ b/rudict/rudict/http/request_handler.cpp @@ -143,173 +143,43 @@ namespace http { boost::property_tree::wptree result; std::cout <<"PrepareReport" << std::endl; + if (request.size() < 2) { result.put(L"error", L"String is too short"); } + int id = 0; - int id; - - //Noun! - - std::set nounStructArr = NN::RecognizeNoun(request); - - id = 0; - + std::vector> wordPairSet; + + NN::RecognizeNoun(request, wordPairSet); + AJ::RecognizeAdjective(request, wordPairSet); + VB::RecognizeVerb(request, wordPairSet); + OT::RecognizeWord(request, wordPairSet); + PP::RecognizeWord(request, wordPairSet); + boost::property_tree::wptree wordArr; - for (auto& nounStruct : nounStructArr) + for (auto& wordPair : wordPairSet) { boost::property_tree::wptree nounTree; nounTree.put(L"id", id); - nounTree.put(L"word", "noun"); - nounTree.put(L"noun.grammaticalCase", NounGrammaticalCaseToWString(nounStruct.nounGrammaticalCase)); - nounTree.put(L"noun.animated", nounStruct.animated); - nounTree.put(L"noun.count", NounCountToWString(nounStruct.nounCount)); - nounTree.put(L"noun.canBeAnimate", nounStruct.nounRecord.canBeAnimate); - nounTree.put(L"noun.canBeInanimate", nounStruct.nounRecord.canBeInanimate); - nounTree.put(L"noun.gender", nounStruct.nounRecord.gender); - nounTree.put(L"noun.haveMultipleForm", nounStruct.nounRecord.haveMultipleForm); - nounTree.put(L"noun.haveSingleForm", nounStruct.nounRecord.haveSingleForm); - nounTree.put(L"noun.haveStandardMultipleForm", nounStruct.nounRecord.haveStandardMultipleForm); - nounTree.put(L"noun.haveStandardMultipleFormWithMissingLastVowel", nounStruct.nounRecord.haveStandardMultipleFormWithMissingLastVowel); - nounTree.put(L"noun.specialMultipleForm", nounStruct.nounRecord.specialMultipleForm); + nounTree.put(L"type", wordPair->getType()); + nounTree.put(L"word", wordPair->word().GetWord()); - nounTree.put(L"noun.nominativeSingularForm", nounStruct.nounRecord.nominativeForm); + nounTree.put_child(L"properties", wordPair->word().GetProperties()); - boost::property_tree::wptree allTranslationsTree = nounStruct.nounRecord.CreateTranslationPropertyTree(); - - nounTree.put_child(L"translations", allTranslationsTree); + nounTree.put_child(L"modificators", wordPair->wordModificator().GetModificators()); + + nounTree.put_child(L"translations", wordPair->word().CreateTranslationPropertyTree()); wordArr.push_back(std::make_pair(L"", nounTree)); id++; } - //result.put_child(L"words", nounArr); - - //Adjective! - - std::set adjectiveStructArr = AJ::RecognizeAdjective(request); - - //id = 0; - - - for (auto& adjectiveStruct : adjectiveStructArr) - { - boost::property_tree::wptree adjectiveTree; - - adjectiveTree.put(L"id", id); - adjectiveTree.put(L"word", "adjective"); - adjectiveTree.put(L"adjective.grammaticalCase", NounGrammaticalCaseToWString(adjectiveStruct.grammaticalCase)); - adjectiveTree.put(L"adjective.animated", adjectiveStruct.isDeclentionAnimated); - adjectiveTree.put(L"adjective.count", NounCountToWString(adjectiveStruct.count)); - - adjectiveTree.put(L"adjective.gender", adjectiveStruct.gender); - - adjectiveTree.put(L"adjective.nominativeSingularForm", adjectiveStruct.adjectiveRecord.nominativeMaleForm); - - boost::property_tree::wptree allTranslationsTree = adjectiveStruct.adjectiveRecord.CreateTranslationPropertyTree(); - - adjectiveTree.put_child(L"translations", allTranslationsTree); - - wordArr.push_back(std::make_pair(L"", adjectiveTree)); - - id++; - } - - - std::set verbStructArr = VB::RecognizeVerb(request); - - //id = 0; - - - for (auto& verbStruct : verbStructArr) - { - boost::property_tree::wptree verbTree; - - - verbTree.put(L"id", id); - verbTree.put(L"word", "verb"); - - verbTree.put(L"verb.infinitiveForm", verbStruct.verbRecord.infinitive); - verbTree.put(L"verb.canBeImperfect", verbStruct.verbRecord.canBeImperfect); - verbTree.put(L"verb.canBePerfect", verbStruct.verbRecord.canBePerfect); - verbTree.put(L"verb.canBeNotPrefixed", verbStruct.verbRecord.canBeNotPrefixed); - verbTree.put(L"verb.canBePrefixed", verbStruct.verbRecord.canBePrefixed); - - verbTree.put(L"verb.selfEnding", verbStruct.selfEnding); - - verbTree.put(L"verb.verbParams", VB::VerbParamsToWString(verbStruct.verbParams)); - - boost::property_tree::wptree allTranslationsTree = verbStruct.verbRecord.CreateTranslationPropertyTree(); - - verbTree.put_child(L"translations", allTranslationsTree); - - - wordArr.push_back(std::make_pair(L"", verbTree)); - - id++; - } - - std::set otherWordStructArr = OT::RecognizeWord(request); - - //id = 0; - - - for (auto& otherWordRecord : otherWordStructArr) - { - boost::property_tree::wptree otherWordTree; - - - otherWordTree.put(L"id", id); - otherWordTree.put(L"word", "otherWord"); - - otherWordTree.put(L"otherWord.word", otherWordRecord.word); - otherWordTree.put(L"otherWord.type", otherWordRecord.type); - - boost::property_tree::wptree allTranslationsTree = otherWordRecord.CreateTranslationPropertyTree(); - - otherWordTree.put_child(L"translations", allTranslationsTree); - - - wordArr.push_back(std::make_pair(L"", otherWordTree)); - - id++; - } - - std::set prepositionStructArr = PP::RecognizeWord(request); - - //id = 0; - - - for (auto& prepositionStruct : prepositionStructArr) - { - boost::property_tree::wptree prepositionTree; - - - prepositionTree.put(L"id", id); - prepositionTree.put(L"word", "preposition"); - - prepositionTree.put(L"preposition.infinitiveForm", prepositionStruct.word); - prepositionTree.put(L"preposition.availableForGenitive", prepositionStruct.availableForGenitive); - prepositionTree.put(L"preposition.availableForDative", prepositionStruct.availableForDative); - prepositionTree.put(L"preposition.availableForAccusative", prepositionStruct.availableForAccusative); - prepositionTree.put(L"preposition.availableForInstrumental", prepositionStruct.availableForInstrumental); - prepositionTree.put(L"preposition.availableForPrepositional", prepositionStruct.availableForPrepositional); - - boost::property_tree::wptree allTranslationsTree = prepositionStruct.CreateTranslationPropertyTree(); - - prepositionTree.put_child(L"translations", allTranslationsTree); - - - wordArr.push_back(std::make_pair(L"", prepositionTree)); - - id++; - } - result.put_child(L"words", wordArr); diff --git a/rudict/rudict/noun.cpp b/rudict/rudict/noun.cpp index a83fb56..8ce8225 100644 --- a/rudict/rudict/noun.cpp +++ b/rudict/rudict/noun.cpp @@ -15,20 +15,10 @@ std::vector NounRecordArr; std::vector nounDeclencionCaseTable; NounRecord::NounRecord() - : gender(NG_MALE) - , haveSingleForm(false) - , haveMultipleForm(false) - , haveStandardMultipleForm(false) - , haveStandardMultipleFormWithMissingLastVowel(false) - , haveStandardMultipleFormEnding(false) - , haveAlternativeMultipleFormEnding(false) - , canBeAnimate(false) - , canBeInanimate(false) { - + SetAllFields(); } - NounRecord::NounRecord(std::wstring line) { std::vector lineArr; @@ -36,40 +26,149 @@ NounRecord::NounRecord(std::wstring line) boost::split_regex(lineArr, line, boost::wregex(L" ")); SetWord(lineArr[1]); - - if (lineArr[2] == L"м") - { - gender = NG_MALE; - } - else if (lineArr[2] == L"ж") - { - gender = NG_FEMALE; - } - else - { - gender = NG_NEUTRAL; - } - - haveSingleForm = lineArr[3] == L"1" ? true : false; - haveMultipleForm = lineArr[4] == L"1" ? true : false; - - haveStandardMultipleForm = lineArr[5] == L"1" ? true : false; - - haveStandardMultipleFormWithMissingLastVowel = lineArr[6] == L"1" ? true : false; - - haveStandardMultipleFormEnding = lineArr[7] == L"1" ? true : false; - - haveAlternativeMultipleFormEnding = lineArr[8] == L"1" ? true : false; - - specialMultipleForm = lineArr[9]; - - canBeAnimate = lineArr[10] == L"1" ? true : false; - canBeInanimate = lineArr[11] == L"1" ? true : false; - + SetAllFields(lineArr); FillTranslationUnit(lineArr, 12); } +NounGender NounRecord::getGender() const +{ + return WStringToNounGender(properties.get(L"gender")); +} + +bool NounRecord::getHaveSingleForm() const +{ + return properties.get(L"haveSingleForm"); +} + +bool NounRecord::getHaveMultipleForm() const +{ + return properties.get(L"haveMultipleForm"); +} + +bool NounRecord::getHaveStandardMultipleForm() const +{ + return properties.get(L"haveStandardMultipleForm"); +} + +bool NounRecord::getHaveStandardMultipleFormWithMissingLastVowel() const +{ + return properties.get(L"haveStandardMultipleFormWithMissingLastVowel"); +} + +bool NounRecord::getHaveStandardMultipleFormEnding() const +{ + return properties.get(L"haveStandardMultipleFormEnding"); +} + +bool NounRecord::getHaveAlternativeMultipleFormEnding() const +{ + return properties.get(L"haveAlternativeMultipleFormEnding"); +} + +std::wstring NounRecord::getSpecialMultipleForm() const +{ + return properties.get(L"specialMultipleForm"); +} + +bool NounRecord::getCanBeAnimate() const +{ + return properties.get(L"canBeAnimate"); +} + +bool NounRecord::getCanBeInanimate() const +{ + return properties.get(L"canBeInanimate"); +} + +void NounRecord::SetAllFields() +{ + properties.put(L"gender", L"NG_MALE"); + properties.put(L"haveSingleForm", false); + properties.put(L"haveMultipleForm", false); + properties.put(L"haveStandardMultipleForm", false); + properties.put(L"haveStandardMultipleFormWithMissingLastVowel", false); + properties.put(L"haveStandardMultipleFormEnding", false); + properties.put(L"haveAlternativeMultipleFormEnding", false); + + properties.put(L"specialMultipleForm", ""); + + properties.put(L"canBeAnimate", false); + properties.put(L"canBeInanimate", false); +} + +void NounRecord::SetAllFields(const std::vector& lineArr) +{ + + + properties.put(L"gender", NormalizeNounGenderString(lineArr[2])); + + properties.put(L"haveSingleForm", lineArr[3] == L"1" ? true : false); + properties.put(L"haveMultipleForm", lineArr[4] == L"1" ? true : false); + properties.put(L"haveStandardMultipleForm", lineArr[5] == L"1" ? true : false); + properties.put(L"haveStandardMultipleFormWithMissingLastVowel", lineArr[6] == L"1" ? true : false); + properties.put(L"haveStandardMultipleFormEnding", lineArr[7] == L"1" ? true : false); + properties.put(L"haveAlternativeMultipleFormEnding", lineArr[8] == L"1" ? true : false); + + properties.put(L"specialMultipleForm", lineArr[9]); + + properties.put(L"canBeAnimate", lineArr[10] == L"1" ? true : false); + properties.put(L"canBeInanimate", lineArr[11] == L"1" ? true : false); +} + + +NounStruct::NounStruct() +/* + : nounGrammaticalCase(NGC_P1_NOMINATIVE) + , nounCount(NC_SINGULAR) + , animated(false)*/ +{ + +} + +NounStruct::NounStruct(NounGrammaticalCase iNounGrammaticalCase, NounCount iNounCount, bool iAnimated) +/* + : nounGrammaticalCase(iNounGrammaticalCase) + , nounCount(iNounCount) + , animated(iAnimated) + */ +{ + modificatorTree.put(L"grammaticalCase", NounGrammaticalCaseToWString(iNounGrammaticalCase)); + modificatorTree.put(L"animated", iAnimated); + modificatorTree.put(L"count", NounCountToWString(iNounCount)); +} + +/* +boost::property_tree::wptree NounStruct::GetModificators() const +{ + return modificatorTree; +}*/ + +NounPair::NounPair() +{ +} + +NounPair::NounPair(NounStruct iNounModificator, NounRecord iNoun) + : nounModificator(iNounModificator) + , noun(iNoun) +{ +} + +WordModificatorInterface& NounPair::wordModificator() +{ + return nounModificator; +} + +WordHolder& NounPair::word() +{ + return noun; +} + +std::wstring NounPair::getType() +{ + return L"noun"; +} + NounDeclencion WStringToNounDeclencion(std::wstring str) { @@ -176,6 +275,7 @@ bool NounEndingDivision::operator<(const NounEndingDivision& other) const } } +/* bool NounStruct::operator<(const NounStruct& other) const { if (nounGrammaticalCase != other.nounGrammaticalCase) @@ -190,17 +290,13 @@ bool NounStruct::operator<(const NounStruct& other) const } else { - if (animated != other.animated) - { + return animated < other.animated; - } - else - { - return nounRecord < other.nounRecord; - } + } } } +*/ std::vector GetAllNounEndingArr() { @@ -319,31 +415,31 @@ std::set getPluralForm(NounRecord noun) { std::set result; - if (noun.specialMultipleForm != L"") + if (noun.getSpecialMultipleForm() != L"") { - result.insert(noun.specialMultipleForm); + result.insert(noun.getSpecialMultipleForm()); } - if (noun.haveSingleForm) + if (noun.getHaveSingleForm()) { - if (noun.haveStandardMultipleForm) + if (noun.getHaveStandardMultipleForm()) { std::wstring pluralForm = convertToStandardPluralForm(noun.GetWord()); - if (noun.haveStandardMultipleFormEnding) + if (noun.getHaveStandardMultipleFormEnding()) { result.insert(pluralForm); } - if (noun.haveAlternativeMultipleFormEnding) + if (noun.getHaveAlternativeMultipleFormEnding()) { result.insert(convertFromStandardToAlternativePluralForm(pluralForm)); } } - if (noun.haveStandardMultipleFormWithMissingLastVowel) + if (noun.getHaveStandardMultipleFormWithMissingLastVowel()) { std::wstring pluralForm = convertToStandardPluralForm(noun.GetWord()); @@ -358,13 +454,13 @@ std::set getPluralForm(NounRecord noun) pluralForm.erase(pluralForm.begin() + pluralForm.size() - 3); } - if (noun.haveStandardMultipleFormEnding) + if (noun.getHaveStandardMultipleFormEnding()) { result.insert(pluralForm); } - if (noun.haveAlternativeMultipleFormEnding) + if (noun.getHaveAlternativeMultipleFormEnding()) { result.insert(convertFromStandardToAlternativePluralForm(pluralForm)); } @@ -386,7 +482,7 @@ bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural) { for (auto& noun : NounRecordArr) { - if (noun.haveMultipleForm) + if (noun.getHaveMultipleForm()) { if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0) { @@ -416,7 +512,7 @@ NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativeP { for (auto& noun : NounRecordArr) { - if (noun.haveMultipleForm) + if (noun.getHaveMultipleForm()) { if (noun.precalculatedNominativePluralSet.count(nounNominativePlural) != 0) { @@ -579,82 +675,82 @@ std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclenci bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeInanimate() && (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())); } bool FirstAIFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeAnimate() && (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())); } bool FirstAIFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeInanimate() && ( (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'и' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'и' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) ); } bool FirstAIFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeAnimate() && ( (nounRecord.GetLastChar() == L'а' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'и' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'и' && charIsIFormConsolant(nounRecord.GetPrevLastChar())) ); } bool FirstAUFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeInanimate() && (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())); } bool FirstAUFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeAnimate() && (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())); } bool FirstAUFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeInanimate() && ( (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'ы' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'ы' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) ); } bool FirstAUFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeAnimate() && ( (nounRecord.GetLastChar() == L'а' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'ы' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'ы' && charIsUFormConsolant(nounRecord.GetPrevLastChar())) ); } bool FirstYaFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeInanimate && (nounRecord.GetLastChar() == L'я'); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeInanimate() && (nounRecord.GetLastChar() == L'я'); } bool FirstYaFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeAnimate && (nounRecord.GetLastChar() == L'я'); + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeAnimate() && (nounRecord.GetLastChar() == L'я'); } bool FirstYaFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeInanimate() && ( (nounRecord.GetLastChar() == L'я') || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'и') + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'и') ); } bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getCanBeAnimate() && ( (nounRecord.GetLastChar() == L'я') || - !nounRecord.haveSingleForm && (nounRecord.GetLastChar() == L'и') + !nounRecord.getHaveSingleForm() && (nounRecord.GetLastChar() == L'и') ); } @@ -663,14 +759,14 @@ bool FirstYaFormAnimatePluralCondition(const NounRecord& nounRecord) bool SecondMaleIFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsIFormConsolant(nounRecord.GetLastChar()); + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && charIsIFormConsolant(nounRecord.GetLastChar()); } bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && ( charIsIFormConsolant(nounRecord.GetLastChar()) || - !nounRecord.haveSingleForm && charIsIFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && charIsIFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'и' ); } @@ -678,92 +774,92 @@ bool SecondMaleIFormInanimatePluralCondition(const NounRecord& nounRecord) bool SecondMaleIFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsIFormConsolant(nounRecord.GetLastChar()); + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && charIsIFormConsolant(nounRecord.GetLastChar()); } bool SecondMaleIFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && ( charIsIFormConsolant(nounRecord.GetLastChar()) || - !nounRecord.haveSingleForm && charIsIFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && charIsIFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'и' ); } bool SecondMaleUFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && charIsUFormConsolant(nounRecord.GetLastChar()); + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && charIsUFormConsolant(nounRecord.GetLastChar()); } bool SecondMaleUFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && ( charIsUFormConsolant(nounRecord.GetLastChar()) || - !nounRecord.haveSingleForm && charIsUFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'ы' + !nounRecord.getHaveSingleForm() && charIsUFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'ы' ); } bool SecondMaleUFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && charIsUFormConsolant(nounRecord.GetLastChar()); + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && charIsUFormConsolant(nounRecord.GetLastChar()); } bool SecondMaleUFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && ( charIsUFormConsolant(nounRecord.GetLastChar()) || - !nounRecord.haveSingleForm && charIsUFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'ы' + !nounRecord.getHaveSingleForm() && charIsUFormConsolant(nounRecord.GetPrevLastChar()) && nounRecord.GetLastChar() == L'ы' ); } bool SecondMaleSSFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && nounRecord.GetLastChar() == L'ь'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && nounRecord.GetLastChar() == L'ь'; } bool SecondMaleSSFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeInanimate() && ( nounRecord.GetLastChar() == L'ь' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'и' ); } bool SecondMaleSSFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && nounRecord.GetLastChar() == L'ь'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && nounRecord.GetLastChar() == L'ь'; } bool SecondMaleSSFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_MALE && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_MALE && nounRecord.getCanBeAnimate() && ( nounRecord.GetLastChar() == L'ь' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'и' ); } bool SecondIShortInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeInanimate && nounRecord.GetLastChar() == L'й'; + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeInanimate() && nounRecord.GetLastChar() == L'й'; } bool SecondIShortAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeAnimate && nounRecord.GetLastChar() == L'й'; + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeAnimate() && nounRecord.GetLastChar() == L'й'; } bool SecondIShortInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeInanimate && nounRecord.GetLastChar() == L'й'; + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeInanimate() && nounRecord.GetLastChar() == L'й'; } bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.canBeAnimate && nounRecord.GetLastChar() == L'й'; + return nounRecord.getHaveSingleForm() && nounRecord.getCanBeAnimate() && nounRecord.GetLastChar() == L'й'; } @@ -772,54 +868,54 @@ bool SecondIShortAnimatePluralCondition(const NounRecord& nounRecord) bool SecondNeutralEFormSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && nounRecord.GetLastChar() == L'е'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_NEUTRAL && nounRecord.GetLastChar() == L'е'; } bool SecondNeutralEFormPluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_NEUTRAL && ( nounRecord.GetLastChar() == L'е' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'я' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'я' ); } bool SecondNeutralOFormSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_NEUTRAL && nounRecord.GetLastChar() == L'о'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_NEUTRAL && nounRecord.GetLastChar() == L'о'; } bool SecondNeutralOFormPluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_NEUTRAL && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_NEUTRAL && ( nounRecord.GetLastChar() == L'о' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'а' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'а' ); } bool ThirdFormInanimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && nounRecord.GetLastChar() == L'ь'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_FEMALE && nounRecord.getCanBeInanimate() && nounRecord.GetLastChar() == L'ь'; } bool ThirdFormAnimateSingularCondition(const NounRecord& nounRecord) { - return nounRecord.haveSingleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && nounRecord.GetLastChar() == L'ь'; + return nounRecord.getHaveSingleForm() && nounRecord.getGender() == NG_FEMALE && nounRecord.getCanBeAnimate() && nounRecord.GetLastChar() == L'ь'; } bool ThirdFormInanimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeInanimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_FEMALE && nounRecord.getCanBeInanimate() && ( nounRecord.GetLastChar() == L'ь' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'и' ); } bool ThirdFormAnimatePluralCondition(const NounRecord& nounRecord) { - return nounRecord.haveMultipleForm && nounRecord.gender == NG_FEMALE && nounRecord.canBeAnimate && ( + return nounRecord.getHaveMultipleForm() && nounRecord.getGender() == NG_FEMALE && nounRecord.getCanBeAnimate() && ( nounRecord.GetLastChar() == L'ь' || - !nounRecord.haveSingleForm && nounRecord.GetLastChar() == L'и' + !nounRecord.getHaveSingleForm() && nounRecord.GetLastChar() == L'и' ); } @@ -897,19 +993,7 @@ bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple) return false; } -bool NounScructIsAlreadyInArray(const NounStruct& nounStruct, const std::vector& arr) -{ - for (auto& ns : arr) - { - if (ns.nounGrammaticalCase == nounStruct.nounGrammaticalCase && - ns.nounRecord.GetWord() == nounStruct.nounRecord.GetWord()) - { - return true; - } - } - return false; -} bool IsDeclencionSecondType(NounDeclencion nounDeclention) { switch (nounDeclention) @@ -957,12 +1041,12 @@ bool StandardTest(NounTuple nounTuple, NounRecord nounRecord) bool LostVowelOTest(NounTuple nounTuple, NounRecord nounRecord) { - return nounRecord.haveStandardMultipleFormWithMissingLastVowel; + return nounRecord.getHaveStandardMultipleFormWithMissingLastVowel(); } bool LostVowelETest(NounTuple nounTuple, NounRecord nounRecord) { - return nounRecord.haveStandardMultipleFormWithMissingLastVowel; + return nounRecord.getHaveStandardMultipleFormWithMissingLastVowel(); } std::map> DivisionCaseNounModificatorMap; @@ -1017,7 +1101,7 @@ void FillDivisionCaseMaps() DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_COMMON] = [](NounTuple t, NounRecord r) { - return (r.haveStandardMultipleForm || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r); + return (r.getHaveStandardMultipleForm() || std::get<1>(t) == NC_SINGULAR) && StandardTest(t, r); }; DivisionCaseNounTupleRecordFilterMap[NounEndingDivision::DC_LOST_VOWEL_O] = [](NounTuple t, NounRecord r) @@ -1035,11 +1119,18 @@ void FillDivisionCaseMaps() } - -std::set RecognizeNoun(std::wstring noun) +void AddNounPairIfNotAdded(std::vector>& result, const NounTuple& nounTuple, const NounRecord& nounRecord) { + WordPairInterface* nPair = new NounPair(NounStruct(std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple))), nounRecord); - std::set result; + result.push_back(std::shared_ptr(nPair)); + +} + + + +void RecognizeNoun(std::wstring noun, std::vector>& wordPairArr) +{ auto nounEndingDivisionArr = getPossibleNounEndingDivisionSet(noun); @@ -1076,7 +1167,7 @@ std::set RecognizeNoun(std::wstring noun) if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord)) { - result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord }); + AddNounPairIfNotAdded(wordPairArr, nounTuple, nounRecord); } } @@ -1098,7 +1189,7 @@ std::set RecognizeNoun(std::wstring noun) if (DivisionCaseNounTupleRecordFilterMap[dc](nounTuple, nounRecord)) { - result.insert({ std::get<2>(nounTuple), std::get<1>(nounTuple), IsDeclencionAnimated(std::get<0>(nounTuple)), nounRecord }); + AddNounPairIfNotAdded(wordPairArr, nounTuple, nounRecord); } } } @@ -1110,7 +1201,6 @@ std::set RecognizeNoun(std::wstring noun) - return result; } NounDeclencion CalculateNounDeclention(NounRecord nounRecord) diff --git a/rudict/rudict/noun.h b/rudict/rudict/noun.h index e2d34e4..4af06e2 100644 --- a/rudict/rudict/noun.h +++ b/rudict/rudict/noun.h @@ -14,24 +14,27 @@ namespace NN { - struct NounRecord : public TranslationUnitSetMix, public WordHolder + struct NounRecord : public WordHolder { + public: - NounGender gender; + NounGender getGender() const; - bool haveSingleForm; - bool haveMultipleForm; + bool getHaveSingleForm() const; + bool getHaveMultipleForm() const; + bool getHaveStandardMultipleForm() const; + bool getHaveStandardMultipleFormWithMissingLastVowel() const; - bool haveStandardMultipleForm; - bool haveStandardMultipleFormWithMissingLastVowel; + bool getHaveStandardMultipleFormEnding() const; + bool getHaveAlternativeMultipleFormEnding() const; - bool haveStandardMultipleFormEnding; - bool haveAlternativeMultipleFormEnding; + std::wstring getSpecialMultipleForm() const; - std::wstring specialMultipleForm; - - bool canBeAnimate; - bool canBeInanimate; + bool getCanBeAnimate() const; + bool getCanBeInanimate() const; + + void SetAllFields(); + void SetAllFields(const std::vector& lineArr); std::set precalculatedNominativePluralSet; @@ -116,17 +119,46 @@ namespace NN bool charIsMissingVowelSoftenerConsolant(wchar_t c); - struct NounStruct + struct NounStruct : public WordModificatorInterface { + private: + /* NounGrammaticalCase nounGrammaticalCase; NounCount nounCount; bool animated; - NounRecord nounRecord; + */ + public: + NounStruct(); - bool operator<(const NounStruct& other) const; + NounStruct(NounGrammaticalCase iNounGrammaticalCase, NounCount iNounCount, bool iAnimated); + + //bool operator<(const NounStruct& other) const; + + //boost::property_tree::wptree GetModificators() const; }; + struct NounPair : public WordPairInterface + { + protected: + + NounStruct nounModificator; + NounRecord noun; + + public: + + NounPair(); + + NounPair(NounStruct iNounModificator, NounRecord iNoun); + + virtual WordModificatorInterface& wordModificator(); + + virtual WordHolder& word(); + + virtual std::wstring getType(); + }; + + std::set getPossibleNounEndingDivisionSet(std::wstring noun); @@ -144,7 +176,7 @@ namespace NN void FillDivisionCaseMaps(); - std::set RecognizeNoun(std::wstring noun); + void RecognizeNoun(std::wstring noun, std::vector>& wordPairArr); NounDeclencion CalculateNounDeclention(NounRecord nounRecord); diff --git a/rudict/rudict/other.cpp b/rudict/rudict/other.cpp index a615b68..2afea06 100644 --- a/rudict/rudict/other.cpp +++ b/rudict/rudict/other.cpp @@ -15,6 +15,7 @@ namespace OT OtherWordRecord::OtherWordRecord() { + SetAllFields(); } OtherWordRecord::OtherWordRecord(std::wstring line) @@ -25,28 +26,74 @@ namespace OT SetWord(lineArr[1]); - type = lineArr[2]; + SetAllFields(lineArr); FillTranslationUnit(lineArr, 3); } - std::set RecognizeWord(std::wstring word) + std::wstring OtherWordRecord::getType() const + { + return properties.get(L"type"); + } + + void OtherWordRecord::SetAllFields() + { + properties.put(L"type", ""); + } + + void OtherWordRecord::SetAllFields(const std::vector& lineArr) + { + properties.put(L"type", lineArr[2]); + } + + + /* + boost::property_tree::wptree OtherWordModificator::GetModificators() const + { + return boost::property_tree::wptree(); + } + */ + + OtherWordPair::OtherWordPair() + { + } + + OtherWordPair::OtherWordPair(OtherWordModificator iModificator, OtherWordRecord iWordRecord) + : modificator(iModificator) + , wordRecord(iWordRecord) + { + } + + WordModificatorInterface& OtherWordPair::wordModificator() + { + return modificator; + } + + WordHolder& OtherWordPair::word() + { + return wordRecord; + } + + std::wstring OtherWordPair::getType() + { + return L"other"; + } + + + void RecognizeWord(std::wstring word, std::vector>& wordPair) { - std::set result; for (auto& wordRecord : OtherWordRecordArr) { if (wordRecord.GetWord() == word) { - result.insert(wordRecord); + wordPair.push_back(std::shared_ptr(new OtherWordPair(OtherWordModificator(), wordRecord))); } } - return result; } - void LoadWordSet(std::string filename) { diff --git a/rudict/rudict/other.h b/rudict/rudict/other.h index 40f994a..74227c6 100644 --- a/rudict/rudict/other.h +++ b/rudict/rudict/other.h @@ -15,9 +15,12 @@ namespace OT { - struct OtherWordRecord : public TranslationUnitSetMix, public WordHolder + struct OtherWordRecord : public WordHolder { - std::wstring type; + std::wstring getType() const; + + void SetAllFields(); + void SetAllFields(const std::vector& lineArr); OtherWordRecord(); @@ -25,10 +28,38 @@ namespace OT }; + + class OtherWordModificator : public WordModificatorInterface + { + //public: + // virtual boost::property_tree::wptree GetModificators() const; + }; + + + struct OtherWordPair : public WordPairInterface + { + protected: + + OtherWordModificator modificator; + OtherWordRecord wordRecord; + + public: + + OtherWordPair(); + + OtherWordPair(OtherWordModificator iModificator, OtherWordRecord iWordRecord); + + virtual WordModificatorInterface& wordModificator(); + + virtual WordHolder& word(); + + virtual std::wstring getType(); + }; + + extern std::vector OtherWordRecordArr; - std::set RecognizeWord(std::wstring word); - + void RecognizeWord(std::wstring word, std::vector>& wordPair); void LoadWordSet(std::string filename); } diff --git a/rudict/rudict/preposition.cpp b/rudict/rudict/preposition.cpp index 27be91c..f86c7aa 100644 --- a/rudict/rudict/preposition.cpp +++ b/rudict/rudict/preposition.cpp @@ -14,12 +14,8 @@ namespace PP std::vector PrepositionRecordArr; PrepositionRecord::PrepositionRecord() - : availableForGenitive(false) - , availableForDative(false) - , availableForAccusative(false) - , availableForInstrumental(false) - , availableForPrepositional(false) { + SetAllFields(); } PrepositionRecord::PrepositionRecord(std::wstring line) @@ -30,29 +26,103 @@ namespace PP SetWord(lineArr[1]); - availableForGenitive = lineArr[2] == L"1" ? true : false; - availableForDative = lineArr[3] == L"1" ? true : false; - availableForAccusative = lineArr[4] == L"1" ? true : false; - availableForInstrumental = lineArr[5] == L"1" ? true : false; - availableForPrepositional = lineArr[6] == L"1" ? true : false; + SetAllFields(lineArr); FillTranslationUnit(lineArr, 7); } - std::set RecognizeWord(std::wstring word) + bool PrepositionRecord::getAvailableForGenitive() const + { + return properties.get(L"availableForGenitive"); + } + + bool PrepositionRecord::getAvailableForDative() const + { + return properties.get(L"availableForDative"); + } + + bool PrepositionRecord::getAvailableForAccusative() const + { + return properties.get(L"availableForAccusative"); + } + + bool PrepositionRecord::getAvailableForInstrumental() const + { + return properties.get(L"availableForInstrumental"); + } + + bool PrepositionRecord::getAvailableForPrepositional() const + { + return properties.get(L"availableForPrepositional"); + } + + void PrepositionRecord::SetAllFields() + { + properties.put(L"availableForGenitive", L""); + properties.put(L"availableForDative", L""); + properties.put(L"availableForAccusative", L""); + properties.put(L"availableForInstrumental", L""); + properties.put(L"availableForPrepositional", L""); + } + + void PrepositionRecord::SetAllFields(const std::vector& lineArr) + { + properties.put(L"availableForGenitive", lineArr[2] == L"1" ? true : false); + properties.put(L"availableForDative", lineArr[3] == L"1" ? true : false); + properties.put(L"availableForAccusative", lineArr[4] == L"1" ? true : false); + properties.put(L"availableForInstrumental", lineArr[5] == L"1" ? true : false); + properties.put(L"availableForPrepositional", lineArr[6] == L"1" ? true : false); + } + + /* + boost::property_tree::wptree PrepositionModificator::GetModificators() const + { + return boost::property_tree::wptree(); + }*/ + + + + PrepositionPair::PrepositionPair() + { + } + + PrepositionPair::PrepositionPair(PrepositionModificator iModificator, PrepositionRecord iWordRecord) + : modificator(iModificator) + , wordRecord(iWordRecord) + { + } + + WordModificatorInterface& PrepositionPair::wordModificator() + { + return modificator; + } + + WordHolder& PrepositionPair::word() + { + return wordRecord; + } + + std::wstring PrepositionPair::getType() + { + return L"preposition"; + } + + + + void RecognizeWord(std::wstring word, std::vector>& wordPair) { - std::set result; for (auto& wordRecord : PrepositionRecordArr) { if (wordRecord.GetWord() == word) { - result.insert(wordRecord); + + wordPair.push_back(std::shared_ptr(new PrepositionPair(PrepositionModificator(), wordRecord))); + } } - return result; } diff --git a/rudict/rudict/preposition.h b/rudict/rudict/preposition.h index a6247ec..75327ff 100644 --- a/rudict/rudict/preposition.h +++ b/rudict/rudict/preposition.h @@ -15,14 +15,17 @@ namespace PP { - struct PrepositionRecord : public TranslationUnitSetMix, public WordHolder + struct PrepositionRecord : public WordHolder { - bool availableForGenitive; - bool availableForDative; - bool availableForAccusative; - bool availableForInstrumental; - bool availableForPrepositional; + bool getAvailableForGenitive() const; + bool getAvailableForDative() const; + bool getAvailableForAccusative() const; + bool getAvailableForInstrumental() const; + bool getAvailableForPrepositional() const; + + void SetAllFields(); + void SetAllFields(const std::vector& lineArr); PrepositionRecord(); @@ -30,9 +33,35 @@ namespace PP }; + class PrepositionModificator : public WordModificatorInterface + { + }; + + + struct PrepositionPair : public WordPairInterface + { + protected: + + PrepositionModificator modificator; + PrepositionRecord wordRecord; + + public: + + PrepositionPair(); + + PrepositionPair(PrepositionModificator iModificator, PrepositionRecord iWordRecord); + + virtual WordModificatorInterface& wordModificator(); + + virtual WordHolder& word(); + + virtual std::wstring getType(); + }; + + extern std::vector PrepositionRecordArr; - std::set RecognizeWord(std::wstring word); + void RecognizeWord(std::wstring word, std::vector>& wordPair); void LoadWordSet(std::string filename); diff --git a/rudict/rudict/verb.cpp b/rudict/rudict/verb.cpp index 4cf9817..1131efb 100644 --- a/rudict/rudict/verb.cpp +++ b/rudict/rudict/verb.cpp @@ -17,12 +17,8 @@ namespace VB VerbRecord::VerbRecord() - : canBePrefixed(false) - , canBeNotPrefixed(false) - , canBePerfect(false) - , canBeImperfect(false) { - + SetAllFields(); } VerbRecord::VerbRecord(std::wstring line) @@ -33,16 +29,110 @@ namespace VB SetWord(lineArr[1]); - canBePrefixed = lineArr[2] == L"1" ? true : false; - canBeNotPrefixed = lineArr[3] == L"1" ? true : false; - - canBePerfect = lineArr[4] == L"1" ? true : false; - canBeImperfect = lineArr[5] == L"1" ? true : false; + SetAllFields(lineArr); FillTranslationUnit(lineArr, 6); } + bool VerbRecord::getCanBePrefixed() const + { + return properties.get(L"canBePrefixed"); + } + + bool VerbRecord::getCanBeNotPrefixed() const + { + return properties.get(L"canBeNotPrefixed"); + } + + bool VerbRecord::getCanBePerfect() const + { + return properties.get(L"canBePerfect"); + } + + bool VerbRecord::getCanBeImperfect() const + { + return properties.get(L"canBeImperfect"); + } + + void VerbRecord::SetAllFields() + { + properties.put(L"canBePrefixed", false); + properties.put(L"canBeNotPrefixed", false); + properties.put(L"canBePerfect", false); + properties.put(L"canBeImperfect", false); + } + + void VerbRecord::SetAllFields(const std::vector& lineArr) + { + properties.put(L"canBePrefixed", lineArr[2] == L"1" ? true : false); + properties.put(L"canBeNotPrefixed", lineArr[3] == L"1" ? true : false); + properties.put(L"canBePerfect", lineArr[4] == L"1" ? true : false); + properties.put(L"canBeImperfect", lineArr[5] == L"1" ? true : false); + } + + + VerbStruct::VerbStruct() + { + } + + VerbStruct::VerbStruct(VerbParams iVerbParams, std::wstring iSelfEnding) + //: verbParams(verbParams) + //, selfEnding(iSelfEnding) + { + modificatorTree.put(L"selfEnding", iSelfEnding); + + modificatorTree.put(L"verbParams", VB::VerbParamsToWString(iVerbParams)); + } + + /* + boost::property_tree::wptree VerbStruct::GetModificators() const + { + boost::property_tree::wptree result; + + result.put(L"selfEnding", selfEnding); + + result.put(L"verbParams", VB::VerbParamsToWString(verbParams)); + + return result; + } + */ + /* + struct VerbPair : public WordPairInterface + { + protected: + + VerbStruct modificator; + VerbRecord wordRecord; + + public: + */ + VerbPair::VerbPair() + { + } + + VerbPair::VerbPair(VerbStruct iModificator, VerbRecord iWordRecord) + : modificator(iModificator) + , wordRecord(iWordRecord) + { + } + + WordModificatorInterface& VerbPair::wordModificator() + { + return modificator; + } + + WordHolder& VerbPair::word() + { + return wordRecord; + } + + std::wstring VerbPair::getType() + { + return L"verb"; + } + + /* bool VerbStruct::operator<(const VerbStruct& v) const { if (verbParams != v.verbParams) @@ -60,7 +150,7 @@ namespace VB return verbRecord < v.verbRecord; } } - } + }*/ VerbParams WStringToVerbParams(std::wstring str) { @@ -375,10 +465,8 @@ namespace VB return modifiedBase + VerbParamsTable[0].verbConjunctionTable[static_cast(verbConjunction)].ending; } - std::set RecognizeVerb(std::wstring verb) + void RecognizeVerb(std::wstring verb, std::vector>& wordPair) { - std::set result; - std::wstring selfEnding = cutSelfEndingIfPossible(verb); auto verbEndingDivisionArr = getPossibleVerbEndingDivisionSet(verb); @@ -400,23 +488,15 @@ namespace VB if (VerbIsInDictionary(verbInfinitive)) { - VerbRecord vr = GetVerbRecordFromDictionary(verbInfinitive); - VerbStruct vs; + VerbPair* verbPair = new VerbPair(VerbStruct(verbParams.first, selfEnding), GetVerbRecordFromDictionary(verbInfinitive)); - vs.verbParams = verbParams.first; - - vs.selfEnding = selfEnding; - - vs.verbRecord = vr; - - result.insert(vs); + wordPair.push_back(std::shared_ptr(verbPair)); } } } - return result; } diff --git a/rudict/rudict/verb.h b/rudict/rudict/verb.h index bd26920..76cf953 100644 --- a/rudict/rudict/verb.h +++ b/rudict/rudict/verb.h @@ -13,12 +13,16 @@ namespace VB { - struct VerbRecord : public TranslationUnitSetMix, public WordHolder + struct VerbRecord : public WordHolder { - bool canBePrefixed; - bool canBeNotPrefixed; - bool canBePerfect; - bool canBeImperfect; + + bool getCanBePrefixed() const; + bool getCanBeNotPrefixed() const; + bool getCanBePerfect() const; + bool getCanBeImperfect() const; + + void SetAllFields(); + void SetAllFields(const std::vector& lineArr); VerbRecord(); @@ -27,7 +31,6 @@ namespace VB }; - extern std::vector VerbRecordArr; enum VerbParams { @@ -45,6 +48,46 @@ namespace VB VP_SIZE }; + + struct VerbStruct : public WordModificatorInterface + { + //VerbParams verbParams; + //std::wstring selfEnding; + + //VerbRecord verbRecord; + + VerbStruct(); + + VerbStruct(VerbParams iVerbParams, std::wstring iSelfEnding); + + //boost::property_tree::wptree GetModificators() const; + + //bool operator<(const VerbStruct& v) const; + + }; + + struct VerbPair : public WordPairInterface + { + protected: + + VerbStruct modificator; + VerbRecord wordRecord; + + public: + + VerbPair(); + + VerbPair(VerbStruct iModificator, VerbRecord iWordRecord); + + virtual WordModificatorInterface& wordModificator(); + + virtual WordHolder& word(); + + virtual std::wstring getType(); + }; + + extern std::vector VerbRecordArr; + enum VerbConjunction { VC_FIRST_ET = 0, @@ -76,17 +119,6 @@ namespace VB extern std::vector VerbParamsTable; - struct VerbStruct - { - VerbParams verbParams; - std::wstring selfEnding; - - VerbRecord verbRecord; - - - bool operator<(const VerbStruct& v) const; - - }; VerbParams WStringToVerbParams(std::wstring str); @@ -116,7 +148,7 @@ namespace VB std::wstring GetVerbInfinitive(std::wstring modifiedBase, VerbConjunction verbConjunction); - std::set RecognizeVerb(std::wstring verb); + void RecognizeVerb(std::wstring verb, std::vector>& wordPair); void LoadVerbConjunctionTable();