diff --git a/rudict/frequent_adjectives_2000.csv b/rudict/frequent_adjectives_2000.csv new file mode 100644 index 0000000..c6fa589 --- /dev/null +++ b/rudict/frequent_adjectives_2000.csv @@ -0,0 +1,282 @@ +Номер;Прилагательное; может быть кратким просто так или с добавлением Е;специальная краткая форма, мужской род +56;другой;0; +73;новый;1; +89;самый;0; +96;большой;0; +130;последний;0; +141;российский;0; +173;русский;0; +193;высокий;1; +199;хороший;1; +207;главный;1; +215;лучший;0; +229;маленький;0; +231;некоторый;0; +233;любой;0; +241;молодой;1; +249;государственный;1; +253;никакой;0; +254;советский;0; +256;настоящий;1; +264;старый;1; +278;разный;0; +279;нужный;1; +281;иной;0; +290;белый;1; +291;собственный;1; +293;чёрный;1; +297;основной;0; +311;далекий;1; +334;подобный;1; +335;следующий;0; +352;равный;1; +364;живой;1; +369;известный;1; +373;военный;1; +377;важный;1; +379;великий;1; +381;простой;1; +385;огромный;1; +387;политический;0; +409;московский;0; +424;готовый;1; +439;данный;0; +442;красный;1; +453;современный;1; +472;социальный;1; +480;ранний;0; +482;особый;0; +483;целый;1; +487;плохой;1; +490;сильный;1; +492;скорый;1; +502;внутренний;1; +514;экономический;0; +516;правый;1; +519;федеральный;1; +524;близкий;1; +526;похожий;1; +532;различный;1; +539;необходимый;1; +544;единственный;1; +545;лёгкий;0;лёгок +555;человеческий;0; +561;международный;1; +564;дорогой;1; +572;небольшой;0; +577;местный;0; +586;бывший;0; +601;американский;0; +615;мировой;0; +617;тяжелый;1; +627;возможный;1; +630;отдельный;1; +631;средний;1; +632;красивый;1; +640;короткий;1;короток +654;серьёзный;1; +660;интересный;1; +662;добрый;1; +665;национальный;1; +667;длинный;1; +670;страшный;1; +671;прошлый;0; +673;общественный;1; +703;детский;0; +707;единый;1; +709;определённый;1; +719;чужой;1; +721;странный;1; +723;чистый;1; +732;поздний;1; +738;специальный;1; +745;научный;1; +754;сложный;1; +762;реальный;1; +775;способный;1; +778;малый;1; +779;старший;0; +783;личный;1; +786;свободный;1; +788;обычный;1; +790;прекрасный;1; +791;высший;0; +803;тёмный;1; +810;гражданский;0; +857;боевой;0; +870;рабочий;1; +872;глубокий;1; +885;долгий;0;долог +892;прямой;1; +897;открытый;1; +904;знакомый;1; +908;нынешний;0; +921;исторический;0; +923;народный;1; +929;знаменитый;1; +933;больший;0; +940;пустой;1; +941;очередной;1; +949;судебный;1; +958;зелёный;1; +966;немецкий;0; +967;золотой;1; +969;технический;0; +970;нормальный;1; +974;некий;0; +976;городской;0; +978;соответствующий;0; +992;любимый;1; +993;родной;1; +994;западный;1; +1000;быстрый;1; +1008;холодный;1; +1012;конкретный;1; +1018;иностранный;0; +1020;ученый;1; +1022;левый;1; +1023;счастливый;1; +1035;святой;1; +1037;точный;1; +1053;частый;1; +1058;значительный;1; +1067;связанный;0;связан +1073;уверенный;0;уверен +1075;тонкий;0;тонок +1076;центральный;1; +1083;будущий;0; +1104;физический;0; +1117;частный;1; +1119;мелкий;0;мелок +1121;английский;1; +1122;постоянный;1; +1131;тихий;1; +1158;европейский;0; +1162;ближайший;0; +1163;отечественный;0; +1164;теплый;0; +1166;духовный;1; +1168;прежний;0; +1190;профессиональный;1; +1191;французский;0; +1192;женский;0; +1193;крайний;0; +1208;божий;0; +1215;дальнейший;0; +1225;естественный;1; +1229;информационный;0; +1234;железный;1; +1240;горячий;1; +1248;веселый;1; +1271;серый;1; +1276;опасный;1; +1284;прочий;0; +1302;слабый;1; +1306;яркий;0;ярок +1311;больной;1; +1314;летний;0; +1315;дополнительный;1; +1326;лесной;0; +1343;умный;1; +1346;северный;0; +1352;ясный;1; +1353;милый;1; +1355;светлый;1; +1359;редкий;0;редок +1362;верный;1; +1364;юридический;0; +1377;административный;1; +1388;узкий;0;узок +1397;художественный;1; +1404;древний;1; +1408;массовый;0; +1410;генеральный;1; +1411;замечательный;1; +1421;задний;1; +1422;региональный;1; +1429;здоровый;1; +1438;активный;1; +1439;литературный;1; +1452;острый;1; +1455;богатый;1; +1459;творческий;0; +1462;мягкий;0;мягок +1463;ночной;0; +1468;налоговый;0; +1469;толстый;0;толст +1480;верхний;0; +1482;вечный;1; +1484;лишний;0; +1485;морской;0; +1487;нижний;1; +1488;спокойный;0;спокоен +1493;сухой;1; +1498;синий;0; +1503;сегодняшний;0; +1509;медицинский;0; +1519;свежий;1; +1520;трудный;1; +1521;уголовный;1; +1582;желтый;1;желт +1596;мощный;1; +1617;русский;0; +1618;деревянный;1; +1628;полезный;1; +1632;дальний;1; +1644;домашний;0; +1647;традиционный;1; +1654;жесткий;1; +1655;крепкий;1; +1659;виноватый;1; +1663;культурный;1; +1666;приятный;1; +1682;круглый;0;кругл +1687;понятный;1; +1688;голубой;0; +1690;удивительный;1; +1692;знакомый;1; +1694;мужской;0; +1696;правовой;0; +1705;мертвый;0;мертв +1714;совместный;1; +1718;семейный;0; +1734;природный;1; +1748;православный;1; +1750;учебный;1; +1756;эффективный;1; +1780;материальный;1; +1781;невозможный;1; +1814;соседний;0; +1815;психологический;0; +1823;музыкальный;1; +1834;голый;1; +1838;обязательный;1; +1845;исполнительный;1; +1848;партийный;0; +1859;существенный;1; +1869;восточный;1; +1880;рабочий;0; +1882;сельский;0; +1883;характерный;1; +1886;неожиданный;1; +1896;видный;1; +1904;честный;1; +1910;резкий;0;резок +1912;младший;0; +1920;трудовой;0; +1922;строгий;1; +1931;южный;1; +1932;практический;0; +1933;многочисленный;1; +1935;согласный;1; +1943;спортивный;1; +1947;истинный;1; +1956;злой;0;зол +1961;индивидуальный;1; +1965;мокрый;1; +1969;дикий;1; +1981;коммерческий;0; +1983;театральный;1; +1988;воздушный;1; +1989;дешевый;1; +1993;пьяный;1; diff --git a/rudict/frequent_adjectives_2000.xlsx b/rudict/frequent_adjectives_2000.xlsx new file mode 100644 index 0000000..7292d3f Binary files /dev/null and b/rudict/frequent_adjectives_2000.xlsx differ diff --git a/rudict/frequent_nouns_2000.xlsx b/rudict/frequent_nouns_2000.xlsx index ad0a648..92b425a 100644 Binary files a/rudict/frequent_nouns_2000.xlsx and b/rudict/frequent_nouns_2000.xlsx differ diff --git a/rudict/frequent_words.xlsx b/rudict/frequent_words.xlsx index 012068c..451254c 100644 Binary files a/rudict/frequent_words.xlsx and b/rudict/frequent_words.xlsx differ diff --git a/rudict/grammar_case_adjective.csv b/rudict/grammar_case_adjective.csv new file mode 100644 index 0000000..c5ba587 --- /dev/null +++ b/rudict/grammar_case_adjective.csv @@ -0,0 +1,121 @@ +Склонение;Род;Падеж;Окончание;Пример +IFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий +;;NGC_P2_GENITIVE;его;подарок для рабочего +;;NGC_P3_DATIVE;ему;подарок рабочему +;;NGC_P4_ACCUSATIVE;ий;обвиняю рабочий +;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим +;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем +;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие +;;NGC_P2_GENITIVE;их;подарок для рабочих +;;NGC_P3_DATIVE;им;подарок рабочим +;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие +;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими +;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих +IFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий +;;NGC_P2_GENITIVE;его;подарок для рабочего +;;NGC_P3_DATIVE;ему;подарок рабочему +;;NGC_P4_ACCUSATIVE;его;обвиняю рабочего +;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим +;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем +;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие +;;NGC_P2_GENITIVE;их;подарок для рабочих +;;NGC_P3_DATIVE;им;подарок рабочим +;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих +;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими +;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих +UOFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой +;;NGC_P2_GENITIVE;ого;подарок для делового +;;NGC_P3_DATIVE;ому;подарок деловому +;;NGC_P4_ACCUSATIVE;ой;обвиняю деловой +;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым +;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом +;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые +;;NGC_P2_GENITIVE;ых;подарок для деловых +;;NGC_P3_DATIVE;ым;подарок деловым +;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые +;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми +;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых +UOFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой +;;NGC_P2_GENITIVE;ого;подарок для делового +;;NGC_P3_DATIVE;ому;подарок деловому +;;NGC_P4_ACCUSATIVE;ого;обвиняю делового +;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым +;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом +;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые +;;NGC_P2_GENITIVE;ых;подарок для деловых +;;NGC_P3_DATIVE;ым;подарок деловым +;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые +;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми +;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых +IFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая +;;NGC_P2_GENITIVE;ей;подарок для рабочей +;;NGC_P3_DATIVE;ей;подарок рабочей +;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую +;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей +;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей +;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие +;;NGC_P2_GENITIVE;их;подарок для рабочих +;;NGC_P3_DATIVE;им;подарок рабочим +;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие +;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими +;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих +IFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая +;;NGC_P2_GENITIVE;ей;подарок для рабочей +;;NGC_P3_DATIVE;ей;подарок рабочей +;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую +;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей +;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей +;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие +;;NGC_P2_GENITIVE;их;подарок для рабочих +;;NGC_P3_DATIVE;им;подарок рабочим +;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих +;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими +;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих +UOFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая +;;NGC_P2_GENITIVE;ой;подарок для деловой +;;NGC_P3_DATIVE;ой;подарок деловой +;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую +;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой +;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой +;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые +;;NGC_P2_GENITIVE;ых;подарок для деловых +;;NGC_P3_DATIVE;ым;подарок деловым +;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые +;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми +;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых +UOFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая +;;NGC_P2_GENITIVE;ой;подарок для деловой +;;NGC_P3_DATIVE;ой;подарок деловой +;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую +;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой +;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой +;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые +;;NGC_P2_GENITIVE;ых;подарок для деловых +;;NGC_P3_DATIVE;ым;подарок деловым +;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые +;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми +;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых +IFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ее;рабочее +;;NGC_P2_GENITIVE;его;подарок для рабочего +;;NGC_P3_DATIVE;ему;подарок рабочему +;;NGC_P4_ACCUSATIVE;ее;обвиняю рабочее +;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим +;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем +;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие +;;NGC_P2_GENITIVE;их;подарок для рабочих +;;NGC_P3_DATIVE;им;подарок рабочим +;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие +;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими +;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих +UOFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ое;деловое +;;NGC_P2_GENITIVE;ого;подарок для делового +;;NGC_P3_DATIVE;ому;подарок деловому +;;NGC_P4_ACCUSATIVE;ое;обвиняю деловое +;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым +;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом +;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые +;;NGC_P2_GENITIVE;ых;подарок для деловых +;;NGC_P3_DATIVE;ым;подарок деловым +;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые +;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми +;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых diff --git a/rudict/grammar_case_adjective.xlsx b/rudict/grammar_case_adjective.xlsx new file mode 100644 index 0000000..810a100 Binary files /dev/null and b/rudict/grammar_case_adjective.xlsx differ diff --git a/rudict/rudict/adjective.cpp b/rudict/rudict/adjective.cpp new file mode 100644 index 0000000..e5c81a9 --- /dev/null +++ b/rudict/rudict/adjective.cpp @@ -0,0 +1,507 @@ +#include "adjective.h" +#include + +#include //Xperimental -- for debug only + +#include "utf8utf16.h" + +#include "boost/regex.hpp" +#include "boost/algorithm/string/regex.hpp" + +namespace AJ +{ + + std::vector AdjectiveRecordArr; + + std::vector adjectiveDeclencionCaseTable; + + + AdjectiveRecord::AdjectiveRecord() + : standardShortFormAvailable(false) + { + + } + + AdjectiveRecord::AdjectiveRecord(std::wstring line) + { + std::vector lineArr; + + boost::split_regex(lineArr, line, boost::wregex(L";")); + + nominativeMaleForm = lineArr[1]; + + standardShortFormAvailable = lineArr[2] == L"1" ? true : false; + + specialShortForm = lineArr[3]; + + } + + + std::vector GetAllAdjectiveEndingArr() + { + std::vector result; + + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + result.push_back(L""); + + return result; + } + + AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str) + { + if (str == L"IFORM_MALE_INANIMATE") + { + return IFORM_MALE_INANIMATE; + } + if (str == L"IFORM_MALE_ANIMATE") + { + return IFORM_MALE_ANIMATE; + } + if (str == L"UOFORM_MALE_INANIMATE") + { + return UOFORM_MALE_INANIMATE; + } + if (str == L"UOFORM_MALE_ANIMATE") + { + return UOFORM_MALE_ANIMATE; + } + + if (str == L"IFORM_FEMALE_INANIMATE") + { + return IFORM_FEMALE_INANIMATE; + } + if (str == L"IFORM_FEMALE_ANIMATE") + { + return IFORM_FEMALE_ANIMATE; + } + if (str == L"UOFORM_FEMALE_INANIMATE") + { + return UOFORM_FEMALE_INANIMATE; + } + if (str == L"UOFORM_FEMALE_ANIMATE") + { + return UOFORM_FEMALE_ANIMATE; + } + + if (str == L"IFORM_NEUTRAL") + { + return IFORM_NEUTRAL; + } + if (str == L"UOFORM_NEUTRAL") + { + return UOFORM_NEUTRAL; + } + + std::cout << "Error in WStringToAdjectiveDeclencion"; + return IFORM_MALE_INANIMATE; + } + + std::set getPossibleAdjectiveEndingDivisionSet(std::wstring noun) + { + std::set result; + + auto allAdjectiveEndingArr = GetAllAdjectiveEndingArr(); + + for (auto ending : allAdjectiveEndingArr) + { + if (boost::ends_with(noun, ending)) + { + std::wstring adjectiveBase = boost::replace_last_copy(noun, ending, ""); + + + + if (charIsConsolant(adjectiveBase[adjectiveBase.size() - 1])) + { + result.insert({ adjectiveBase, ending, AdjectiveEndingDivision::DC_COMMON }); + } + + } + } + + return result; + } + + std::vector GetPossibleTupleArr(std::wstring ending) + { + std::vector result; + + for (auto& adjective : adjectiveDeclencionCaseTable) + { + for (int i = 0; i < NGC_SIZE * NC_SIZE; i++) + { + if (adjective.grammaticalCaseTable[i].ending.count(ending) != 0) + { + result.push_back(AdjectiveTuple{ adjective.adjectiveDeclencion, adjective.grammaticalCaseTable[i].count, adjective.grammaticalCaseTable[i].grammaticalCase }); + } + } + + } + + return result; + } + + bool AdjectiveIsInDictionary(std::wstring nominative) + { + for (auto& adjective : AdjectiveRecordArr) + { + if (adjective.nominativeMaleForm == nominative) + { + return true; + } + } + + return false; + } + + AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative) + { + for (auto& adjective : AdjectiveRecordArr) + { + if (adjective.nominativeMaleForm == nominative) + { + return adjective; + } + } + + return{}; + } + + + /* + std::set GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount) + { + std::set result; + + AdjectiveDeclencionCaseTableRecord declencionCaseTableRecord = adjectiveDeclencionCaseTable[static_cast(declencion)]; + + for (auto& grammaticalTableRecord : declencionCaseTableRecord.grammaticalCaseTable) + { + if (grammaticalTableRecord.grammaticalCase == NGC_P1_NOMINATIVE && grammaticalTableRecord.count == nounCount) + { + for (auto& e : grammaticalTableRecord.ending) + { + result.insert(base + e); + } + + } + } + + return result; + }*/ + + std::set GetNominativeMaleSingular(std::wstring base) + { + std::wstring result; + + if (charIsIFormConsolant(base[base.size() - 1])) + { + return{ base + L"" }; + } + + if (charIsUFormConsolant(base[base.size() - 1])) + { + return{ base + L"", base + L"" }; + } + + return{}; + + } + + bool IsDeclencionAnimated(AdjectiveDeclencion declention) + { + switch (declention) + { + case AJ::IFORM_MALE_ANIMATE: + case AJ::UOFORM_MALE_ANIMATE: + case AJ::IFORM_FEMALE_ANIMATE: + case AJ::UOFORM_FEMALE_ANIMATE: + return true; + break; + default: + return false; + break; + } + } + + + NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention) + { + switch (declention) + { + case AJ::IFORM_MALE_INANIMATE: + case AJ::IFORM_MALE_ANIMATE: + case AJ::UOFORM_MALE_INANIMATE: + case AJ::UOFORM_MALE_ANIMATE: + return NG_MALE; + break; + case AJ::IFORM_FEMALE_INANIMATE: + case AJ::IFORM_FEMALE_ANIMATE: + case AJ::UOFORM_FEMALE_INANIMATE: + case AJ::UOFORM_FEMALE_ANIMATE: + return NG_FEMALE; + break; + default: + return NG_NEUTRAL; + break; + } + } + + + bool IFormTest(const AdjectiveRecord& r) + { + return charIsIFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]); + } + + bool UOFormTest(const AdjectiveRecord& r) + { + return charIsUFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]); + } + + + std::map> DeclentionConditionMap; + + void SetupDeclentionMap() + { + + DeclentionConditionMap[IFORM_MALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1); + DeclentionConditionMap[IFORM_MALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1); + DeclentionConditionMap[UOFORM_MALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1); + DeclentionConditionMap[UOFORM_MALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1); + + DeclentionConditionMap[IFORM_FEMALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1); + DeclentionConditionMap[IFORM_FEMALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1); + DeclentionConditionMap[UOFORM_FEMALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1); + DeclentionConditionMap[UOFORM_FEMALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1); + + DeclentionConditionMap[IFORM_NEUTRAL] = std::bind(IFormTest, std::placeholders::_1); + DeclentionConditionMap[UOFORM_NEUTRAL] = std::bind(UOFormTest, std::placeholders::_1); + + + } + + bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple) + { + + AdjectiveDeclencion declencion = std::get<0>(tuple); + + bool standardDeclention = DeclentionConditionMap[declencion](record); + + if (standardDeclention) + { + return true; + } + + return false; + } + + std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple) >> DivisionCaseAdjectiveTupleFilterMap; + + std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple, AdjectiveRecord) >> DivisionCaseAdjectiveTupleRecordFilterMap; + + + void FillDivisionCaseMaps() + { + + DivisionCaseAdjectiveTupleFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple) + { + return true; + }; + + DivisionCaseAdjectiveTupleRecordFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple, AdjectiveRecord record) + { + return AdjectiveFitsDeclention(record, tuple); + }; + + } + + std::set RecognizeAdjective(std::wstring noun) + { + std::set result; + + auto adjectiveEndingDivisionArr = getPossibleAdjectiveEndingDivisionSet(noun); + + for (auto adjectiveEndingDivision : adjectiveEndingDivisionArr) + { + std::wstring base = adjectiveEndingDivision.base; + std::wstring ending = adjectiveEndingDivision.ending; + AdjectiveEndingDivision::DivisionCase dc = adjectiveEndingDivision.divisionCase; + + std::wstring modifiedBase = base; + std::wstring modifiedEnding = ending; + + std::vector possibleTupleArr = GetPossibleTupleArr(modifiedEnding); + + for (AdjectiveTuple tuple : possibleTupleArr) + { + if (DivisionCaseAdjectiveTupleFilterMap[dc](tuple)) + { + std::set nominaviteSingularSet = GetNominativeMaleSingular(modifiedBase); + + for (auto& nn : nominaviteSingularSet) + { + + if (AdjectiveIsInDictionary(nn)) + { + + AdjectiveRecord record = GetAdjectiveRecordFromDictionary(nn); + + if (DivisionCaseAdjectiveTupleRecordFilterMap[dc](tuple, record)) + { + result.insert({ std::get<2>(tuple), std::get<1>(tuple), GetGenderFromDeclencion(std::get<0>(tuple)), IsDeclencionAnimated(std::get<0>(tuple)), record }); + } + } + + } + + } + } + + } + + return result; + } + + void LoadAdjectiveDeclencionCaseTable() + { + + adjectiveDeclencionCaseTable.clear(); + +#ifdef _WIN32 + std::ifstream f("C:/Workplace/ChineseJournal/rudict/grammar_case_adjective.csv"); + +#else + std::ifstream f("/home/devuser/workplace/rudict/grammar_case_adjective.csv"); +#endif + + std::string line; + std::wstring wline; + + if (f.is_open()) + { + + std::cout << "File found!" << std::endl; + + std::vector currentGrammaticalCaseTable; + std::wstring currentAdjectiveDeclencion; + std::wstring currentAdjectiveCount; + + getline(f, line); //Skip one line + + while (getline(f, line)) + { + std::vector lineArr; + + boost::split_regex(lineArr, line, boost::regex(";")); + + if (lineArr[0] != "") + { + if (currentAdjectiveDeclencion == L"") + { + currentAdjectiveDeclencion = string_to_wstring(lineArr[0]); + } + else + { + adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable }); + + currentAdjectiveDeclencion = string_to_wstring(lineArr[0]); + + currentGrammaticalCaseTable.clear(); + } + + } + + if (lineArr[1] != "") + { + currentAdjectiveCount = string_to_wstring(lineArr[1]); + } + + std::wstring endings = string_to_wstring(lineArr[3]); + + std::set endingsSet; + boost::split_regex(endingsSet, endings, boost::regex(", ")); + + currentGrammaticalCaseTable.push_back({ + WStringToNounCount(currentAdjectiveCount), + WStringToNounGrammaticalCase(string_to_wstring(lineArr[2])), + endingsSet + }); + + } + //Add last one + if (currentAdjectiveDeclencion != L"") + { + adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable }); + } + + f.close(); + } + else + { + std::cout << "file not found!" << std::endl; + } + + + } + + void LoadFrequentAdjectiveSet() + { + + +#ifdef _WIN32 + std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_adjectives_2000.csv"); + +#else + std::ifstream f("/home/devuser/workplace/rudict/frequent_adjectives_2000.csv"); +#endif + + std::string line; + std::wstring wline; + + if (f.is_open()) + { + + getline(f, line); //Skip one line + + std::cout << "File found!" << std::endl; + while (getline(f, line)) + { + + wline = string_to_wstring(line); + AdjectiveRecord adjectiveRecord(wline); + + AdjectiveRecordArr.push_back(adjectiveRecord); + + } + f.close(); + } + else + { + std::cout << "file not found!" << std::endl; + } + + + } + + +} //namespace AJ + diff --git a/rudict/rudict/adjective.h b/rudict/rudict/adjective.h new file mode 100644 index 0000000..728932b --- /dev/null +++ b/rudict/rudict/adjective.h @@ -0,0 +1,186 @@ +#ifndef ADJECTIVE_H_INCLUDED +#define ADJECTIVE_H_INCLUDED + +#include +#include +#include +#include +#include + +#include "boost/algorithm/string.hpp" + +#include "grammarCase.h" + +namespace AJ +{ + + struct AdjectiveRecord + { + std::wstring nominativeMaleForm; + + bool standardShortFormAvailable; + + std::wstring specialShortForm; + + AdjectiveRecord(); + AdjectiveRecord(std::wstring line); + + bool operator<(const AdjectiveRecord& n) const + { + return nominativeMaleForm < n.nominativeMaleForm; + } + }; + + + extern std::vector AdjectiveRecordArr; + + + struct AdjectiveStruct + { + NounGrammaticalCase grammaticalCase; + + NounCount count; + NounGender gender; + + bool isDeclentionAnimated; + + AdjectiveRecord adjectiveRecord; + + bool operator<(const AdjectiveStruct& n) const + { + if (grammaticalCase != n.grammaticalCase) + { + return grammaticalCase < n.grammaticalCase; + } + else + { + if (count != n.count) + { + return count < n.count; + } + else + { + if (gender != n.gender) + { + return gender < n.gender; + } + else + { + if (isDeclentionAnimated != n.isDeclentionAnimated) + { + return isDeclentionAnimated < n.isDeclentionAnimated; + } + else + { + return adjectiveRecord < n.adjectiveRecord; + } + } + } + } + } + }; + + + std::vector GetAllAdjectiveEndingArr(); + + + + enum AdjectiveDeclencion + { + IFORM_MALE_INANIMATE = 0, + IFORM_MALE_ANIMATE, + UOFORM_MALE_INANIMATE, + UOFORM_MALE_ANIMATE, + IFORM_FEMALE_INANIMATE, + IFORM_FEMALE_ANIMATE, + UOFORM_FEMALE_INANIMATE, + UOFORM_FEMALE_ANIMATE, + IFORM_NEUTRAL, + UOFORM_NEUTRAL, + }; + + + struct AdjectiveDeclencionCaseTableRecord + { + AdjectiveDeclencion adjectiveDeclencion; + std::vector grammaticalCaseTable; + }; + + extern std::vector adjectiveDeclencionCaseTable; + + + AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str); + + + struct AdjectiveEndingDivision + { + std::wstring base; + std::wstring ending; + + enum DivisionCase + { + DC_COMMON = 0 + } divisionCase; + + bool operator<(const AdjectiveEndingDivision& other) const + { + if (base != other.base) + { + return base < other.base; + } + else + { + if (ending != other.ending) + { + return ending < other.ending; + } + else + { + return divisionCase < other.divisionCase; + } + } + } + }; + + std::set getPossibleAdjectiveEndingDivisionSet(std::wstring noun); + + + typedef std::tuple < + AdjectiveDeclencion, + NounCount, + NounGrammaticalCase + > AdjectiveTuple; + + std::vector GetPossibleTupleArr(std::wstring ending); + + bool AdjectiveIsInDictionary(std::wstring nominative); + AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative); + + + + //std::set GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount); + std::set GetNominativeMaleSingular(std::wstring base); + + void SetupDeclentionMap(); + bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple); + + bool IsDeclencionAnimated(AdjectiveDeclencion declention); + NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention); + + void FillDivisionCaseMaps(); + + + std::set RecognizeAdjective(std::wstring noun); + + + void LoadAdjectiveDeclencionCaseTable(); + + + void LoadFrequentAdjectiveSet(); + + + +} //namespace AJ + + +#endif //ADJECTIVE_H_INCLUDED diff --git a/rudict/rudict/grammarCase.cpp b/rudict/rudict/grammarCase.cpp new file mode 100644 index 0000000..fcc29a8 --- /dev/null +++ b/rudict/rudict/grammarCase.cpp @@ -0,0 +1,128 @@ +#include "grammarCase.h" + +#include + +NounCount WStringToNounCount(std::wstring str) +{ + if (str == L"NC_SINGULAR") + { + return NC_SINGULAR; + } + if (str == L"NC_PLURAL") + { + return NC_PLURAL; + } + + std::cout << "Error in WStringToNounCount!" << std::endl; + return NC_SINGULAR; +} + +std::wstring NounCountToWString(NounCount nounCount) +{ + if (nounCount == NC_SINGULAR) + { + return L"NC_SINGULAR"; + } + if (nounCount == NC_PLURAL) + { + return L"NC_PLURAL"; + } + + std::cout << "Error in NounCountToWString!" << std::endl; + return L""; +} + + +std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase) +{ + switch (nounGrammaticalCase) + { + case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE"; + case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE"; + case NGC_P3_DATIVE: return L"NGC_P3_DATIVE"; + case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE"; + case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL"; + case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL"; + } + + return L""; +} + +NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str) +{ + + if (str == L"NGC_P1_NOMINATIVE") + { + return NGC_P1_NOMINATIVE; + } + if (str == L"NGC_P2_GENITIVE") + { + return NGC_P2_GENITIVE; + } + if (str == L"NGC_P3_DATIVE") + { + return NGC_P3_DATIVE; + } + if (str == L"NGC_P4_ACCUSATIVE") + { + return NGC_P4_ACCUSATIVE; + } + if (str == L"NGC_P5_INSTRUMENTAL") + { + return NGC_P5_INSTRUMENTAL; + } + if (str == L"NGC_P6_PREPOSITIONAL") + { + return NGC_P6_PREPOSITIONAL; + } + + std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl; + return NGC_P1_NOMINATIVE; +} + + + + +bool charIsConsolant(wchar_t c) //except +{ + std::wstring consolants = L""; + + for (wchar_t ic : consolants) + { + if (c == ic) + { + return true; + } + } + + return false; +} + +bool charIsVowel(wchar_t c) +{ + std::wstring vovels = L""; + + for (wchar_t ic : vovels) + { + if (c == ic) + { + return true; + } + } + + return false; +} + + +std::wstring i_form_consolants = L""; +std::wstring u_form_consolants = L""; + +bool charIsIFormConsolant(wchar_t c) +{ + return i_form_consolants.find(c) != i_form_consolants.npos; +} + +bool charIsUFormConsolant(wchar_t c) +{ + return u_form_consolants.find(c) != i_form_consolants.npos; +} diff --git a/rudict/rudict/grammarCase.h b/rudict/rudict/grammarCase.h new file mode 100644 index 0000000..cbb5294 --- /dev/null +++ b/rudict/rudict/grammarCase.h @@ -0,0 +1,58 @@ +#ifndef GRAMMAR_CASE_H_INCLUDED +#define GRAMMAR_CASE_H_INCLUDED + + +#include +#include + +enum NounGender +{ + NG_MALE = 0, + NG_FEMALE, + NG_NEUTRAL +}; + +enum NounGrammaticalCase +{ + NGC_P1_NOMINATIVE = 0, + NGC_P2_GENITIVE, + NGC_P3_DATIVE, + NGC_P4_ACCUSATIVE, + NGC_P5_INSTRUMENTAL, + NGC_P6_PREPOSITIONAL, + NGC_SIZE +}; + +enum NounCount +{ + NC_SINGULAR = 0, + NC_PLURAL, + NC_SIZE +}; + + +struct GrammaticalTableRecord +{ + NounCount count; + + NounGrammaticalCase grammaticalCase; + + std::set ending; +}; + + +std::wstring NounCountToWString(NounCount nounCount); +NounCount WStringToNounCount(std::wstring str); +std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase); +NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str); + + +bool charIsConsolant(wchar_t c); //except + +bool charIsVowel(wchar_t c); + +bool charIsIFormConsolant(wchar_t c); +bool charIsUFormConsolant(wchar_t c); + + +#endif //GRAMMAR_CASE_H_INCLUDED diff --git a/rudict/rudict/http/request_handler.cpp b/rudict/rudict/http/request_handler.cpp index 464d9ab..79421bd 100644 --- a/rudict/rudict/http/request_handler.cpp +++ b/rudict/rudict/http/request_handler.cpp @@ -21,6 +21,7 @@ #include "boost/property_tree/json_parser.hpp" #include "../utf8utf16.h" #include "../noun.h" +#include "../adjective.h" namespace http { namespace server { @@ -144,12 +145,16 @@ namespace http { result.put(L"error", L"String is too short"); } - std::set nounStructArr = RecognizeNoun(request); + int id; + + //Noun! + + std::set nounStructArr = NN::RecognizeNoun(request); std::cout <<"nounstructarr" << nounStructArr.size() << std::endl; - int id = 0; + id = 0; boost::property_tree::wptree nounArr; @@ -178,6 +183,35 @@ namespace http { result.put_child(L"nouns", nounArr); + //Adjective! + + std::set adjectiveStructArr = AJ::RecognizeAdjective(request); + + id = 0; + + boost::property_tree::wptree adjectiveArr; + + for (auto& adjectiveStruct : adjectiveStructArr) + { + boost::property_tree::wptree adjectivrTree; + + adjectivrTree.put(L"id", id); + adjectivrTree.put(L"grammaticalCase", NounGrammaticalCaseToWString(adjectiveStruct.grammaticalCase)); + adjectivrTree.put(L"animated", adjectiveStruct.isDeclentionAnimated); + adjectivrTree.put(L"count", NounCountToWString(adjectiveStruct.count)); + + adjectivrTree.put(L"gender", adjectiveStruct.gender); + + adjectivrTree.put(L"nominativeSingularForm", adjectiveStruct.adjectiveRecord.nominativeMaleForm); + + adjectiveArr.push_back(std::make_pair(L"", adjectivrTree)); + + id++; + } + + result.put_child(L"adjectives", adjectiveArr); + + return result; diff --git a/rudict/rudict/main.cpp b/rudict/rudict/main.cpp index f82aef6..7f9026e 100644 --- a/rudict/rudict/main.cpp +++ b/rudict/rudict/main.cpp @@ -6,17 +6,25 @@ #include "noun.h" +#include "adjective.h" + int main() { - SetupDeclentionMap(); - LoadNounDeclencionCaseTable(); - LoadFrequentWordSet(); - FillDivisionCaseMaps(); - CalculatePluralForm(); + NN::SetupDeclentionMap(); + NN::LoadNounDeclencionCaseTable(); + NN::LoadFrequentWordSet(); + NN::FillDivisionCaseMaps(); + NN::CalculatePluralForm(); + + AJ::LoadAdjectiveDeclencionCaseTable(); + AJ::LoadFrequentAdjectiveSet(); + AJ::FillDivisionCaseMaps(); + AJ::SetupDeclentionMap(); //RecognizeNoun(L"стульями"); //Косяк: "вечер" + //AJ::RecognizeAdjective(L"золотыми"); try { diff --git a/rudict/rudict/noun.cpp b/rudict/rudict/noun.cpp index ed73771..63ba41d 100644 --- a/rudict/rudict/noun.cpp +++ b/rudict/rudict/noun.cpp @@ -7,6 +7,8 @@ #include "boost/regex.hpp" #include "boost/algorithm/string/regex.hpp" +namespace NN +{ std::vector NounRecordArr; @@ -153,83 +155,6 @@ NounDeclencion WStringToNounDeclencion(std::wstring str) } -NounCount WStringToNounCount(std::wstring str) -{ - if (str == L"NC_SINGULAR") - { - return NC_SINGULAR; - } - if (str == L"NC_PLURAL") - { - return NC_PLURAL; - } - - std::cout << "Error in WStringToNounCount!" << std::endl; - return NC_SINGULAR; -} - -std::wstring NounCountToWString(NounCount nounCount) -{ - if (nounCount == NC_SINGULAR) - { - return L"NC_SINGULAR"; - } - if (nounCount == NC_PLURAL) - { - return L"NC_PLURAL"; - } - - std::cout << "Error in NounCountToWString!" << std::endl; - return L""; -} - - -std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase) -{ - switch (nounGrammaticalCase) - { - case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE"; - case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE"; - case NGC_P3_DATIVE: return L"NGC_P3_DATIVE"; - case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE"; - case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL"; - case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL"; - } - - return L""; -} - -NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str) -{ - - if (str == L"NGC_P1_NOMINATIVE") - { - return NGC_P1_NOMINATIVE; - } - if (str == L"NGC_P2_GENITIVE") - { - return NGC_P2_GENITIVE; - } - if (str == L"NGC_P3_DATIVE") - { - return NGC_P3_DATIVE; - } - if (str == L"NGC_P4_ACCUSATIVE") - { - return NGC_P4_ACCUSATIVE; - } - if (str == L"NGC_P5_INSTRUMENTAL") - { - return NGC_P5_INSTRUMENTAL; - } - if (str == L"NGC_P6_PREPOSITIONAL") - { - return NGC_P6_PREPOSITIONAL; - } - - std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl; - return NGC_P1_NOMINATIVE; -} std::vector GetAllNounEndingArr() { @@ -458,35 +383,6 @@ NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativeP } -bool charIsConsolant(wchar_t c) //except й -{ - std::wstring consolants = L"цкнгшщзхфвпрлджчсмтб"; - - for (wchar_t ic : consolants) - { - if (c == ic) - { - return true; - } - } - - return false; -} - -bool charIsVowel(wchar_t c) -{ - std::wstring vovels = L"аоуыэяёюие"; - - for (wchar_t ic : vovels) - { - if (c == ic) - { - return true; - } - } - - return false; -} bool charIsMissingVowelSoftenerConsolant(wchar_t c) @@ -545,13 +441,6 @@ std::set getPossibleNounEndingDivisionSet(std::wstring noun) result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E }); } - - /* - if (ending == L"а" || ending == L"я") - { - result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_A }); - }*/ - } } @@ -575,34 +464,6 @@ std::vector GetPossibleNounTupleArr(std::wstring nounEnding) } } - - /* - //Xperimental -- additionally check for plural form with a-ending - if (nounEnding == L"а" || nounEnding == L"я") - { - - result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE }); - - result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE }); - - result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE }); - - result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - - - result.push_back(NounTuple{ SECOND_MALE_IFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - - result.push_back(NounTuple{ SECOND_MALE_UFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - - result.push_back(NounTuple{ SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - - result.push_back(NounTuple{ SECOND_I_SHORT_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE }); - }*/ - return result; } @@ -668,8 +529,6 @@ std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclenci return L""; } -std::wstring i_form_consolants = L"гхкжшчщ"; -std::wstring u_form_consolants = L"бпдтвфзснмлрц"; wchar_t GetLastChar(const NounRecord& nounRecord) { @@ -681,16 +540,6 @@ wchar_t GetPrevLastChar(const NounRecord& nounRecord) return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2]; } -bool charIsIFormConsolant(wchar_t c) -{ - return i_form_consolants.find(c) != i_form_consolants.npos; -} - -bool charIsUFormConsolant(wchar_t c) -{ - return u_form_consolants.find(c) != i_form_consolants.npos; -} - bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord) { @@ -1375,4 +1224,6 @@ void LoadFrequentWordSet() } -} \ No newline at end of file +} + +} //namespace NN \ No newline at end of file diff --git a/rudict/rudict/noun.h b/rudict/rudict/noun.h index 2f58fc6..c323904 100644 --- a/rudict/rudict/noun.h +++ b/rudict/rudict/noun.h @@ -9,239 +9,197 @@ #include "boost/algorithm/string.hpp" -enum NounGender +#include "grammarCase.h" + +namespace NN { - NG_MALE = 0, - NG_FEMALE, - NG_NEUTRAL -}; -struct NounRecord -{ - std::wstring nominativeForm; - NounGender gender; - - bool haveSingleForm; - bool haveMultipleForm; - - bool haveStandardMultipleForm; - bool haveStandardMultipleFormWithMissingLastVowel; - - bool haveStandardMultipleFormEnding; - bool haveAlternativeMultipleFormEnding; - - std::wstring specialMultipleForm; - - bool canBeAnimate; - bool canBeInanimate; - - std::set precalculatedNominativePluralSet; - - NounRecord(); - NounRecord(std::wstring line); - - bool operator<(const NounRecord& n) const + struct NounRecord { - return nominativeForm < n.nominativeForm; - } -}; + std::wstring nominativeForm; + NounGender gender; -enum NounDeclencion -{ - FIRST_A_IFORM_INANIMATE = 0, - FIRST_A_IFORM_ANIMATE, - FIRST_A_UFORM_INANIMATE, - FIRST_A_UFORM_ANIMATE, - FIRST_YA_FORM_INANIMATE, - FIRST_YA_FORM_ANIMATE, - SECOND_MALE_IFORM_INANIMATE, - SECOND_MALE_IFORM_ANIMATE, - SECOND_MALE_UFORM_INANIMATE, - SECOND_MALE_UFORM_ANIMATE, - SECOND_MALE_SSFORM_INANIMATE, - SECOND_MALE_SSFORM_ANIMATE, - SECOND_I_SHORT_INANIMATE, - SECOND_I_SHORT_ANIMATE, - SECOND_NEUTRAL_E_FORM, - SECOND_NEUTRAL_O_FORM, - THIRD_FORM_INANIMATE, - THIRD_FORM_ANIMATE, -}; + bool haveSingleForm; + bool haveMultipleForm; -enum NounGrammaticalCase -{ - NGC_P1_NOMINATIVE = 0, - NGC_P2_GENITIVE, - NGC_P3_DATIVE, - NGC_P4_ACCUSATIVE, - NGC_P5_INSTRUMENTAL, - NGC_P6_PREPOSITIONAL, - NGC_SIZE -}; + bool haveStandardMultipleForm; + bool haveStandardMultipleFormWithMissingLastVowel; -enum NounCount -{ - NC_SINGULAR = 0, - NC_PLURAL, - NC_SIZE -}; + bool haveStandardMultipleFormEnding; + bool haveAlternativeMultipleFormEnding; -struct GrammaticalTableRecord -{ - NounCount count; + std::wstring specialMultipleForm; - NounGrammaticalCase grammaticalCase; + bool canBeAnimate; + bool canBeInanimate; - std::set ending; -}; + std::set precalculatedNominativePluralSet; -struct NounDeclencionCaseTableRecord -{ - NounDeclencion nounDeclencion; - std::vector grammaticalCaseTable; -}; + NounRecord(); + NounRecord(std::wstring line); - - -extern std::vector NounRecordArr; - -extern std::vector nounDeclencionCaseTable; - - -NounDeclencion WStringToNounDeclencion(std::wstring str); -std::wstring NounCountToWString(NounCount nounCount); -NounCount WStringToNounCount(std::wstring str); -std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase); -NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str); - - -//std::wstring NounNumberToWString(NounNumber nounNumber); - -typedef std::tuple< - NounDeclencion, - NounCount, - NounGrammaticalCase -> NounTuple; - -typedef std::set StringSet; - -struct NounEndingDivision -{ - std::wstring base; - std::wstring ending; - - enum DivisionCase - { - DC_COMMON = 0, - DC_LOST_VOWEL_O, - DC_LOST_VOWEL_E - } divisionCase; - - bool operator<(const NounEndingDivision& other) const - { - if (base != other.base) + bool operator<(const NounRecord& n) const { - return base < other.base; + return nominativeForm < n.nominativeForm; } - else + }; + + enum NounDeclencion + { + FIRST_A_IFORM_INANIMATE = 0, + FIRST_A_IFORM_ANIMATE, + FIRST_A_UFORM_INANIMATE, + FIRST_A_UFORM_ANIMATE, + FIRST_YA_FORM_INANIMATE, + FIRST_YA_FORM_ANIMATE, + SECOND_MALE_IFORM_INANIMATE, + SECOND_MALE_IFORM_ANIMATE, + SECOND_MALE_UFORM_INANIMATE, + SECOND_MALE_UFORM_ANIMATE, + SECOND_MALE_SSFORM_INANIMATE, + SECOND_MALE_SSFORM_ANIMATE, + SECOND_I_SHORT_INANIMATE, + SECOND_I_SHORT_ANIMATE, + SECOND_NEUTRAL_E_FORM, + SECOND_NEUTRAL_O_FORM, + THIRD_FORM_INANIMATE, + THIRD_FORM_ANIMATE, + }; + + extern std::vector NounRecordArr; + + + + struct NounDeclencionCaseTableRecord + { + NounDeclencion nounDeclencion; + std::vector grammaticalCaseTable; + }; + + extern std::vector nounDeclencionCaseTable; + + + NounDeclencion WStringToNounDeclencion(std::wstring str); + + typedef std::tuple < + NounDeclencion, + NounCount, + NounGrammaticalCase + > NounTuple; + + typedef std::set StringSet; + + struct NounEndingDivision + { + std::wstring base; + std::wstring ending; + + enum DivisionCase { - if (ending != other.ending) + DC_COMMON = 0, + DC_LOST_VOWEL_O, + DC_LOST_VOWEL_E + } divisionCase; + + bool operator<(const NounEndingDivision& other) const + { + if (base != other.base) { - return ending < other.ending; + return base < other.base; } else { - return divisionCase < other.divisionCase; - } - } - } -}; - -std::vector GetAllNounEndingArr(); - - -std::set getPluralForm(NounRecord nounRecord); - -bool NounIsInDictionary(std::wstring nounNominative); -bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural); - - -NounRecord GetNounRecordFromDictionary(std::wstring nounNominative); -NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural); - -bool charIsConsolant(wchar_t c); //except й - -bool charIsVowel(wchar_t c); - - -bool charIsMissingVowelSoftenerConsolant(wchar_t c); - -struct NounStruct -{ - NounGrammaticalCase nounGrammaticalCase; - NounCount nounCount; - bool animated; - NounRecord nounRecord; - - bool operator<(const NounStruct& other) const - { - if (nounGrammaticalCase != other.nounGrammaticalCase) - { - return nounGrammaticalCase < other.nounGrammaticalCase; - } - else - { - if (nounCount != other.nounCount) - { - return nounCount < other.nounCount; - } - else - { - if (animated != other.animated) + if (ending != other.ending) { - return animated < other.animated; + return ending < other.ending; } else { - return nounRecord < other.nounRecord; + return divisionCase < other.divisionCase; } } } - } -}; + }; + + std::vector GetAllNounEndingArr(); -std::set getPossibleNounEndingDivisionSet(std::wstring noun); + std::set getPluralForm(NounRecord nounRecord); -std::vector GetPossibleNounTupleArr(std::wstring nounEnding); - -std::set GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount); -std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion); - -wchar_t GetLastChar(const NounRecord& nounRecord); -wchar_t GetPrevLastChar(const NounRecord& nounRecord); -bool charIsIFormConsolant(wchar_t c); -bool charIsUFormConsolant(wchar_t c); + bool NounIsInDictionary(std::wstring nounNominative); + bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural); -void SetupDeclentionMap(); + NounRecord GetNounRecordFromDictionary(std::wstring nounNominative); + NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural); -bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple); + bool charIsMissingVowelSoftenerConsolant(wchar_t c); -bool IsDeclencionSecondType(NounDeclencion nounDeclention); -bool IsDeclencionAnimated(NounDeclencion nounDeclention); + struct NounStruct + { + NounGrammaticalCase nounGrammaticalCase; + NounCount nounCount; + bool animated; + NounRecord nounRecord; -void FillDivisionCaseMaps(); - -std::set RecognizeNoun(std::wstring noun); - -NounDeclencion CalculateNounDeclention(NounRecord nounRecord); - -void CalculatePluralForm(); - -void LoadFrequentWordSet(); + bool operator<(const NounStruct& other) const + { + if (nounGrammaticalCase != other.nounGrammaticalCase) + { + return nounGrammaticalCase < other.nounGrammaticalCase; + } + else + { + if (nounCount != other.nounCount) + { + return nounCount < other.nounCount; + } + else + { + if (animated != other.animated) + { + return animated < other.animated; + } + else + { + return nounRecord < other.nounRecord; + } + } + } + } + }; -void LoadNounDeclencionCaseTable(); + std::set getPossibleNounEndingDivisionSet(std::wstring noun); + + std::vector GetPossibleNounTupleArr(std::wstring nounEnding); + + std::set GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount); + std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion); + + wchar_t GetLastChar(const NounRecord& nounRecord); + wchar_t GetPrevLastChar(const NounRecord& nounRecord); + + void SetupDeclentionMap(); + + bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple); + + bool IsDeclencionSecondType(NounDeclencion nounDeclention); + bool IsDeclencionAnimated(NounDeclencion nounDeclention); + + void FillDivisionCaseMaps(); + + std::set RecognizeNoun(std::wstring noun); + + NounDeclencion CalculateNounDeclention(NounRecord nounRecord); + + void CalculatePluralForm(); + + void LoadFrequentWordSet(); + + void LoadNounDeclencionCaseTable(); + + +} //namespace NN #endif //NOUN_H_INCLUDED diff --git a/rudict/rudict/rudict.vcxproj b/rudict/rudict/rudict.vcxproj index dee4e59..fb0ff38 100644 --- a/rudict/rudict/rudict.vcxproj +++ b/rudict/rudict/rudict.vcxproj @@ -70,6 +70,8 @@ + + @@ -82,6 +84,8 @@ + + diff --git a/rudict/rudict/rudict.vcxproj.filters b/rudict/rudict/rudict.vcxproj.filters index 1bbf3dc..9e7a6a0 100644 --- a/rudict/rudict/rudict.vcxproj.filters +++ b/rudict/rudict/rudict.vcxproj.filters @@ -48,6 +48,12 @@ Source Files + + Source Files + + + Source Files + @@ -83,5 +89,11 @@ Source Files + + Source Files + + + Source Files + \ No newline at end of file