Adjectives added
This commit is contained in:
parent
54456546c7
commit
a3e371c51e
282
rudict/frequent_adjectives_2000.csv
Normal file
282
rudict/frequent_adjectives_2000.csv
Normal file
@ -0,0 +1,282 @@
|
||||
Номер;Прилагательное; может быть кратким просто так или с добавлением Е;специальная краткая форма, мужской род
|
||||
56;другой;0;
|
||||
73;новый;1;
|
||||
89;самый;0;
|
||||
96;большой;0;
|
||||
130;последний;0;
|
||||
141;российский;0;
|
||||
173;русский;0;
|
||||
193;высокий;1;
|
||||
199;хороший;1;
|
||||
207;главный;1;
|
||||
215;лучший;0;
|
||||
229;маленький;0;
|
||||
231;некоторый;0;
|
||||
233;любой;0;
|
||||
241;молодой;1;
|
||||
249;государственный;1;
|
||||
253;никакой;0;
|
||||
254;советский;0;
|
||||
256;настоящий;1;
|
||||
264;старый;1;
|
||||
278;разный;0;
|
||||
279;нужный;1;
|
||||
281;иной;0;
|
||||
290;белый;1;
|
||||
291;собственный;1;
|
||||
293;чёрный;1;
|
||||
297;основной;0;
|
||||
311;далекий;1;
|
||||
334;подобный;1;
|
||||
335;следующий;0;
|
||||
352;равный;1;
|
||||
364;живой;1;
|
||||
369;известный;1;
|
||||
373;военный;1;
|
||||
377;важный;1;
|
||||
379;великий;1;
|
||||
381;простой;1;
|
||||
385;огромный;1;
|
||||
387;политический;0;
|
||||
409;московский;0;
|
||||
424;готовый;1;
|
||||
439;данный;0;
|
||||
442;красный;1;
|
||||
453;современный;1;
|
||||
472;социальный;1;
|
||||
480;ранний;0;
|
||||
482;особый;0;
|
||||
483;целый;1;
|
||||
487;плохой;1;
|
||||
490;сильный;1;
|
||||
492;скорый;1;
|
||||
502;внутренний;1;
|
||||
514;экономический;0;
|
||||
516;правый;1;
|
||||
519;федеральный;1;
|
||||
524;близкий;1;
|
||||
526;похожий;1;
|
||||
532;различный;1;
|
||||
539;необходимый;1;
|
||||
544;единственный;1;
|
||||
545;лёгкий;0;лёгок
|
||||
555;человеческий;0;
|
||||
561;международный;1;
|
||||
564;дорогой;1;
|
||||
572;небольшой;0;
|
||||
577;местный;0;
|
||||
586;бывший;0;
|
||||
601;американский;0;
|
||||
615;мировой;0;
|
||||
617;тяжелый;1;
|
||||
627;возможный;1;
|
||||
630;отдельный;1;
|
||||
631;средний;1;
|
||||
632;красивый;1;
|
||||
640;короткий;1;короток
|
||||
654;серьёзный;1;
|
||||
660;интересный;1;
|
||||
662;добрый;1;
|
||||
665;национальный;1;
|
||||
667;длинный;1;
|
||||
670;страшный;1;
|
||||
671;прошлый;0;
|
||||
673;общественный;1;
|
||||
703;детский;0;
|
||||
707;единый;1;
|
||||
709;определённый;1;
|
||||
719;чужой;1;
|
||||
721;странный;1;
|
||||
723;чистый;1;
|
||||
732;поздний;1;
|
||||
738;специальный;1;
|
||||
745;научный;1;
|
||||
754;сложный;1;
|
||||
762;реальный;1;
|
||||
775;способный;1;
|
||||
778;малый;1;
|
||||
779;старший;0;
|
||||
783;личный;1;
|
||||
786;свободный;1;
|
||||
788;обычный;1;
|
||||
790;прекрасный;1;
|
||||
791;высший;0;
|
||||
803;тёмный;1;
|
||||
810;гражданский;0;
|
||||
857;боевой;0;
|
||||
870;рабочий;1;
|
||||
872;глубокий;1;
|
||||
885;долгий;0;долог
|
||||
892;прямой;1;
|
||||
897;открытый;1;
|
||||
904;знакомый;1;
|
||||
908;нынешний;0;
|
||||
921;исторический;0;
|
||||
923;народный;1;
|
||||
929;знаменитый;1;
|
||||
933;больший;0;
|
||||
940;пустой;1;
|
||||
941;очередной;1;
|
||||
949;судебный;1;
|
||||
958;зелёный;1;
|
||||
966;немецкий;0;
|
||||
967;золотой;1;
|
||||
969;технический;0;
|
||||
970;нормальный;1;
|
||||
974;некий;0;
|
||||
976;городской;0;
|
||||
978;соответствующий;0;
|
||||
992;любимый;1;
|
||||
993;родной;1;
|
||||
994;западный;1;
|
||||
1000;быстрый;1;
|
||||
1008;холодный;1;
|
||||
1012;конкретный;1;
|
||||
1018;иностранный;0;
|
||||
1020;ученый;1;
|
||||
1022;левый;1;
|
||||
1023;счастливый;1;
|
||||
1035;святой;1;
|
||||
1037;точный;1;
|
||||
1053;частый;1;
|
||||
1058;значительный;1;
|
||||
1067;связанный;0;связан
|
||||
1073;уверенный;0;уверен
|
||||
1075;тонкий;0;тонок
|
||||
1076;центральный;1;
|
||||
1083;будущий;0;
|
||||
1104;физический;0;
|
||||
1117;частный;1;
|
||||
1119;мелкий;0;мелок
|
||||
1121;английский;1;
|
||||
1122;постоянный;1;
|
||||
1131;тихий;1;
|
||||
1158;европейский;0;
|
||||
1162;ближайший;0;
|
||||
1163;отечественный;0;
|
||||
1164;теплый;0;
|
||||
1166;духовный;1;
|
||||
1168;прежний;0;
|
||||
1190;профессиональный;1;
|
||||
1191;французский;0;
|
||||
1192;женский;0;
|
||||
1193;крайний;0;
|
||||
1208;божий;0;
|
||||
1215;дальнейший;0;
|
||||
1225;естественный;1;
|
||||
1229;информационный;0;
|
||||
1234;железный;1;
|
||||
1240;горячий;1;
|
||||
1248;веселый;1;
|
||||
1271;серый;1;
|
||||
1276;опасный;1;
|
||||
1284;прочий;0;
|
||||
1302;слабый;1;
|
||||
1306;яркий;0;ярок
|
||||
1311;больной;1;
|
||||
1314;летний;0;
|
||||
1315;дополнительный;1;
|
||||
1326;лесной;0;
|
||||
1343;умный;1;
|
||||
1346;северный;0;
|
||||
1352;ясный;1;
|
||||
1353;милый;1;
|
||||
1355;светлый;1;
|
||||
1359;редкий;0;редок
|
||||
1362;верный;1;
|
||||
1364;юридический;0;
|
||||
1377;административный;1;
|
||||
1388;узкий;0;узок
|
||||
1397;художественный;1;
|
||||
1404;древний;1;
|
||||
1408;массовый;0;
|
||||
1410;генеральный;1;
|
||||
1411;замечательный;1;
|
||||
1421;задний;1;
|
||||
1422;региональный;1;
|
||||
1429;здоровый;1;
|
||||
1438;активный;1;
|
||||
1439;литературный;1;
|
||||
1452;острый;1;
|
||||
1455;богатый;1;
|
||||
1459;творческий;0;
|
||||
1462;мягкий;0;мягок
|
||||
1463;ночной;0;
|
||||
1468;налоговый;0;
|
||||
1469;толстый;0;толст
|
||||
1480;верхний;0;
|
||||
1482;вечный;1;
|
||||
1484;лишний;0;
|
||||
1485;морской;0;
|
||||
1487;нижний;1;
|
||||
1488;спокойный;0;спокоен
|
||||
1493;сухой;1;
|
||||
1498;синий;0;
|
||||
1503;сегодняшний;0;
|
||||
1509;медицинский;0;
|
||||
1519;свежий;1;
|
||||
1520;трудный;1;
|
||||
1521;уголовный;1;
|
||||
1582;желтый;1;желт
|
||||
1596;мощный;1;
|
||||
1617;русский;0;
|
||||
1618;деревянный;1;
|
||||
1628;полезный;1;
|
||||
1632;дальний;1;
|
||||
1644;домашний;0;
|
||||
1647;традиционный;1;
|
||||
1654;жесткий;1;
|
||||
1655;крепкий;1;
|
||||
1659;виноватый;1;
|
||||
1663;культурный;1;
|
||||
1666;приятный;1;
|
||||
1682;круглый;0;кругл
|
||||
1687;понятный;1;
|
||||
1688;голубой;0;
|
||||
1690;удивительный;1;
|
||||
1692;знакомый;1;
|
||||
1694;мужской;0;
|
||||
1696;правовой;0;
|
||||
1705;мертвый;0;мертв
|
||||
1714;совместный;1;
|
||||
1718;семейный;0;
|
||||
1734;природный;1;
|
||||
1748;православный;1;
|
||||
1750;учебный;1;
|
||||
1756;эффективный;1;
|
||||
1780;материальный;1;
|
||||
1781;невозможный;1;
|
||||
1814;соседний;0;
|
||||
1815;психологический;0;
|
||||
1823;музыкальный;1;
|
||||
1834;голый;1;
|
||||
1838;обязательный;1;
|
||||
1845;исполнительный;1;
|
||||
1848;партийный;0;
|
||||
1859;существенный;1;
|
||||
1869;восточный;1;
|
||||
1880;рабочий;0;
|
||||
1882;сельский;0;
|
||||
1883;характерный;1;
|
||||
1886;неожиданный;1;
|
||||
1896;видный;1;
|
||||
1904;честный;1;
|
||||
1910;резкий;0;резок
|
||||
1912;младший;0;
|
||||
1920;трудовой;0;
|
||||
1922;строгий;1;
|
||||
1931;южный;1;
|
||||
1932;практический;0;
|
||||
1933;многочисленный;1;
|
||||
1935;согласный;1;
|
||||
1943;спортивный;1;
|
||||
1947;истинный;1;
|
||||
1956;злой;0;зол
|
||||
1961;индивидуальный;1;
|
||||
1965;мокрый;1;
|
||||
1969;дикий;1;
|
||||
1981;коммерческий;0;
|
||||
1983;театральный;1;
|
||||
1988;воздушный;1;
|
||||
1989;дешевый;1;
|
||||
1993;пьяный;1;
|
|
BIN
rudict/frequent_adjectives_2000.xlsx
Normal file
BIN
rudict/frequent_adjectives_2000.xlsx
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
121
rudict/grammar_case_adjective.csv
Normal file
121
rudict/grammar_case_adjective.csv
Normal file
@ -0,0 +1,121 @@
|
||||
Склонение;Род;Падеж;Окончание;Пример
|
||||
IFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий
|
||||
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||
;;NGC_P4_ACCUSATIVE;ий;обвиняю рабочий
|
||||
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||
IFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий
|
||||
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||
;;NGC_P4_ACCUSATIVE;его;обвиняю рабочего
|
||||
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||
;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих
|
||||
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||
UOFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой
|
||||
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||
;;NGC_P4_ACCUSATIVE;ой;обвиняю деловой
|
||||
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||
UOFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой
|
||||
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||
;;NGC_P4_ACCUSATIVE;ого;обвиняю делового
|
||||
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||
IFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая
|
||||
;;NGC_P2_GENITIVE;ей;подарок для рабочей
|
||||
;;NGC_P3_DATIVE;ей;подарок рабочей
|
||||
;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую
|
||||
;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей
|
||||
;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||
IFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая
|
||||
;;NGC_P2_GENITIVE;ей;подарок для рабочей
|
||||
;;NGC_P3_DATIVE;ей;подарок рабочей
|
||||
;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую
|
||||
;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей
|
||||
;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||
;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих
|
||||
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||
UOFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая
|
||||
;;NGC_P2_GENITIVE;ой;подарок для деловой
|
||||
;;NGC_P3_DATIVE;ой;подарок деловой
|
||||
;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую
|
||||
;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой
|
||||
;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||
UOFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая
|
||||
;;NGC_P2_GENITIVE;ой;подарок для деловой
|
||||
;;NGC_P3_DATIVE;ой;подарок деловой
|
||||
;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую
|
||||
;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой
|
||||
;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||
IFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ее;рабочее
|
||||
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||
;;NGC_P4_ACCUSATIVE;ее;обвиняю рабочее
|
||||
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||
UOFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ое;деловое
|
||||
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||
;;NGC_P4_ACCUSATIVE;ое;обвиняю деловое
|
||||
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
|
BIN
rudict/grammar_case_adjective.xlsx
Normal file
BIN
rudict/grammar_case_adjective.xlsx
Normal file
Binary file not shown.
507
rudict/rudict/adjective.cpp
Normal file
507
rudict/rudict/adjective.cpp
Normal file
@ -0,0 +1,507 @@
|
||||
#include "adjective.h"
|
||||
#include <string>
|
||||
|
||||
#include <iostream> //Xperimental -- for debug only
|
||||
|
||||
#include "utf8utf16.h"
|
||||
|
||||
#include "boost/regex.hpp"
|
||||
#include "boost/algorithm/string/regex.hpp"
|
||||
|
||||
namespace AJ
|
||||
{
|
||||
|
||||
std::vector<AdjectiveRecord> AdjectiveRecordArr;
|
||||
|
||||
std::vector<AdjectiveDeclencionCaseTableRecord> adjectiveDeclencionCaseTable;
|
||||
|
||||
|
||||
AdjectiveRecord::AdjectiveRecord()
|
||||
: standardShortFormAvailable(false)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
AdjectiveRecord::AdjectiveRecord(std::wstring line)
|
||||
{
|
||||
std::vector<std::wstring> lineArr;
|
||||
|
||||
boost::split_regex(lineArr, line, boost::wregex(L";"));
|
||||
|
||||
nominativeMaleForm = lineArr[1];
|
||||
|
||||
standardShortFormAvailable = lineArr[2] == L"1" ? true : false;
|
||||
|
||||
specialShortForm = lineArr[3];
|
||||
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::wstring> GetAllAdjectiveEndingArr()
|
||||
{
|
||||
std::vector<std::wstring> result;
|
||||
|
||||
result.push_back(L"èé");
|
||||
result.push_back(L"åãî");
|
||||
result.push_back(L"åìó");
|
||||
result.push_back(L"èì");
|
||||
result.push_back(L"åì");
|
||||
result.push_back(L"èå");
|
||||
result.push_back(L"èõ");
|
||||
result.push_back(L"èìè");
|
||||
result.push_back(L"îé");
|
||||
result.push_back(L"ûé");
|
||||
result.push_back(L"îãî");
|
||||
result.push_back(L"îìó");
|
||||
result.push_back(L"ûì");
|
||||
result.push_back(L"îì");
|
||||
result.push_back(L"ûå");
|
||||
result.push_back(L"ûõ");
|
||||
result.push_back(L"ûìè");
|
||||
result.push_back(L"àÿ");
|
||||
result.push_back(L"åé");
|
||||
result.push_back(L"óþ");
|
||||
result.push_back(L"åå");
|
||||
result.push_back(L"îå");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str)
|
||||
{
|
||||
if (str == L"IFORM_MALE_INANIMATE")
|
||||
{
|
||||
return IFORM_MALE_INANIMATE;
|
||||
}
|
||||
if (str == L"IFORM_MALE_ANIMATE")
|
||||
{
|
||||
return IFORM_MALE_ANIMATE;
|
||||
}
|
||||
if (str == L"UOFORM_MALE_INANIMATE")
|
||||
{
|
||||
return UOFORM_MALE_INANIMATE;
|
||||
}
|
||||
if (str == L"UOFORM_MALE_ANIMATE")
|
||||
{
|
||||
return UOFORM_MALE_ANIMATE;
|
||||
}
|
||||
|
||||
if (str == L"IFORM_FEMALE_INANIMATE")
|
||||
{
|
||||
return IFORM_FEMALE_INANIMATE;
|
||||
}
|
||||
if (str == L"IFORM_FEMALE_ANIMATE")
|
||||
{
|
||||
return IFORM_FEMALE_ANIMATE;
|
||||
}
|
||||
if (str == L"UOFORM_FEMALE_INANIMATE")
|
||||
{
|
||||
return UOFORM_FEMALE_INANIMATE;
|
||||
}
|
||||
if (str == L"UOFORM_FEMALE_ANIMATE")
|
||||
{
|
||||
return UOFORM_FEMALE_ANIMATE;
|
||||
}
|
||||
|
||||
if (str == L"IFORM_NEUTRAL")
|
||||
{
|
||||
return IFORM_NEUTRAL;
|
||||
}
|
||||
if (str == L"UOFORM_NEUTRAL")
|
||||
{
|
||||
return UOFORM_NEUTRAL;
|
||||
}
|
||||
|
||||
std::cout << "Error in WStringToAdjectiveDeclencion";
|
||||
return IFORM_MALE_INANIMATE;
|
||||
}
|
||||
|
||||
std::set<AdjectiveEndingDivision> getPossibleAdjectiveEndingDivisionSet(std::wstring noun)
|
||||
{
|
||||
std::set<AdjectiveEndingDivision> result;
|
||||
|
||||
auto allAdjectiveEndingArr = GetAllAdjectiveEndingArr();
|
||||
|
||||
for (auto ending : allAdjectiveEndingArr)
|
||||
{
|
||||
if (boost::ends_with(noun, ending))
|
||||
{
|
||||
std::wstring adjectiveBase = boost::replace_last_copy(noun, ending, "");
|
||||
|
||||
|
||||
|
||||
if (charIsConsolant(adjectiveBase[adjectiveBase.size() - 1]))
|
||||
{
|
||||
result.insert({ adjectiveBase, ending, AdjectiveEndingDivision::DC_COMMON });
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<AdjectiveTuple> GetPossibleTupleArr(std::wstring ending)
|
||||
{
|
||||
std::vector<AdjectiveTuple> result;
|
||||
|
||||
for (auto& adjective : adjectiveDeclencionCaseTable)
|
||||
{
|
||||
for (int i = 0; i < NGC_SIZE * NC_SIZE; i++)
|
||||
{
|
||||
if (adjective.grammaticalCaseTable[i].ending.count(ending) != 0)
|
||||
{
|
||||
result.push_back(AdjectiveTuple{ adjective.adjectiveDeclencion, adjective.grammaticalCaseTable[i].count, adjective.grammaticalCaseTable[i].grammaticalCase });
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool AdjectiveIsInDictionary(std::wstring nominative)
|
||||
{
|
||||
for (auto& adjective : AdjectiveRecordArr)
|
||||
{
|
||||
if (adjective.nominativeMaleForm == nominative)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative)
|
||||
{
|
||||
for (auto& adjective : AdjectiveRecordArr)
|
||||
{
|
||||
if (adjective.nominativeMaleForm == nominative)
|
||||
{
|
||||
return adjective;
|
||||
}
|
||||
}
|
||||
|
||||
return{};
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
std::set<std::wstring> GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount)
|
||||
{
|
||||
std::set<std::wstring> result;
|
||||
|
||||
AdjectiveDeclencionCaseTableRecord declencionCaseTableRecord = adjectiveDeclencionCaseTable[static_cast<int>(declencion)];
|
||||
|
||||
for (auto& grammaticalTableRecord : declencionCaseTableRecord.grammaticalCaseTable)
|
||||
{
|
||||
if (grammaticalTableRecord.grammaticalCase == NGC_P1_NOMINATIVE && grammaticalTableRecord.count == nounCount)
|
||||
{
|
||||
for (auto& e : grammaticalTableRecord.ending)
|
||||
{
|
||||
result.insert(base + e);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}*/
|
||||
|
||||
std::set<std::wstring> GetNominativeMaleSingular(std::wstring base)
|
||||
{
|
||||
std::wstring result;
|
||||
|
||||
if (charIsIFormConsolant(base[base.size() - 1]))
|
||||
{
|
||||
return{ base + L"èé" };
|
||||
}
|
||||
|
||||
if (charIsUFormConsolant(base[base.size() - 1]))
|
||||
{
|
||||
return{ base + L"ûé", base + L"îé" };
|
||||
}
|
||||
|
||||
return{};
|
||||
|
||||
}
|
||||
|
||||
bool IsDeclencionAnimated(AdjectiveDeclencion declention)
|
||||
{
|
||||
switch (declention)
|
||||
{
|
||||
case AJ::IFORM_MALE_ANIMATE:
|
||||
case AJ::UOFORM_MALE_ANIMATE:
|
||||
case AJ::IFORM_FEMALE_ANIMATE:
|
||||
case AJ::UOFORM_FEMALE_ANIMATE:
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention)
|
||||
{
|
||||
switch (declention)
|
||||
{
|
||||
case AJ::IFORM_MALE_INANIMATE:
|
||||
case AJ::IFORM_MALE_ANIMATE:
|
||||
case AJ::UOFORM_MALE_INANIMATE:
|
||||
case AJ::UOFORM_MALE_ANIMATE:
|
||||
return NG_MALE;
|
||||
break;
|
||||
case AJ::IFORM_FEMALE_INANIMATE:
|
||||
case AJ::IFORM_FEMALE_ANIMATE:
|
||||
case AJ::UOFORM_FEMALE_INANIMATE:
|
||||
case AJ::UOFORM_FEMALE_ANIMATE:
|
||||
return NG_FEMALE;
|
||||
break;
|
||||
default:
|
||||
return NG_NEUTRAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool IFormTest(const AdjectiveRecord& r)
|
||||
{
|
||||
return charIsIFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]);
|
||||
}
|
||||
|
||||
bool UOFormTest(const AdjectiveRecord& r)
|
||||
{
|
||||
return charIsUFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]);
|
||||
}
|
||||
|
||||
|
||||
std::map<AdjectiveDeclencion, std::function < bool(const AdjectiveRecord&) >> DeclentionConditionMap;
|
||||
|
||||
void SetupDeclentionMap()
|
||||
{
|
||||
|
||||
DeclentionConditionMap[IFORM_MALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[IFORM_MALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[UOFORM_MALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[UOFORM_MALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||
|
||||
DeclentionConditionMap[IFORM_FEMALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[IFORM_FEMALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[UOFORM_FEMALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[UOFORM_FEMALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||
|
||||
DeclentionConditionMap[IFORM_NEUTRAL] = std::bind(IFormTest, std::placeholders::_1);
|
||||
DeclentionConditionMap[UOFORM_NEUTRAL] = std::bind(UOFormTest, std::placeholders::_1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple)
|
||||
{
|
||||
|
||||
AdjectiveDeclencion declencion = std::get<0>(tuple);
|
||||
|
||||
bool standardDeclention = DeclentionConditionMap[declencion](record);
|
||||
|
||||
if (standardDeclention)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple) >> DivisionCaseAdjectiveTupleFilterMap;
|
||||
|
||||
std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple, AdjectiveRecord) >> DivisionCaseAdjectiveTupleRecordFilterMap;
|
||||
|
||||
|
||||
void FillDivisionCaseMaps()
|
||||
{
|
||||
|
||||
DivisionCaseAdjectiveTupleFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple)
|
||||
{
|
||||
return true;
|
||||
};
|
||||
|
||||
DivisionCaseAdjectiveTupleRecordFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple, AdjectiveRecord record)
|
||||
{
|
||||
return AdjectiveFitsDeclention(record, tuple);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
std::set<AdjectiveStruct> RecognizeAdjective(std::wstring noun)
|
||||
{
|
||||
std::set<AdjectiveStruct> result;
|
||||
|
||||
auto adjectiveEndingDivisionArr = getPossibleAdjectiveEndingDivisionSet(noun);
|
||||
|
||||
for (auto adjectiveEndingDivision : adjectiveEndingDivisionArr)
|
||||
{
|
||||
std::wstring base = adjectiveEndingDivision.base;
|
||||
std::wstring ending = adjectiveEndingDivision.ending;
|
||||
AdjectiveEndingDivision::DivisionCase dc = adjectiveEndingDivision.divisionCase;
|
||||
|
||||
std::wstring modifiedBase = base;
|
||||
std::wstring modifiedEnding = ending;
|
||||
|
||||
std::vector<AdjectiveTuple> possibleTupleArr = GetPossibleTupleArr(modifiedEnding);
|
||||
|
||||
for (AdjectiveTuple tuple : possibleTupleArr)
|
||||
{
|
||||
if (DivisionCaseAdjectiveTupleFilterMap[dc](tuple))
|
||||
{
|
||||
std::set<std::wstring> nominaviteSingularSet = GetNominativeMaleSingular(modifiedBase);
|
||||
|
||||
for (auto& nn : nominaviteSingularSet)
|
||||
{
|
||||
|
||||
if (AdjectiveIsInDictionary(nn))
|
||||
{
|
||||
|
||||
AdjectiveRecord record = GetAdjectiveRecordFromDictionary(nn);
|
||||
|
||||
if (DivisionCaseAdjectiveTupleRecordFilterMap[dc](tuple, record))
|
||||
{
|
||||
result.insert({ std::get<2>(tuple), std::get<1>(tuple), GetGenderFromDeclencion(std::get<0>(tuple)), IsDeclencionAnimated(std::get<0>(tuple)), record });
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void LoadAdjectiveDeclencionCaseTable()
|
||||
{
|
||||
|
||||
adjectiveDeclencionCaseTable.clear();
|
||||
|
||||
#ifdef _WIN32
|
||||
std::ifstream f("C:/Workplace/ChineseJournal/rudict/grammar_case_adjective.csv");
|
||||
|
||||
#else
|
||||
std::ifstream f("/home/devuser/workplace/rudict/grammar_case_adjective.csv");
|
||||
#endif
|
||||
|
||||
std::string line;
|
||||
std::wstring wline;
|
||||
|
||||
if (f.is_open())
|
||||
{
|
||||
|
||||
std::cout << "File found!" << std::endl;
|
||||
|
||||
std::vector<GrammaticalTableRecord> currentGrammaticalCaseTable;
|
||||
std::wstring currentAdjectiveDeclencion;
|
||||
std::wstring currentAdjectiveCount;
|
||||
|
||||
getline(f, line); //Skip one line
|
||||
|
||||
while (getline(f, line))
|
||||
{
|
||||
std::vector<std::string> lineArr;
|
||||
|
||||
boost::split_regex(lineArr, line, boost::regex(";"));
|
||||
|
||||
if (lineArr[0] != "")
|
||||
{
|
||||
if (currentAdjectiveDeclencion == L"")
|
||||
{
|
||||
currentAdjectiveDeclencion = string_to_wstring(lineArr[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable });
|
||||
|
||||
currentAdjectiveDeclencion = string_to_wstring(lineArr[0]);
|
||||
|
||||
currentGrammaticalCaseTable.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (lineArr[1] != "")
|
||||
{
|
||||
currentAdjectiveCount = string_to_wstring(lineArr[1]);
|
||||
}
|
||||
|
||||
std::wstring endings = string_to_wstring(lineArr[3]);
|
||||
|
||||
std::set<std::wstring> endingsSet;
|
||||
boost::split_regex(endingsSet, endings, boost::regex(", "));
|
||||
|
||||
currentGrammaticalCaseTable.push_back({
|
||||
WStringToNounCount(currentAdjectiveCount),
|
||||
WStringToNounGrammaticalCase(string_to_wstring(lineArr[2])),
|
||||
endingsSet
|
||||
});
|
||||
|
||||
}
|
||||
//Add last one
|
||||
if (currentAdjectiveDeclencion != L"")
|
||||
{
|
||||
adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable });
|
||||
}
|
||||
|
||||
f.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "file not found!" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void LoadFrequentAdjectiveSet()
|
||||
{
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_adjectives_2000.csv");
|
||||
|
||||
#else
|
||||
std::ifstream f("/home/devuser/workplace/rudict/frequent_adjectives_2000.csv");
|
||||
#endif
|
||||
|
||||
std::string line;
|
||||
std::wstring wline;
|
||||
|
||||
if (f.is_open())
|
||||
{
|
||||
|
||||
getline(f, line); //Skip one line
|
||||
|
||||
std::cout << "File found!" << std::endl;
|
||||
while (getline(f, line))
|
||||
{
|
||||
|
||||
wline = string_to_wstring(line);
|
||||
AdjectiveRecord adjectiveRecord(wline);
|
||||
|
||||
AdjectiveRecordArr.push_back(adjectiveRecord);
|
||||
|
||||
}
|
||||
f.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "file not found!" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
} //namespace AJ
|
||||
|
186
rudict/rudict/adjective.h
Normal file
186
rudict/rudict/adjective.h
Normal file
@ -0,0 +1,186 @@
|
||||
#ifndef ADJECTIVE_H_INCLUDED
|
||||
#define ADJECTIVE_H_INCLUDED
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
|
||||
#include "grammarCase.h"
|
||||
|
||||
namespace AJ
|
||||
{
|
||||
|
||||
struct AdjectiveRecord
|
||||
{
|
||||
std::wstring nominativeMaleForm;
|
||||
|
||||
bool standardShortFormAvailable;
|
||||
|
||||
std::wstring specialShortForm;
|
||||
|
||||
AdjectiveRecord();
|
||||
AdjectiveRecord(std::wstring line);
|
||||
|
||||
bool operator<(const AdjectiveRecord& n) const
|
||||
{
|
||||
return nominativeMaleForm < n.nominativeMaleForm;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
extern std::vector<AdjectiveRecord> AdjectiveRecordArr;
|
||||
|
||||
|
||||
struct AdjectiveStruct
|
||||
{
|
||||
NounGrammaticalCase grammaticalCase;
|
||||
|
||||
NounCount count;
|
||||
NounGender gender;
|
||||
|
||||
bool isDeclentionAnimated;
|
||||
|
||||
AdjectiveRecord adjectiveRecord;
|
||||
|
||||
bool operator<(const AdjectiveStruct& n) const
|
||||
{
|
||||
if (grammaticalCase != n.grammaticalCase)
|
||||
{
|
||||
return grammaticalCase < n.grammaticalCase;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count != n.count)
|
||||
{
|
||||
return count < n.count;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gender != n.gender)
|
||||
{
|
||||
return gender < n.gender;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (isDeclentionAnimated != n.isDeclentionAnimated)
|
||||
{
|
||||
return isDeclentionAnimated < n.isDeclentionAnimated;
|
||||
}
|
||||
else
|
||||
{
|
||||
return adjectiveRecord < n.adjectiveRecord;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
std::vector<std::wstring> GetAllAdjectiveEndingArr();
|
||||
|
||||
|
||||
|
||||
enum AdjectiveDeclencion
|
||||
{
|
||||
IFORM_MALE_INANIMATE = 0,
|
||||
IFORM_MALE_ANIMATE,
|
||||
UOFORM_MALE_INANIMATE,
|
||||
UOFORM_MALE_ANIMATE,
|
||||
IFORM_FEMALE_INANIMATE,
|
||||
IFORM_FEMALE_ANIMATE,
|
||||
UOFORM_FEMALE_INANIMATE,
|
||||
UOFORM_FEMALE_ANIMATE,
|
||||
IFORM_NEUTRAL,
|
||||
UOFORM_NEUTRAL,
|
||||
};
|
||||
|
||||
|
||||
struct AdjectiveDeclencionCaseTableRecord
|
||||
{
|
||||
AdjectiveDeclencion adjectiveDeclencion;
|
||||
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
||||
};
|
||||
|
||||
extern std::vector<AdjectiveDeclencionCaseTableRecord> adjectiveDeclencionCaseTable;
|
||||
|
||||
|
||||
AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str);
|
||||
|
||||
|
||||
struct AdjectiveEndingDivision
|
||||
{
|
||||
std::wstring base;
|
||||
std::wstring ending;
|
||||
|
||||
enum DivisionCase
|
||||
{
|
||||
DC_COMMON = 0
|
||||
} divisionCase;
|
||||
|
||||
bool operator<(const AdjectiveEndingDivision& other) const
|
||||
{
|
||||
if (base != other.base)
|
||||
{
|
||||
return base < other.base;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ending != other.ending)
|
||||
{
|
||||
return ending < other.ending;
|
||||
}
|
||||
else
|
||||
{
|
||||
return divisionCase < other.divisionCase;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::set<AdjectiveEndingDivision> getPossibleAdjectiveEndingDivisionSet(std::wstring noun);
|
||||
|
||||
|
||||
typedef std::tuple <
|
||||
AdjectiveDeclencion,
|
||||
NounCount,
|
||||
NounGrammaticalCase
|
||||
> AdjectiveTuple;
|
||||
|
||||
std::vector<AdjectiveTuple> GetPossibleTupleArr(std::wstring ending);
|
||||
|
||||
bool AdjectiveIsInDictionary(std::wstring nominative);
|
||||
AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative);
|
||||
|
||||
|
||||
|
||||
//std::set<std::wstring> GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount);
|
||||
std::set<std::wstring> GetNominativeMaleSingular(std::wstring base);
|
||||
|
||||
void SetupDeclentionMap();
|
||||
bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple);
|
||||
|
||||
bool IsDeclencionAnimated(AdjectiveDeclencion declention);
|
||||
NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention);
|
||||
|
||||
void FillDivisionCaseMaps();
|
||||
|
||||
|
||||
std::set<AdjectiveStruct> RecognizeAdjective(std::wstring noun);
|
||||
|
||||
|
||||
void LoadAdjectiveDeclencionCaseTable();
|
||||
|
||||
|
||||
void LoadFrequentAdjectiveSet();
|
||||
|
||||
|
||||
|
||||
} //namespace AJ
|
||||
|
||||
|
||||
#endif //ADJECTIVE_H_INCLUDED
|
128
rudict/rudict/grammarCase.cpp
Normal file
128
rudict/rudict/grammarCase.cpp
Normal file
@ -0,0 +1,128 @@
|
||||
#include "grammarCase.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
NounCount WStringToNounCount(std::wstring str)
|
||||
{
|
||||
if (str == L"NC_SINGULAR")
|
||||
{
|
||||
return NC_SINGULAR;
|
||||
}
|
||||
if (str == L"NC_PLURAL")
|
||||
{
|
||||
return NC_PLURAL;
|
||||
}
|
||||
|
||||
std::cout << "Error in WStringToNounCount!" << std::endl;
|
||||
return NC_SINGULAR;
|
||||
}
|
||||
|
||||
std::wstring NounCountToWString(NounCount nounCount)
|
||||
{
|
||||
if (nounCount == NC_SINGULAR)
|
||||
{
|
||||
return L"NC_SINGULAR";
|
||||
}
|
||||
if (nounCount == NC_PLURAL)
|
||||
{
|
||||
return L"NC_PLURAL";
|
||||
}
|
||||
|
||||
std::cout << "Error in NounCountToWString!" << std::endl;
|
||||
return L"";
|
||||
}
|
||||
|
||||
|
||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
||||
{
|
||||
switch (nounGrammaticalCase)
|
||||
{
|
||||
case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE";
|
||||
case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE";
|
||||
case NGC_P3_DATIVE: return L"NGC_P3_DATIVE";
|
||||
case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE";
|
||||
case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL";
|
||||
case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL";
|
||||
}
|
||||
|
||||
return L"";
|
||||
}
|
||||
|
||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str)
|
||||
{
|
||||
|
||||
if (str == L"NGC_P1_NOMINATIVE")
|
||||
{
|
||||
return NGC_P1_NOMINATIVE;
|
||||
}
|
||||
if (str == L"NGC_P2_GENITIVE")
|
||||
{
|
||||
return NGC_P2_GENITIVE;
|
||||
}
|
||||
if (str == L"NGC_P3_DATIVE")
|
||||
{
|
||||
return NGC_P3_DATIVE;
|
||||
}
|
||||
if (str == L"NGC_P4_ACCUSATIVE")
|
||||
{
|
||||
return NGC_P4_ACCUSATIVE;
|
||||
}
|
||||
if (str == L"NGC_P5_INSTRUMENTAL")
|
||||
{
|
||||
return NGC_P5_INSTRUMENTAL;
|
||||
}
|
||||
if (str == L"NGC_P6_PREPOSITIONAL")
|
||||
{
|
||||
return NGC_P6_PREPOSITIONAL;
|
||||
}
|
||||
|
||||
std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl;
|
||||
return NGC_P1_NOMINATIVE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool charIsConsolant(wchar_t c) //except é
|
||||
{
|
||||
std::wstring consolants = L"öêíãøùçõôâïðëäæ÷ñìòá";
|
||||
|
||||
for (wchar_t ic : consolants)
|
||||
{
|
||||
if (c == ic)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool charIsVowel(wchar_t c)
|
||||
{
|
||||
std::wstring vovels = L"àîóûýÿ¸þèå";
|
||||
|
||||
for (wchar_t ic : vovels)
|
||||
{
|
||||
if (c == ic)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
std::wstring i_form_consolants = L"ãõêæø÷ù";
|
||||
std::wstring u_form_consolants = L"áïäòâôçñíìëðö";
|
||||
|
||||
bool charIsIFormConsolant(wchar_t c)
|
||||
{
|
||||
return i_form_consolants.find(c) != i_form_consolants.npos;
|
||||
}
|
||||
|
||||
bool charIsUFormConsolant(wchar_t c)
|
||||
{
|
||||
return u_form_consolants.find(c) != i_form_consolants.npos;
|
||||
}
|
58
rudict/rudict/grammarCase.h
Normal file
58
rudict/rudict/grammarCase.h
Normal file
@ -0,0 +1,58 @@
|
||||
#ifndef GRAMMAR_CASE_H_INCLUDED
|
||||
#define GRAMMAR_CASE_H_INCLUDED
|
||||
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
enum NounGender
|
||||
{
|
||||
NG_MALE = 0,
|
||||
NG_FEMALE,
|
||||
NG_NEUTRAL
|
||||
};
|
||||
|
||||
enum NounGrammaticalCase
|
||||
{
|
||||
NGC_P1_NOMINATIVE = 0,
|
||||
NGC_P2_GENITIVE,
|
||||
NGC_P3_DATIVE,
|
||||
NGC_P4_ACCUSATIVE,
|
||||
NGC_P5_INSTRUMENTAL,
|
||||
NGC_P6_PREPOSITIONAL,
|
||||
NGC_SIZE
|
||||
};
|
||||
|
||||
enum NounCount
|
||||
{
|
||||
NC_SINGULAR = 0,
|
||||
NC_PLURAL,
|
||||
NC_SIZE
|
||||
};
|
||||
|
||||
|
||||
struct GrammaticalTableRecord
|
||||
{
|
||||
NounCount count;
|
||||
|
||||
NounGrammaticalCase grammaticalCase;
|
||||
|
||||
std::set<std::wstring> ending;
|
||||
};
|
||||
|
||||
|
||||
std::wstring NounCountToWString(NounCount nounCount);
|
||||
NounCount WStringToNounCount(std::wstring str);
|
||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase);
|
||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str);
|
||||
|
||||
|
||||
bool charIsConsolant(wchar_t c); //except é
|
||||
|
||||
bool charIsVowel(wchar_t c);
|
||||
|
||||
bool charIsIFormConsolant(wchar_t c);
|
||||
bool charIsUFormConsolant(wchar_t c);
|
||||
|
||||
|
||||
#endif //GRAMMAR_CASE_H_INCLUDED
|
@ -21,6 +21,7 @@
|
||||
#include "boost/property_tree/json_parser.hpp"
|
||||
#include "../utf8utf16.h"
|
||||
#include "../noun.h"
|
||||
#include "../adjective.h"
|
||||
|
||||
namespace http {
|
||||
namespace server {
|
||||
@ -144,12 +145,16 @@ namespace http {
|
||||
result.put(L"error", L"String is too short");
|
||||
}
|
||||
|
||||
std::set<NounStruct> nounStructArr = RecognizeNoun(request);
|
||||
|
||||
int id;
|
||||
|
||||
//Noun!
|
||||
|
||||
std::set<NN::NounStruct> nounStructArr = NN::RecognizeNoun(request);
|
||||
|
||||
std::cout <<"nounstructarr" << nounStructArr.size() << std::endl;
|
||||
|
||||
int id = 0;
|
||||
id = 0;
|
||||
|
||||
boost::property_tree::wptree nounArr;
|
||||
|
||||
@ -178,6 +183,35 @@ namespace http {
|
||||
|
||||
result.put_child(L"nouns", nounArr);
|
||||
|
||||
//Adjective!
|
||||
|
||||
std::set<AJ::AdjectiveStruct> adjectiveStructArr = AJ::RecognizeAdjective(request);
|
||||
|
||||
id = 0;
|
||||
|
||||
boost::property_tree::wptree adjectiveArr;
|
||||
|
||||
for (auto& adjectiveStruct : adjectiveStructArr)
|
||||
{
|
||||
boost::property_tree::wptree adjectivrTree;
|
||||
|
||||
adjectivrTree.put(L"id", id);
|
||||
adjectivrTree.put(L"grammaticalCase", NounGrammaticalCaseToWString(adjectiveStruct.grammaticalCase));
|
||||
adjectivrTree.put(L"animated", adjectiveStruct.isDeclentionAnimated);
|
||||
adjectivrTree.put(L"count", NounCountToWString(adjectiveStruct.count));
|
||||
|
||||
adjectivrTree.put(L"gender", adjectiveStruct.gender);
|
||||
|
||||
adjectivrTree.put(L"nominativeSingularForm", adjectiveStruct.adjectiveRecord.nominativeMaleForm);
|
||||
|
||||
adjectiveArr.push_back(std::make_pair(L"", adjectivrTree));
|
||||
|
||||
id++;
|
||||
}
|
||||
|
||||
result.put_child(L"adjectives", adjectiveArr);
|
||||
|
||||
|
||||
|
||||
return result;
|
||||
|
||||
|
@ -6,17 +6,25 @@
|
||||
|
||||
#include "noun.h"
|
||||
|
||||
#include "adjective.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
SetupDeclentionMap();
|
||||
LoadNounDeclencionCaseTable();
|
||||
LoadFrequentWordSet();
|
||||
FillDivisionCaseMaps();
|
||||
CalculatePluralForm();
|
||||
NN::SetupDeclentionMap();
|
||||
NN::LoadNounDeclencionCaseTable();
|
||||
NN::LoadFrequentWordSet();
|
||||
NN::FillDivisionCaseMaps();
|
||||
NN::CalculatePluralForm();
|
||||
|
||||
AJ::LoadAdjectiveDeclencionCaseTable();
|
||||
AJ::LoadFrequentAdjectiveSet();
|
||||
AJ::FillDivisionCaseMaps();
|
||||
AJ::SetupDeclentionMap();
|
||||
|
||||
//RecognizeNoun(L"стульями");
|
||||
//Косяк: "вечер"
|
||||
|
||||
//AJ::RecognizeAdjective(L"золотыми");
|
||||
try
|
||||
{
|
||||
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include "boost/regex.hpp"
|
||||
#include "boost/algorithm/string/regex.hpp"
|
||||
|
||||
namespace NN
|
||||
{
|
||||
|
||||
std::vector<NounRecord> NounRecordArr;
|
||||
|
||||
@ -153,83 +155,6 @@ NounDeclencion WStringToNounDeclencion(std::wstring str)
|
||||
|
||||
}
|
||||
|
||||
NounCount WStringToNounCount(std::wstring str)
|
||||
{
|
||||
if (str == L"NC_SINGULAR")
|
||||
{
|
||||
return NC_SINGULAR;
|
||||
}
|
||||
if (str == L"NC_PLURAL")
|
||||
{
|
||||
return NC_PLURAL;
|
||||
}
|
||||
|
||||
std::cout << "Error in WStringToNounCount!" << std::endl;
|
||||
return NC_SINGULAR;
|
||||
}
|
||||
|
||||
std::wstring NounCountToWString(NounCount nounCount)
|
||||
{
|
||||
if (nounCount == NC_SINGULAR)
|
||||
{
|
||||
return L"NC_SINGULAR";
|
||||
}
|
||||
if (nounCount == NC_PLURAL)
|
||||
{
|
||||
return L"NC_PLURAL";
|
||||
}
|
||||
|
||||
std::cout << "Error in NounCountToWString!" << std::endl;
|
||||
return L"";
|
||||
}
|
||||
|
||||
|
||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
||||
{
|
||||
switch (nounGrammaticalCase)
|
||||
{
|
||||
case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE";
|
||||
case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE";
|
||||
case NGC_P3_DATIVE: return L"NGC_P3_DATIVE";
|
||||
case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE";
|
||||
case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL";
|
||||
case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL";
|
||||
}
|
||||
|
||||
return L"";
|
||||
}
|
||||
|
||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str)
|
||||
{
|
||||
|
||||
if (str == L"NGC_P1_NOMINATIVE")
|
||||
{
|
||||
return NGC_P1_NOMINATIVE;
|
||||
}
|
||||
if (str == L"NGC_P2_GENITIVE")
|
||||
{
|
||||
return NGC_P2_GENITIVE;
|
||||
}
|
||||
if (str == L"NGC_P3_DATIVE")
|
||||
{
|
||||
return NGC_P3_DATIVE;
|
||||
}
|
||||
if (str == L"NGC_P4_ACCUSATIVE")
|
||||
{
|
||||
return NGC_P4_ACCUSATIVE;
|
||||
}
|
||||
if (str == L"NGC_P5_INSTRUMENTAL")
|
||||
{
|
||||
return NGC_P5_INSTRUMENTAL;
|
||||
}
|
||||
if (str == L"NGC_P6_PREPOSITIONAL")
|
||||
{
|
||||
return NGC_P6_PREPOSITIONAL;
|
||||
}
|
||||
|
||||
std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl;
|
||||
return NGC_P1_NOMINATIVE;
|
||||
}
|
||||
|
||||
std::vector<std::wstring> GetAllNounEndingArr()
|
||||
{
|
||||
@ -458,35 +383,6 @@ NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativeP
|
||||
}
|
||||
|
||||
|
||||
bool charIsConsolant(wchar_t c) //except й
|
||||
{
|
||||
std::wstring consolants = L"цкнгшщзхфвпрлджчсмтб";
|
||||
|
||||
for (wchar_t ic : consolants)
|
||||
{
|
||||
if (c == ic)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool charIsVowel(wchar_t c)
|
||||
{
|
||||
std::wstring vovels = L"аоуыэяёюие";
|
||||
|
||||
for (wchar_t ic : vovels)
|
||||
{
|
||||
if (c == ic)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool charIsMissingVowelSoftenerConsolant(wchar_t c)
|
||||
@ -545,13 +441,6 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
|
||||
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
if (ending == L"а" || ending == L"я")
|
||||
{
|
||||
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_A });
|
||||
}*/
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -575,34 +464,6 @@ std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
//Xperimental -- additionally check for plural form with a-ending
|
||||
if (nounEnding == L"а" || nounEnding == L"я")
|
||||
{
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
|
||||
result.push_back(NounTuple{ SECOND_I_SHORT_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
||||
}*/
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -668,8 +529,6 @@ std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclenci
|
||||
return L"";
|
||||
}
|
||||
|
||||
std::wstring i_form_consolants = L"гхкжшчщ";
|
||||
std::wstring u_form_consolants = L"бпдтвфзснмлрц";
|
||||
|
||||
wchar_t GetLastChar(const NounRecord& nounRecord)
|
||||
{
|
||||
@ -681,16 +540,6 @@ wchar_t GetPrevLastChar(const NounRecord& nounRecord)
|
||||
return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2];
|
||||
}
|
||||
|
||||
bool charIsIFormConsolant(wchar_t c)
|
||||
{
|
||||
return i_form_consolants.find(c) != i_form_consolants.npos;
|
||||
}
|
||||
|
||||
bool charIsUFormConsolant(wchar_t c)
|
||||
{
|
||||
return u_form_consolants.find(c) != i_form_consolants.npos;
|
||||
}
|
||||
|
||||
|
||||
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||||
{
|
||||
@ -1376,3 +1225,5 @@ void LoadFrequentWordSet()
|
||||
|
||||
|
||||
}
|
||||
|
||||
} //namespace NN
|
@ -9,12 +9,10 @@
|
||||
|
||||
#include "boost/algorithm/string.hpp"
|
||||
|
||||
enum NounGender
|
||||
#include "grammarCase.h"
|
||||
|
||||
namespace NN
|
||||
{
|
||||
NG_MALE = 0,
|
||||
NG_FEMALE,
|
||||
NG_NEUTRAL
|
||||
};
|
||||
|
||||
struct NounRecord
|
||||
{
|
||||
@ -68,32 +66,9 @@ enum NounDeclencion
|
||||
THIRD_FORM_ANIMATE,
|
||||
};
|
||||
|
||||
enum NounGrammaticalCase
|
||||
{
|
||||
NGC_P1_NOMINATIVE = 0,
|
||||
NGC_P2_GENITIVE,
|
||||
NGC_P3_DATIVE,
|
||||
NGC_P4_ACCUSATIVE,
|
||||
NGC_P5_INSTRUMENTAL,
|
||||
NGC_P6_PREPOSITIONAL,
|
||||
NGC_SIZE
|
||||
};
|
||||
extern std::vector<NounRecord> NounRecordArr;
|
||||
|
||||
enum NounCount
|
||||
{
|
||||
NC_SINGULAR = 0,
|
||||
NC_PLURAL,
|
||||
NC_SIZE
|
||||
};
|
||||
|
||||
struct GrammaticalTableRecord
|
||||
{
|
||||
NounCount count;
|
||||
|
||||
NounGrammaticalCase grammaticalCase;
|
||||
|
||||
std::set<std::wstring> ending;
|
||||
};
|
||||
|
||||
struct NounDeclencionCaseTableRecord
|
||||
{
|
||||
@ -101,21 +76,10 @@ struct NounDeclencionCaseTableRecord
|
||||
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
||||
};
|
||||
|
||||
|
||||
|
||||
extern std::vector<NounRecord> NounRecordArr;
|
||||
|
||||
extern std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
|
||||
|
||||
|
||||
NounDeclencion WStringToNounDeclencion(std::wstring str);
|
||||
std::wstring NounCountToWString(NounCount nounCount);
|
||||
NounCount WStringToNounCount(std::wstring str);
|
||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase);
|
||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str);
|
||||
|
||||
|
||||
//std::wstring NounNumberToWString(NounNumber nounNumber);
|
||||
|
||||
typedef std::tuple <
|
||||
NounDeclencion,
|
||||
@ -169,11 +133,6 @@ bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
||||
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
||||
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
|
||||
|
||||
bool charIsConsolant(wchar_t c); //except й
|
||||
|
||||
bool charIsVowel(wchar_t c);
|
||||
|
||||
|
||||
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
|
||||
|
||||
struct NounStruct
|
||||
@ -220,9 +179,6 @@ std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclenci
|
||||
|
||||
wchar_t GetLastChar(const NounRecord& nounRecord);
|
||||
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
|
||||
bool charIsIFormConsolant(wchar_t c);
|
||||
bool charIsUFormConsolant(wchar_t c);
|
||||
|
||||
|
||||
void SetupDeclentionMap();
|
||||
|
||||
@ -241,7 +197,9 @@ void CalculatePluralForm();
|
||||
|
||||
void LoadFrequentWordSet();
|
||||
|
||||
|
||||
void LoadNounDeclencionCaseTable();
|
||||
|
||||
|
||||
} //namespace NN
|
||||
|
||||
#endif //NOUN_H_INCLUDED
|
||||
|
@ -70,6 +70,8 @@
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="adjective.cpp" />
|
||||
<ClCompile Include="grammarCase.cpp" />
|
||||
<ClCompile Include="http\connection.cpp" />
|
||||
<ClCompile Include="http\connection_manager.cpp" />
|
||||
<ClCompile Include="http\mime_types.cpp" />
|
||||
@ -82,6 +84,8 @@
|
||||
<ClCompile Include="utf8utf16.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="adjective.h" />
|
||||
<ClInclude Include="grammarCase.h" />
|
||||
<ClInclude Include="http\connection.hpp" />
|
||||
<ClInclude Include="http\connection_manager.hpp" />
|
||||
<ClInclude Include="http\header.hpp" />
|
||||
|
@ -48,6 +48,12 @@
|
||||
<ClCompile Include="utf8utf16.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="adjective.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="grammarCase.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="noun.h">
|
||||
@ -83,5 +89,11 @@
|
||||
<ClInclude Include="utf8utf16.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="adjective.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="grammarCase.h">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user