Adjectives added
This commit is contained in:
parent
54456546c7
commit
a3e371c51e
282
rudict/frequent_adjectives_2000.csv
Normal file
282
rudict/frequent_adjectives_2000.csv
Normal file
@ -0,0 +1,282 @@
|
|||||||
|
Номер;Прилагательное; может быть кратким просто так или с добавлением Е;специальная краткая форма, мужской род
|
||||||
|
56;другой;0;
|
||||||
|
73;новый;1;
|
||||||
|
89;самый;0;
|
||||||
|
96;большой;0;
|
||||||
|
130;последний;0;
|
||||||
|
141;российский;0;
|
||||||
|
173;русский;0;
|
||||||
|
193;высокий;1;
|
||||||
|
199;хороший;1;
|
||||||
|
207;главный;1;
|
||||||
|
215;лучший;0;
|
||||||
|
229;маленький;0;
|
||||||
|
231;некоторый;0;
|
||||||
|
233;любой;0;
|
||||||
|
241;молодой;1;
|
||||||
|
249;государственный;1;
|
||||||
|
253;никакой;0;
|
||||||
|
254;советский;0;
|
||||||
|
256;настоящий;1;
|
||||||
|
264;старый;1;
|
||||||
|
278;разный;0;
|
||||||
|
279;нужный;1;
|
||||||
|
281;иной;0;
|
||||||
|
290;белый;1;
|
||||||
|
291;собственный;1;
|
||||||
|
293;чёрный;1;
|
||||||
|
297;основной;0;
|
||||||
|
311;далекий;1;
|
||||||
|
334;подобный;1;
|
||||||
|
335;следующий;0;
|
||||||
|
352;равный;1;
|
||||||
|
364;живой;1;
|
||||||
|
369;известный;1;
|
||||||
|
373;военный;1;
|
||||||
|
377;важный;1;
|
||||||
|
379;великий;1;
|
||||||
|
381;простой;1;
|
||||||
|
385;огромный;1;
|
||||||
|
387;политический;0;
|
||||||
|
409;московский;0;
|
||||||
|
424;готовый;1;
|
||||||
|
439;данный;0;
|
||||||
|
442;красный;1;
|
||||||
|
453;современный;1;
|
||||||
|
472;социальный;1;
|
||||||
|
480;ранний;0;
|
||||||
|
482;особый;0;
|
||||||
|
483;целый;1;
|
||||||
|
487;плохой;1;
|
||||||
|
490;сильный;1;
|
||||||
|
492;скорый;1;
|
||||||
|
502;внутренний;1;
|
||||||
|
514;экономический;0;
|
||||||
|
516;правый;1;
|
||||||
|
519;федеральный;1;
|
||||||
|
524;близкий;1;
|
||||||
|
526;похожий;1;
|
||||||
|
532;различный;1;
|
||||||
|
539;необходимый;1;
|
||||||
|
544;единственный;1;
|
||||||
|
545;лёгкий;0;лёгок
|
||||||
|
555;человеческий;0;
|
||||||
|
561;международный;1;
|
||||||
|
564;дорогой;1;
|
||||||
|
572;небольшой;0;
|
||||||
|
577;местный;0;
|
||||||
|
586;бывший;0;
|
||||||
|
601;американский;0;
|
||||||
|
615;мировой;0;
|
||||||
|
617;тяжелый;1;
|
||||||
|
627;возможный;1;
|
||||||
|
630;отдельный;1;
|
||||||
|
631;средний;1;
|
||||||
|
632;красивый;1;
|
||||||
|
640;короткий;1;короток
|
||||||
|
654;серьёзный;1;
|
||||||
|
660;интересный;1;
|
||||||
|
662;добрый;1;
|
||||||
|
665;национальный;1;
|
||||||
|
667;длинный;1;
|
||||||
|
670;страшный;1;
|
||||||
|
671;прошлый;0;
|
||||||
|
673;общественный;1;
|
||||||
|
703;детский;0;
|
||||||
|
707;единый;1;
|
||||||
|
709;определённый;1;
|
||||||
|
719;чужой;1;
|
||||||
|
721;странный;1;
|
||||||
|
723;чистый;1;
|
||||||
|
732;поздний;1;
|
||||||
|
738;специальный;1;
|
||||||
|
745;научный;1;
|
||||||
|
754;сложный;1;
|
||||||
|
762;реальный;1;
|
||||||
|
775;способный;1;
|
||||||
|
778;малый;1;
|
||||||
|
779;старший;0;
|
||||||
|
783;личный;1;
|
||||||
|
786;свободный;1;
|
||||||
|
788;обычный;1;
|
||||||
|
790;прекрасный;1;
|
||||||
|
791;высший;0;
|
||||||
|
803;тёмный;1;
|
||||||
|
810;гражданский;0;
|
||||||
|
857;боевой;0;
|
||||||
|
870;рабочий;1;
|
||||||
|
872;глубокий;1;
|
||||||
|
885;долгий;0;долог
|
||||||
|
892;прямой;1;
|
||||||
|
897;открытый;1;
|
||||||
|
904;знакомый;1;
|
||||||
|
908;нынешний;0;
|
||||||
|
921;исторический;0;
|
||||||
|
923;народный;1;
|
||||||
|
929;знаменитый;1;
|
||||||
|
933;больший;0;
|
||||||
|
940;пустой;1;
|
||||||
|
941;очередной;1;
|
||||||
|
949;судебный;1;
|
||||||
|
958;зелёный;1;
|
||||||
|
966;немецкий;0;
|
||||||
|
967;золотой;1;
|
||||||
|
969;технический;0;
|
||||||
|
970;нормальный;1;
|
||||||
|
974;некий;0;
|
||||||
|
976;городской;0;
|
||||||
|
978;соответствующий;0;
|
||||||
|
992;любимый;1;
|
||||||
|
993;родной;1;
|
||||||
|
994;западный;1;
|
||||||
|
1000;быстрый;1;
|
||||||
|
1008;холодный;1;
|
||||||
|
1012;конкретный;1;
|
||||||
|
1018;иностранный;0;
|
||||||
|
1020;ученый;1;
|
||||||
|
1022;левый;1;
|
||||||
|
1023;счастливый;1;
|
||||||
|
1035;святой;1;
|
||||||
|
1037;точный;1;
|
||||||
|
1053;частый;1;
|
||||||
|
1058;значительный;1;
|
||||||
|
1067;связанный;0;связан
|
||||||
|
1073;уверенный;0;уверен
|
||||||
|
1075;тонкий;0;тонок
|
||||||
|
1076;центральный;1;
|
||||||
|
1083;будущий;0;
|
||||||
|
1104;физический;0;
|
||||||
|
1117;частный;1;
|
||||||
|
1119;мелкий;0;мелок
|
||||||
|
1121;английский;1;
|
||||||
|
1122;постоянный;1;
|
||||||
|
1131;тихий;1;
|
||||||
|
1158;европейский;0;
|
||||||
|
1162;ближайший;0;
|
||||||
|
1163;отечественный;0;
|
||||||
|
1164;теплый;0;
|
||||||
|
1166;духовный;1;
|
||||||
|
1168;прежний;0;
|
||||||
|
1190;профессиональный;1;
|
||||||
|
1191;французский;0;
|
||||||
|
1192;женский;0;
|
||||||
|
1193;крайний;0;
|
||||||
|
1208;божий;0;
|
||||||
|
1215;дальнейший;0;
|
||||||
|
1225;естественный;1;
|
||||||
|
1229;информационный;0;
|
||||||
|
1234;железный;1;
|
||||||
|
1240;горячий;1;
|
||||||
|
1248;веселый;1;
|
||||||
|
1271;серый;1;
|
||||||
|
1276;опасный;1;
|
||||||
|
1284;прочий;0;
|
||||||
|
1302;слабый;1;
|
||||||
|
1306;яркий;0;ярок
|
||||||
|
1311;больной;1;
|
||||||
|
1314;летний;0;
|
||||||
|
1315;дополнительный;1;
|
||||||
|
1326;лесной;0;
|
||||||
|
1343;умный;1;
|
||||||
|
1346;северный;0;
|
||||||
|
1352;ясный;1;
|
||||||
|
1353;милый;1;
|
||||||
|
1355;светлый;1;
|
||||||
|
1359;редкий;0;редок
|
||||||
|
1362;верный;1;
|
||||||
|
1364;юридический;0;
|
||||||
|
1377;административный;1;
|
||||||
|
1388;узкий;0;узок
|
||||||
|
1397;художественный;1;
|
||||||
|
1404;древний;1;
|
||||||
|
1408;массовый;0;
|
||||||
|
1410;генеральный;1;
|
||||||
|
1411;замечательный;1;
|
||||||
|
1421;задний;1;
|
||||||
|
1422;региональный;1;
|
||||||
|
1429;здоровый;1;
|
||||||
|
1438;активный;1;
|
||||||
|
1439;литературный;1;
|
||||||
|
1452;острый;1;
|
||||||
|
1455;богатый;1;
|
||||||
|
1459;творческий;0;
|
||||||
|
1462;мягкий;0;мягок
|
||||||
|
1463;ночной;0;
|
||||||
|
1468;налоговый;0;
|
||||||
|
1469;толстый;0;толст
|
||||||
|
1480;верхний;0;
|
||||||
|
1482;вечный;1;
|
||||||
|
1484;лишний;0;
|
||||||
|
1485;морской;0;
|
||||||
|
1487;нижний;1;
|
||||||
|
1488;спокойный;0;спокоен
|
||||||
|
1493;сухой;1;
|
||||||
|
1498;синий;0;
|
||||||
|
1503;сегодняшний;0;
|
||||||
|
1509;медицинский;0;
|
||||||
|
1519;свежий;1;
|
||||||
|
1520;трудный;1;
|
||||||
|
1521;уголовный;1;
|
||||||
|
1582;желтый;1;желт
|
||||||
|
1596;мощный;1;
|
||||||
|
1617;русский;0;
|
||||||
|
1618;деревянный;1;
|
||||||
|
1628;полезный;1;
|
||||||
|
1632;дальний;1;
|
||||||
|
1644;домашний;0;
|
||||||
|
1647;традиционный;1;
|
||||||
|
1654;жесткий;1;
|
||||||
|
1655;крепкий;1;
|
||||||
|
1659;виноватый;1;
|
||||||
|
1663;культурный;1;
|
||||||
|
1666;приятный;1;
|
||||||
|
1682;круглый;0;кругл
|
||||||
|
1687;понятный;1;
|
||||||
|
1688;голубой;0;
|
||||||
|
1690;удивительный;1;
|
||||||
|
1692;знакомый;1;
|
||||||
|
1694;мужской;0;
|
||||||
|
1696;правовой;0;
|
||||||
|
1705;мертвый;0;мертв
|
||||||
|
1714;совместный;1;
|
||||||
|
1718;семейный;0;
|
||||||
|
1734;природный;1;
|
||||||
|
1748;православный;1;
|
||||||
|
1750;учебный;1;
|
||||||
|
1756;эффективный;1;
|
||||||
|
1780;материальный;1;
|
||||||
|
1781;невозможный;1;
|
||||||
|
1814;соседний;0;
|
||||||
|
1815;психологический;0;
|
||||||
|
1823;музыкальный;1;
|
||||||
|
1834;голый;1;
|
||||||
|
1838;обязательный;1;
|
||||||
|
1845;исполнительный;1;
|
||||||
|
1848;партийный;0;
|
||||||
|
1859;существенный;1;
|
||||||
|
1869;восточный;1;
|
||||||
|
1880;рабочий;0;
|
||||||
|
1882;сельский;0;
|
||||||
|
1883;характерный;1;
|
||||||
|
1886;неожиданный;1;
|
||||||
|
1896;видный;1;
|
||||||
|
1904;честный;1;
|
||||||
|
1910;резкий;0;резок
|
||||||
|
1912;младший;0;
|
||||||
|
1920;трудовой;0;
|
||||||
|
1922;строгий;1;
|
||||||
|
1931;южный;1;
|
||||||
|
1932;практический;0;
|
||||||
|
1933;многочисленный;1;
|
||||||
|
1935;согласный;1;
|
||||||
|
1943;спортивный;1;
|
||||||
|
1947;истинный;1;
|
||||||
|
1956;злой;0;зол
|
||||||
|
1961;индивидуальный;1;
|
||||||
|
1965;мокрый;1;
|
||||||
|
1969;дикий;1;
|
||||||
|
1981;коммерческий;0;
|
||||||
|
1983;театральный;1;
|
||||||
|
1988;воздушный;1;
|
||||||
|
1989;дешевый;1;
|
||||||
|
1993;пьяный;1;
|
|
BIN
rudict/frequent_adjectives_2000.xlsx
Normal file
BIN
rudict/frequent_adjectives_2000.xlsx
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
121
rudict/grammar_case_adjective.csv
Normal file
121
rudict/grammar_case_adjective.csv
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
Склонение;Род;Падеж;Окончание;Пример
|
||||||
|
IFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий
|
||||||
|
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||||
|
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||||
|
;;NGC_P4_ACCUSATIVE;ий;обвиняю рабочий
|
||||||
|
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||||
|
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||||
|
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||||
|
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||||
|
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||||
|
IFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ий;рабочий
|
||||||
|
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||||
|
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||||
|
;;NGC_P4_ACCUSATIVE;его;обвиняю рабочего
|
||||||
|
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||||
|
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||||
|
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||||
|
;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||||
|
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||||
|
UOFORM_MALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой
|
||||||
|
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||||
|
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||||
|
;;NGC_P4_ACCUSATIVE;ой;обвиняю деловой
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||||
|
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||||
|
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||||
|
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||||
|
UOFORM_MALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ой, ый;деловой
|
||||||
|
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||||
|
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||||
|
;;NGC_P4_ACCUSATIVE;ого;обвиняю делового
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||||
|
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||||
|
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||||
|
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||||
|
IFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая
|
||||||
|
;;NGC_P2_GENITIVE;ей;подарок для рабочей
|
||||||
|
;;NGC_P3_DATIVE;ей;подарок рабочей
|
||||||
|
;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||||
|
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||||
|
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||||
|
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||||
|
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||||
|
IFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;рабочая
|
||||||
|
;;NGC_P2_GENITIVE;ей;подарок для рабочей
|
||||||
|
;;NGC_P3_DATIVE;ей;подарок рабочей
|
||||||
|
;;NGC_P4_ACCUSATIVE;ую;обвиняю рабочую
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ей;говорю с рабочей
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ей;говорю о рабочей
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||||
|
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||||
|
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||||
|
;;NGC_P4_ACCUSATIVE;их;обвиняю рабочих
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||||
|
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||||
|
UOFORM_FEMALE_INANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая
|
||||||
|
;;NGC_P2_GENITIVE;ой;подарок для деловой
|
||||||
|
;;NGC_P3_DATIVE;ой;подарок деловой
|
||||||
|
;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||||
|
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||||
|
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||||
|
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||||
|
UOFORM_FEMALE_ANIMATE;NC_SINGULAR;NGC_P1_NOMINATIVE;ая;деловая
|
||||||
|
;;NGC_P2_GENITIVE;ой;подарок для деловой
|
||||||
|
;;NGC_P3_DATIVE;ой;подарок деловой
|
||||||
|
;;NGC_P4_ACCUSATIVE;ую;обвиняю деловую
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ой;говорю с деловой
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ой;говорю о деловой
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||||
|
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||||
|
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||||
|
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
||||||
|
IFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ее;рабочее
|
||||||
|
;;NGC_P2_GENITIVE;его;подарок для рабочего
|
||||||
|
;;NGC_P3_DATIVE;ему;подарок рабочему
|
||||||
|
;;NGC_P4_ACCUSATIVE;ее;обвиняю рабочее
|
||||||
|
;;NGC_P5_INSTRUMENTAL;им;говорю с рабочим
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ем;говорю о рабочем
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ие;рабочие
|
||||||
|
;;NGC_P2_GENITIVE;их;подарок для рабочих
|
||||||
|
;;NGC_P3_DATIVE;им;подарок рабочим
|
||||||
|
;;NGC_P4_ACCUSATIVE;ие;обвиняю рабочие
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ими;говорю с рабочими
|
||||||
|
;;NGC_P6_PREPOSITIONAL;их;говорю о рабочих
|
||||||
|
UOFORM_NEUTRAL;NC_SINGULAR;NGC_P1_NOMINATIVE;ое;деловое
|
||||||
|
;;NGC_P2_GENITIVE;ого;подарок для делового
|
||||||
|
;;NGC_P3_DATIVE;ому;подарок деловому
|
||||||
|
;;NGC_P4_ACCUSATIVE;ое;обвиняю деловое
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ым;говорю с деловым
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ом;говорю о деловом
|
||||||
|
;NC_PLURAL;NGC_P1_NOMINATIVE;ые;деловые
|
||||||
|
;;NGC_P2_GENITIVE;ых;подарок для деловых
|
||||||
|
;;NGC_P3_DATIVE;ым;подарок деловым
|
||||||
|
;;NGC_P4_ACCUSATIVE;ые;обвиняю деловые
|
||||||
|
;;NGC_P5_INSTRUMENTAL;ыми;говорю с деловыми
|
||||||
|
;;NGC_P6_PREPOSITIONAL;ых;говорю о деловых
|
|
BIN
rudict/grammar_case_adjective.xlsx
Normal file
BIN
rudict/grammar_case_adjective.xlsx
Normal file
Binary file not shown.
507
rudict/rudict/adjective.cpp
Normal file
507
rudict/rudict/adjective.cpp
Normal file
@ -0,0 +1,507 @@
|
|||||||
|
#include "adjective.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <iostream> //Xperimental -- for debug only
|
||||||
|
|
||||||
|
#include "utf8utf16.h"
|
||||||
|
|
||||||
|
#include "boost/regex.hpp"
|
||||||
|
#include "boost/algorithm/string/regex.hpp"
|
||||||
|
|
||||||
|
namespace AJ
|
||||||
|
{
|
||||||
|
|
||||||
|
std::vector<AdjectiveRecord> AdjectiveRecordArr;
|
||||||
|
|
||||||
|
std::vector<AdjectiveDeclencionCaseTableRecord> adjectiveDeclencionCaseTable;
|
||||||
|
|
||||||
|
|
||||||
|
AdjectiveRecord::AdjectiveRecord()
|
||||||
|
: standardShortFormAvailable(false)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
AdjectiveRecord::AdjectiveRecord(std::wstring line)
|
||||||
|
{
|
||||||
|
std::vector<std::wstring> lineArr;
|
||||||
|
|
||||||
|
boost::split_regex(lineArr, line, boost::wregex(L";"));
|
||||||
|
|
||||||
|
nominativeMaleForm = lineArr[1];
|
||||||
|
|
||||||
|
standardShortFormAvailable = lineArr[2] == L"1" ? true : false;
|
||||||
|
|
||||||
|
specialShortForm = lineArr[3];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<std::wstring> GetAllAdjectiveEndingArr()
|
||||||
|
{
|
||||||
|
std::vector<std::wstring> result;
|
||||||
|
|
||||||
|
result.push_back(L"èé");
|
||||||
|
result.push_back(L"åãî");
|
||||||
|
result.push_back(L"åìó");
|
||||||
|
result.push_back(L"èì");
|
||||||
|
result.push_back(L"åì");
|
||||||
|
result.push_back(L"èå");
|
||||||
|
result.push_back(L"èõ");
|
||||||
|
result.push_back(L"èìè");
|
||||||
|
result.push_back(L"îé");
|
||||||
|
result.push_back(L"ûé");
|
||||||
|
result.push_back(L"îãî");
|
||||||
|
result.push_back(L"îìó");
|
||||||
|
result.push_back(L"ûì");
|
||||||
|
result.push_back(L"îì");
|
||||||
|
result.push_back(L"ûå");
|
||||||
|
result.push_back(L"ûõ");
|
||||||
|
result.push_back(L"ûìè");
|
||||||
|
result.push_back(L"àÿ");
|
||||||
|
result.push_back(L"åé");
|
||||||
|
result.push_back(L"óþ");
|
||||||
|
result.push_back(L"åå");
|
||||||
|
result.push_back(L"îå");
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str)
|
||||||
|
{
|
||||||
|
if (str == L"IFORM_MALE_INANIMATE")
|
||||||
|
{
|
||||||
|
return IFORM_MALE_INANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"IFORM_MALE_ANIMATE")
|
||||||
|
{
|
||||||
|
return IFORM_MALE_ANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"UOFORM_MALE_INANIMATE")
|
||||||
|
{
|
||||||
|
return UOFORM_MALE_INANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"UOFORM_MALE_ANIMATE")
|
||||||
|
{
|
||||||
|
return UOFORM_MALE_ANIMATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str == L"IFORM_FEMALE_INANIMATE")
|
||||||
|
{
|
||||||
|
return IFORM_FEMALE_INANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"IFORM_FEMALE_ANIMATE")
|
||||||
|
{
|
||||||
|
return IFORM_FEMALE_ANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"UOFORM_FEMALE_INANIMATE")
|
||||||
|
{
|
||||||
|
return UOFORM_FEMALE_INANIMATE;
|
||||||
|
}
|
||||||
|
if (str == L"UOFORM_FEMALE_ANIMATE")
|
||||||
|
{
|
||||||
|
return UOFORM_FEMALE_ANIMATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (str == L"IFORM_NEUTRAL")
|
||||||
|
{
|
||||||
|
return IFORM_NEUTRAL;
|
||||||
|
}
|
||||||
|
if (str == L"UOFORM_NEUTRAL")
|
||||||
|
{
|
||||||
|
return UOFORM_NEUTRAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Error in WStringToAdjectiveDeclencion";
|
||||||
|
return IFORM_MALE_INANIMATE;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<AdjectiveEndingDivision> getPossibleAdjectiveEndingDivisionSet(std::wstring noun)
|
||||||
|
{
|
||||||
|
std::set<AdjectiveEndingDivision> result;
|
||||||
|
|
||||||
|
auto allAdjectiveEndingArr = GetAllAdjectiveEndingArr();
|
||||||
|
|
||||||
|
for (auto ending : allAdjectiveEndingArr)
|
||||||
|
{
|
||||||
|
if (boost::ends_with(noun, ending))
|
||||||
|
{
|
||||||
|
std::wstring adjectiveBase = boost::replace_last_copy(noun, ending, "");
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if (charIsConsolant(adjectiveBase[adjectiveBase.size() - 1]))
|
||||||
|
{
|
||||||
|
result.insert({ adjectiveBase, ending, AdjectiveEndingDivision::DC_COMMON });
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<AdjectiveTuple> GetPossibleTupleArr(std::wstring ending)
|
||||||
|
{
|
||||||
|
std::vector<AdjectiveTuple> result;
|
||||||
|
|
||||||
|
for (auto& adjective : adjectiveDeclencionCaseTable)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < NGC_SIZE * NC_SIZE; i++)
|
||||||
|
{
|
||||||
|
if (adjective.grammaticalCaseTable[i].ending.count(ending) != 0)
|
||||||
|
{
|
||||||
|
result.push_back(AdjectiveTuple{ adjective.adjectiveDeclencion, adjective.grammaticalCaseTable[i].count, adjective.grammaticalCaseTable[i].grammaticalCase });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdjectiveIsInDictionary(std::wstring nominative)
|
||||||
|
{
|
||||||
|
for (auto& adjective : AdjectiveRecordArr)
|
||||||
|
{
|
||||||
|
if (adjective.nominativeMaleForm == nominative)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative)
|
||||||
|
{
|
||||||
|
for (auto& adjective : AdjectiveRecordArr)
|
||||||
|
{
|
||||||
|
if (adjective.nominativeMaleForm == nominative)
|
||||||
|
{
|
||||||
|
return adjective;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return{};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
std::set<std::wstring> GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount)
|
||||||
|
{
|
||||||
|
std::set<std::wstring> result;
|
||||||
|
|
||||||
|
AdjectiveDeclencionCaseTableRecord declencionCaseTableRecord = adjectiveDeclencionCaseTable[static_cast<int>(declencion)];
|
||||||
|
|
||||||
|
for (auto& grammaticalTableRecord : declencionCaseTableRecord.grammaticalCaseTable)
|
||||||
|
{
|
||||||
|
if (grammaticalTableRecord.grammaticalCase == NGC_P1_NOMINATIVE && grammaticalTableRecord.count == nounCount)
|
||||||
|
{
|
||||||
|
for (auto& e : grammaticalTableRecord.ending)
|
||||||
|
{
|
||||||
|
result.insert(base + e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}*/
|
||||||
|
|
||||||
|
std::set<std::wstring> GetNominativeMaleSingular(std::wstring base)
|
||||||
|
{
|
||||||
|
std::wstring result;
|
||||||
|
|
||||||
|
if (charIsIFormConsolant(base[base.size() - 1]))
|
||||||
|
{
|
||||||
|
return{ base + L"èé" };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (charIsUFormConsolant(base[base.size() - 1]))
|
||||||
|
{
|
||||||
|
return{ base + L"ûé", base + L"îé" };
|
||||||
|
}
|
||||||
|
|
||||||
|
return{};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsDeclencionAnimated(AdjectiveDeclencion declention)
|
||||||
|
{
|
||||||
|
switch (declention)
|
||||||
|
{
|
||||||
|
case AJ::IFORM_MALE_ANIMATE:
|
||||||
|
case AJ::UOFORM_MALE_ANIMATE:
|
||||||
|
case AJ::IFORM_FEMALE_ANIMATE:
|
||||||
|
case AJ::UOFORM_FEMALE_ANIMATE:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention)
|
||||||
|
{
|
||||||
|
switch (declention)
|
||||||
|
{
|
||||||
|
case AJ::IFORM_MALE_INANIMATE:
|
||||||
|
case AJ::IFORM_MALE_ANIMATE:
|
||||||
|
case AJ::UOFORM_MALE_INANIMATE:
|
||||||
|
case AJ::UOFORM_MALE_ANIMATE:
|
||||||
|
return NG_MALE;
|
||||||
|
break;
|
||||||
|
case AJ::IFORM_FEMALE_INANIMATE:
|
||||||
|
case AJ::IFORM_FEMALE_ANIMATE:
|
||||||
|
case AJ::UOFORM_FEMALE_INANIMATE:
|
||||||
|
case AJ::UOFORM_FEMALE_ANIMATE:
|
||||||
|
return NG_FEMALE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return NG_NEUTRAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool IFormTest(const AdjectiveRecord& r)
|
||||||
|
{
|
||||||
|
return charIsIFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool UOFormTest(const AdjectiveRecord& r)
|
||||||
|
{
|
||||||
|
return charIsUFormConsolant(r.nominativeMaleForm[r.nominativeMaleForm.size() - 3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::map<AdjectiveDeclencion, std::function < bool(const AdjectiveRecord&) >> DeclentionConditionMap;
|
||||||
|
|
||||||
|
void SetupDeclentionMap()
|
||||||
|
{
|
||||||
|
|
||||||
|
DeclentionConditionMap[IFORM_MALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[IFORM_MALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[UOFORM_MALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[UOFORM_MALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||||
|
|
||||||
|
DeclentionConditionMap[IFORM_FEMALE_INANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[IFORM_FEMALE_ANIMATE] = std::bind(IFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[UOFORM_FEMALE_INANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[UOFORM_FEMALE_ANIMATE] = std::bind(UOFormTest, std::placeholders::_1);
|
||||||
|
|
||||||
|
DeclentionConditionMap[IFORM_NEUTRAL] = std::bind(IFormTest, std::placeholders::_1);
|
||||||
|
DeclentionConditionMap[UOFORM_NEUTRAL] = std::bind(UOFormTest, std::placeholders::_1);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple)
|
||||||
|
{
|
||||||
|
|
||||||
|
AdjectiveDeclencion declencion = std::get<0>(tuple);
|
||||||
|
|
||||||
|
bool standardDeclention = DeclentionConditionMap[declencion](record);
|
||||||
|
|
||||||
|
if (standardDeclention)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple) >> DivisionCaseAdjectiveTupleFilterMap;
|
||||||
|
|
||||||
|
std::map < AdjectiveEndingDivision::DivisionCase, std::function < bool(AdjectiveTuple, AdjectiveRecord) >> DivisionCaseAdjectiveTupleRecordFilterMap;
|
||||||
|
|
||||||
|
|
||||||
|
void FillDivisionCaseMaps()
|
||||||
|
{
|
||||||
|
|
||||||
|
DivisionCaseAdjectiveTupleFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
DivisionCaseAdjectiveTupleRecordFilterMap[AdjectiveEndingDivision::DC_COMMON] = [](AdjectiveTuple tuple, AdjectiveRecord record)
|
||||||
|
{
|
||||||
|
return AdjectiveFitsDeclention(record, tuple);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::set<AdjectiveStruct> RecognizeAdjective(std::wstring noun)
|
||||||
|
{
|
||||||
|
std::set<AdjectiveStruct> result;
|
||||||
|
|
||||||
|
auto adjectiveEndingDivisionArr = getPossibleAdjectiveEndingDivisionSet(noun);
|
||||||
|
|
||||||
|
for (auto adjectiveEndingDivision : adjectiveEndingDivisionArr)
|
||||||
|
{
|
||||||
|
std::wstring base = adjectiveEndingDivision.base;
|
||||||
|
std::wstring ending = adjectiveEndingDivision.ending;
|
||||||
|
AdjectiveEndingDivision::DivisionCase dc = adjectiveEndingDivision.divisionCase;
|
||||||
|
|
||||||
|
std::wstring modifiedBase = base;
|
||||||
|
std::wstring modifiedEnding = ending;
|
||||||
|
|
||||||
|
std::vector<AdjectiveTuple> possibleTupleArr = GetPossibleTupleArr(modifiedEnding);
|
||||||
|
|
||||||
|
for (AdjectiveTuple tuple : possibleTupleArr)
|
||||||
|
{
|
||||||
|
if (DivisionCaseAdjectiveTupleFilterMap[dc](tuple))
|
||||||
|
{
|
||||||
|
std::set<std::wstring> nominaviteSingularSet = GetNominativeMaleSingular(modifiedBase);
|
||||||
|
|
||||||
|
for (auto& nn : nominaviteSingularSet)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (AdjectiveIsInDictionary(nn))
|
||||||
|
{
|
||||||
|
|
||||||
|
AdjectiveRecord record = GetAdjectiveRecordFromDictionary(nn);
|
||||||
|
|
||||||
|
if (DivisionCaseAdjectiveTupleRecordFilterMap[dc](tuple, record))
|
||||||
|
{
|
||||||
|
result.insert({ std::get<2>(tuple), std::get<1>(tuple), GetGenderFromDeclencion(std::get<0>(tuple)), IsDeclencionAnimated(std::get<0>(tuple)), record });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadAdjectiveDeclencionCaseTable()
|
||||||
|
{
|
||||||
|
|
||||||
|
adjectiveDeclencionCaseTable.clear();
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::ifstream f("C:/Workplace/ChineseJournal/rudict/grammar_case_adjective.csv");
|
||||||
|
|
||||||
|
#else
|
||||||
|
std::ifstream f("/home/devuser/workplace/rudict/grammar_case_adjective.csv");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string line;
|
||||||
|
std::wstring wline;
|
||||||
|
|
||||||
|
if (f.is_open())
|
||||||
|
{
|
||||||
|
|
||||||
|
std::cout << "File found!" << std::endl;
|
||||||
|
|
||||||
|
std::vector<GrammaticalTableRecord> currentGrammaticalCaseTable;
|
||||||
|
std::wstring currentAdjectiveDeclencion;
|
||||||
|
std::wstring currentAdjectiveCount;
|
||||||
|
|
||||||
|
getline(f, line); //Skip one line
|
||||||
|
|
||||||
|
while (getline(f, line))
|
||||||
|
{
|
||||||
|
std::vector<std::string> lineArr;
|
||||||
|
|
||||||
|
boost::split_regex(lineArr, line, boost::regex(";"));
|
||||||
|
|
||||||
|
if (lineArr[0] != "")
|
||||||
|
{
|
||||||
|
if (currentAdjectiveDeclencion == L"")
|
||||||
|
{
|
||||||
|
currentAdjectiveDeclencion = string_to_wstring(lineArr[0]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable });
|
||||||
|
|
||||||
|
currentAdjectiveDeclencion = string_to_wstring(lineArr[0]);
|
||||||
|
|
||||||
|
currentGrammaticalCaseTable.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineArr[1] != "")
|
||||||
|
{
|
||||||
|
currentAdjectiveCount = string_to_wstring(lineArr[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring endings = string_to_wstring(lineArr[3]);
|
||||||
|
|
||||||
|
std::set<std::wstring> endingsSet;
|
||||||
|
boost::split_regex(endingsSet, endings, boost::regex(", "));
|
||||||
|
|
||||||
|
currentGrammaticalCaseTable.push_back({
|
||||||
|
WStringToNounCount(currentAdjectiveCount),
|
||||||
|
WStringToNounGrammaticalCase(string_to_wstring(lineArr[2])),
|
||||||
|
endingsSet
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
//Add last one
|
||||||
|
if (currentAdjectiveDeclencion != L"")
|
||||||
|
{
|
||||||
|
adjectiveDeclencionCaseTable.push_back(AdjectiveDeclencionCaseTableRecord{ WStringToAdjectiveDeclencion(currentAdjectiveDeclencion), currentGrammaticalCaseTable });
|
||||||
|
}
|
||||||
|
|
||||||
|
f.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "file not found!" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void LoadFrequentAdjectiveSet()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
std::ifstream f("C:/Workplace/ChineseJournal/rudict/frequent_adjectives_2000.csv");
|
||||||
|
|
||||||
|
#else
|
||||||
|
std::ifstream f("/home/devuser/workplace/rudict/frequent_adjectives_2000.csv");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string line;
|
||||||
|
std::wstring wline;
|
||||||
|
|
||||||
|
if (f.is_open())
|
||||||
|
{
|
||||||
|
|
||||||
|
getline(f, line); //Skip one line
|
||||||
|
|
||||||
|
std::cout << "File found!" << std::endl;
|
||||||
|
while (getline(f, line))
|
||||||
|
{
|
||||||
|
|
||||||
|
wline = string_to_wstring(line);
|
||||||
|
AdjectiveRecord adjectiveRecord(wline);
|
||||||
|
|
||||||
|
AdjectiveRecordArr.push_back(adjectiveRecord);
|
||||||
|
|
||||||
|
}
|
||||||
|
f.close();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::cout << "file not found!" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} //namespace AJ
|
||||||
|
|
186
rudict/rudict/adjective.h
Normal file
186
rudict/rudict/adjective.h
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
#ifndef ADJECTIVE_H_INCLUDED
|
||||||
|
#define ADJECTIVE_H_INCLUDED
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <set>
|
||||||
|
#include <vector>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "boost/algorithm/string.hpp"
|
||||||
|
|
||||||
|
#include "grammarCase.h"
|
||||||
|
|
||||||
|
namespace AJ
|
||||||
|
{
|
||||||
|
|
||||||
|
struct AdjectiveRecord
|
||||||
|
{
|
||||||
|
std::wstring nominativeMaleForm;
|
||||||
|
|
||||||
|
bool standardShortFormAvailable;
|
||||||
|
|
||||||
|
std::wstring specialShortForm;
|
||||||
|
|
||||||
|
AdjectiveRecord();
|
||||||
|
AdjectiveRecord(std::wstring line);
|
||||||
|
|
||||||
|
bool operator<(const AdjectiveRecord& n) const
|
||||||
|
{
|
||||||
|
return nominativeMaleForm < n.nominativeMaleForm;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
extern std::vector<AdjectiveRecord> AdjectiveRecordArr;
|
||||||
|
|
||||||
|
|
||||||
|
struct AdjectiveStruct
|
||||||
|
{
|
||||||
|
NounGrammaticalCase grammaticalCase;
|
||||||
|
|
||||||
|
NounCount count;
|
||||||
|
NounGender gender;
|
||||||
|
|
||||||
|
bool isDeclentionAnimated;
|
||||||
|
|
||||||
|
AdjectiveRecord adjectiveRecord;
|
||||||
|
|
||||||
|
bool operator<(const AdjectiveStruct& n) const
|
||||||
|
{
|
||||||
|
if (grammaticalCase != n.grammaticalCase)
|
||||||
|
{
|
||||||
|
return grammaticalCase < n.grammaticalCase;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (count != n.count)
|
||||||
|
{
|
||||||
|
return count < n.count;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (gender != n.gender)
|
||||||
|
{
|
||||||
|
return gender < n.gender;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (isDeclentionAnimated != n.isDeclentionAnimated)
|
||||||
|
{
|
||||||
|
return isDeclentionAnimated < n.isDeclentionAnimated;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return adjectiveRecord < n.adjectiveRecord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<std::wstring> GetAllAdjectiveEndingArr();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
enum AdjectiveDeclencion
|
||||||
|
{
|
||||||
|
IFORM_MALE_INANIMATE = 0,
|
||||||
|
IFORM_MALE_ANIMATE,
|
||||||
|
UOFORM_MALE_INANIMATE,
|
||||||
|
UOFORM_MALE_ANIMATE,
|
||||||
|
IFORM_FEMALE_INANIMATE,
|
||||||
|
IFORM_FEMALE_ANIMATE,
|
||||||
|
UOFORM_FEMALE_INANIMATE,
|
||||||
|
UOFORM_FEMALE_ANIMATE,
|
||||||
|
IFORM_NEUTRAL,
|
||||||
|
UOFORM_NEUTRAL,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct AdjectiveDeclencionCaseTableRecord
|
||||||
|
{
|
||||||
|
AdjectiveDeclencion adjectiveDeclencion;
|
||||||
|
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
||||||
|
};
|
||||||
|
|
||||||
|
extern std::vector<AdjectiveDeclencionCaseTableRecord> adjectiveDeclencionCaseTable;
|
||||||
|
|
||||||
|
|
||||||
|
AdjectiveDeclencion WStringToAdjectiveDeclencion(std::wstring str);
|
||||||
|
|
||||||
|
|
||||||
|
struct AdjectiveEndingDivision
|
||||||
|
{
|
||||||
|
std::wstring base;
|
||||||
|
std::wstring ending;
|
||||||
|
|
||||||
|
enum DivisionCase
|
||||||
|
{
|
||||||
|
DC_COMMON = 0
|
||||||
|
} divisionCase;
|
||||||
|
|
||||||
|
bool operator<(const AdjectiveEndingDivision& other) const
|
||||||
|
{
|
||||||
|
if (base != other.base)
|
||||||
|
{
|
||||||
|
return base < other.base;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ending != other.ending)
|
||||||
|
{
|
||||||
|
return ending < other.ending;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return divisionCase < other.divisionCase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::set<AdjectiveEndingDivision> getPossibleAdjectiveEndingDivisionSet(std::wstring noun);
|
||||||
|
|
||||||
|
|
||||||
|
typedef std::tuple <
|
||||||
|
AdjectiveDeclencion,
|
||||||
|
NounCount,
|
||||||
|
NounGrammaticalCase
|
||||||
|
> AdjectiveTuple;
|
||||||
|
|
||||||
|
std::vector<AdjectiveTuple> GetPossibleTupleArr(std::wstring ending);
|
||||||
|
|
||||||
|
bool AdjectiveIsInDictionary(std::wstring nominative);
|
||||||
|
AdjectiveRecord GetAdjectiveRecordFromDictionary(std::wstring nominative);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//std::set<std::wstring> GetAdjectiveNominative(std::wstring base, AdjectiveDeclencion declencion, NounCount nounCount);
|
||||||
|
std::set<std::wstring> GetNominativeMaleSingular(std::wstring base);
|
||||||
|
|
||||||
|
void SetupDeclentionMap();
|
||||||
|
bool AdjectiveFitsDeclention(AdjectiveRecord record, AdjectiveTuple tuple);
|
||||||
|
|
||||||
|
bool IsDeclencionAnimated(AdjectiveDeclencion declention);
|
||||||
|
NounGender GetGenderFromDeclencion(AdjectiveDeclencion declention);
|
||||||
|
|
||||||
|
void FillDivisionCaseMaps();
|
||||||
|
|
||||||
|
|
||||||
|
std::set<AdjectiveStruct> RecognizeAdjective(std::wstring noun);
|
||||||
|
|
||||||
|
|
||||||
|
void LoadAdjectiveDeclencionCaseTable();
|
||||||
|
|
||||||
|
|
||||||
|
void LoadFrequentAdjectiveSet();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} //namespace AJ
|
||||||
|
|
||||||
|
|
||||||
|
#endif //ADJECTIVE_H_INCLUDED
|
128
rudict/rudict/grammarCase.cpp
Normal file
128
rudict/rudict/grammarCase.cpp
Normal file
@ -0,0 +1,128 @@
|
|||||||
|
#include "grammarCase.h"
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
NounCount WStringToNounCount(std::wstring str)
|
||||||
|
{
|
||||||
|
if (str == L"NC_SINGULAR")
|
||||||
|
{
|
||||||
|
return NC_SINGULAR;
|
||||||
|
}
|
||||||
|
if (str == L"NC_PLURAL")
|
||||||
|
{
|
||||||
|
return NC_PLURAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Error in WStringToNounCount!" << std::endl;
|
||||||
|
return NC_SINGULAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::wstring NounCountToWString(NounCount nounCount)
|
||||||
|
{
|
||||||
|
if (nounCount == NC_SINGULAR)
|
||||||
|
{
|
||||||
|
return L"NC_SINGULAR";
|
||||||
|
}
|
||||||
|
if (nounCount == NC_PLURAL)
|
||||||
|
{
|
||||||
|
return L"NC_PLURAL";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Error in NounCountToWString!" << std::endl;
|
||||||
|
return L"";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
||||||
|
{
|
||||||
|
switch (nounGrammaticalCase)
|
||||||
|
{
|
||||||
|
case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE";
|
||||||
|
case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE";
|
||||||
|
case NGC_P3_DATIVE: return L"NGC_P3_DATIVE";
|
||||||
|
case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE";
|
||||||
|
case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL";
|
||||||
|
case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL";
|
||||||
|
}
|
||||||
|
|
||||||
|
return L"";
|
||||||
|
}
|
||||||
|
|
||||||
|
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (str == L"NGC_P1_NOMINATIVE")
|
||||||
|
{
|
||||||
|
return NGC_P1_NOMINATIVE;
|
||||||
|
}
|
||||||
|
if (str == L"NGC_P2_GENITIVE")
|
||||||
|
{
|
||||||
|
return NGC_P2_GENITIVE;
|
||||||
|
}
|
||||||
|
if (str == L"NGC_P3_DATIVE")
|
||||||
|
{
|
||||||
|
return NGC_P3_DATIVE;
|
||||||
|
}
|
||||||
|
if (str == L"NGC_P4_ACCUSATIVE")
|
||||||
|
{
|
||||||
|
return NGC_P4_ACCUSATIVE;
|
||||||
|
}
|
||||||
|
if (str == L"NGC_P5_INSTRUMENTAL")
|
||||||
|
{
|
||||||
|
return NGC_P5_INSTRUMENTAL;
|
||||||
|
}
|
||||||
|
if (str == L"NGC_P6_PREPOSITIONAL")
|
||||||
|
{
|
||||||
|
return NGC_P6_PREPOSITIONAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl;
|
||||||
|
return NGC_P1_NOMINATIVE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool charIsConsolant(wchar_t c) //except é
|
||||||
|
{
|
||||||
|
std::wstring consolants = L"öêíãøùçõôâïðëäæ÷ñìòá";
|
||||||
|
|
||||||
|
for (wchar_t ic : consolants)
|
||||||
|
{
|
||||||
|
if (c == ic)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool charIsVowel(wchar_t c)
|
||||||
|
{
|
||||||
|
std::wstring vovels = L"àîóûýÿ¸þèå";
|
||||||
|
|
||||||
|
for (wchar_t ic : vovels)
|
||||||
|
{
|
||||||
|
if (c == ic)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::wstring i_form_consolants = L"ãõêæø÷ù";
|
||||||
|
std::wstring u_form_consolants = L"áïäòâôçñíìëðö";
|
||||||
|
|
||||||
|
bool charIsIFormConsolant(wchar_t c)
|
||||||
|
{
|
||||||
|
return i_form_consolants.find(c) != i_form_consolants.npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool charIsUFormConsolant(wchar_t c)
|
||||||
|
{
|
||||||
|
return u_form_consolants.find(c) != i_form_consolants.npos;
|
||||||
|
}
|
58
rudict/rudict/grammarCase.h
Normal file
58
rudict/rudict/grammarCase.h
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#ifndef GRAMMAR_CASE_H_INCLUDED
|
||||||
|
#define GRAMMAR_CASE_H_INCLUDED
|
||||||
|
|
||||||
|
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
enum NounGender
|
||||||
|
{
|
||||||
|
NG_MALE = 0,
|
||||||
|
NG_FEMALE,
|
||||||
|
NG_NEUTRAL
|
||||||
|
};
|
||||||
|
|
||||||
|
enum NounGrammaticalCase
|
||||||
|
{
|
||||||
|
NGC_P1_NOMINATIVE = 0,
|
||||||
|
NGC_P2_GENITIVE,
|
||||||
|
NGC_P3_DATIVE,
|
||||||
|
NGC_P4_ACCUSATIVE,
|
||||||
|
NGC_P5_INSTRUMENTAL,
|
||||||
|
NGC_P6_PREPOSITIONAL,
|
||||||
|
NGC_SIZE
|
||||||
|
};
|
||||||
|
|
||||||
|
enum NounCount
|
||||||
|
{
|
||||||
|
NC_SINGULAR = 0,
|
||||||
|
NC_PLURAL,
|
||||||
|
NC_SIZE
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct GrammaticalTableRecord
|
||||||
|
{
|
||||||
|
NounCount count;
|
||||||
|
|
||||||
|
NounGrammaticalCase grammaticalCase;
|
||||||
|
|
||||||
|
std::set<std::wstring> ending;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
std::wstring NounCountToWString(NounCount nounCount);
|
||||||
|
NounCount WStringToNounCount(std::wstring str);
|
||||||
|
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase);
|
||||||
|
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str);
|
||||||
|
|
||||||
|
|
||||||
|
bool charIsConsolant(wchar_t c); //except é
|
||||||
|
|
||||||
|
bool charIsVowel(wchar_t c);
|
||||||
|
|
||||||
|
bool charIsIFormConsolant(wchar_t c);
|
||||||
|
bool charIsUFormConsolant(wchar_t c);
|
||||||
|
|
||||||
|
|
||||||
|
#endif //GRAMMAR_CASE_H_INCLUDED
|
@ -21,6 +21,7 @@
|
|||||||
#include "boost/property_tree/json_parser.hpp"
|
#include "boost/property_tree/json_parser.hpp"
|
||||||
#include "../utf8utf16.h"
|
#include "../utf8utf16.h"
|
||||||
#include "../noun.h"
|
#include "../noun.h"
|
||||||
|
#include "../adjective.h"
|
||||||
|
|
||||||
namespace http {
|
namespace http {
|
||||||
namespace server {
|
namespace server {
|
||||||
@ -144,12 +145,16 @@ namespace http {
|
|||||||
result.put(L"error", L"String is too short");
|
result.put(L"error", L"String is too short");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<NounStruct> nounStructArr = RecognizeNoun(request);
|
|
||||||
|
|
||||||
|
int id;
|
||||||
|
|
||||||
|
//Noun!
|
||||||
|
|
||||||
|
std::set<NN::NounStruct> nounStructArr = NN::RecognizeNoun(request);
|
||||||
|
|
||||||
std::cout <<"nounstructarr" << nounStructArr.size() << std::endl;
|
std::cout <<"nounstructarr" << nounStructArr.size() << std::endl;
|
||||||
|
|
||||||
int id = 0;
|
id = 0;
|
||||||
|
|
||||||
boost::property_tree::wptree nounArr;
|
boost::property_tree::wptree nounArr;
|
||||||
|
|
||||||
@ -178,6 +183,35 @@ namespace http {
|
|||||||
|
|
||||||
result.put_child(L"nouns", nounArr);
|
result.put_child(L"nouns", nounArr);
|
||||||
|
|
||||||
|
//Adjective!
|
||||||
|
|
||||||
|
std::set<AJ::AdjectiveStruct> adjectiveStructArr = AJ::RecognizeAdjective(request);
|
||||||
|
|
||||||
|
id = 0;
|
||||||
|
|
||||||
|
boost::property_tree::wptree adjectiveArr;
|
||||||
|
|
||||||
|
for (auto& adjectiveStruct : adjectiveStructArr)
|
||||||
|
{
|
||||||
|
boost::property_tree::wptree adjectivrTree;
|
||||||
|
|
||||||
|
adjectivrTree.put(L"id", id);
|
||||||
|
adjectivrTree.put(L"grammaticalCase", NounGrammaticalCaseToWString(adjectiveStruct.grammaticalCase));
|
||||||
|
adjectivrTree.put(L"animated", adjectiveStruct.isDeclentionAnimated);
|
||||||
|
adjectivrTree.put(L"count", NounCountToWString(adjectiveStruct.count));
|
||||||
|
|
||||||
|
adjectivrTree.put(L"gender", adjectiveStruct.gender);
|
||||||
|
|
||||||
|
adjectivrTree.put(L"nominativeSingularForm", adjectiveStruct.adjectiveRecord.nominativeMaleForm);
|
||||||
|
|
||||||
|
adjectiveArr.push_back(std::make_pair(L"", adjectivrTree));
|
||||||
|
|
||||||
|
id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
result.put_child(L"adjectives", adjectiveArr);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
|
@ -6,17 +6,25 @@
|
|||||||
|
|
||||||
#include "noun.h"
|
#include "noun.h"
|
||||||
|
|
||||||
|
#include "adjective.h"
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
SetupDeclentionMap();
|
NN::SetupDeclentionMap();
|
||||||
LoadNounDeclencionCaseTable();
|
NN::LoadNounDeclencionCaseTable();
|
||||||
LoadFrequentWordSet();
|
NN::LoadFrequentWordSet();
|
||||||
FillDivisionCaseMaps();
|
NN::FillDivisionCaseMaps();
|
||||||
CalculatePluralForm();
|
NN::CalculatePluralForm();
|
||||||
|
|
||||||
|
AJ::LoadAdjectiveDeclencionCaseTable();
|
||||||
|
AJ::LoadFrequentAdjectiveSet();
|
||||||
|
AJ::FillDivisionCaseMaps();
|
||||||
|
AJ::SetupDeclentionMap();
|
||||||
|
|
||||||
//RecognizeNoun(L"стульями");
|
//RecognizeNoun(L"стульями");
|
||||||
//Косяк: "вечер"
|
//Косяк: "вечер"
|
||||||
|
|
||||||
|
//AJ::RecognizeAdjective(L"золотыми");
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include "boost/regex.hpp"
|
#include "boost/regex.hpp"
|
||||||
#include "boost/algorithm/string/regex.hpp"
|
#include "boost/algorithm/string/regex.hpp"
|
||||||
|
|
||||||
|
namespace NN
|
||||||
|
{
|
||||||
|
|
||||||
std::vector<NounRecord> NounRecordArr;
|
std::vector<NounRecord> NounRecordArr;
|
||||||
|
|
||||||
@ -153,83 +155,6 @@ NounDeclencion WStringToNounDeclencion(std::wstring str)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NounCount WStringToNounCount(std::wstring str)
|
|
||||||
{
|
|
||||||
if (str == L"NC_SINGULAR")
|
|
||||||
{
|
|
||||||
return NC_SINGULAR;
|
|
||||||
}
|
|
||||||
if (str == L"NC_PLURAL")
|
|
||||||
{
|
|
||||||
return NC_PLURAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Error in WStringToNounCount!" << std::endl;
|
|
||||||
return NC_SINGULAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::wstring NounCountToWString(NounCount nounCount)
|
|
||||||
{
|
|
||||||
if (nounCount == NC_SINGULAR)
|
|
||||||
{
|
|
||||||
return L"NC_SINGULAR";
|
|
||||||
}
|
|
||||||
if (nounCount == NC_PLURAL)
|
|
||||||
{
|
|
||||||
return L"NC_PLURAL";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Error in NounCountToWString!" << std::endl;
|
|
||||||
return L"";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase)
|
|
||||||
{
|
|
||||||
switch (nounGrammaticalCase)
|
|
||||||
{
|
|
||||||
case NGC_P1_NOMINATIVE: return L"NGC_P1_NOMINATIVE";
|
|
||||||
case NGC_P2_GENITIVE: return L"NGC_P2_GENITIVE";
|
|
||||||
case NGC_P3_DATIVE: return L"NGC_P3_DATIVE";
|
|
||||||
case NGC_P4_ACCUSATIVE: return L"NGC_P4_ACCUSATIVE";
|
|
||||||
case NGC_P5_INSTRUMENTAL: return L"NGC_P5_INSTRUMENTAL";
|
|
||||||
case NGC_P6_PREPOSITIONAL: return L"NGC_P6_PREPOSITIONAL";
|
|
||||||
}
|
|
||||||
|
|
||||||
return L"";
|
|
||||||
}
|
|
||||||
|
|
||||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str)
|
|
||||||
{
|
|
||||||
|
|
||||||
if (str == L"NGC_P1_NOMINATIVE")
|
|
||||||
{
|
|
||||||
return NGC_P1_NOMINATIVE;
|
|
||||||
}
|
|
||||||
if (str == L"NGC_P2_GENITIVE")
|
|
||||||
{
|
|
||||||
return NGC_P2_GENITIVE;
|
|
||||||
}
|
|
||||||
if (str == L"NGC_P3_DATIVE")
|
|
||||||
{
|
|
||||||
return NGC_P3_DATIVE;
|
|
||||||
}
|
|
||||||
if (str == L"NGC_P4_ACCUSATIVE")
|
|
||||||
{
|
|
||||||
return NGC_P4_ACCUSATIVE;
|
|
||||||
}
|
|
||||||
if (str == L"NGC_P5_INSTRUMENTAL")
|
|
||||||
{
|
|
||||||
return NGC_P5_INSTRUMENTAL;
|
|
||||||
}
|
|
||||||
if (str == L"NGC_P6_PREPOSITIONAL")
|
|
||||||
{
|
|
||||||
return NGC_P6_PREPOSITIONAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Error in WStringToNounGrammaticalCase!" << std::endl;
|
|
||||||
return NGC_P1_NOMINATIVE;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::wstring> GetAllNounEndingArr()
|
std::vector<std::wstring> GetAllNounEndingArr()
|
||||||
{
|
{
|
||||||
@ -458,35 +383,6 @@ NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativeP
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool charIsConsolant(wchar_t c) //except й
|
|
||||||
{
|
|
||||||
std::wstring consolants = L"цкнгшщзхфвпрлджчсмтб";
|
|
||||||
|
|
||||||
for (wchar_t ic : consolants)
|
|
||||||
{
|
|
||||||
if (c == ic)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool charIsVowel(wchar_t c)
|
|
||||||
{
|
|
||||||
std::wstring vovels = L"аоуыэяёюие";
|
|
||||||
|
|
||||||
for (wchar_t ic : vovels)
|
|
||||||
{
|
|
||||||
if (c == ic)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool charIsMissingVowelSoftenerConsolant(wchar_t c)
|
bool charIsMissingVowelSoftenerConsolant(wchar_t c)
|
||||||
@ -545,13 +441,6 @@ std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun)
|
|||||||
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
|
result.insert({ nounBase, ending, NounEndingDivision::DC_LOST_VOWEL_E });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
if (ending == L"а" || ending == L"я")
|
|
||||||
{
|
|
||||||
result.insert({ nounBase, ending, NounEndingDivision::DC_SPECIAL_PLURAL_A });
|
|
||||||
}*/
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -575,34 +464,6 @@ std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
//Xperimental -- additionally check for plural form with a-ending
|
|
||||||
if (nounEnding == L"а" || nounEnding == L"я")
|
|
||||||
{
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_INANIMATE, NC_PLURAL, NGC_P4_ACCUSATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
result.push_back(NounTuple{ SECOND_I_SHORT_INANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_IFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_UFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_MALE_SSFORM_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
|
|
||||||
result.push_back(NounTuple{ SECOND_I_SHORT_ANIMATE, NC_PLURAL, NGC_P1_NOMINATIVE });
|
|
||||||
}*/
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -668,8 +529,6 @@ std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclenci
|
|||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::wstring i_form_consolants = L"гхкжшчщ";
|
|
||||||
std::wstring u_form_consolants = L"бпдтвфзснмлрц";
|
|
||||||
|
|
||||||
wchar_t GetLastChar(const NounRecord& nounRecord)
|
wchar_t GetLastChar(const NounRecord& nounRecord)
|
||||||
{
|
{
|
||||||
@ -681,16 +540,6 @@ wchar_t GetPrevLastChar(const NounRecord& nounRecord)
|
|||||||
return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2];
|
return nounRecord.nominativeForm[nounRecord.nominativeForm.size() - 2];
|
||||||
}
|
}
|
||||||
|
|
||||||
bool charIsIFormConsolant(wchar_t c)
|
|
||||||
{
|
|
||||||
return i_form_consolants.find(c) != i_form_consolants.npos;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool charIsUFormConsolant(wchar_t c)
|
|
||||||
{
|
|
||||||
return u_form_consolants.find(c) != i_form_consolants.npos;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
bool FirstAIFormInanimateSingularCondition(const NounRecord& nounRecord)
|
||||||
{
|
{
|
||||||
@ -1376,3 +1225,5 @@ void LoadFrequentWordSet()
|
|||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} //namespace NN
|
@ -9,15 +9,13 @@
|
|||||||
|
|
||||||
#include "boost/algorithm/string.hpp"
|
#include "boost/algorithm/string.hpp"
|
||||||
|
|
||||||
enum NounGender
|
#include "grammarCase.h"
|
||||||
{
|
|
||||||
NG_MALE = 0,
|
|
||||||
NG_FEMALE,
|
|
||||||
NG_NEUTRAL
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NounRecord
|
namespace NN
|
||||||
{
|
{
|
||||||
|
|
||||||
|
struct NounRecord
|
||||||
|
{
|
||||||
std::wstring nominativeForm;
|
std::wstring nominativeForm;
|
||||||
NounGender gender;
|
NounGender gender;
|
||||||
|
|
||||||
@ -44,10 +42,10 @@ struct NounRecord
|
|||||||
{
|
{
|
||||||
return nominativeForm < n.nominativeForm;
|
return nominativeForm < n.nominativeForm;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum NounDeclencion
|
enum NounDeclencion
|
||||||
{
|
{
|
||||||
FIRST_A_IFORM_INANIMATE = 0,
|
FIRST_A_IFORM_INANIMATE = 0,
|
||||||
FIRST_A_IFORM_ANIMATE,
|
FIRST_A_IFORM_ANIMATE,
|
||||||
FIRST_A_UFORM_INANIMATE,
|
FIRST_A_UFORM_INANIMATE,
|
||||||
@ -66,67 +64,33 @@ enum NounDeclencion
|
|||||||
SECOND_NEUTRAL_O_FORM,
|
SECOND_NEUTRAL_O_FORM,
|
||||||
THIRD_FORM_INANIMATE,
|
THIRD_FORM_INANIMATE,
|
||||||
THIRD_FORM_ANIMATE,
|
THIRD_FORM_ANIMATE,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum NounGrammaticalCase
|
extern std::vector<NounRecord> NounRecordArr;
|
||||||
{
|
|
||||||
NGC_P1_NOMINATIVE = 0,
|
|
||||||
NGC_P2_GENITIVE,
|
|
||||||
NGC_P3_DATIVE,
|
|
||||||
NGC_P4_ACCUSATIVE,
|
|
||||||
NGC_P5_INSTRUMENTAL,
|
|
||||||
NGC_P6_PREPOSITIONAL,
|
|
||||||
NGC_SIZE
|
|
||||||
};
|
|
||||||
|
|
||||||
enum NounCount
|
|
||||||
{
|
|
||||||
NC_SINGULAR = 0,
|
|
||||||
NC_PLURAL,
|
|
||||||
NC_SIZE
|
|
||||||
};
|
|
||||||
|
|
||||||
struct GrammaticalTableRecord
|
|
||||||
{
|
|
||||||
NounCount count;
|
|
||||||
|
|
||||||
NounGrammaticalCase grammaticalCase;
|
struct NounDeclencionCaseTableRecord
|
||||||
|
{
|
||||||
std::set<std::wstring> ending;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct NounDeclencionCaseTableRecord
|
|
||||||
{
|
|
||||||
NounDeclencion nounDeclencion;
|
NounDeclencion nounDeclencion;
|
||||||
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
std::vector<GrammaticalTableRecord> grammaticalCaseTable;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
extern std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
|
||||||
|
|
||||||
|
|
||||||
|
NounDeclencion WStringToNounDeclencion(std::wstring str);
|
||||||
|
|
||||||
extern std::vector<NounRecord> NounRecordArr;
|
typedef std::tuple <
|
||||||
|
|
||||||
extern std::vector<NounDeclencionCaseTableRecord> nounDeclencionCaseTable;
|
|
||||||
|
|
||||||
|
|
||||||
NounDeclencion WStringToNounDeclencion(std::wstring str);
|
|
||||||
std::wstring NounCountToWString(NounCount nounCount);
|
|
||||||
NounCount WStringToNounCount(std::wstring str);
|
|
||||||
std::wstring NounGrammaticalCaseToWString(NounGrammaticalCase nounGrammaticalCase);
|
|
||||||
NounGrammaticalCase WStringToNounGrammaticalCase(std::wstring str);
|
|
||||||
|
|
||||||
|
|
||||||
//std::wstring NounNumberToWString(NounNumber nounNumber);
|
|
||||||
|
|
||||||
typedef std::tuple<
|
|
||||||
NounDeclencion,
|
NounDeclencion,
|
||||||
NounCount,
|
NounCount,
|
||||||
NounGrammaticalCase
|
NounGrammaticalCase
|
||||||
> NounTuple;
|
> NounTuple;
|
||||||
|
|
||||||
typedef std::set<std::wstring> StringSet;
|
typedef std::set<std::wstring> StringSet;
|
||||||
|
|
||||||
struct NounEndingDivision
|
struct NounEndingDivision
|
||||||
{
|
{
|
||||||
std::wstring base;
|
std::wstring base;
|
||||||
std::wstring ending;
|
std::wstring ending;
|
||||||
|
|
||||||
@ -155,29 +119,24 @@ struct NounEndingDivision
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<std::wstring> GetAllNounEndingArr();
|
std::vector<std::wstring> GetAllNounEndingArr();
|
||||||
|
|
||||||
|
|
||||||
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
|
std::set<std::wstring> getPluralForm(NounRecord nounRecord);
|
||||||
|
|
||||||
bool NounIsInDictionary(std::wstring nounNominative);
|
bool NounIsInDictionary(std::wstring nounNominative);
|
||||||
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
bool NounPluralFormIsInDictionary(std::wstring nounNominativePlural);
|
||||||
|
|
||||||
|
|
||||||
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
NounRecord GetNounRecordFromDictionary(std::wstring nounNominative);
|
||||||
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
|
NounRecord GetNounRecordFromDictionary_ByPluralForm(std::wstring nounNominativePlural);
|
||||||
|
|
||||||
bool charIsConsolant(wchar_t c); //except й
|
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
|
||||||
|
|
||||||
bool charIsVowel(wchar_t c);
|
struct NounStruct
|
||||||
|
{
|
||||||
|
|
||||||
bool charIsMissingVowelSoftenerConsolant(wchar_t c);
|
|
||||||
|
|
||||||
struct NounStruct
|
|
||||||
{
|
|
||||||
NounGrammaticalCase nounGrammaticalCase;
|
NounGrammaticalCase nounGrammaticalCase;
|
||||||
NounCount nounCount;
|
NounCount nounCount;
|
||||||
bool animated;
|
bool animated;
|
||||||
@ -208,40 +167,39 @@ struct NounStruct
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun);
|
std::set<NounEndingDivision> getPossibleNounEndingDivisionSet(std::wstring noun);
|
||||||
|
|
||||||
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
|
std::vector<NounTuple> GetPossibleNounTupleArr(std::wstring nounEnding);
|
||||||
|
|
||||||
std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
|
std::set<std::wstring> GetNounNoninative(std::wstring nounBase, NounDeclencion nounDeclencion, NounCount nounCount);
|
||||||
std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion);
|
std::wstring GetNounNoninativeSpecialPluralA(std::wstring nounBase, NounDeclencion nounDeclencion);
|
||||||
|
|
||||||
wchar_t GetLastChar(const NounRecord& nounRecord);
|
wchar_t GetLastChar(const NounRecord& nounRecord);
|
||||||
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
|
wchar_t GetPrevLastChar(const NounRecord& nounRecord);
|
||||||
bool charIsIFormConsolant(wchar_t c);
|
|
||||||
bool charIsUFormConsolant(wchar_t c);
|
void SetupDeclentionMap();
|
||||||
|
|
||||||
|
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
|
||||||
|
|
||||||
|
bool IsDeclencionSecondType(NounDeclencion nounDeclention);
|
||||||
|
bool IsDeclencionAnimated(NounDeclencion nounDeclention);
|
||||||
|
|
||||||
|
void FillDivisionCaseMaps();
|
||||||
|
|
||||||
|
std::set<NounStruct> RecognizeNoun(std::wstring noun);
|
||||||
|
|
||||||
|
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
|
||||||
|
|
||||||
|
void CalculatePluralForm();
|
||||||
|
|
||||||
|
void LoadFrequentWordSet();
|
||||||
|
|
||||||
|
void LoadNounDeclencionCaseTable();
|
||||||
|
|
||||||
|
|
||||||
void SetupDeclentionMap();
|
} //namespace NN
|
||||||
|
|
||||||
bool NounFitsDeclention(NounRecord nounRecord, NounTuple nounTuple);
|
|
||||||
|
|
||||||
bool IsDeclencionSecondType(NounDeclencion nounDeclention);
|
|
||||||
bool IsDeclencionAnimated(NounDeclencion nounDeclention);
|
|
||||||
|
|
||||||
void FillDivisionCaseMaps();
|
|
||||||
|
|
||||||
std::set<NounStruct> RecognizeNoun(std::wstring noun);
|
|
||||||
|
|
||||||
NounDeclencion CalculateNounDeclention(NounRecord nounRecord);
|
|
||||||
|
|
||||||
void CalculatePluralForm();
|
|
||||||
|
|
||||||
void LoadFrequentWordSet();
|
|
||||||
|
|
||||||
|
|
||||||
void LoadNounDeclencionCaseTable();
|
|
||||||
|
|
||||||
#endif //NOUN_H_INCLUDED
|
#endif //NOUN_H_INCLUDED
|
||||||
|
@ -70,6 +70,8 @@
|
|||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClCompile Include="adjective.cpp" />
|
||||||
|
<ClCompile Include="grammarCase.cpp" />
|
||||||
<ClCompile Include="http\connection.cpp" />
|
<ClCompile Include="http\connection.cpp" />
|
||||||
<ClCompile Include="http\connection_manager.cpp" />
|
<ClCompile Include="http\connection_manager.cpp" />
|
||||||
<ClCompile Include="http\mime_types.cpp" />
|
<ClCompile Include="http\mime_types.cpp" />
|
||||||
@ -82,6 +84,8 @@
|
|||||||
<ClCompile Include="utf8utf16.cpp" />
|
<ClCompile Include="utf8utf16.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClInclude Include="adjective.h" />
|
||||||
|
<ClInclude Include="grammarCase.h" />
|
||||||
<ClInclude Include="http\connection.hpp" />
|
<ClInclude Include="http\connection.hpp" />
|
||||||
<ClInclude Include="http\connection_manager.hpp" />
|
<ClInclude Include="http\connection_manager.hpp" />
|
||||||
<ClInclude Include="http\header.hpp" />
|
<ClInclude Include="http\header.hpp" />
|
||||||
|
@ -48,6 +48,12 @@
|
|||||||
<ClCompile Include="utf8utf16.cpp">
|
<ClCompile Include="utf8utf16.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="adjective.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="grammarCase.cpp">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="noun.h">
|
<ClInclude Include="noun.h">
|
||||||
@ -83,5 +89,11 @@
|
|||||||
<ClInclude Include="utf8utf16.h">
|
<ClInclude Include="utf8utf16.h">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="adjective.h">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="grammarCase.h">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
Loading…
Reference in New Issue
Block a user