From ece4de41cdfe173c31f7e26b38cb1c53d04cfcf7 Mon Sep 17 00:00:00 2001 From: johdirr Date: Mon, 19 Dec 2016 16:27:18 +0100 Subject: [PATCH 1/6] convert txt files to JSONs --- data/stopwords/stopwords-ar.json | 164 ++++ data/stopwords/stopwords-ar.txt | 162 ---- data/stopwords/stopwords-bg.json | 261 ++++++ data/stopwords/stopwords-bg.txt | 259 ------ data/stopwords/stopwords-cs.json | 258 ++++++ data/stopwords/stopwords-cs.txt | 256 ------ data/stopwords/stopwords-da.json | 104 +++ data/stopwords/stopwords-da.txt | 101 --- data/stopwords/stopwords-de.json | 896 ++++++++++++++++++++ data/stopwords/stopwords-de.txt | 894 -------------------- data/stopwords/stopwords-en.json | 548 +++++++++++++ data/stopwords/stopwords-en.txt | 546 ------------- data/stopwords/stopwords-es.json | 311 +++++++ data/stopwords/stopwords-es.txt | 308 ------- data/stopwords/stopwords-fi.json | 71 ++ data/stopwords/stopwords-fi.txt | 68 -- data/stopwords/stopwords-fr.json | 201 +++++ data/stopwords/stopwords-fr.txt | 220 ----- data/stopwords/stopwords-hu.json | 406 +++++++++ data/stopwords/stopwords-hu.txt | 403 --------- data/stopwords/stopwords-id.json | 1311 ++++++++++++++++++++++++++++++ data/stopwords/stopwords-id.txt | 1309 ----------------------------- data/stopwords/stopwords-it.json | 290 +++++++ data/stopwords/stopwords-it.txt | 287 ------- data/stopwords/stopwords-ko.json | 72 ++ data/stopwords/stopwords-ko.txt | 70 -- data/stopwords/stopwords-nb.json | 120 +++ data/stopwords/stopwords-nb.txt | 117 --- data/stopwords/stopwords-nl.json | 51 ++ data/stopwords/stopwords-nl.txt | 48 -- data/stopwords/stopwords-no.json | 122 +++ data/stopwords/stopwords-no.txt | 120 --- data/stopwords/stopwords-pl.json | 279 +++++++ data/stopwords/stopwords-pl.txt | 277 ------- data/stopwords/stopwords-pt.json | 611 ++++++++++++++ data/stopwords/stopwords-pt.txt | 609 -------------- data/stopwords/stopwords-ru.json | 424 ++++++++++ data/stopwords/stopwords-ru.txt | 421 ---------- data/stopwords/stopwords-sv.json | 546 +++++++++++++ data/stopwords/stopwords-sv.txt | 547 ------------- data/stopwords/stopwords-th.json | 1050 ++++++++++++++++++++++++ data/stopwords/stopwords-th.txt | 1047 ------------------------ data/stopwords/stopwords-tr.json | 225 +++++ data/stopwords/stopwords-tr.txt | 223 ----- data/stopwords/stopwords-zh.json | 127 +++ data/stopwords/stopwords-zh.txt | 125 --- 46 files changed, 8448 insertions(+), 8417 deletions(-) create mode 100644 data/stopwords/stopwords-ar.json delete mode 100644 data/stopwords/stopwords-ar.txt create mode 100644 data/stopwords/stopwords-bg.json delete mode 100644 data/stopwords/stopwords-bg.txt create mode 100644 data/stopwords/stopwords-cs.json delete mode 100755 data/stopwords/stopwords-cs.txt create mode 100644 data/stopwords/stopwords-da.json delete mode 100644 data/stopwords/stopwords-da.txt create mode 100644 data/stopwords/stopwords-de.json delete mode 100644 data/stopwords/stopwords-de.txt create mode 100644 data/stopwords/stopwords-en.json delete mode 100644 data/stopwords/stopwords-en.txt create mode 100644 data/stopwords/stopwords-es.json delete mode 100644 data/stopwords/stopwords-es.txt create mode 100644 data/stopwords/stopwords-fi.json delete mode 100644 data/stopwords/stopwords-fi.txt create mode 100644 data/stopwords/stopwords-fr.json delete mode 100644 data/stopwords/stopwords-fr.txt create mode 100644 data/stopwords/stopwords-hu.json delete mode 100644 data/stopwords/stopwords-hu.txt create mode 100644 data/stopwords/stopwords-id.json delete mode 100644 data/stopwords/stopwords-id.txt create mode 100644 data/stopwords/stopwords-it.json delete mode 100644 data/stopwords/stopwords-it.txt create mode 100644 data/stopwords/stopwords-ko.json delete mode 100644 data/stopwords/stopwords-ko.txt create mode 100644 data/stopwords/stopwords-nb.json delete mode 100644 data/stopwords/stopwords-nb.txt create mode 100644 data/stopwords/stopwords-nl.json delete mode 100644 data/stopwords/stopwords-nl.txt create mode 100644 data/stopwords/stopwords-no.json delete mode 100644 data/stopwords/stopwords-no.txt create mode 100644 data/stopwords/stopwords-pl.json delete mode 100644 data/stopwords/stopwords-pl.txt create mode 100644 data/stopwords/stopwords-pt.json delete mode 100644 data/stopwords/stopwords-pt.txt create mode 100644 data/stopwords/stopwords-ru.json delete mode 100644 data/stopwords/stopwords-ru.txt create mode 100644 data/stopwords/stopwords-sv.json delete mode 100644 data/stopwords/stopwords-sv.txt create mode 100644 data/stopwords/stopwords-th.json delete mode 100644 data/stopwords/stopwords-th.txt create mode 100644 data/stopwords/stopwords-tr.json delete mode 100644 data/stopwords/stopwords-tr.txt create mode 100644 data/stopwords/stopwords-zh.json delete mode 100644 data/stopwords/stopwords-zh.txt diff --git a/data/stopwords/stopwords-ar.json b/data/stopwords/stopwords-ar.json new file mode 100644 index 0000000..53965d0 --- /dev/null +++ b/data/stopwords/stopwords-ar.json @@ -0,0 +1,164 @@ +[ + "\ufeffفى\r", + "في\r", + "كل\r", + "لم\r", + "لن\r", + "له\r", + "من\r", + "هو\r", + "هي\r", + "قوة\r", + "كما\r", + "لها\r", + "منذ\r", + "وقد\r", + "ولا\r", + "نفسه\r", + "لقاء\r", + "مقابل\r", + "هناك\r", + "وقال\r", + "وكان\r", + "نهاية\r", + "وقالت\r", + "وكانت\r", + "للامم\r", + "فيه\r", + "كلم\r", + "لكن\r", + "وفي\r", + "وقف\r", + "ولم\r", + "ومن\r", + "وهو\r", + "وهي\r", + "يوم\r", + "فيها\r", + "منها\r", + "مليار\r", + "لوكالة\r", + "يكون\r", + "يمكن\r", + "مليون\r", + "حيث\r", + "اكد\r", + "الا\r", + "اما\r", + "امس\r", + "السابق\r", + "التى\r", + "التي\r", + "اكثر\r", + "ايار\r", + "ايضا\r", + "ثلاثة\r", + "الذاتي\r", + "الاخيرة\r", + "الثاني\r", + "الثانية\r", + "الذى\r", + "الذي\r", + "الان\r", + "امام\r", + "ايام\r", + "خلال\r", + "حوالى\r", + "الذين\r", + "الاول\r", + "الاولى\r", + "بين\r", + "ذلك\r", + "دون\r", + "حول\r", + "حين\r", + "الف\r", + "الى\r", + "انه\r", + "اول\r", + "ضمن\r", + "انها\r", + "جميع\r", + "الماضي\r", + "الوقت\r", + "المقبل\r", + "اليوم\r", + "ـ\r", + "ف\r", + "و\r", + "و6\r", + "قد\r", + "لا\r", + "ما\r", + "مع\r", + "مساء\r", + "هذا\r", + "واحد\r", + "واضاف\r", + "واضافت\r", + "فان\r", + "قبل\r", + "قال\r", + "كان\r", + "لدى\r", + "نحو\r", + "هذه\r", + "وان\r", + "واكد\r", + "كانت\r", + "واوضح\r", + "مايو\r", + "ب\r", + "ا\r", + "أ\r", + "،\r", + "عشر\r", + "عدد\r", + "عدة\r", + "عشرة\r", + "عدم\r", + "عام\r", + "عاما\r", + "عن\r", + "عند\r", + "عندما\r", + "على\r", + "عليه\r", + "عليها\r", + "زيارة\r", + "سنة\r", + "سنوات\r", + "تم\r", + "ضد\r", + "بعد\r", + "بعض\r", + "اعادة\r", + "اعلنت\r", + "بسبب\r", + "حتى\r", + "اذا\r", + "احد\r", + "اثر\r", + "برس\r", + "باسم\r", + "غدا\r", + "شخصا\r", + "صباح\r", + "اطار\r", + "اربعة\r", + "اخرى\r", + "بان\r", + "اجل\r", + "غير\r", + "بشكل\r", + "حاليا\r", + "بن\r", + "به\r", + "ثم\r", + "اف\r", + "ان\r", + "او\r", + "اي\r", + "بها\r", + "صفر" +] diff --git a/data/stopwords/stopwords-ar.txt b/data/stopwords/stopwords-ar.txt deleted file mode 100644 index 64e0e71..0000000 --- a/data/stopwords/stopwords-ar.txt +++ /dev/null @@ -1,162 +0,0 @@ -فى -في -كل -لم -لن -له -من -هو -هي -قوة -كما -لها -منذ -وقد -ولا -نفسه -لقاء -مقابل -هناك -وقال -وكان -نهاية -وقالت -وكانت -للامم -فيه -كلم -لكن -وفي -وقف -ولم -ومن -وهو -وهي -يوم -فيها -منها -مليار -لوكالة -يكون -يمكن -مليون -حيث -اكد -الا -اما -امس -السابق -التى -التي -اكثر -ايار -ايضا -ثلاثة -الذاتي -الاخيرة -الثاني -الثانية -الذى -الذي -الان -امام -ايام -خلال -حوالى -الذين -الاول -الاولى -بين -ذلك -دون -حول -حين -الف -الى -انه -اول -ضمن -انها -جميع -الماضي -الوقت -المقبل -اليوم -ـ -ف -و -و6 -قد -لا -ما -مع -مساء -هذا -واحد -واضاف -واضافت -فان -قبل -قال -كان -لدى -نحو -هذه -وان -واكد -كانت -واوضح -مايو -ب -ا -أ -، -عشر -عدد -عدة -عشرة -عدم -عام -عاما -عن -عند -عندما -على -عليه -عليها -زيارة -سنة -سنوات -تم -ضد -بعد -بعض -اعادة -اعلنت -بسبب -حتى -اذا -احد -اثر -برس -باسم -غدا -شخصا -صباح -اطار -اربعة -اخرى -بان -اجل -غير -بشكل -حاليا -بن -به -ثم -اف -ان -او -اي -بها -صفر \ No newline at end of file diff --git a/data/stopwords/stopwords-bg.json b/data/stopwords/stopwords-bg.json new file mode 100644 index 0000000..54d39ff --- /dev/null +++ b/data/stopwords/stopwords-bg.json @@ -0,0 +1,261 @@ +[ + "а\r", + "автентичен\r", + "аз\r", + "ако\r", + "ала\r", + "бе\r", + "без\r", + "беше\r", + "би\r", + "бивш\r", + "бивша\r", + "бившо\r", + "бил\r", + "била\r", + "били\r", + "било\r", + "благодаря\r", + "близо\r", + "бъдат\r", + "бъде\r", + "бяха\r", + "в\r", + "вас\r", + "ваш\r", + "ваша\r", + "вероятно\r", + "вече\r", + "взема\r", + "ви\r", + "вие\r", + "винаги\r", + "внимава\r", + "време\r", + "все\r", + "всеки\r", + "всички\r", + "всичко\r", + "всяка\r", + "във\r", + "въпреки\r", + "върху\r", + "г\r", + "ги\r", + "главен\r", + "главна\r", + "главно\r", + "глас\r", + "го\r", + "година\r", + "години\r", + "годишен\r", + "д\r", + "да\r", + "дали\r", + "два\r", + "двама\r", + "двамата\r", + "две\r", + "двете\r", + "ден\r", + "днес\r", + "дни\r", + "до\r", + "добра\r", + "добре\r", + "добро\r", + "добър\r", + "докато\r", + "докога\r", + "дори\r", + "досега\r", + "доста\r", + "друг\r", + "друга\r", + "други\r", + "е\r", + "евтин\r", + "едва\r", + "един\r", + "една\r", + "еднаква\r", + "еднакви\r", + "еднакъв\r", + "едно\r", + "екип\r", + "ето\r", + "живот\r", + "за\r", + "забавям\r", + "зад\r", + "заедно\r", + "заради\r", + "засега\r", + "заспал\r", + "затова\r", + "защо\r", + "защото\r", + "и\r", + "из\r", + "или\r", + "им\r", + "има\r", + "имат\r", + "иска\r", + "й\r", + "каза\r", + "как\r", + "каква\r", + "какво\r", + "както\r", + "какъв\r", + "като\r", + "кога\r", + "когато\r", + "което\r", + "които\r", + "кой\r", + "който\r", + "колко\r", + "която\r", + "къде\r", + "където\r", + "към\r", + "лесен\r", + "лесно\r", + "ли\r", + "лош\r", + "м\r", + "май\r", + "малко\r", + "ме\r", + "между\r", + "мек\r", + "мен\r", + "месец\r", + "ми\r", + "много\r", + "мнозина\r", + "мога\r", + "могат\r", + "може\r", + "мокър\r", + "моля\r", + "момента\r", + "му\r", + "н\r", + "на\r", + "над\r", + "назад\r", + "най\r", + "направи\r", + "напред\r", + "например\r", + "нас\r", + "не\r", + "него\r", + "нещо\r", + "нея\r", + "ни\r", + "ние\r", + "никой\r", + "нито\r", + "нищо\r", + "но\r", + "нов\r", + "нова\r", + "нови\r", + "новина\r", + "някои\r", + "някой\r", + "няколко\r", + "няма\r", + "обаче\r", + "около\r", + "освен\r", + "особено\r", + "от\r", + "отгоре\r", + "отново\r", + "още\r", + "пак\r", + "по\r", + "повече\r", + "повечето\r", + "под\r", + "поне\r", + "поради\r", + "после\r", + "почти\r", + "прави\r", + "пред\r", + "преди\r", + "през\r", + "при\r", + "пък\r", + "първата\r", + "първи\r", + "първо\r", + "пъти\r", + "равен\r", + "равна\r", + "с\r", + "са\r", + "сам\r", + "само\r", + "се\r", + "сега\r", + "си\r", + "син\r", + "скоро\r", + "след\r", + "следващ\r", + "сме\r", + "смях\r", + "според\r", + "сред\r", + "срещу\r", + "сте\r", + "съм\r", + "със\r", + "също\r", + "т\r", + "тази\r", + "така\r", + "такива\r", + "такъв\r", + "там\r", + "твой\r", + "те\r", + "тези\r", + "ти\r", + "т.н.\r", + "то\r", + "това\r", + "тогава\r", + "този\r", + "той\r", + "толкова\r", + "точно\r", + "три\r", + "трябва\r", + "тук\r", + "тъй\r", + "тя\r", + "тях\r", + "у\r", + "утре\r", + "харесва\r", + "хиляди\r", + "ч\r", + "часа\r", + "че\r", + "често\r", + "чрез\r", + "ще\r", + "щом\r", + "юмрук\r", + "я\r", + "як" +] diff --git a/data/stopwords/stopwords-bg.txt b/data/stopwords/stopwords-bg.txt deleted file mode 100644 index 9700c31..0000000 --- a/data/stopwords/stopwords-bg.txt +++ /dev/null @@ -1,259 +0,0 @@ -а -автентичен -аз -ако -ала -бе -без -беше -би -бивш -бивша -бившо -бил -била -били -било -благодаря -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -внимава -време -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главен -главна -главно -глас -го -година -години -годишен -д -да -дали -два -двама -двамата -две -двете -ден -днес -дни -до -добра -добре -добро -добър -докато -докога -дори -досега -доста -друг -друга -други -е -евтин -едва -един -една -еднаква -еднакви -еднакъв -едно -екип -ето -живот -за -забавям -зад -заедно -заради -засега -заспал -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -лесен -лесно -ли -лош -м -май -малко -ме -между -мек -мен -месец -ми -много -мнозина -мога -могат -може -мокър -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нещо -нея -ни -ние -никой -нито -нищо -но -нов -нова -нови -новина -някои -някой -няколко -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първата -първи -първо -пъти -равен -равна -с -са -сам -само -се -сега -си -син -скоро -след -следващ -сме -смях -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -т.н. -то -това -тогава -този -той -толкова -точно -три -трябва -тук -тъй -тя -тях -у -утре -харесва -хиляди -ч -часа -че -често -чрез -ще -щом -юмрук -я -як \ No newline at end of file diff --git a/data/stopwords/stopwords-cs.json b/data/stopwords/stopwords-cs.json new file mode 100644 index 0000000..0aefa68 --- /dev/null +++ b/data/stopwords/stopwords-cs.json @@ -0,0 +1,258 @@ +[ + "ačkoli\r", + "ahoj\r", + "ale\r", + "anebo\r", + "ano\r", + "asi\r", + "aspoň\r", + "během\r", + "bez\r", + "beze\r", + "blízko\r", + "bohužel\r", + "brzo\r", + "bude\r", + "budeme\r", + "budeš\r", + "budete\r", + "budou\r", + "budu\r", + "byl\r", + "byla\r", + "byli\r", + "bylo\r", + "byly\r", + "bys\r", + "čau\r", + "chce\r", + "chceme\r", + "chceš\r", + "chcete\r", + "chci\r", + "chtějí\r", + "chtít\r", + "chut'\r", + "chuti\r", + "co\r", + "čtrnáct\r", + "čtyři\r", + "dál\r", + "dále\r", + "daleko\r", + "děkovat\r", + "děkujeme\r", + "děkuji\r", + "den\r", + "deset\r", + "devatenáct\r", + "devět\r", + "do\r", + "dobrý\r", + "docela\r", + "dva\r", + "dvacet\r", + "dvanáct\r", + "dvě\r", + "hodně\r", + "já\r", + "jak\r", + "jde\r", + "je\r", + "jeden\r", + "jedenáct\r", + "jedna\r", + "jedno\r", + "jednou\r", + "jedou\r", + "jeho\r", + "její\r", + "jejich\r", + "jemu\r", + "jen\r", + "jenom\r", + "ještě\r", + "jestli\r", + "jestliže\r", + "jí\r", + "jich\r", + "jím\r", + "jimi\r", + "jinak\r", + "jsem\r", + "jsi\r", + "jsme\r", + "jsou\r", + "jste\r", + "kam\r", + "kde\r", + "kdo\r", + "kdy\r", + "když\r", + "ke\r", + "kolik\r", + "kromě\r", + "která\r", + "které\r", + "kteří\r", + "který\r", + "kvůli\r", + "má\r", + "mají\r", + "málo\r", + "mám\r", + "máme\r", + "máš\r", + "máte\r", + "mé\r", + "mě\r", + "mezi\r", + "mí\r", + "mít\r", + "mně\r", + "mnou\r", + "moc\r", + "mohl\r", + "mohou\r", + "moje\r", + "moji\r", + "možná\r", + "můj\r", + "musí\r", + "může\r", + "my\r", + "na\r", + "nad\r", + "nade\r", + "nám\r", + "námi\r", + "naproti\r", + "nás\r", + "náš\r", + "naše\r", + "naši\r", + "ne\r", + "ně\r", + "nebo\r", + "nebyl\r", + "nebyla\r", + "nebyli\r", + "nebyly\r", + "něco\r", + "nedělá\r", + "nedělají\r", + "nedělám\r", + "neděláme\r", + "neděláš\r", + "neděláte\r", + "nějak\r", + "nejsi\r", + "někde\r", + "někdo\r", + "nemají\r", + "nemáme\r", + "nemáte\r", + "neměl\r", + "němu\r", + "není\r", + "nestačí\r", + "nevadí\r", + "než\r", + "nic\r", + "nich\r", + "ním\r", + "nimi\r", + "nula\r", + "od\r", + "ode\r", + "on\r", + "ona\r", + "oni\r", + "ono\r", + "ony\r", + "osm\r", + "osmnáct\r", + "pak\r", + "patnáct\r", + "pět\r", + "po\r", + "pořád\r", + "potom\r", + "pozdě\r", + "před\r", + "přes\r", + "přese\r", + "pro\r", + "proč\r", + "prosím\r", + "prostě\r", + "proti\r", + "protože\r", + "rovně\r", + "se\r", + "sedm\r", + "sedmnáct\r", + "šest\r", + "šestnáct\r", + "skoro\r", + "smějí\r", + "smí\r", + "snad\r", + "spolu\r", + "sta\r", + "sté\r", + "sto\r", + "ta\r", + "tady\r", + "tak\r", + "takhle\r", + "taky\r", + "tam\r", + "tamhle\r", + "tamhleto\r", + "tamto\r", + "tě\r", + "tebe\r", + "tebou\r", + "ted'\r", + "tedy\r", + "ten\r", + "ti\r", + "tisíc\r", + "tisíce\r", + "to\r", + "tobě\r", + "tohle\r", + "toto\r", + "třeba\r", + "tři\r", + "třináct\r", + "trošku\r", + "tvá\r", + "tvé\r", + "tvoje\r", + "tvůj\r", + "ty\r", + "určitě\r", + "už\r", + "vám\r", + "vámi\r", + "vás\r", + "váš\r", + "vaše\r", + "vaši\r", + "ve\r", + "večer\r", + "vedle\r", + "vlastně\r", + "všechno\r", + "všichni\r", + "vůbec\r", + "vy\r", + "vždy\r", + "za\r", + "zač\r", + "zatímco\r", + "ze\r", + "že" +] diff --git a/data/stopwords/stopwords-cs.txt b/data/stopwords/stopwords-cs.txt deleted file mode 100755 index df359f3..0000000 --- a/data/stopwords/stopwords-cs.txt +++ /dev/null @@ -1,256 +0,0 @@ -ačkoli -ahoj -ale -anebo -ano -asi -aspoň -během -bez -beze -blízko -bohužel -brzo -bude -budeme -budeš -budete -budou -budu -byl -byla -byli -bylo -byly -bys -čau -chce -chceme -chceš -chcete -chci -chtějí -chtít -chut' -chuti -co -čtrnáct -čtyři -dál -dále -daleko -děkovat -děkujeme -děkuji -den -deset -devatenáct -devět -do -dobrý -docela -dva -dvacet -dvanáct -dvě -hodně -já -jak -jde -je -jeden -jedenáct -jedna -jedno -jednou -jedou -jeho -její -jejich -jemu -jen -jenom -ještě -jestli -jestliže -jí -jich -jím -jimi -jinak -jsem -jsi -jsme -jsou -jste -kam -kde -kdo -kdy -když -ke -kolik -kromě -která -které -kteří -který -kvůli -má -mají -málo -mám -máme -máš -máte -mé -mě -mezi -mí -mít -mně -mnou -moc -mohl -mohou -moje -moji -možná -můj -musí -může -my -na -nad -nade -nám -námi -naproti -nás -náš -naše -naši -ne -ně -nebo -nebyl -nebyla -nebyli -nebyly -něco -nedělá -nedělají -nedělám -neděláme -neděláš -neděláte -nějak -nejsi -někde -někdo -nemají -nemáme -nemáte -neměl -němu -není -nestačí -nevadí -než -nic -nich -ním -nimi -nula -od -ode -on -ona -oni -ono -ony -osm -osmnáct -pak -patnáct -pět -po -pořád -potom -pozdě -před -přes -přese -pro -proč -prosím -prostě -proti -protože -rovně -se -sedm -sedmnáct -šest -šestnáct -skoro -smějí -smí -snad -spolu -sta -sté -sto -ta -tady -tak -takhle -taky -tam -tamhle -tamhleto -tamto -tě -tebe -tebou -ted' -tedy -ten -ti -tisíc -tisíce -to -tobě -tohle -toto -třeba -tři -třináct -trošku -tvá -tvé -tvoje -tvůj -ty -určitě -už -vám -vámi -vás -váš -vaše -vaši -ve -večer -vedle -vlastně -všechno -všichni -vůbec -vy -vždy -za -zač -zatímco -ze -že \ No newline at end of file diff --git a/data/stopwords/stopwords-da.json b/data/stopwords/stopwords-da.json new file mode 100644 index 0000000..f777e82 --- /dev/null +++ b/data/stopwords/stopwords-da.json @@ -0,0 +1,104 @@ +[ + "af", + "alle", + "andet", + "andre", + "at", + "begge", + "da", + "de", + "den", + "denne", + "der", + "deres", + "det", + "dette", + "dig", + "din", + "dog", + "du", + "ej", + "eller", + "en", + "end", + "ene", + "eneste", + "enhver", + "et", + "fem", + "fire", + "flere", + "fleste", + "for", + "fordi", + "forrige", + "fra", + "få", + "før", + "god", + "han", + "hans", + "har", + "hendes", + "her", + "hun", + "hvad", + "hvem", + "hver", + "hvilken", + "hvis", + "hvor", + "hvordan", + "hvorfor", + "hvornår", + "i", + "ikke", + "ind", + "ingen", + "intet", + "jeg", + "jeres", + "kan", + "kom", + "kommer", + "lav", + "lidt", + "lille", + "man", + "mand", + "mange", + "med", + "meget", + "men", + "mens", + "mere", + "mig", + "ned", + "ni", + "nogen", + "noget", + "ny", + "nyt", + "nær", + "næste", + "næsten", + "og", + "op", + "otte", + "over", + "på", + "se", + "seks", + "ses", + "som", + "stor", + "store", + "syv", + "ti", + "til", + "to", + "tre", + "ud", + "var", + "" +] diff --git a/data/stopwords/stopwords-da.txt b/data/stopwords/stopwords-da.txt deleted file mode 100644 index e8522ef..0000000 --- a/data/stopwords/stopwords-da.txt +++ /dev/null @@ -1,101 +0,0 @@ -af -alle -andet -andre -at -begge -da -de -den -denne -der -deres -det -dette -dig -din -dog -du -ej -eller -en -end -ene -eneste -enhver -et -fem -fire -flere -fleste -for -fordi -forrige -fra -få -før -god -han -hans -har -hendes -her -hun -hvad -hvem -hver -hvilken -hvis -hvor -hvordan -hvorfor -hvornår -i -ikke -ind -ingen -intet -jeg -jeres -kan -kom -kommer -lav -lidt -lille -man -mand -mange -med -meget -men -mens -mere -mig -ned -ni -nogen -noget -ny -nyt -nær -næste -næsten -og -op -otte -over -på -se -seks -ses -som -stor -store -syv -ti -til -to -tre -ud -var diff --git a/data/stopwords/stopwords-de.json b/data/stopwords/stopwords-de.json new file mode 100644 index 0000000..fdab73d --- /dev/null +++ b/data/stopwords/stopwords-de.json @@ -0,0 +1,896 @@ +[ + "der", + "die", + "und", + "in", + "den", + "von", + "zu", + "mit", + "ist", + "das", + "des", + "im", + "für", + "auf", + "sich", + "dem", + "Die", + "nicht", + "ein", + "eine", + "als", + "auch", + "an", + "es", + "er", + "aus", + "bei", + "werden", + "sie", + "nach", + "Der", + "sind", + "war", + "wurde", + "wird", + "einer", + "Das", + "hat", + "am", + "wie", + "um", + "Sie", + "zum", + "oder", + "einen", + "über", + "dass", + "einem", + "noch", + "bis", + "nur", + "vor", + "zur", + "durch", + "so", + "haben", + "aber", + "ich", + "In", + "man", + "mehr", + "wir", + "daß", + "kann", + "sein", + "vom", + "Es", + "unter", + "Ich", + "hatte", + "gegen", + "Im", + "Er", + "wenn", + "dieser", + "seine", + "eines", + "können", + "diese", + "wieder", + "wurden", + "dann", + "was", + "schon", + "Jahr", + "zwei", + "seiner", + "Jahre", + "Jahren", + "ihre", + "gibt", + "zwischen", + "Ein", + "immer", + "waren", + "Zeit", + "Uhr", + "keine", + "Wir", + "sei", + "habe", + "sehr", + "hier", + "alle", + "Nach", + "ab", + "sowie", + "da", + "beim", + "heute", + "seit", + "diesem", + "uns", + "soll", + "Und", + "Deutschland", + "Mit", + "anderen", + "jedoch", + "ihr", + "damit", + "ersten", + "drei", + "Auch", + "doch", + "ihm", + "seinen", + "Stadt", + "etwa", + "sagte", + "ihn", + "Eine", + "sondern", + "bereits", + "müssen", + "ohne", + "Menschen", + "will", + "Prozent", + "ihrer", + "worden", + "Bei", + "selbst", + "jetzt", + "of", + "Als", + "seinem", + "neue", + "muss", + "allem", + "neuen", + "Ende", + "nun", + "Von", + "geht", + "ihren", + "SPD", + "So", + "Für", + "weil", + "wo", + "mich", + "mir", + "Aber", + "Am", + "Diese", + "ganz", + "dieses", + "etwas", + "andere", + "Geschichte", + "Frau", + "liegt", + "Wenn", + "ins", + "gut", + "einmal", + "konnte", + "Euro", + "du", + "denn", + "viele", + "Auf", + "machen", + "Herr", + "Leben", + "the", + "diesen", + "erst", + "lassen", + "Wie", + "dort", + "beiden", + "erste", + "The", + "Teil", + "deutschen", + "weiter", + "also", + "viel", + "sollte", + "dabei", + "Millionen", + "Was", + "später", + "hatten", + "während", + "Welt", + "ISBN", + "sagt", + "denen", + "wollen", + "steht", + "Da", + "kommt", + "kein", + "vier", + "nichts", + "de", + "allerdings", + "Seite", + "ob", + "dazu", + "gab", + "s", + "letzten", + "kam", + "USA", + "wegen", + "dies", + "zurück", + "großen", + "kommen", + "alles", + "rund", + "ja", + "sollen", + "deren", + "dafür", + "Doch", + "Kinder", + "wäre", + "Frage", + "weitere", + "würde", + "dessen", + "große", + "Januar", + "zwar", + "darauf", + "Arbeit", + "", + "Beispiel", + "September", + "zusammen", + "einige", + "Land", + "allen", + "fast", + "Frauen", + "März", + "Namen", + "Unternehmen", + "ihrem", + "davon", + "Mann", + "Mai", + "Platz", + "deutsche", + "werde", + "Oktober", + "muß", + "Literatur", + "Art", + "ihnen", + "Deutschen", + "fünf", + "gilt", + "sehen", + "könnte", + "Dezember", + "stehen", + "sogar", + "seien", + "Wer", + "Seit", + "August", + "bin", + "Beifall", + "Fall", + "Juni", + "eigenen", + "November", + "mal", + "Film", + "finden", + "sagen", + "Regierung", + "April", + "München", + "oft", + "Dies", + "lange", + "ebenfalls", + "bekannt", + "Präsident", + "wohl", + "CDU/CSU", + "Zu", + "gehört", + "Man", + "weniger", + "gerade", + "statt", + "aller", + "Juli", + "möchte", + "Weg", + "Entwicklung", + "zunächst", + "ging", + "Mark", + "Bild", + "möglich", + "gar", + "besonders", + "hätte", + "macht", + "Politik", + "geben", + "Tag", + "Ihnen", + "Februar", + "Hier", + "Gemeinde", + "wenig", + "gewesen", + "Europa", + "gehen", + "gemacht", + "welche", + "New", + "gegenüber", + "heißt", + "Familie", + "Union", + "tun", + "Jahrhundert", + "einfach", + "Frankfurt", + "deutlich", + "Dabei", + "neben", + "sollten", + "Kirche", + "keinen", + "Artikel", + "Ihre", + "Peter", + "Thema", + "besteht", + "vielen", + "nie", + "bzw.", + "Aus", + "Zeitung", + "wollte", + "Kommission", + "seines", + "Hamburg", + "hätten", + "Geld", + "meine", + "Dr", + "kaum", + "zweiten", + "Während", + "lässt", + "Anfang", + "Um", + "Ort", + "weiß", + "findet", + "Bereich", + "Haus", + "anderem", + "Mal", + "", + "deshalb", + "alten", + "erhalten", + "zehn", + "Zum", + "bisher", + "meisten", + "darüber", + "würden", + "hin", + "Form", + "An", + "bleibt", + "sieht", + "Gesellschaft", + "Berliner", + "Den", + "vergangenen", + "bezeichnet", + "Nr.", + "Ziel", + "je", + "weit", + "Grund", + "sechs", + "darf", + "Rolle", + "Deutsche", + "wissen", + "jeder", + "zeigt", + "Damit", + "Denn", + "mehrere", + "nächsten", + "Vor", + "Dann", + "schließlich", + "kleinen", + "Durch", + "Michael", + "km", + "Lage", + "Gruppe", + "Band", + "damals", + "Spiel", + "Sohn", + "Dr.", + "stark", + "Universität", + "Hilfe", + "besser", + "hinter", + "meist", + "Seine", + "St.", + "stellt", + "Tage", + "unsere", + "daher", + "Nur", + "wirklich", + "führt", + "Dieser", + "beispielsweise", + "kurz", + "Bericht", + "gleich", + "weiteren", + "Straße", + "bleiben", + "Wirtschaft", + "Siehe", + "Zukunft", + "eher", + "Bedeutung", + "Recht", + "insbesondere", + "Bevölkerung", + "schnell", + "nehmen", + "Verlag", + "CDU", + "Tod", + "Alle", + "solche", + "neu", + "Bundesregierung", + "pro", + "Frankreich", + "Jahres", + "konnten", + "Ihr", + "ließ", + "Du", + "kleine", + "Europäischen", + "Vater", + "genannt", + "lang", + "Titel", + "Rahmen", + "Wort", + "eigentlich", + "erhielt", + "einigen", + "Woche", + "FC", + "Musik", + "dagegen", + "Sein", + "allein", + "Einsatz", + "genau", + "begann", + "innerhalb", + "unserer", + "Partei", + "Polizei", + "Wasser", + "bringen", + "deutscher", + "natürlich", + "eigene", + "", + "Wochen", + "insgesamt", + "Außerdem", + "Bis", + "halten", + "politischen", + "musste", + "Parlament", + "Meter", + "Hand", + "Zahl", + "stellen", + "gesagt", + "führen", + "daran", + "Erfolg", + "befindet", + "Zur", + "verschiedenen", + "Probleme", + "Unter", + "Abgeordneten", + "Milliarden", + "nahm", + "stand", + "geworden", + "c", + "liegen", + "erstmals", + "Sprache", + "Fragen", + "nämlich", + "Ja", + "Kollegen", + "Männer", + "Nicht", + "Wolfgang", + "Problem", + "Mutter", + "Minuten", + "Weitere", + "Mitte", + "Mitglied", + "Jahrhunderts", + "Krieg", + "Hans", + "könnten", + "Thomas", + "Über", + "Personen", + "Friedrich", + "ca.", + "ebenso", + "machte", + "York", + "vielleicht", + "Stelle", + "derzeit", + "Ländern", + "Höhe", + "verwendet", + "gute", + "überhaupt", + "Länder", + "Angaben", + "führte", + "gegeben", + "Tel.", + "klar", + "Karl", + "europäischen", + "sicher", + "Saison", + "Programm", + "erreicht", + "GRÜNEN", + "beide", + "Sonntag", + "sowohl", + "Region", + "alte", + "Staaten", + "Paris", + "Beginn", + "Buch", + "zweite", + "ganze", + "hinaus", + "König", + "Morgen", + "handelt", + "fand", + "Schweiz", + "jeweils", + "Weise", + "DM", + "fest", + "per", + "blieb", + "Mitglieder", + "Richtung", + "Heute", + "Stunden", + "leicht", + "Leute", + "wobei", + "gehören", + "bietet", + "Wien", + "politische", + "Folge", + "Blick", + "aufgrund", + "Entscheidung", + "Dort", + "Neben", + "hält", + "Gebiet", + "gemeinsam", + "erklärt", + "direkt", + "könne", + "Daten", + "recht", + "schwer", + "Bayern", + "jeden", + "Name", + "Schule", + "GmbH", + "dürfen", + "laut", + "Seiten", + "Bürger", + "Eltern", + "dpa", + "Meinung", + "Werke", + "Jetzt", + "letzte", + "Spieler", + "bald", + "London", + "häufig", + "heutigen", + "Einwohner", + "acht", + "eben", + "Internet", + "Markt", + "dich", + "Nein", + "Situation", + "System", + "zuvor", + "Möglichkeit", + "Freitag", + "mein", + "Mannheim", + "Fenster", + "Kosten", + "inzwischen", + "kamen", + "John", + "sieben", + "bekommen", + "erreichen", + "unser", + "Verfügung", + "Köln", + "Dazu", + "besten", + "Zusammenhang", + "Reihe", + "Kritik", + "richtig", + "Liste", + "Herren", + "Augen", + "taz", + "zeigen", + "siehe", + "hohen", + "spielte", + "leben", + "völlig", + "Neue", + "ihres", + "spielt", + "Sicherheit", + "weiterhin", + "hoch", + "nachdem", + "gegründet", + "erneut", + "sah", + "z.", + "wer", + "Informationen", + "anders", + "spielen", + "Dieses", + "gleichen", + "Kultur", + "größten", + "eingesetzt", + "Unterstützung", + "Beim", + "erklärte", + "Allerdings", + "Firma", + "Amt", + "Kopf", + "trotz", + "Erst", + "gebracht", + "gestellt", + "läuft", + "schließen", + "Bilder", + "nimmt", + "Mitarbeiter", + "BÜNDNIS", + "Deshalb", + "verschiedene", + "zudem", + "Werk", + "Ergebnis", + "Heinrich", + "Bau", + "ehemaligen", + "Preis", + "Tochter", + "Stuttgart", + "Samstag", + "Bad", + "Verfahren", + "Kind", + "früher", + "Paul", + "darin", + "paar", + "Punkt", + "Weblinks", + "Nun", + "Maßnahmen", + "Österreich", + "Wilhelm", + "Herrn", + "z.B.", + "Noch", + "Staat", + "Zusammenarbeit", + "knapp", + "Nacht", + "einzelnen", + "trat", + "gestern", + "Team", + "Osten", + "scheint", + "Mannschaft", + "Tagen", + "internationalen", + "jede", + "mindestens", + "teilweise", + "einzige", + "Soldaten", + "setzt", + "gefunden", + "Kunst", + "lediglich", + "öffentlichen", + "bedeutet", + "Raum", + "gewann", + "Kampf", + "Martin", + "Ist", + "Begriff", + "Hause", + "entwickelt", + "Wahl", + "Schon", + "arbeiten", + "größte", + "Donnerstag", + "Ab", + "Viele", + "Quellen", + "Nachdem", + "dadurch", + "Italien", + "erster", + "gekommen", + "dir", + "Mittwoch", + "danach", + "stellte", + "her", + "zahlreiche", + "Landes", + "Gesetz", + "Monaten", + "PDS", + "Rat", + "Franz", + "Verein", + "sonst", + "Frankfurter", + "Meine", + "Klaus", + "Karriere", + "müsse", + "meiner", + "anderer", + "zuletzt", + "Monate", + "Alter", + "hohe", + "Interesse", + "Regie", + "Montag", + "genommen", + "lag", + "Sommer", + "spricht", + "Trainer", + "Liebe", + "jedem", + "/DIE", + "Westen", + "guten", + "Kilometer", + "Johann", + "gesehen", + "darunter", + "solchen", + "indem", + "Mittel", + "oben", + "Schweizer", + "wichtig", + "Hälfte", + "Regel", + "obwohl", + "Bürgermeister", + "Aufgabe", + "Spiele", + "folgenden", + "Dienstag", + "version", + "Sache", + "sprechen", + "Gemeinden", + "electronic", + "for", + "Norden", + "außerdem", + "Antrag", + "gleichzeitig", + "ganzen", + "Politiker", + "gehörte", + "großer", + "China", + "Nähe", + "bereit", + "setzte", + "Druck", + "tatsächlich", + "Gott", + "frei", + "Grünen", + "zumindest", + "Opfer", + "genug", + "versucht", + "bevor" +] diff --git a/data/stopwords/stopwords-de.txt b/data/stopwords/stopwords-de.txt deleted file mode 100644 index 37e6714..0000000 --- a/data/stopwords/stopwords-de.txt +++ /dev/null @@ -1,894 +0,0 @@ -der -die -und -in -den -von -zu -mit -ist -das -des -im -für -auf -sich -dem -Die -nicht -ein -eine -als -auch -an -es -er -aus -bei -werden -sie -nach -Der -sind -war -wurde -wird -einer -Das -hat -am -wie -um -Sie -zum -oder -einen -über -dass -einem -noch -bis -nur -vor -zur -durch -so -haben -aber -ich -In -man -mehr -wir -daß -kann -sein -vom -Es -unter -Ich -hatte -gegen -Im -Er -wenn -dieser -seine -eines -können -diese -wieder -wurden -dann -was -schon -Jahr -zwei -seiner -Jahre -Jahren -ihre -gibt -zwischen -Ein -immer -waren -Zeit -Uhr -keine -Wir -sei -habe -sehr -hier -alle -Nach -ab -sowie -da -beim -heute -seit -diesem -uns -soll -Und -Deutschland -Mit -anderen -jedoch -ihr -damit -ersten -drei -Auch -doch -ihm -seinen -Stadt -etwa -sagte -ihn -Eine -sondern -bereits -müssen -ohne -Menschen -will -Prozent -ihrer -worden -Bei -selbst -jetzt -of -Als -seinem -neue -muss -allem -neuen -Ende -nun -Von -geht -ihren -SPD -So -Für -weil -wo -mich -mir -Aber -Am -Diese -ganz -dieses -etwas -andere -Geschichte -Frau -liegt -Wenn -ins -gut -einmal -konnte -Euro -du -denn -viele -Auf -machen -Herr -Leben -the -diesen -erst -lassen -Wie -dort -beiden -erste -The -Teil -deutschen -weiter -also -viel -sollte -dabei -Millionen -Was -später -hatten -während -Welt -ISBN -sagt -denen -wollen -steht -Da -kommt -kein -vier -nichts -de -allerdings -Seite -ob -dazu -gab -s -letzten -kam -USA -wegen -dies -zurück -großen -kommen -alles -rund -ja -sollen -deren -dafür -Doch -Kinder -wäre -Frage -weitere -würde -dessen -große -Januar -zwar -darauf -Arbeit - -Beispiel -September -zusammen -einige -Land -allen -fast -Frauen -März -Namen -Unternehmen -ihrem -davon -Mann -Mai -Platz -deutsche -werde -Oktober -muß -Literatur -Art -ihnen -Deutschen -fünf -gilt -sehen -könnte -Dezember -stehen -sogar -seien -Wer -Seit -August -bin -Beifall -Fall -Juni -eigenen -November -mal -Film -finden -sagen -Regierung -April -München -oft -Dies -lange -ebenfalls -bekannt -Präsident -wohl -CDU/CSU -Zu -gehört -Man -weniger -gerade -statt -aller -Juli -möchte -Weg -Entwicklung -zunächst -ging -Mark -Bild -möglich -gar -besonders -hätte -macht -Politik -geben -Tag -Ihnen -Februar -Hier -Gemeinde -wenig -gewesen -Europa -gehen -gemacht -welche -New -gegenüber -heißt -Familie -Union -tun -Jahrhundert -einfach -Frankfurt -deutlich -Dabei -neben -sollten -Kirche -keinen -Artikel -Ihre -Peter -Thema -besteht -vielen -nie -bzw. -Aus -Zeitung -wollte -Kommission -seines -Hamburg -hätten -Geld -meine -Dr -kaum -zweiten -Während -lässt -Anfang -Um -Ort -weiß -findet -Bereich -Haus -anderem -Mal - -deshalb -alten -erhalten -zehn -Zum -bisher -meisten -darüber -würden -hin -Form -An -bleibt -sieht -Gesellschaft -Berliner -Den -vergangenen -bezeichnet -Nr. -Ziel -je -weit -Grund -sechs -darf -Rolle -Deutsche -wissen -jeder -zeigt -Damit -Denn -mehrere -nächsten -Vor -Dann -schließlich -kleinen -Durch -Michael -km -Lage -Gruppe -Band -damals -Spiel -Sohn -Dr. -stark -Universität -Hilfe -besser -hinter -meist -Seine -St. -stellt -Tage -unsere -daher -Nur -wirklich -führt -Dieser -beispielsweise -kurz -Bericht -gleich -weiteren -Straße -bleiben -Wirtschaft -Siehe -Zukunft -eher -Bedeutung -Recht -insbesondere -Bevölkerung -schnell -nehmen -Verlag -CDU -Tod -Alle -solche -neu -Bundesregierung -pro -Frankreich -Jahres -konnten -Ihr -ließ -Du -kleine -Europäischen -Vater -genannt -lang -Titel -Rahmen -Wort -eigentlich -erhielt -einigen -Woche -FC -Musik -dagegen -Sein -allein -Einsatz -genau -begann -innerhalb -unserer -Partei -Polizei -Wasser -bringen -deutscher -natürlich -eigene - -Wochen -insgesamt -Außerdem -Bis -halten -politischen -musste -Parlament -Meter -Hand -Zahl -stellen -gesagt -führen -daran -Erfolg -befindet -Zur -verschiedenen -Probleme -Unter -Abgeordneten -Milliarden -nahm -stand -geworden -c -liegen -erstmals -Sprache -Fragen -nämlich -Ja -Kollegen -Männer -Nicht -Wolfgang -Problem -Mutter -Minuten -Weitere -Mitte -Mitglied -Jahrhunderts -Krieg -Hans -könnten -Thomas -Über -Personen -Friedrich -ca. -ebenso -machte -York -vielleicht -Stelle -derzeit -Ländern -Höhe -verwendet -gute -überhaupt -Länder -Angaben -führte -gegeben -Tel. -klar -Karl -europäischen -sicher -Saison -Programm -erreicht -GRÜNEN -beide -Sonntag -sowohl -Region -alte -Staaten -Paris -Beginn -Buch -zweite -ganze -hinaus -König -Morgen -handelt -fand -Schweiz -jeweils -Weise -DM -fest -per -blieb -Mitglieder -Richtung -Heute -Stunden -leicht -Leute -wobei -gehören -bietet -Wien -politische -Folge -Blick -aufgrund -Entscheidung -Dort -Neben -hält -Gebiet -gemeinsam -erklärt -direkt -könne -Daten -recht -schwer -Bayern -jeden -Name -Schule -GmbH -dürfen -laut -Seiten -Bürger -Eltern -dpa -Meinung -Werke -Jetzt -letzte -Spieler -bald -London -häufig -heutigen -Einwohner -acht -eben -Internet -Markt -dich -Nein -Situation -System -zuvor -Möglichkeit -Freitag -mein -Mannheim -Fenster -Kosten -inzwischen -kamen -John -sieben -bekommen -erreichen -unser -Verfügung -Köln -Dazu -besten -Zusammenhang -Reihe -Kritik -richtig -Liste -Herren -Augen -taz -zeigen -siehe -hohen -spielte -leben -völlig -Neue -ihres -spielt -Sicherheit -weiterhin -hoch -nachdem -gegründet -erneut -sah -z. -wer -Informationen -anders -spielen -Dieses -gleichen -Kultur -größten -eingesetzt -Unterstützung -Beim -erklärte -Allerdings -Firma -Amt -Kopf -trotz -Erst -gebracht -gestellt -läuft -schließen -Bilder -nimmt -Mitarbeiter -BÜNDNIS -Deshalb -verschiedene -zudem -Werk -Ergebnis -Heinrich -Bau -ehemaligen -Preis -Tochter -Stuttgart -Samstag -Bad -Verfahren -Kind -früher -Paul -darin -paar -Punkt -Weblinks -Nun -Maßnahmen -Österreich -Wilhelm -Herrn -z.B. -Noch -Staat -Zusammenarbeit -knapp -Nacht -einzelnen -trat -gestern -Team -Osten -scheint -Mannschaft -Tagen -internationalen -jede -mindestens -teilweise -einzige -Soldaten -setzt -gefunden -Kunst -lediglich -öffentlichen -bedeutet -Raum -gewann -Kampf -Martin -Ist -Begriff -Hause -entwickelt -Wahl -Schon -arbeiten -größte -Donnerstag -Ab -Viele -Quellen -Nachdem -dadurch -Italien -erster -gekommen -dir -Mittwoch -danach -stellte -her -zahlreiche -Landes -Gesetz -Monaten -PDS -Rat -Franz -Verein -sonst -Frankfurter -Meine -Klaus -Karriere -müsse -meiner -anderer -zuletzt -Monate -Alter -hohe -Interesse -Regie -Montag -genommen -lag -Sommer -spricht -Trainer -Liebe -jedem -/DIE -Westen -guten -Kilometer -Johann -gesehen -darunter -solchen -indem -Mittel -oben -Schweizer -wichtig -Hälfte -Regel -obwohl -Bürgermeister -Aufgabe -Spiele -folgenden -Dienstag -version -Sache -sprechen -Gemeinden -electronic -for -Norden -außerdem -Antrag -gleichzeitig -ganzen -Politiker -gehörte -großer -China -Nähe -bereit -setzte -Druck -tatsächlich -Gott -frei -Grünen -zumindest -Opfer -genug -versucht -bevor \ No newline at end of file diff --git a/data/stopwords/stopwords-en.json b/data/stopwords/stopwords-en.json new file mode 100644 index 0000000..25e4434 --- /dev/null +++ b/data/stopwords/stopwords-en.json @@ -0,0 +1,548 @@ +[ + "a's", + "able", + "about", + "above", + "according", + "accordingly", + "across", + "actually", + "after", + "afterwards", + "again", + "against", + "ain't", + "all", + "allow", + "allows", + "almost", + "alone", + "along", + "already", + "also", + "although", + "always", + "am", + "among", + "amongst", + "an", + "and", + "another", + "any", + "anybody", + "anyhow", + "anyone", + "anything", + "anyway", + "anyways", + "anywhere", + "apart", + "appear", + "appreciate", + "appropriate", + "are", + "aren't", + "around", + "as", + "aside", + "ask", + "asking", + "associated", + "at", + "available", + "away", + "awfully", + "be", + "became", + "because", + "become", + "becomes", + "becoming", + "been", + "before", + "beforehand", + "behind", + "being", + "believe", + "below", + "beside", + "besides", + "best", + "better", + "between", + "beyond", + "both", + "brief", + "but", + "by", + "c", + "c'mon", + "c's", + "came", + "campaign", + "can", + "can't", + "cannot", + "cant", + "cause", + "causes", + "certain", + "certainly", + "changes", + "clearly", + "co", + "com", + "come", + "comes", + "concerning", + "consequently", + "consider", + "considering", + "contain", + "containing", + "contains", + "corresponding", + "could", + "couldn't", + "course", + "currently", + "definitely", + "described", + "despite", + "did", + "didn't", + "different", + "do", + "does", + "doesn't", + "doing", + "don't", + "done", + "down", + "downwards", + "during", + "each", + "edu", + "eight", + "either", + "else", + "elsewhere", + "enough", + "endorsed", + "entirely", + "especially", + "et", + "etc", + "even", + "ever", + "every", + "everybody", + "everyone", + "everything", + "everywhere", + "ex", + "exactly", + "example", + "except", + "far", + "few", + "fifth", + "first", + "financial", + "five", + "followed", + "following", + "follows", + "for", + "former", + "formerly", + "forth", + "four", + "from", + "further", + "furthermore", + "get", + "gets", + "getting", + "given", + "gives", + "go", + "goes", + "going", + "gone", + "got", + "gotten", + "greetings", + "had", + "hadn't", + "happens", + "hardly", + "has", + "hasn't", + "have", + "haven't", + "having", + "he", + "he's", + "hello", + "help", + "hence", + "her", + "here", + "here's", + "hereafter", + "hereby", + "herein", + "hereupon", + "hers", + "herself", + "hi", + "him", + "himself", + "his", + "hither", + "hopefully", + "how", + "howbeit", + "however", + "i'd", + "i'll", + "i'm", + "i've", + "if", + "ignored", + "immediate", + "in", + "inasmuch", + "inc", + "indeed", + "indicate", + "indicated", + "indicates", + "inner", + "insofar", + "instead", + "into", + "inward", + "is", + "isn't", + "it", + "it'd", + "it'll", + "it's", + "its", + "itself", + "just", + "keep", + "keeps", + "kept", + "know", + "knows", + "known", + "last", + "lately", + "later", + "latter", + "latterly", + "least", + "less", + "lest", + "let", + "let's", + "like", + "liked", + "likely", + "little", + "look", + "looking", + "looks", + "ltd", + "mainly", + "many", + "may", + "maybe", + "me", + "mean", + "meanwhile", + "merely", + "might", + "more", + "moreover", + "most", + "mostly", + "much", + "must", + "my", + "myself", + "name", + "namely", + "nd", + "near", + "nearly", + "necessary", + "need", + "needs", + "neither", + "never", + "nevertheless", + "new", + "next", + "nine", + "no", + "nobody", + "non", + "none", + "noone", + "nor", + "normally", + "not", + "nothing", + "novel", + "now", + "nowhere", + "obviously", + "of", + "off", + "often", + "oh", + "ok", + "okay", + "old", + "on", + "once", + "one", + "ones", + "only", + "onto", + "or", + "other", + "others", + "otherwise", + "ought", + "our", + "ours", + "ourselves", + "out", + "outside", + "over", + "overall", + "own", + "particular", + "particularly", + "per", + "perhaps", + "placed", + "please", + "plus", + "possible", + "presumably", + "probably", + "provides", + "quite", + "quote", + "quarterly", + "rather", + "really", + "reasonably", + "regarding", + "regardless", + "regards", + "relatively", + "respectively", + "right", + "said", + "same", + "saw", + "say", + "saying", + "says", + "second", + "secondly", + "see", + "seeing", + "seem", + "seemed", + "seeming", + "seems", + "seen", + "self", + "selves", + "sensible", + "sent", + "serious", + "seriously", + "seven", + "several", + "shall", + "she", + "should", + "shouldn't", + "since", + "six", + "so", + "some", + "somebody", + "somehow", + "someone", + "something", + "sometime", + "sometimes", + "somewhat", + "somewhere", + "soon", + "sorry", + "specified", + "specify", + "specifying", + "still", + "sub", + "such", + "sup", + "sure", + "t's", + "take", + "taken", + "tell", + "tends", + "than", + "thank", + "thanks", + "thanx", + "that", + "that's", + "thats", + "the", + "their", + "theirs", + "them", + "themselves", + "then", + "thence", + "there", + "there's", + "thereafter", + "thereby", + "therefore", + "therein", + "theres", + "thereupon", + "these", + "they", + "they'd", + "they'll", + "they're", + "they've", + "think", + "third", + "this", + "thorough", + "thoroughly", + "those", + "though", + "three", + "through", + "throughout", + "thru", + "thus", + "to", + "together", + "too", + "took", + "toward", + "towards", + "tried", + "tries", + "truly", + "try", + "trying", + "twice", + "two", + "under", + "unfortunately", + "unless", + "unlikely", + "until", + "unto", + "up", + "upon", + "us", + "use", + "used", + "useful", + "uses", + "using", + "usually", + "uucp", + "value", + "various", + "very", + "via", + "viz", + "vs", + "want", + "wants", + "was", + "wasn't", + "way", + "we", + "we'd", + "we'll", + "we're", + "we've", + "welcome", + "well", + "went", + "were", + "weren't", + "what", + "what's", + "whatever", + "when", + "whence", + "whenever", + "where", + "where's", + "whereafter", + "whereas", + "whereby", + "wherein", + "whereupon", + "wherever", + "whether", + "which", + "while", + "whither", + "who", + "who's", + "whoever", + "whole", + "whom", + "whose", + "why", + "will", + "willing", + "wish", + "with", + "within", + "without", + "won't", + "wonder", + "would", + "would", + "wouldn't", + "yes", + "yet", + "you", + "you'd", + "you'll", + "you're", + "you've", + "your", + "yours", + "yourself", + "yourselves", + "zero", + "official", + "sharply", + "criticized" +] diff --git a/data/stopwords/stopwords-en.txt b/data/stopwords/stopwords-en.txt deleted file mode 100644 index d3a3954..0000000 --- a/data/stopwords/stopwords-en.txt +++ /dev/null @@ -1,546 +0,0 @@ -a's -able -about -above -according -accordingly -across -actually -after -afterwards -again -against -ain't -all -allow -allows -almost -alone -along -already -also -although -always -am -among -amongst -an -and -another -any -anybody -anyhow -anyone -anything -anyway -anyways -anywhere -apart -appear -appreciate -appropriate -are -aren't -around -as -aside -ask -asking -associated -at -available -away -awfully -be -became -because -become -becomes -becoming -been -before -beforehand -behind -being -believe -below -beside -besides -best -better -between -beyond -both -brief -but -by -c -c'mon -c's -came -campaign -can -can't -cannot -cant -cause -causes -certain -certainly -changes -clearly -co -com -come -comes -concerning -consequently -consider -considering -contain -containing -contains -corresponding -could -couldn't -course -currently -definitely -described -despite -did -didn't -different -do -does -doesn't -doing -don't -done -down -downwards -during -each -edu -eight -either -else -elsewhere -enough -endorsed -entirely -especially -et -etc -even -ever -every -everybody -everyone -everything -everywhere -ex -exactly -example -except -far -few -fifth -first -financial -five -followed -following -follows -for -former -formerly -forth -four -from -further -furthermore -get -gets -getting -given -gives -go -goes -going -gone -got -gotten -greetings -had -hadn't -happens -hardly -has -hasn't -have -haven't -having -he -he's -hello -help -hence -her -here -here's -hereafter -hereby -herein -hereupon -hers -herself -hi -him -himself -his -hither -hopefully -how -howbeit -however -i'd -i'll -i'm -i've -if -ignored -immediate -in -inasmuch -inc -indeed -indicate -indicated -indicates -inner -insofar -instead -into -inward -is -isn't -it -it'd -it'll -it's -its -itself -just -keep -keeps -kept -know -knows -known -last -lately -later -latter -latterly -least -less -lest -let -let's -like -liked -likely -little -look -looking -looks -ltd -mainly -many -may -maybe -me -mean -meanwhile -merely -might -more -moreover -most -mostly -much -must -my -myself -name -namely -nd -near -nearly -necessary -need -needs -neither -never -nevertheless -new -next -nine -no -nobody -non -none -noone -nor -normally -not -nothing -novel -now -nowhere -obviously -of -off -often -oh -ok -okay -old -on -once -one -ones -only -onto -or -other -others -otherwise -ought -our -ours -ourselves -out -outside -over -overall -own -particular -particularly -per -perhaps -placed -please -plus -possible -presumably -probably -provides -quite -quote -quarterly -rather -really -reasonably -regarding -regardless -regards -relatively -respectively -right -said -same -saw -say -saying -says -second -secondly -see -seeing -seem -seemed -seeming -seems -seen -self -selves -sensible -sent -serious -seriously -seven -several -shall -she -should -shouldn't -since -six -so -some -somebody -somehow -someone -something -sometime -sometimes -somewhat -somewhere -soon -sorry -specified -specify -specifying -still -sub -such -sup -sure -t's -take -taken -tell -tends -than -thank -thanks -thanx -that -that's -thats -the -their -theirs -them -themselves -then -thence -there -there's -thereafter -thereby -therefore -therein -theres -thereupon -these -they -they'd -they'll -they're -they've -think -third -this -thorough -thoroughly -those -though -three -through -throughout -thru -thus -to -together -too -took -toward -towards -tried -tries -truly -try -trying -twice -two -under -unfortunately -unless -unlikely -until -unto -up -upon -us -use -used -useful -uses -using -usually -uucp -value -various -very -via -viz -vs -want -wants -was -wasn't -way -we -we'd -we'll -we're -we've -welcome -well -went -were -weren't -what -what's -whatever -when -whence -whenever -where -where's -whereafter -whereas -whereby -wherein -whereupon -wherever -whether -which -while -whither -who -who's -whoever -whole -whom -whose -why -will -willing -wish -with -within -without -won't -wonder -would -would -wouldn't -yes -yet -you -you'd -you'll -you're -you've -your -yours -yourself -yourselves -zero -official -sharply -criticized \ No newline at end of file diff --git a/data/stopwords/stopwords-es.json b/data/stopwords/stopwords-es.json new file mode 100644 index 0000000..44b6c82 --- /dev/null +++ b/data/stopwords/stopwords-es.json @@ -0,0 +1,311 @@ +[ + "de", + "la", + "que", + "el", + "en", + "y", + "a", + "los", + "del", + "se", + "las", + "por", + "un", + "para", + "con", + "no", + "una", + "su", + "al", + "lo", + "como", + "más", + "pero", + "sus", + "le", + "ya", + "o", + "este", + "sí", + "porque", + "esta", + "entre", + "cuando", + "muy", + "sin", + "sobre", + "también", + "me", + "hasta", + "hay", + "donde", + "quien", + "desde", + "todo", + "nos", + "durante", + "todos", + "uno", + "les", + "ni", + "contra", + "otros", + "ese", + "eso", + "ante", + "ellos", + "e", + "esto", + "mí", + "antes", + "algunos", + "qué", + "unos", + "yo", + "otro", + "otras", + "otra", + "él", + "tanto", + "esa", + "estos", + "mucho", + "quienes", + "nada", + "muchos", + "cual", + "poco", + "ella", + "estar", + "estas", + "algunas", + "algo", + "nosotros", + "mi", + "mis", + "tú", + "te", + "ti", + "tu", + "tus", + "ellas", + "nosotras", + "vosotros", + "vosotras", + "os", + "mío", + "mía", + "míos", + "mías", + "tuyo", + "tuya", + "tuyos", + "tuyas", + "suyo", + "suya", + "suyos", + "suyas", + "nuestro", + "nuestra", + "nuestros", + "nuestras", + "vuestro", + "vuestra", + "vuestros", + "vuestras", + "esos", + "esas", + "estoy", + "estás", + "está", + "estamos", + "estáis", + "están", + "esté", + "estés", + "estemos", + "estéis", + "estén", + "estaré", + "estarás", + "estará", + "estaremos", + "estaréis", + "estarán", + "estaría", + "estarías", + "estaríamos", + "estaríais", + "estarían", + "estaba", + "estabas", + "estábamos", + "estabais", + "estaban", + "estuve", + "estuviste", + "estuvo", + "estuvimos", + "estuvisteis", + "estuvieron", + "estuviera", + "estuvieras", + "estuviéramos", + "estuvierais", + "estuvieran", + "estuviese", + "estuvieses", + "estuviésemos", + "estuvieseis", + "estuviesen", + "estando", + "estado", + "estada", + "estados", + "estadas", + "estad", + "he", + "has", + "ha", + "hemos", + "habéis", + "han", + "haya", + "hayas", + "hayamos", + "hayáis", + "hayan", + "habré", + "habrás", + "habrá", + "habremos", + "habréis", + "habrán", + "habría", + "habrías", + "habríamos", + "habríais", + "habrían", + "había", + "habías", + "habíamos", + "habíais", + "habían", + "hube", + "hubiste", + "hubo", + "hubimos", + "hubisteis", + "hubieron", + "hubiera", + "hubieras", + "hubiéramos", + "hubierais", + "hubieran", + "hubiese", + "hubieses", + "hubiésemos", + "hubieseis", + "hubiesen", + "habiendo", + "habido", + "habida", + "habidos", + "habidas", + "soy", + "eres", + "es", + "somos", + "sois", + "son", + "sea", + "seas", + "seamos", + "seáis", + "sean", + "seré", + "serás", + "será", + "seremos", + "seréis", + "serán", + "sería", + "serías", + "seríamos", + "seríais", + "serían", + "era", + "eras", + "éramos", + "erais", + "eran", + "fui", + "fuiste", + "fue", + "fuimos", + "fuisteis", + "fueron", + "fuera", + "fueras", + "fuéramos", + "fuerais", + "fueran", + "fuese", + "fueses", + "fuésemos", + "fueseis", + "fuesen", + "siendo", + "sido", + "tengo", + "tienes", + "tiene", + "tenemos", + "tenéis", + "tienen", + "tenga", + "tengas", + "tengamos", + "tengáis", + "tengan", + "tendré", + "tendrás", + "tendrá", + "tendremos", + "tendréis", + "tendrán", + "tendría", + "tendrías", + "tendríamos", + "tendríais", + "tendrían", + "tenía", + "tenías", + "teníamos", + "teníais", + "tenían", + "tuve", + "tuviste", + "tuvo", + "tuvimos", + "tuvisteis", + "tuvieron", + "tuviera", + "tuvieras", + "tuviéramos", + "tuvierais", + "tuvieran", + "tuviese", + "tuvieses", + "tuviésemos", + "tuvieseis", + "tuviesen", + "teniendo", + "tenido", + "tenida", + "tenidos", + "tenidas", + "tened", + "" +] diff --git a/data/stopwords/stopwords-es.txt b/data/stopwords/stopwords-es.txt deleted file mode 100644 index c59d9b2..0000000 --- a/data/stopwords/stopwords-es.txt +++ /dev/null @@ -1,308 +0,0 @@ -de -la -que -el -en -y -a -los -del -se -las -por -un -para -con -no -una -su -al -lo -como -más -pero -sus -le -ya -o -este -sí -porque -esta -entre -cuando -muy -sin -sobre -también -me -hasta -hay -donde -quien -desde -todo -nos -durante -todos -uno -les -ni -contra -otros -ese -eso -ante -ellos -e -esto -mí -antes -algunos -qué -unos -yo -otro -otras -otra -él -tanto -esa -estos -mucho -quienes -nada -muchos -cual -poco -ella -estar -estas -algunas -algo -nosotros -mi -mis -tú -te -ti -tu -tus -ellas -nosotras -vosotros -vosotras -os -mío -mía -míos -mías -tuyo -tuya -tuyos -tuyas -suyo -suya -suyos -suyas -nuestro -nuestra -nuestros -nuestras -vuestro -vuestra -vuestros -vuestras -esos -esas -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened diff --git a/data/stopwords/stopwords-fi.json b/data/stopwords/stopwords-fi.json new file mode 100644 index 0000000..edf9e89 --- /dev/null +++ b/data/stopwords/stopwords-fi.json @@ -0,0 +1,71 @@ +[ + "alla", + "ansiosta", + "ehkä", + "ei", + "enemmän", + "ennen", + "etessa", + "f", + "haikki", + "he", + "hitaasti", + "hoikein", + "hyvin", + "hän", + "ilman", + "ja", + "jos", + "jälkeen", + "kanssa", + "kaukana", + "kenties", + "keskellä", + "kesken", + "koskaan", + "kuinkan", + "kukka", + "kylliksi", + "kyllä", + "liian", + "lla", + "lla", + "luona", + "lähellä", + "läpi", + "me", + "miksi", + "mikä", + "milloin", + "milloinkan", + "minä", + "missä", + "miten", + "nopeasti", + "nyt", + "oikea", + "oikealla", + "paljon", + "siellä", + "sinä", + "ssa", + "sta", + "suoraan", + "tai", + "takana", + "takia", + "tarpeeksi", + "te", + "tässä", + "ulkopuolella", + "vahemmän", + "vasen", + "vasenmalla", + "vastan", + "vielä", + "vieressä", + "vähän", + "yhdessä", + "ylös", + "" +] diff --git a/data/stopwords/stopwords-fi.txt b/data/stopwords/stopwords-fi.txt deleted file mode 100644 index 3b468b3..0000000 --- a/data/stopwords/stopwords-fi.txt +++ /dev/null @@ -1,68 +0,0 @@ -alla -ansiosta -ehkä -ei -enemmän -ennen -etessa -f -haikki -he -hitaasti -hoikein -hyvin -hän -ilman -ja -jos -jälkeen -kanssa -kaukana -kenties -keskellä -kesken -koskaan -kuinkan -kukka -kylliksi -kyllä -liian -lla -lla -luona -lähellä -läpi -me -miksi -mikä -milloin -milloinkan -minä -missä -miten -nopeasti -nyt -oikea -oikealla -paljon -siellä -sinä -ssa -sta -suoraan -tai -takana -takia -tarpeeksi -te -tässä -ulkopuolella -vahemmän -vasen -vasenmalla -vastan -vielä -vieressä -vähän -yhdessä -ylös diff --git a/data/stopwords/stopwords-fr.json b/data/stopwords/stopwords-fr.json new file mode 100644 index 0000000..d75a948 --- /dev/null +++ b/data/stopwords/stopwords-fr.json @@ -0,0 +1,201 @@ +[ + "a", + "an", + "and", + "are", + "as", + "at", + "be", + "but", + "by", + "for", + "if", + "in", + "into", + "is", + "it", + "no", + "not", + "of", + "on", + "or", + "s", + "such", + "t", + "that", + "the", + "their", + "then", + "there", + "these", + "they", + "this", + "to", + "was", + "will", + "with", + "au", + "aux", + "avec", + "ce", + "ces", + "dans", + "de", + "des", + "du", + "elle", + "en", + "et", + "eux", + "il", + "je", + "la", + "le", + "leur", + "lui", + "ma", + "mais", + "me", + "même", + "mes", + "moi", + "mon", + "ne", + "nos", + "notre", + "nous", + "on", + "ou", + "par", + "pas", + "pour", + "qu", + "que", + "qui", + "sa", + "se", + "ses", + "son", + "sur", + "ta", + "te", + "tes", + "toi", + "ton", + "tu", + "un", + "une", + "vos", + "votre", + "vous", + "c", + "d", + "j", + "l", + "à", + "m", + "n", + "s", + "t", + "y", + "été", + "étée", + "étées", + "étés", + "étant", + "suis", + "es", + "est", + "sommes", + "êtes", + "sont", + "serai", + "seras", + "sera", + "serons", + "serez", + "seront", + "serais", + "serait", + "serions", + "seriez", + "seraient", + "étais", + "était", + "étions", + "étiez", + "étaient", + "fus", + "fut", + "fûmes", + "fûtes", + "furent", + "sois", + "soit", + "soyons", + "soyez", + "soient", + "fusse", + "fusses", + "fût", + "fussions", + "fussiez", + "fussent", + "ayant", + "eu", + "eue", + "eues", + "eus", + "ai", + "as", + "avons", + "avez", + "ont", + "aurai", + "auras", + "aura", + "aurons", + "aurez", + "auront", + "aurais", + "aurait", + "aurions", + "auriez", + "auraient", + "avais", + "avait", + "avions", + "aviez", + "avaient", + "eut", + "eûmes", + "eûtes", + "eurent", + "aie", + "aies", + "ait", + "ayons", + "ayez", + "aient", + "eusse", + "eusses", + "eût", + "eussions", + "eussiez", + "eussent", + "ceci", + "celà", + "cet", + "cette", + "ici", + "ils", + "les", + "leurs", + "quel", + "quels", + "quelle", + "quelles", + "sans", + "soi", + "" +] diff --git a/data/stopwords/stopwords-fr.txt b/data/stopwords/stopwords-fr.txt deleted file mode 100644 index 30e06ef..0000000 --- a/data/stopwords/stopwords-fr.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -#Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -s -such -t -that -the -their -then -there -these -they -this -to -was -will -with -au -aux -avec -ce -ces -dans -de -des -du -elle -en -et -eux -il -je -la -le -leur -lui -ma -mais -me -même -mes -moi -mon -ne -nos -notre -nous -on -ou -par -pas -pour -qu -que -qui -sa -se -ses -son -sur -ta -te -tes -toi -ton -tu -un -une -vos -votre -vous -c -d -j -l -à -m -n -s -t -y -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent -ceci -celà -cet -cette -ici -ils -les -leurs -quel -quels -quelle -quelles -sans -soi diff --git a/data/stopwords/stopwords-hu.json b/data/stopwords/stopwords-hu.json new file mode 100644 index 0000000..6366662 --- /dev/null +++ b/data/stopwords/stopwords-hu.json @@ -0,0 +1,406 @@ +[ + "a", + "á", + "ahogy", + "ahol", + "aki", + "akik", + "akkor", + "alatt", + "által", + "általában", + "amely", + "amelyek", + "amelyekben", + "amelyeket", + "amelyet", + "amelynek", + "ami", + "amit", + "amolyan", + "amp", + "amíg", + "amikor", + "át", + "abban", + "ahhoz", + "annak", + "arra", + "arról", + "az", + "azok", + "azon", + "azt", + "azzal", + "azért", + "aztán", + "azután", + "azonban", + "b", + "bár", + "be", + "belül", + "benne", + "c", + "cikk", + "cikkek", + "cikkeket", + "csak", + "d", + "de", + "e", + "é", + "eddig", + "egész", + "egy", + "egyes", + "egyetlen", + "egyéb", + "egyik", + "egyre", + "ekkor", + "el", + "elég", + "ellen", + "elő", + "először", + "előtt", + "első", + "én", + "éppen", + "ebben", + "ehhez", + "emilyen", + "ennek", + "erre", + "ez", + "ezt", + "ezek", + "ezen", + "ezzel", + "ezért", + "és", + "f", + "fel", + "felé", + "g", + "h", + "hanem", + "hiszen", + "hogy", + "hogyan", + "i", + "í", + "igen", + "így", + "illetve", + "ill.", + "ill", + "ilyen", + "ilyenkor", + "is", + "ison", + "ismét", + "itt", + "j", + "jó", + "jól", + "jobban", + "k", + "kell", + "kellett", + "keresztül", + "keressünk", + "ki", + "kívül", + "között", + "közül", + "l", + "legalább", + "lehet", + "lehetett", + "legyen", + "lenne", + "lenni", + "lesz", + "lett", + "m", + "maga", + "magát", + "majd", + "majd", + "már", + "más", + "másik", + "meg", + "még", + "mellett", + "mert", + "mely", + "melyek", + "mi", + "mit", + "míg", + "miért", + "milyen", + "mikor", + "minden", + "mindent", + "mindenki", + "mindig", + "mint", + "mintha", + "mivel", + "most", + "n", + "nagy", + "nagyobb", + "nagyon", + "ne", + "néha", + "nekem", + "neki", + "nem", + "néhány", + "nélkül", + "nincs", + "o", + "ó", + "olyan", + "ott", + "össze", + "ö", + "ő", + "ők", + "őket", + "p", + "pedig", + "persze", + "q", + "r", + "rá", + "s", + "saját", + "sem", + "semmi", + "sok", + "sokat", + "sokkal", + "sz", + "számára", + "szemben", + "szerint", + "szinte", + "t", + "talán", + "tehát", + "teljes", + "tovább", + "továbbá", + "több", + "u", + "ú", + "úgy", + "ugyanis", + "új", + "újabb", + "újra", + "után", + "utána", + "utolsó", + "ü", + "ű", + "v", + "vagy", + "vagyis", + "valaki", + "valamely", + "valami", + "valamint", + "való", + "vagyok", + "van", + "vannak", + "volt", + "voltam", + "voltak", + "voltunk", + "vissza", + "vele", + "viszont", + "volna", + "számolnak", + "szólnak", + "szól", + "w", + "x", + "y", + "z", + "zs", + "a", + "ahogy", + "ahol", + "aki", + "akkor", + "alatt", + "általában", + "által", + "amely", + "amíg", + "amikor", + "ami", + "amolyan", + "arra", + "át", + "az", + "azért", + "azonban", + "azon", + "aztán", + "azt", + "azután", + "azzal", + "bár", + "be", + "belül", + "benne", + "cikk", + "csak", + "de", + "eddig", + "egész", + "egy", + "egyéb", + "egyes", + "egyetlen", + "egyik", + "egyre", + "ekkor", + "el", + "elég", + "ellen", + "elő", + "először", + "előtt", + "első", + "emilyen", + "én", + "éppen", + "erre", + "és", + "e", + "ez", + "ezen", + "ezért", + "ezzel", + "fel", + "felé", + "hanem", + "hiszen", + "hogy", + "hogyan", + "igen", + "így", + "ill.", + "illetve", + "ill", + "ilyen", + "ilyenkor", + "ismét", + "ison", + "itt", + "jó", + "jobban", + "jól", + "kell", + "keres", + "keresztül", + "ki", + "kívül", + "között", + "közül", + "legalább", + "legyen", + "lehet", + "lenni", + "lett", + "maga", + "maga", + "majd", + "már", + "más", + "másik", + "még", + "meg", + "mellett", + "mely", + "mert", + "miért", + "míg", + "mikor", + "milyen", + "minden", + "mindenki", + "mindig", + "mi", + "mint", + "mintha", + "mivel", + "most", + "nagy", + "nagyobb", + "nagyon", + "ne", + "néha", + "néhány", + "neki", + "nélkül", + "nem", + "nincs", + "ők", + "olyan", + "ő", + "össze", + "ott", + "pedig", + "persze", + "rá", + "saját", + "s", + "sem", + "semmi", + "sokkal", + "sok", + "számára", + "számol", + "szemben", + "szerint", + "szinte", + "szól", + "talán", + "tehát", + "teljes", + "továbbá", + "tovább", + "úgy", + "ugyanis", + "új", + "újabb", + "újra", + "utána", + "után", + "utolsó", + "vagy", + "vagyis", + "valaki", + "valamely", + "valami", + "valamint", + "való", + "van", + "vissza", + "viszont", + "volt", + "", + "" +] diff --git a/data/stopwords/stopwords-hu.txt b/data/stopwords/stopwords-hu.txt deleted file mode 100644 index 694feb1..0000000 --- a/data/stopwords/stopwords-hu.txt +++ /dev/null @@ -1,403 +0,0 @@ -a -á -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amp -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -b -bár -be -belül -benne -c -cikk -cikkek -cikkeket -csak -d -de -e -é -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -f -fel -felé -g -h -hanem -hiszen -hogy -hogyan -i -í -igen -így -illetve -ill. -ill -ilyen -ilyenkor -is -ison -ismét -itt -j -jó -jól -jobban -k -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -l -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -m -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -n -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -o -ó -olyan -ott -össze -ö -ő -ők -őket -p -pedig -persze -q -r -rá -s -saját -sem -semmi -sok -sokat -sokkal -sz -számára -szemben -szerint -szinte -t -talán -tehát -teljes -tovább -továbbá -több -u -ú -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -ü -ű -v -vagy -vagyis -valaki -valamely -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna -számolnak -szólnak -szól -w -x -y -z -zs -a -ahogy -ahol -aki -akkor -alatt -általában -által -amely -amíg -amikor -ami -amolyan -arra -át -az -azért -azonban -azon -aztán -azt -azután -azzal -bár -be -belül -benne -cikk -csak -de -eddig -egész -egy -egyéb -egyes -egyetlen -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -emilyen -én -éppen -erre -és -e -ez -ezen -ezért -ezzel -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -ill. -illetve -ill -ilyen -ilyenkor -ismét -ison -itt -jó -jobban -jól -kell -keres -keresztül -ki -kívül -között -közül -legalább -legyen -lehet -lenni -lett -maga -maga -majd -már -más -másik -még -meg -mellett -mely -mert -miért -míg -mikor -milyen -minden -mindenki -mindig -mi -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -néhány -neki -nélkül -nem -nincs -ők -olyan -ő -össze -ott -pedig -persze -rá -saját -s -sem -semmi -sokkal -sok -számára -számol -szemben -szerint -szinte -szól -talán -tehát -teljes -továbbá -tovább -úgy -ugyanis -új -újabb -újra -utána -után -utolsó -vagy -vagyis -valaki -valamely -valami -valamint -való -van -vissza -viszont -volt - diff --git a/data/stopwords/stopwords-id.json b/data/stopwords/stopwords-id.json new file mode 100644 index 0000000..6c995ae --- /dev/null +++ b/data/stopwords/stopwords-id.json @@ -0,0 +1,1311 @@ +[ + "a", + "abad", + "acara", + "aceh", + "ada", + "adalah", + "adanya", + "adapun", + "agak", + "agaknya", + "agama", + "agar", + "agustus", + "air", + "akan", + "akankah", + "akhir", + "akhiri", + "akhirnya", + "akibat", + "aku", + "akulah", + "alam", + "album", + "amat", + "amatlah", + "amerika", + "anak", + "and", + "anda", + "andalah", + "anggota", + "antar", + "antara", + "antarabangsa", + "antaranya", + "apa", + "apaan", + "apabila", + "apakah", + "apalagi", + "apatah", + "api", + "april", + "artikel", + "artinya", + "as", + "asal", + "asalkan", + "asas", + "asia", + "asing", + "atas", + "atau", + "ataukah", + "ataupun", + "australia", + "awal", + "awalnya", + "awam", + "b", + "badan", + "bagai", + "bagaikan", + "bagaimana", + "bagaimanakah", + "bagaimanapun", + "bagainamakah", + "bagi", + "bagian", + "bahagian", + "bahan", + "baharu", + "bahasa", + "bahawa", + "bahkan", + "bahwa", + "bahwasannya", + "bahwasanya", + "baik", + "baiknya", + "bakal", + "bakalan", + "balik", + "bandar", + "bangsa", + "bank", + "banyak", + "bapak", + "barang", + "barangan", + "barat", + "baru", + "baru-baru", + "bawah", + "beberapa", + "begini", + "beginian", + "beginikah", + "beginilah", + "begitu", + "begitukah", + "begitulah", + "begitupun", + "bekas", + "bekerja", + "belakang", + "belakangan", + "belanda", + "beli", + "beliau", + "belum", + "belumlah", + "benar", + "benarkah", + "benarlah", + "bentuk", + "berada", + "berakhir", + "berakhirlah", + "berakhirnya", + "berapa", + "berapakah", + "berapalah", + "berapapun", + "berarti", + "berasal", + "berat", + "berawal", + "berbagai", + "berbanding", + "berbeda", + "berdasarkan", + "berdatangan", + "berharap", + "berhasil", + "beri", + "berikan", + "berikut", + "berikutan", + "berikutnya", + "berita", + "berjalan", + "berjaya", + "berjumlah", + "berkaitan", + "berkali", + "berkali-kali", + "berkata", + "berkehendak", + "berkeinginan", + "berkenaan", + "berlainan", + "berlaku", + "berlalu", + "berlangsung", + "berlebihan", + "bermacam", + "bermacam-macam", + "bermain", + "bermaksud", + "bermula", + "bernama", + "bernilai", + "bersama", + "bersama-sama", + "bersiap", + "bertanya", + "bertemu", + "berturut", + "bertutur", + "berubah", + "berujar", + "berupa", + "besar", + "besok", + "betul", + "betulkah", + "bhd", + "biasa", + "biasanya", + "bidang", + "bila", + "bilakah", + "bilion", + "bintang", + "bisa", + "bisakah", + "blog", + "bn", + "bola", + "boleh", + "bolehkah", + "bolehlah", + "buat", + "bukan", + "bukankah", + "bukanlah", + "bukannya", + "buku", + "bulan", + "bumi", + "bung", + "bursa", + "cadangan", + "cara", + "caranya", + "catch", + "china", + "click", + "code", + "copyright", + "cukup", + "cukupkah", + "cukuplah", + "cuma", + "daerah", + "dagangan", + "dahulu", + "dalam", + "dan", + "dana", + "dapat", + "dari", + "daripada", + "dasar", + "data", + "datang", + "datuk", + "dekat", + "demi", + "demikian", + "demikianlah", + "dengan", + "depan", + "derivatives", + "desa", + "desember", + "detik", + "dewan", + "di", + "dia", + "diadakan", + "diakhiri", + "diakhirinya", + "dialah", + "dianggap", + "diantara", + "diantaranya", + "diberi", + "diberikan", + "diberikannya", + "dibuat", + "dibuatnya", + "dibuka", + "dicatatkan", + "didapat", + "didatangkan", + "didirikan", + "diduga", + "digunakan", + "diibaratkan", + "diibaratkannya", + "diingat", + "diingatkan", + "diinginkan", + "dijangka", + "dijawab", + "dijelaskan", + "dijelaskannya", + "dikarenakan", + "dikatakan", + "dikatakannya", + "dikenal", + "dikerjakan", + "diketahui", + "diketahuinya", + "dikira", + "dilakukan", + "dilalui", + "dilihat", + "dimaksud", + "dimaksudkan", + "dimaksudkannya", + "dimaksudnya", + "dimana", + "diminta", + "dimintai", + "dimisalkan", + "dimulai", + "dimulailah", + "dimulainya", + "dimungkinkan", + "dini", + "diniagakan", + "dipastikan", + "diperbuat", + "diperbuatnya", + "dipergunakan", + "diperkirakan", + "diperlihatkan", + "diperlukan", + "diperlukannya", + "dipersoalkan", + "dipertanyakan", + "dipunyai", + "diri", + "dirilis", + "dirinya", + "dis", + "disampaikan", + "disebut", + "disebutkan", + "disebutkannya", + "disember", + "disini", + "disinilah", + "distrik", + "ditambahkan", + "ditandaskan", + "ditanya", + "ditanyai", + "ditanyakan", + "ditegaskan", + "ditemukan", + "ditujukan", + "ditunjuk", + "ditunjuki", + "ditunjukkan", + "ditunjukkannya", + "ditunjuknya", + "ditutup", + "dituturkan", + "dituturkannya", + "diucapkan", + "diucapkannya", + "diungkapkan", + "document.write", + "dolar", + "dong", + "dr", + "dua", + "dulu", + "dunia", + "effective", + "ekonomi", + "eksekutif", + "eksport", + "empat", + "enam", + "enggak", + "enggaknya", + "entah", + "entahlah", + "era", + "eropa", + "err", + "faedah", + "feb", + "film", + "gat", + "gedung", + "gelar", + "gettracker", + "global", + "grup", + "guna", + "gunakan", + "gunung", + "hadap", + "hadapan", + "hal", + "hampir", + "hanya", + "hanyalah", + "harga", + "hari", + "harian", + "harus", + "haruslah", + "harusnya", + "hasil", + "hendak", + "hendaklah", + "hendaknya", + "hidup", + "hingga", + "https", + "hubungan", + "hukum", + "hutan", + "i", + "ia", + "iaitu", + "ialah", + "ibarat", + "ibaratkan", + "ibaratnya", + "ibu", + "ii", + "iklan", + "ikut", + "ilmu", + "indeks", + "india", + "indonesia", + "industri", + "informasi", + "ingat", + "inggris", + "ingin", + "inginkah", + "inginkan", + "ini", + "inikah", + "inilah", + "internasional", + "islam", + "isnin", + "isu", + "italia", + "itu", + "itukah", + "itulah", + "jabatan", + "jadi", + "jadilah", + "jadinya", + "jakarta", + "jalan", + "jalur", + "jaman", + "jan", + "jangan", + "jangankan", + "janganlah", + "januari", + "jauh", + "jawa", + "jawab", + "jawaban", + "jawabnya", + "jawatan", + "jawatankuasa", + "jelas", + "jelaskan", + "jelaslah", + "jelasnya", + "jenis", + "jepang", + "jepun", + "jerman", + "jika", + "jikalau", + "jiwa", + "jual", + "jualan", + "juga", + "julai", + "jumaat", + "jumat", + "jumlah", + "jumlahnya", + "jun", + "juni", + "justru", + "juta", + "kabar", + "kabupaten", + "kadar", + "kala", + "kalangan", + "kalau", + "kalaulah", + "kalaupun", + "kali", + "kalian", + "kalimantan", + "kami", + "kamilah", + "kamis", + "kamu", + "kamulah", + "kan", + "kantor", + "kapal", + "kapan", + "kapankah", + "kapanpun", + "karena", + "karenanya", + "karya", + "kasus", + "kata", + "katakan", + "katakanlah", + "katanya", + "kaunter", + "kawasan", + "ke", + "keadaan", + "kebetulan", + "kebutuhan", + "kecamatan", + "kecil", + "kedua", + "kedua-dua", + "keduanya", + "kedudukan", + "kegiatan", + "kehidupan", + "keinginan", + "kejadian", + "kekal", + "kelamaan", + "kelihatan", + "kelihatannya", + "kelima", + "kelompok", + "keluar", + "keluarga", + "kelurahan", + "kembali", + "kementerian", + "kemudahan", + "kemudian", + "kemungkinan", + "kemungkinannya", + "kenaikan", + "kenapa", + "kenyataan", + "kepada", + "kepadanya", + "kepala", + "kepentingan", + "keputusan", + "kerajaan", + "kerana", + "kereta", + "kerja", + "kerjasama", + "kes", + "kesampaian", + "keselamatan", + "keseluruhan", + "keseluruhannya", + "kesempatan", + "kesihatan", + "keterangan", + "keterlaluan", + "ketiga", + "ketika", + "ketua", + "keuntungan", + "kewangan", + "khamis", + "khusus", + "khususnya", + "kini", + "kinilah", + "kira", + "kira-kira", + "kiranya", + "kita", + "kitalah", + "klci", + "klibor", + "klik", + "km", + "kok", + "komentar", + "kompas", + "komposit", + "kondisi", + "kontrak", + "korban", + "korea", + "kos", + "kota", + "kuala", + "kuasa", + "kukuh", + "kumpulan", + "kurang", + "kurangnya", + "lagi", + "lagian", + "lagu", + "lah", + "lain", + "lainnya", + "laku", + "lalu", + "lama", + "lamanya", + "langkah", + "langsung", + "lanjut", + "lanjutnya", + "laporan", + "laut", + "lebih", + "lembaga", + "lepas", + "lewat", + "lima", + "lingkungan", + "login", + "lokasi", + "lot", + "luar", + "luas", + "lumpur", + "mac", + "macam", + "mahkamah", + "mahu", + "majlis", + "maka", + "makanan", + "makanya", + "makin", + "maklumat", + "malah", + "malahan", + "malam", + "malaysia", + "mampu", + "mampukah", + "mana", + "manakala", + "manalagi", + "mantan", + "manusia", + "masa", + "masalah", + "masalahnya", + "masih", + "masihkah", + "masing", + "masing-masing", + "masuk", + "masyarakat", + "mata", + "mau", + "maupun", + "measure", + "media", + "mei", + "melainkan", + "melakukan", + "melalui", + "melawan", + "melihat", + "melihatnya", + "memandangkan", + "memang", + "memastikan", + "membantu", + "membawa", + "memberi", + "memberikan", + "membolehkan", + "membuat", + "memerlukan", + "memihak", + "memiliki", + "meminta", + "memintakan", + "memisalkan", + "memperbuat", + "mempergunakan", + "memperkirakan", + "memperlihatkan", + "mempersiapkan", + "mempersoalkan", + "mempertanyakan", + "mempunyai", + "memulai", + "memungkinkan", + "menaiki", + "menambah", + "menambahkan", + "menandaskan", + "menanti", + "menantikan", + "menanya", + "menanyai", + "menanyakan", + "menarik", + "menawarkan", + "mencapai", + "mencari", + "mencatatkan", + "mendapat", + "mendapatkan", + "mendatang", + "mendatangi", + "mendatangkan", + "menegaskan", + "menerima", + "menerusi", + "mengadakan", + "mengakhiri", + "mengaku", + "mengalami", + "mengambil", + "mengapa", + "mengatakan", + "mengatakannya", + "mengenai", + "mengerjakan", + "mengetahui", + "menggalakkan", + "menggunakan", + "menghadapi", + "menghendaki", + "mengibaratkan", + "mengibaratkannya", + "mengikut", + "mengingat", + "mengingatkan", + "menginginkan", + "mengira", + "mengucapkan", + "mengucapkannya", + "mengumumkan", + "mengungkapkan", + "mengurangkan", + "meninggal", + "meningkat", + "meningkatkan", + "menjadi", + "menjalani", + "menjawab", + "menjelang", + "menjelaskan", + "menokok", + "menteri", + "menuju", + "menunjuk", + "menunjuki", + "menunjukkan", + "menunjuknya", + "menurut", + "menuturkan", + "menyaksikan", + "menyampaikan", + "menyangkut", + "menyatakan", + "menyebabkan", + "menyebutkan", + "menyediakan", + "menyeluruh", + "menyiapkan", + "merasa", + "mereka", + "merekalah", + "merosot", + "merupakan", + "meski", + "meskipun", + "mesyuarat", + "metrotv", + "meyakini", + "meyakinkan", + "milik", + "militer", + "minat", + "minggu", + "minta", + "minyak", + "mirip", + "misal", + "misalkan", + "misalnya", + "mobil", + "modal", + "mohd", + "mudah", + "mula", + "mulai", + "mulailah", + "mulanya", + "muncul", + "mungkin", + "mungkinkah", + "musik", + "musim", + "nah", + "naik", + "nama", + "namun", + "nanti", + "nantinya", + "nasional", + "negara", + "negara-negara", + "negeri", + "new", + "niaga", + "nilai", + "nomor", + "noun", + "nov", + "november", + "numeral", + "numeralia", + "nya", + "nyaris", + "nyatanya", + "of", + "ogos", + "okt", + "oktober", + "olah", + "oleh", + "olehnya", + "operasi", + "orang", + "organisasi", + "pada", + "padahal", + "padanya", + "pagetracker", + "pagi", + "pak", + "paling", + "pameran", + "panjang", + "pantas", + "papan", + "para", + "paras", + "parlimen", + "partai", + "parti", + "particle", + "pasar", + "pasaran", + "password", + "pasti", + "pastilah", + "pasukan", + "paticle", + "pegawai", + "pejabat", + "pekan", + "pekerja", + "pelabur", + "pelaburan", + "pelancongan", + "pelanggan", + "pelbagai", + "peluang", + "pemain", + "pembangunan", + "pemberita", + "pembinaan", + "pemerintah", + "pemerintahan", + "pemimpin", + "pendapatan", + "pendidikan", + "penduduk", + "penerbangan", + "pengarah", + "pengeluaran", + "pengerusi", + "pengguna", + "penggunaan", + "pengurusan", + "peniaga", + "peningkatan", + "penting", + "pentingnya", + "per", + "perancis", + "perang", + "peratus", + "percuma", + "perdagangan", + "perdana", + "peringkat", + "perjanjian", + "perkara", + "perkhidmatan", + "perladangan", + "perlu", + "perlukah", + "perlunya", + "permintaan", + "pernah", + "perniagaan", + "persekutuan", + "persen", + "persidangan", + "persoalan", + "pertama", + "pertandingan", + "pertanyaan", + "pertanyakan", + "pertubuhan", + "pertumbuhan", + "perubahan", + "perusahaan", + "pesawat", + "peserta", + "petang", + "pihak", + "pihaknya", + "pilihan", + "pinjaman", + "polis", + "polisi", + "politik", + "pos", + "posisi", + "presiden", + "prestasi", + "produk", + "program", + "projek", + "pronomia", + "pronoun", + "proses", + "proton", + "provinsi", + "pt", + "pubdate", + "pukul", + "pula", + "pulau", + "pun", + "punya", + "pusat", + "rabu", + "radio", + "raja", + "rakan", + "rakyat", + "ramai", + "rantau", + "rasa", + "rasanya", + "rata", + "raya", + "rendah", + "republik", + "resmi", + "ribu", + "ringgit", + "root", + "ruang", + "rumah", + "rupa", + "rupanya", + "saat", + "saatnya", + "sabah", + "sabtu", + "sahaja", + "saham", + "saja", + "sajalah", + "sakit", + "salah", + "saling", + "sama", + "sama-sama", + "sambil", + "sampai", + "sampaikan", + "sana", + "sangat", + "sangatlah", + "sarawak", + "satu", + "sawit", + "saya", + "sayalah", + "sdn", + "se", + "sebab", + "sebabnya", + "sebagai", + "sebagaimana", + "sebagainya", + "sebagian", + "sebahagian", + "sebaik", + "sebaiknya", + "sebaliknya", + "sebanyak", + "sebarang", + "sebegini", + "sebegitu", + "sebelah", + "sebelum", + "sebelumnya", + "sebenarnya", + "seberapa", + "sebesar", + "sebetulnya", + "sebisanya", + "sebuah", + "sebut", + "sebutlah", + "sebutnya", + "secara", + "secukupnya", + "sedang", + "sedangkan", + "sedemikian", + "sedikit", + "sedikitnya", + "seenaknya", + "segala", + "segalanya", + "segera", + "segi", + "seharusnya", + "sehingga", + "seingat", + "sejak", + "sejarah", + "sejauh", + "sejenak", + "sejumlah", + "sekadar", + "sekadarnya", + "sekali", + "sekali-kali", + "sekalian", + "sekaligus", + "sekalipun", + "sekarang", + "sekaranglah", + "sekecil", + "seketika", + "sekiranya", + "sekitar", + "sekitarnya", + "sekolah", + "sektor", + "sekurang", + "sekurangnya", + "sekuriti", + "sela", + "selagi", + "selain", + "selaku", + "selalu", + "selama", + "selama-lamanya", + "selamanya", + "selanjutnya", + "selasa", + "selatan", + "selepas", + "seluruh", + "seluruhnya", + "semacam", + "semakin", + "semalam", + "semampu", + "semampunya", + "semasa", + "semasih", + "semata", + "semaunya", + "sementara", + "semisal", + "semisalnya", + "sempat", + "semua", + "semuanya", + "semula", + "sen", + "sendiri", + "sendirian", + "sendirinya", + "senin", + "seolah", + "seolah-olah", + "seorang", + "sepak", + "sepanjang", + "sepantasnya", + "sepantasnyalah", + "seperlunya", + "seperti", + "sepertinya", + "sepihak", + "sept", + "september", + "serangan", + "serantau", + "seri", + "serikat", + "sering", + "seringnya", + "serta", + "serupa", + "sesaat", + "sesama", + "sesampai", + "sesegera", + "sesekali", + "seseorang", + "sesi", + "sesuai", + "sesuatu", + "sesuatunya", + "sesudah", + "sesudahnya", + "setelah", + "setempat", + "setengah", + "seterusnya", + "setiap", + "setiausaha", + "setiba", + "setibanya", + "setidak", + "setidaknya", + "setinggi", + "seusai", + "sewaktu", + "siap", + "siapa", + "siapakah", + "siapapun", + "siaran", + "sidang", + "singapura", + "sini", + "sinilah", + "sistem", + "soal", + "soalnya", + "sokongan", + "sri", + "stasiun", + "suara", + "suatu", + "sudah", + "sudahkah", + "sudahlah", + "sukan", + "suku", + "sumber", + "sungai", + "supaya", + "surat", + "susut", + "syarikat", + "syed", + "tadi", + "tadinya", + "tahap", + "tahu", + "tahun", + "tak", + "tama", + "tambah", + "tambahnya", + "tampak", + "tampaknya", + "tampil", + "tan", + "tanah", + "tandas", + "tandasnya", + "tanggal", + "tanpa", + "tanya", + "tanyakan", + "tanyanya", + "tapi", + "tawaran", + "tegas", + "tegasnya", + "teknologi", + "telah", + "televisi", + "teman", + "tempat", + "tempatan", + "tempo", + "tempoh", + "tenaga", + "tengah", + "tentang", + "tentara", + "tentu", + "tentulah", + "tentunya", + "tepat", + "terakhir", + "terasa", + "terbaik", + "terbang", + "terbanyak", + "terbesar", + "terbuka", + "terdahulu", + "terdapat", + "terdiri", + "terhadap", + "terhadapnya", + "teringat", + "terjadi", + "terjadilah", + "terjadinya", + "terkait", + "terkenal", + "terkira", + "terlalu", + "terlebih", + "terletak", + "terlihat", + "termasuk", + "ternyata", + "tersampaikan", + "tersebut", + "tersebutlah", + "tertentu", + "tertuju", + "terus", + "terutama", + "testimoni", + "testimony", + "tetap", + "tetapi", + "the", + "tiada", + "tiap", + "tiba", + "tidak", + "tidakkah", + "tidaklah", + "tidaknya", + "tiga", + "tim", + "timbalan", + "timur", + "tindakan", + "tinggal", + "tinggi", + "tingkat", + "toh", + "tokoh", + "try", + "tun", + "tunai", + "tunjuk", + "turun", + "turut", + "tutur", + "tuturnya", + "tv", + "uang", + "ucap", + "ucapnya", + "udara", + "ujar", + "ujarnya", + "umum", + "umumnya", + "unescape", + "ungkap", + "ungkapnya", + "unit", + "universitas", + "untuk", + "untung", + "upaya", + "urus", + "usah", + "usaha", + "usai", + "user", + "utama", + "utara", + "var", + "versi", + "waduh", + "wah", + "wahai", + "wakil", + "waktu", + "waktunya", + "walau", + "walaupun", + "wang", + "wanita", + "warga", + "warta", + "wib", + "wilayah", + "wong", + "word", + "ya", + "yaitu", + "yakin", + "yakni", + "yang", + "zaman" +] diff --git a/data/stopwords/stopwords-id.txt b/data/stopwords/stopwords-id.txt deleted file mode 100644 index 418f43f..0000000 --- a/data/stopwords/stopwords-id.txt +++ /dev/null @@ -1,1309 +0,0 @@ -a -abad -acara -aceh -ada -adalah -adanya -adapun -agak -agaknya -agama -agar -agustus -air -akan -akankah -akhir -akhiri -akhirnya -akibat -aku -akulah -alam -album -amat -amatlah -amerika -anak -and -anda -andalah -anggota -antar -antara -antarabangsa -antaranya -apa -apaan -apabila -apakah -apalagi -apatah -api -april -artikel -artinya -as -asal -asalkan -asas -asia -asing -atas -atau -ataukah -ataupun -australia -awal -awalnya -awam -b -badan -bagai -bagaikan -bagaimana -bagaimanakah -bagaimanapun -bagainamakah -bagi -bagian -bahagian -bahan -baharu -bahasa -bahawa -bahkan -bahwa -bahwasannya -bahwasanya -baik -baiknya -bakal -bakalan -balik -bandar -bangsa -bank -banyak -bapak -barang -barangan -barat -baru -baru-baru -bawah -beberapa -begini -beginian -beginikah -beginilah -begitu -begitukah -begitulah -begitupun -bekas -bekerja -belakang -belakangan -belanda -beli -beliau -belum -belumlah -benar -benarkah -benarlah -bentuk -berada -berakhir -berakhirlah -berakhirnya -berapa -berapakah -berapalah -berapapun -berarti -berasal -berat -berawal -berbagai -berbanding -berbeda -berdasarkan -berdatangan -berharap -berhasil -beri -berikan -berikut -berikutan -berikutnya -berita -berjalan -berjaya -berjumlah -berkaitan -berkali -berkali-kali -berkata -berkehendak -berkeinginan -berkenaan -berlainan -berlaku -berlalu -berlangsung -berlebihan -bermacam -bermacam-macam -bermain -bermaksud -bermula -bernama -bernilai -bersama -bersama-sama -bersiap -bertanya -bertemu -berturut -bertutur -berubah -berujar -berupa -besar -besok -betul -betulkah -bhd -biasa -biasanya -bidang -bila -bilakah -bilion -bintang -bisa -bisakah -blog -bn -bola -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -buku -bulan -bumi -bung -bursa -cadangan -cara -caranya -catch -china -click -code -copyright -cukup -cukupkah -cukuplah -cuma -daerah -dagangan -dahulu -dalam -dan -dana -dapat -dari -daripada -dasar -data -datang -datuk -dekat -demi -demikian -demikianlah -dengan -depan -derivatives -desa -desember -detik -dewan -di -dia -diadakan -diakhiri -diakhirinya -dialah -dianggap -diantara -diantaranya -diberi -diberikan -diberikannya -dibuat -dibuatnya -dibuka -dicatatkan -didapat -didatangkan -didirikan -diduga -digunakan -diibaratkan -diibaratkannya -diingat -diingatkan -diinginkan -dijangka -dijawab -dijelaskan -dijelaskannya -dikarenakan -dikatakan -dikatakannya -dikenal -dikerjakan -diketahui -diketahuinya -dikira -dilakukan -dilalui -dilihat -dimaksud -dimaksudkan -dimaksudkannya -dimaksudnya -dimana -diminta -dimintai -dimisalkan -dimulai -dimulailah -dimulainya -dimungkinkan -dini -diniagakan -dipastikan -diperbuat -diperbuatnya -dipergunakan -diperkirakan -diperlihatkan -diperlukan -diperlukannya -dipersoalkan -dipertanyakan -dipunyai -diri -dirilis -dirinya -dis -disampaikan -disebut -disebutkan -disebutkannya -disember -disini -disinilah -distrik -ditambahkan -ditandaskan -ditanya -ditanyai -ditanyakan -ditegaskan -ditemukan -ditujukan -ditunjuk -ditunjuki -ditunjukkan -ditunjukkannya -ditunjuknya -ditutup -dituturkan -dituturkannya -diucapkan -diucapkannya -diungkapkan -document.write -dolar -dong -dr -dua -dulu -dunia -effective -ekonomi -eksekutif -eksport -empat -enam -enggak -enggaknya -entah -entahlah -era -eropa -err -faedah -feb -film -gat -gedung -gelar -gettracker -global -grup -guna -gunakan -gunung -hadap -hadapan -hal -hampir -hanya -hanyalah -harga -hari -harian -harus -haruslah -harusnya -hasil -hendak -hendaklah -hendaknya -hidup -hingga -https -hubungan -hukum -hutan -i -ia -iaitu -ialah -ibarat -ibaratkan -ibaratnya -ibu -ii -iklan -ikut -ilmu -indeks -india -indonesia -industri -informasi -ingat -inggris -ingin -inginkah -inginkan -ini -inikah -inilah -internasional -islam -isnin -isu -italia -itu -itukah -itulah -jabatan -jadi -jadilah -jadinya -jakarta -jalan -jalur -jaman -jan -jangan -jangankan -janganlah -januari -jauh -jawa -jawab -jawaban -jawabnya -jawatan -jawatankuasa -jelas -jelaskan -jelaslah -jelasnya -jenis -jepang -jepun -jerman -jika -jikalau -jiwa -jual -jualan -juga -julai -jumaat -jumat -jumlah -jumlahnya -jun -juni -justru -juta -kabar -kabupaten -kadar -kala -kalangan -kalau -kalaulah -kalaupun -kali -kalian -kalimantan -kami -kamilah -kamis -kamu -kamulah -kan -kantor -kapal -kapan -kapankah -kapanpun -karena -karenanya -karya -kasus -kata -katakan -katakanlah -katanya -kaunter -kawasan -ke -keadaan -kebetulan -kebutuhan -kecamatan -kecil -kedua -kedua-dua -keduanya -kedudukan -kegiatan -kehidupan -keinginan -kejadian -kekal -kelamaan -kelihatan -kelihatannya -kelima -kelompok -keluar -keluarga -kelurahan -kembali -kementerian -kemudahan -kemudian -kemungkinan -kemungkinannya -kenaikan -kenapa -kenyataan -kepada -kepadanya -kepala -kepentingan -keputusan -kerajaan -kerana -kereta -kerja -kerjasama -kes -kesampaian -keselamatan -keseluruhan -keseluruhannya -kesempatan -kesihatan -keterangan -keterlaluan -ketiga -ketika -ketua -keuntungan -kewangan -khamis -khusus -khususnya -kini -kinilah -kira -kira-kira -kiranya -kita -kitalah -klci -klibor -klik -km -kok -komentar -kompas -komposit -kondisi -kontrak -korban -korea -kos -kota -kuala -kuasa -kukuh -kumpulan -kurang -kurangnya -lagi -lagian -lagu -lah -lain -lainnya -laku -lalu -lama -lamanya -langkah -langsung -lanjut -lanjutnya -laporan -laut -lebih -lembaga -lepas -lewat -lima -lingkungan -login -lokasi -lot -luar -luas -lumpur -mac -macam -mahkamah -mahu -majlis -maka -makanan -makanya -makin -maklumat -malah -malahan -malam -malaysia -mampu -mampukah -mana -manakala -manalagi -mantan -manusia -masa -masalah -masalahnya -masih -masihkah -masing -masing-masing -masuk -masyarakat -mata -mau -maupun -measure -media -mei -melainkan -melakukan -melalui -melawan -melihat -melihatnya -memandangkan -memang -memastikan -membantu -membawa -memberi -memberikan -membolehkan -membuat -memerlukan -memihak -memiliki -meminta -memintakan -memisalkan -memperbuat -mempergunakan -memperkirakan -memperlihatkan -mempersiapkan -mempersoalkan -mempertanyakan -mempunyai -memulai -memungkinkan -menaiki -menambah -menambahkan -menandaskan -menanti -menantikan -menanya -menanyai -menanyakan -menarik -menawarkan -mencapai -mencari -mencatatkan -mendapat -mendapatkan -mendatang -mendatangi -mendatangkan -menegaskan -menerima -menerusi -mengadakan -mengakhiri -mengaku -mengalami -mengambil -mengapa -mengatakan -mengatakannya -mengenai -mengerjakan -mengetahui -menggalakkan -menggunakan -menghadapi -menghendaki -mengibaratkan -mengibaratkannya -mengikut -mengingat -mengingatkan -menginginkan -mengira -mengucapkan -mengucapkannya -mengumumkan -mengungkapkan -mengurangkan -meninggal -meningkat -meningkatkan -menjadi -menjalani -menjawab -menjelang -menjelaskan -menokok -menteri -menuju -menunjuk -menunjuki -menunjukkan -menunjuknya -menurut -menuturkan -menyaksikan -menyampaikan -menyangkut -menyatakan -menyebabkan -menyebutkan -menyediakan -menyeluruh -menyiapkan -merasa -mereka -merekalah -merosot -merupakan -meski -meskipun -mesyuarat -metrotv -meyakini -meyakinkan -milik -militer -minat -minggu -minta -minyak -mirip -misal -misalkan -misalnya -mobil -modal -mohd -mudah -mula -mulai -mulailah -mulanya -muncul -mungkin -mungkinkah -musik -musim -nah -naik -nama -namun -nanti -nantinya -nasional -negara -negara-negara -negeri -new -niaga -nilai -nomor -noun -nov -november -numeral -numeralia -nya -nyaris -nyatanya -of -ogos -okt -oktober -olah -oleh -olehnya -operasi -orang -organisasi -pada -padahal -padanya -pagetracker -pagi -pak -paling -pameran -panjang -pantas -papan -para -paras -parlimen -partai -parti -particle -pasar -pasaran -password -pasti -pastilah -pasukan -paticle -pegawai -pejabat -pekan -pekerja -pelabur -pelaburan -pelancongan -pelanggan -pelbagai -peluang -pemain -pembangunan -pemberita -pembinaan -pemerintah -pemerintahan -pemimpin -pendapatan -pendidikan -penduduk -penerbangan -pengarah -pengeluaran -pengerusi -pengguna -penggunaan -pengurusan -peniaga -peningkatan -penting -pentingnya -per -perancis -perang -peratus -percuma -perdagangan -perdana -peringkat -perjanjian -perkara -perkhidmatan -perladangan -perlu -perlukah -perlunya -permintaan -pernah -perniagaan -persekutuan -persen -persidangan -persoalan -pertama -pertandingan -pertanyaan -pertanyakan -pertubuhan -pertumbuhan -perubahan -perusahaan -pesawat -peserta -petang -pihak -pihaknya -pilihan -pinjaman -polis -polisi -politik -pos -posisi -presiden -prestasi -produk -program -projek -pronomia -pronoun -proses -proton -provinsi -pt -pubdate -pukul -pula -pulau -pun -punya -pusat -rabu -radio -raja -rakan -rakyat -ramai -rantau -rasa -rasanya -rata -raya -rendah -republik -resmi -ribu -ringgit -root -ruang -rumah -rupa -rupanya -saat -saatnya -sabah -sabtu -sahaja -saham -saja -sajalah -sakit -salah -saling -sama -sama-sama -sambil -sampai -sampaikan -sana -sangat -sangatlah -sarawak -satu -sawit -saya -sayalah -sdn -se -sebab -sebabnya -sebagai -sebagaimana -sebagainya -sebagian -sebahagian -sebaik -sebaiknya -sebaliknya -sebanyak -sebarang -sebegini -sebegitu -sebelah -sebelum -sebelumnya -sebenarnya -seberapa -sebesar -sebetulnya -sebisanya -sebuah -sebut -sebutlah -sebutnya -secara -secukupnya -sedang -sedangkan -sedemikian -sedikit -sedikitnya -seenaknya -segala -segalanya -segera -segi -seharusnya -sehingga -seingat -sejak -sejarah -sejauh -sejenak -sejumlah -sekadar -sekadarnya -sekali -sekali-kali -sekalian -sekaligus -sekalipun -sekarang -sekaranglah -sekecil -seketika -sekiranya -sekitar -sekitarnya -sekolah -sektor -sekurang -sekurangnya -sekuriti -sela -selagi -selain -selaku -selalu -selama -selama-lamanya -selamanya -selanjutnya -selasa -selatan -selepas -seluruh -seluruhnya -semacam -semakin -semalam -semampu -semampunya -semasa -semasih -semata -semaunya -sementara -semisal -semisalnya -sempat -semua -semuanya -semula -sen -sendiri -sendirian -sendirinya -senin -seolah -seolah-olah -seorang -sepak -sepanjang -sepantasnya -sepantasnyalah -seperlunya -seperti -sepertinya -sepihak -sept -september -serangan -serantau -seri -serikat -sering -seringnya -serta -serupa -sesaat -sesama -sesampai -sesegera -sesekali -seseorang -sesi -sesuai -sesuatu -sesuatunya -sesudah -sesudahnya -setelah -setempat -setengah -seterusnya -setiap -setiausaha -setiba -setibanya -setidak -setidaknya -setinggi -seusai -sewaktu -siap -siapa -siapakah -siapapun -siaran -sidang -singapura -sini -sinilah -sistem -soal -soalnya -sokongan -sri -stasiun -suara -suatu -sudah -sudahkah -sudahlah -sukan -suku -sumber -sungai -supaya -surat -susut -syarikat -syed -tadi -tadinya -tahap -tahu -tahun -tak -tama -tambah -tambahnya -tampak -tampaknya -tampil -tan -tanah -tandas -tandasnya -tanggal -tanpa -tanya -tanyakan -tanyanya -tapi -tawaran -tegas -tegasnya -teknologi -telah -televisi -teman -tempat -tempatan -tempo -tempoh -tenaga -tengah -tentang -tentara -tentu -tentulah -tentunya -tepat -terakhir -terasa -terbaik -terbang -terbanyak -terbesar -terbuka -terdahulu -terdapat -terdiri -terhadap -terhadapnya -teringat -terjadi -terjadilah -terjadinya -terkait -terkenal -terkira -terlalu -terlebih -terletak -terlihat -termasuk -ternyata -tersampaikan -tersebut -tersebutlah -tertentu -tertuju -terus -terutama -testimoni -testimony -tetap -tetapi -the -tiada -tiap -tiba -tidak -tidakkah -tidaklah -tidaknya -tiga -tim -timbalan -timur -tindakan -tinggal -tinggi -tingkat -toh -tokoh -try -tun -tunai -tunjuk -turun -turut -tutur -tuturnya -tv -uang -ucap -ucapnya -udara -ujar -ujarnya -umum -umumnya -unescape -ungkap -ungkapnya -unit -universitas -untuk -untung -upaya -urus -usah -usaha -usai -user -utama -utara -var -versi -waduh -wah -wahai -wakil -waktu -waktunya -walau -walaupun -wang -wanita -warga -warta -wib -wilayah -wong -word -ya -yaitu -yakin -yakni -yang -zaman \ No newline at end of file diff --git a/data/stopwords/stopwords-it.json b/data/stopwords/stopwords-it.json new file mode 100644 index 0000000..ac00fc5 --- /dev/null +++ b/data/stopwords/stopwords-it.json @@ -0,0 +1,290 @@ +[ + "\ufeffad ", + "al ", + "allo ", + "ai ", + "agli ", + "all ", + "agl ", + "alla ", + "alle ", + "con ", + "col ", + "coi ", + "da ", + "dal ", + "dallo ", + "dai ", + "dagli ", + "dall ", + "dagl ", + "dalla ", + "dalle ", + "di ", + "del ", + "dello ", + "dei ", + "degli ", + "dell ", + "degl ", + "della ", + "delle ", + "in ", + "nel ", + "nello ", + "nei ", + "negli ", + "nell ", + "negl ", + "nella ", + "nelle ", + "su ", + "sul ", + "sullo ", + "sui ", + "sugli ", + "sull ", + "sugl ", + "sulla ", + "sulle ", + "per ", + "tra ", + "contro ", + "io ", + "tu ", + "lui ", + "lei ", + "noi ", + "voi ", + "loro ", + "mio ", + "mia ", + "miei ", + "mie ", + "tuo ", + "tua ", + "tuoi ", + "tue ", + "suo ", + "sua ", + "suoi ", + "sue ", + "nostro ", + "nostra ", + "nostri ", + "nostre ", + "vostro ", + "vostra ", + "vostri ", + "vostre ", + "mi ", + "ti ", + "ci ", + "vi ", + "lo ", + "la ", + "li ", + "le ", + "gli ", + "ne ", + "il ", + "un ", + "uno ", + "una ", + "ma ", + "ed ", + "se ", + "perchè ", + "perché", + "perche", + "anche ", + "come ", + "dov ", + "dove ", + "che ", + "chi ", + "cui ", + "non ", + "più ", + "piu", + "quale ", + "quanto ", + "quanti ", + "quanta ", + "quante ", + "quello ", + "quelli ", + "quella ", + "quelle ", + "questo ", + "questi ", + "questa ", + "queste ", + "si ", + "tutto ", + "tutti ", + "a ", + "c ", + "e ", + "i ", + "l ", + "o ", + "ho", + "hai", + "ha", + "abbiamo", + "avete", + "hanno", + "abbia", + "abbiate", + "abbiano", + "avrò", + "avro", + "avrai", + "avrà", + "avra", + "avremo", + "avrete", + "avranno", + "avrei", + "avresti", + "avrebbe", + "avremmo", + "avreste", + "avrebbero", + "avevo", + "avevi", + "aveva", + "avevamo", + "avevate", + "avevano", + "ebbi", + "avesti", + "ebbe", + "avemmo", + "aveste", + "ebbero", + "avessi", + "avesse", + "avessimo", + "avessero", + "avendo", + "avuto", + "avuta", + "avuti", + "avute", + "sono", + "sei", + "è", + "é", + "e", + "siamo", + "siete", + "sia", + "siate", + "siano", + "sarà", + "sarai", + "sarò", + "saro", + "saremo", + "sarete", + "saranno", + "sarei", + "saresti", + "sarebbe", + "saremmo", + "sareste", + "sarebbero", + "ero", + "eri", + "era", + "eravamo", + "eravate", + "erano", + "fui", + "fosti", + "fu", + "fummo", + "foste", + "furono", + "fossi", + "fosse", + "fossimo", + "fossero", + "essendo", + "faccio", + "fai", + "facciamo", + "fanno", + "faccia", + "facciate", + "facciano", + "farà", + "farai", + "farò", + "faremo", + "farete", + "faranno", + "farei", + "faresti", + "farebbe", + "faremmo", + "fareste", + "farebbero", + "facevo", + "facevi", + "faceva", + "facevamo", + "facevate", + "facevano", + "feci", + "facesti", + "fece", + "facemmo", + "faceste", + "fecero", + "facessi", + "facesse", + "facessimo", + "facessero", + "facendo", + "sto", + "stai", + "sta", + "stiamo", + "stanno", + "stia", + "stiate", + "stiano", + "starà", + "starai", + "starò", + "staremo", + "starete", + "staranno", + "starei", + "staresti", + "starebbe", + "staremmo", + "stareste", + "starebbero", + "stavo", + "stavi", + "stava", + "stavamo", + "stavate", + "stavano", + "stetti", + "stesti", + "stette", + "stemmo", + "steste", + "stettero", + "stessi", + "stesse", + "stessimo", + "stessero", + "stando", + "" +] diff --git a/data/stopwords/stopwords-it.txt b/data/stopwords/stopwords-it.txt deleted file mode 100644 index 98ffee1..0000000 --- a/data/stopwords/stopwords-it.txt +++ /dev/null @@ -1,287 +0,0 @@ -ad -al -allo -ai -agli -all -agl -alla -alle -con -col -coi -da -dal -dallo -dai -dagli -dall -dagl -dalla -dalle -di -del -dello -dei -degli -dell -degl -della -delle -in -nel -nello -nei -negli -nell -negl -nella -nelle -su -sul -sullo -sui -sugli -sull -sugl -sulla -sulle -per -tra -contro -io -tu -lui -lei -noi -voi -loro -mio -mia -miei -mie -tuo -tua -tuoi -tue -suo -sua -suoi -sue -nostro -nostra -nostri -nostre -vostro -vostra -vostri -vostre -mi -ti -ci -vi -lo -la -li -le -gli -ne -il -un -uno -una -ma -ed -se -perchè -perché -perche -anche -come -dov -dove -che -chi -cui -non -più -piu -quale -quanto -quanti -quanta -quante -quello -quelli -quella -quelle -questo -questi -questa -queste -si -tutto -tutti -a -c -e -i -l -o -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avro -avrai -avrà -avra -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute -sono -sei -è -é -e -siamo -siete -sia -siate -siano -sarà -sarai -sarò -saro -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farà -farai -farò -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starà -starai -starò -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/data/stopwords/stopwords-ko.json b/data/stopwords/stopwords-ko.json new file mode 100644 index 0000000..195bbca --- /dev/null +++ b/data/stopwords/stopwords-ko.json @@ -0,0 +1,72 @@ +[ + "을", + "의", + "에", + "이", + "를", + "으로", + "은", + "는", + "가", + "로", + "하고", + "과", + "에서", + "도", + "와", + "이다", + "고", + "부터", + "까지", + "께", + "에는", + "이라고", + "만", + "라고", + "보다", + "에도", + "다", + "토록", + "에게", + "나", + "대로", + "에서는", + "이나", + "이며", + "요", + "든", + "으로써", + "같이", + "로는", + "밖에", + "과의", + "며", + "로부터", + "처럼", + "아", + "라", + "여", + "으로는", + "이고", + "에서의", + "이라는", + "만에", + "으로부터", + "에서도", + "와의", + "엔", + "만을", + "부터는", + "만의", + "야", + "까지의", + "과는", + "치고", + "과를", + "으로의", + "까지는", + "보다는", + "만이", + "에만", + "로의" +] diff --git a/data/stopwords/stopwords-ko.txt b/data/stopwords/stopwords-ko.txt deleted file mode 100644 index a6746f7..0000000 --- a/data/stopwords/stopwords-ko.txt +++ /dev/null @@ -1,70 +0,0 @@ -을 -의 -에 -이 -를 -으로 -은 -는 -가 -로 -하고 -과 -에서 -도 -와 -이다 -고 -부터 -까지 -께 -에는 -이라고 -만 -라고 -보다 -에도 -다 -토록 -에게 -나 -대로 -에서는 -이나 -이며 -요 -든 -으로써 -같이 -로는 -밖에 -과의 -며 -로부터 -처럼 -아 -라 -여 -으로는 -이고 -에서의 -이라는 -만에 -으로부터 -에서도 -와의 -엔 -만을 -부터는 -만의 -야 -까지의 -과는 -치고 -과를 -으로의 -까지는 -보다는 -만이 -에만 -로의 \ No newline at end of file diff --git a/data/stopwords/stopwords-nb.json b/data/stopwords/stopwords-nb.json new file mode 100644 index 0000000..582cf68 --- /dev/null +++ b/data/stopwords/stopwords-nb.json @@ -0,0 +1,120 @@ +[ + "alle", + "andre", + "arbeid", + "av", + "begge", + "bort", + "bra", + "bruke", + "da", + "denne", + "der", + "deres", + "det", + "din", + "disse", + "du", + "eller", + "en", + "ene", + "eneste", + "enhver", + "enn", + "er", + "et", + "folk", + "for", + "fordi", + "forsÛke", + "fra", + "fÅ", + "fÛr", + "fÛrst", + "gjorde", + "gjÛre", + "god", + "gÅ", + "ha", + "hadde", + "han", + "hans", + "hennes", + "her", + "hva", + "hvem", + "hver", + "hvilken", + "hvis", + "hvor", + "hvordan", + "hvorfor", + "ikke", + "inn", + "innen", + "kan", + "kunne", + "lage", + "lang", + "lik", + "like", + "makt", + "mange", + "med", + "meg", + "meget", + "men", + "mens", + "mer", + "mest", + "min", + "mye", + "mÅ", + "mÅte", + "navn", + "nei", + "ny", + "nÅ", + "nÅr", + "og", + "ogsÅ", + "om", + "opp", + "oss", + "over", + "part", + "punkt", + "pÅ", + "rett", + "riktig", + "samme", + "sant", + "si", + "siden", + "sist", + "skulle", + "slik", + "slutt", + "som", + "start", + "stille", + "tid", + "til", + "tilbake", + "tilstand", + "under", + "ut", + "uten", + "var", + "ved", + "verdi", + "vi", + "vil", + "ville", + "vite", + "vÅr", + "vÖre", + "vÖrt", + "Å", + "" +] diff --git a/data/stopwords/stopwords-nb.txt b/data/stopwords/stopwords-nb.txt deleted file mode 100644 index bb9edb1..0000000 --- a/data/stopwords/stopwords-nb.txt +++ /dev/null @@ -1,117 +0,0 @@ -alle -andre -arbeid -av -begge -bort -bra -bruke -da -denne -der -deres -det -din -disse -du -eller -en -ene -eneste -enhver -enn -er -et -folk -for -fordi -forsÛke -fra -fÅ -fÛr -fÛrst -gjorde -gjÛre -god -gÅ -ha -hadde -han -hans -hennes -her -hva -hvem -hver -hvilken -hvis -hvor -hvordan -hvorfor -ikke -inn -innen -kan -kunne -lage -lang -lik -like -makt -mange -med -meg -meget -men -mens -mer -mest -min -mye -mÅ -mÅte -navn -nei -ny -nÅ -nÅr -og -ogsÅ -om -opp -oss -over -part -punkt -pÅ -rett -riktig -samme -sant -si -siden -sist -skulle -slik -slutt -som -start -stille -tid -til -tilbake -tilstand -under -ut -uten -var -ved -verdi -vi -vil -ville -vite -vÅr -vÖre -vÖrt -Å diff --git a/data/stopwords/stopwords-nl.json b/data/stopwords/stopwords-nl.json new file mode 100644 index 0000000..2f31591 --- /dev/null +++ b/data/stopwords/stopwords-nl.json @@ -0,0 +1,51 @@ +[ + "aan", + "af", + "al", + "als", + "bij", + "dan", + "dat", + "die", + "dit", + "een", + "en", + "er", + "had", + "heb", + "hem", + "het", + "hij", + "hoe", + "hun", + "ik", + "in", + "is", + "je", + "kan", + "me", + "men", + "met", + "mij", + "nog", + "nu", + "of", + "ons", + "ook", + "te", + "tot", + "uit", + "van", + "was", + "wat", + "we", + "wel", + "wij", + "zal", + "ze", + "zei", + "zij", + "zo", + "zou", + "" +] diff --git a/data/stopwords/stopwords-nl.txt b/data/stopwords/stopwords-nl.txt deleted file mode 100644 index 300c368..0000000 --- a/data/stopwords/stopwords-nl.txt +++ /dev/null @@ -1,48 +0,0 @@ -aan -af -al -als -bij -dan -dat -die -dit -een -en -er -had -heb -hem -het -hij -hoe -hun -ik -in -is -je -kan -me -men -met -mij -nog -nu -of -ons -ook -te -tot -uit -van -was -wat -we -wel -wij -zal -ze -zei -zij -zo -zou diff --git a/data/stopwords/stopwords-no.json b/data/stopwords/stopwords-no.json new file mode 100644 index 0000000..7bb6e51 --- /dev/null +++ b/data/stopwords/stopwords-no.json @@ -0,0 +1,122 @@ +[ + "at", + "av", + "de", + "den", + "der", + "det", + "du", + "en", + "er", + "et", + "for", + "fra", + "før", + "med", + "og", + "om", + "over", + "på", + "som", + "til", + "ved", + "år", + "alle", + "bare", + "ble", + "bort", + "bra", + "da", + "deg", + "dem", + "denne", + "dere", + "deres", + "det", + "dette", + "din", + "disse", + "dit", + "ditt", + "eller", + "ene", + "enn", + "er", + "et", + "ett", + "etter", + "for", + "fram", + "først", + "få", + "god", + "gå", + "ha", + "han", + "hans", + "har", + "her", + "hit", + "hun", + "hva", + "hvem", + "hver", + "ikke", + "inn", + "ja", + "jeg", + "kan", + "kom", + "kun", + "kunne", + "lage", + "lang", + "lik", + "like", + "man", + "mer", + "min", + "mot", + "mye", + "må", + "måte", + "ned", + "nei", + "noe", + "noen", + "ny", + "nå", + "når", + "også", + "opp", + "oss", + "seg", + "selv", + "si", + "siden", + "sin", + "sine", + "sist", + "skal", + "skulle", + "slik", + "som", + "så", + "sånn", + "tid", + "til", + "under", + "ut", + "uten", + "var", + "ved", + "vi", + "vil", + "vite", + "vår", + "å", + "dei", + "di", + "då", + "eg" +] diff --git a/data/stopwords/stopwords-no.txt b/data/stopwords/stopwords-no.txt deleted file mode 100644 index 4b14918..0000000 --- a/data/stopwords/stopwords-no.txt +++ /dev/null @@ -1,120 +0,0 @@ -at -av -de -den -der -det -du -en -er -et -for -fra -før -med -og -om -over -på -som -til -ved -år -alle -bare -ble -bort -bra -da -deg -dem -denne -dere -deres -det -dette -din -disse -dit -ditt -eller -ene -enn -er -et -ett -etter -for -fram -først -få -god -gå -ha -han -hans -har -her -hit -hun -hva -hvem -hver -ikke -inn -ja -jeg -kan -kom -kun -kunne -lage -lang -lik -like -man -mer -min -mot -mye -må -måte -ned -nei -noe -noen -ny -nå -når -også -opp -oss -seg -selv -si -siden -sin -sine -sist -skal -skulle -slik -som -så -sånn -tid -til -under -ut -uten -var -ved -vi -vil -vite -vår -å -dei -di -då -eg \ No newline at end of file diff --git a/data/stopwords/stopwords-pl.json b/data/stopwords/stopwords-pl.json new file mode 100644 index 0000000..cd7eba8 --- /dev/null +++ b/data/stopwords/stopwords-pl.json @@ -0,0 +1,279 @@ +[ + "a", + "aby", + "ach", + "acz", + "aczkolwiek", + "aj", + "albo", + "ale", + "ależ", + "ani", + "aż", + "bardziej", + "bardzo", + "bo", + "bowiem", + "by", + "byli", + "bynajmniej", + "być", + "był", + "była", + "było", + "były", + "będzie", + "będą", + "cali", + "cała", + "cały", + "ci", + "cię", + "ciebie", + "co", + "cokolwiek", + "coś", + "czasami", + "czasem", + "czemu", + "czy", + "czyli", + "daleko", + "dla", + "dlaczego", + "dlatego", + "do", + "dobrze", + "dokąd", + "dość", + "dużo", + "dwa", + "dwaj", + "dwie", + "dwoje", + "dziś", + "dzisiaj", + "gdy", + "gdyby", + "gdyż", + "gdzie", + "gdziekolwiek", + "gdzieś", + "i", + "ich", + "ile", + "im", + "inna", + "inne", + "inny", + "innych", + "iż", + "ja", + "ją", + "jak", + "jakaś", + "jakby", + "jaki", + "jakichś", + "jakie", + "jakiś", + "jakiż", + "jakkolwiek", + "jako", + "jakoś", + "je", + "jeden", + "jedna", + "jedno", + "jednak", + "jednakże", + "jego", + "jej", + "jemu", + "jest", + "jestem", + "jeszcze", + "jeśli", + "jeżeli", + "już", + "ją", + "każdy", + "kiedy", + "kilka", + "kimś", + "kto", + "ktokolwiek", + "ktoś", + "która", + "które", + "którego", + "której", + "który", + "których", + "którym", + "którzy", + "ku", + "lat", + "lecz", + "lub", + "ma", + "mają", + "mało", + "mam", + "mi", + "mimo", + "między", + "mną", + "mnie", + "mogą", + "moi", + "moim", + "moja", + "moje", + "może", + "możliwe", + "można", + "mój", + "mu", + "musi", + "my", + "na", + "nad", + "nam", + "nami", + "nas", + "nasi", + "nasz", + "nasza", + "nasze", + "naszego", + "naszych", + "natomiast", + "natychmiast", + "nawet", + "nią", + "nic", + "nich", + "nie", + "niech", + "niego", + "niej", + "niemu", + "nigdy", + "nim", + "nimi", + "niż", + "no", + "o", + "obok", + "od", + "około", + "on", + "ona", + "one", + "oni", + "ono", + "oraz", + "oto", + "owszem", + "pan", + "pana", + "pani", + "po", + "pod", + "podczas", + "pomimo", + "ponad", + "ponieważ", + "powinien", + "powinna", + "powinni", + "powinno", + "poza", + "prawie", + "przecież", + "przed", + "przede", + "przedtem", + "przez", + "przy", + "roku", + "również", + "sam", + "sama", + "są", + "się", + "skąd", + "sobie", + "sobą", + "sposób", + "swoje", + "ta", + "tak", + "taka", + "taki", + "takie", + "także", + "tam", + "te", + "tego", + "tej", + "temu", + "ten", + "teraz", + "też", + "to", + "tobą", + "tobie", + "toteż", + "trzeba", + "tu", + "tutaj", + "twoi", + "twoim", + "twoja", + "twoje", + "twym", + "twój", + "ty", + "tych", + "tylko", + "tym", + "u", + "w", + "wam", + "wami", + "was", + "wasz", + "wasza", + "wasze", + "we", + "według", + "wiele", + "wielu", + "więc", + "więcej", + "wszyscy", + "wszystkich", + "wszystkie", + "wszystkim", + "wszystko", + "wtedy", + "wy", + "właśnie", + "z", + "za", + "zapewne", + "zawsze", + "ze", + "zł", + "znowu", + "znów", + "został", + "żaden", + "żadna", + "żadne", + "żadnych", + "że", + "żeby" +] diff --git a/data/stopwords/stopwords-pl.txt b/data/stopwords/stopwords-pl.txt deleted file mode 100644 index 93dac82..0000000 --- a/data/stopwords/stopwords-pl.txt +++ /dev/null @@ -1,277 +0,0 @@ -a -aby -ach -acz -aczkolwiek -aj -albo -ale -ależ -ani -aż -bardziej -bardzo -bo -bowiem -by -byli -bynajmniej -być -był -była -było -były -będzie -będą -cali -cała -cały -ci -cię -ciebie -co -cokolwiek -coś -czasami -czasem -czemu -czy -czyli -daleko -dla -dlaczego -dlatego -do -dobrze -dokąd -dość -dużo -dwa -dwaj -dwie -dwoje -dziś -dzisiaj -gdy -gdyby -gdyż -gdzie -gdziekolwiek -gdzieś -i -ich -ile -im -inna -inne -inny -innych -iż -ja -ją -jak -jakaś -jakby -jaki -jakichś -jakie -jakiś -jakiż -jakkolwiek -jako -jakoś -je -jeden -jedna -jedno -jednak -jednakże -jego -jej -jemu -jest -jestem -jeszcze -jeśli -jeżeli -już -ją -każdy -kiedy -kilka -kimś -kto -ktokolwiek -ktoś -która -które -którego -której -który -których -którym -którzy -ku -lat -lecz -lub -ma -mają -mało -mam -mi -mimo -między -mną -mnie -mogą -moi -moim -moja -moje -może -możliwe -można -mój -mu -musi -my -na -nad -nam -nami -nas -nasi -nasz -nasza -nasze -naszego -naszych -natomiast -natychmiast -nawet -nią -nic -nich -nie -niech -niego -niej -niemu -nigdy -nim -nimi -niż -no -o -obok -od -około -on -ona -one -oni -ono -oraz -oto -owszem -pan -pana -pani -po -pod -podczas -pomimo -ponad -ponieważ -powinien -powinna -powinni -powinno -poza -prawie -przecież -przed -przede -przedtem -przez -przy -roku -również -sam -sama -są -się -skąd -sobie -sobą -sposób -swoje -ta -tak -taka -taki -takie -także -tam -te -tego -tej -temu -ten -teraz -też -to -tobą -tobie -toteż -trzeba -tu -tutaj -twoi -twoim -twoja -twoje -twym -twój -ty -tych -tylko -tym -u -w -wam -wami -was -wasz -wasza -wasze -we -według -wiele -wielu -więc -więcej -wszyscy -wszystkich -wszystkie -wszystkim -wszystko -wtedy -wy -właśnie -z -za -zapewne -zawsze -ze -zł -znowu -znów -został -żaden -żadna -żadne -żadnych -że -żeby \ No newline at end of file diff --git a/data/stopwords/stopwords-pt.json b/data/stopwords/stopwords-pt.json new file mode 100644 index 0000000..59b94c2 --- /dev/null +++ b/data/stopwords/stopwords-pt.json @@ -0,0 +1,611 @@ +[ + "a", + "à", + "abril", + "agosto", + "ainda", + "ano", + "anos", + "ao", + "aos", + "apenas", + "as", + "às", + "até", + "brasil", + "com", + "como", + "contra", + "da", + "das", + "de", + "depois", + "deve", + "dezembro", + "dia", + "disse", + "diz", + "do", + "dois", + "dos", + "e", + "é", + "ela", + "ele", + "em", + "entre", + "era", + "está", + "estado", + "estão", + "eu", + "foi", + "folha", + "foram", + "governo", + "grande", + "há", + "hoje", + "isso", + "já", + "local", + "maio", + "maior", + "mais", + "mas", + "mercado", + "mesmo", + "mil", + "milhões", + "muito", + "mundo", + "na", + "não", + "nas", + "no", + "nos", + "o", + "ontem", + "os", + "ou", + "país", + "para", + "paulo", + "pela", + "pelo", + "pessoas", + "pode", + "por", + "porque", + "presidente", + "quando", + "que", + "quem", + "r", + "rio", + "são", + "se", + "segundo", + "sem", + "ser", + "será", + "seu", + "seus", + "só", + "sobre", + "sua", + "também", + "tem", + "ter", + "todos", + "três", + "um", + "uma", + "us", + "vaia", + "à", + "acordo", + "afirmou", + "agora", + "ainda", + "além", + "alguns", + "ano", + "anos", + "antes", + "ao", + "aos", + "apenas", + "as", + "às", + "assim", + "até", + "banco", + "bem", + "brasil", + "brasileira", + "brasileiro", + "brasília", + "cada", + "carlos", + "casa", + "caso", + "cerca", + "cidade", + "com", + "como", + "congresso", + "contra", + "da", + "das", + "de", + "depois", + "desde", + "deve", + "dia", + "dias", + "dinheiro", + "disse", + "diz", + "do", + "dois", + "dos", + "duas", + "durante", + "e", + "é", + "economia", + "ela", + "ele", + "eles", + "em", + "empresa", + "empresas", + "entre", + "era", + "especial", + "essa", + "esse", + "esta", + "está", + "estado", + "estão", + "estava", + "este", + "eu", + "eua", + "exemplo", + "faz", + "fazer", + "federal", + "fernando", + "fevereiro", + "fhc", + "filme", + "final", + "foi", + "folha", + "foram", + "forma", + "governo", + "grande", + "grupo", + "há", + "henrique", + "história", + "hoje", + "inflação", + "isso", + "já", + "janeiro", + "josé", + "lei", + "local", + "maior", + "mais", + "março", + "mas", + "me", + "melhor", + "menos", + "mercado", + "mês", + "meses", + "mesmo", + "mil", + "milhões", + "ministro", + "muito", + "mundo", + "na", + "nacional", + "nada", + "não", + "nas", + "nem", + "no", + "nos", + "nova", + "novo", + "o", + "onde", + "ontem", + "os", + "ou", + "outra", + "outro", + "outros", + "outubro", + "país", + "para", + "parte", + "partir", + "passado", + "paulo", + "pela", + "pelo", + "pelos", + "pessoas", + "plano", + "pode", + "polícia", + "política", + "por", + "porque", + "preços", + "presidente", + "primeira", + "primeiro", + "programa", + "projeto", + "público", + "qual", + "qualquer", + "quando", + "quatro", + "que", + "quem", + "r", + "real", + "reportagem", + "rio", + "são", + "se", + "segundo", + "seja", + "sem", + "semana", + "sempre", + "sendo", + "ser", + "será", + "seria", + "seu", + "seus", + "sistema", + "só", + "sobre", + "sp", + "sua", + "suas", + "também", + "tem", + "têm", + "tempo", + "ter", + "todo", + "todos", + "trabalho", + "três", + "tudo", + "um", + "uma", + "us", + "vai", + "vez", + "vida", + "vocêa", + "à", + "acordo", + "afirma", + "afirmou", + "agora", + "ainda", + "além", + "alguns", + "ano", + "anos", + "antes", + "ao", + "aos", + "apenas", + "após", + "aqui", + "área", + "as", + "às", + "assim", + "até", + "aumento", + "banco", + "bem", + "bilhões", + "bom", + "brasil", + "brasileira", + "brasileiro", + "brasília", + "cada", + "câmara", + "campanha", + "candidato", + "carlos", + "casa", + "caso", + "central", + "centro", + "cerca", + "cidade", + "cinco", + "cinema", + "coisa", + "com", + "como", + "congresso", + "conta", + "contra", + "da", + "dar", + "das", + "de", + "depois", + "deputado", + "desde", + "deve", + "dia", + "dias", + "dinheiro", + "direito", + "diretor", + "disse", + "diz", + "do", + "dois", + "dos", + "duas", + "durante", + "e", + "é", + "economia", + "econômica", + "ela", + "ele", + "eles", + "em", + "empresa", + "empresas", + "enquanto", + "então", + "entre", + "equipe", + "era", + "especial", + "essa", + "esse", + "esta", + "está", + "estado", + "estados", + "estão", + "estava", + "este", + "eu", + "eua", + "exemplo", + "falta", + "fato", + "faz", + "fazer", + "federal", + "fernando", + "fez", + "fhc", + "ficou", + "filho", + "filme", + "fim", + "final", + "foi", + "folha", + "fora", + "foram", + "forma", + "governo", + "grande", + "grupo", + "há", + "havia", + "henrique", + "história", + "hoje", + "inflação", + "início", + "isso", + "já", + "janeiro", + "jogo", + "josé", + "junho", + "julho", + "juros", + "justiça", + "lado", + "lei", + "livro", + "local", + "lugar", + "maior", + "mais", + "mas", + "me", + "média", + "meio", + "melhor", + "menos", + "mercado", + "mês", + "meses", + "mesma", + "mesmo", + "meu", + "mil", + "milhões", + "minha", + "ministério", + "ministro", + "momento", + "muito", + "mulher", + "mundo", + "na", + "nacional", + "nada", + "não", + "nas", + "nem", + "neste", + "no", + "noite", + "nome", + "nos", + "nós", + "nova", + "novembro", + "novo", + "num", + "numa", + "número", + "o", + "onde", + "ontem", + "os", + "ou", + "outra", + "outras", + "outro", + "outros", + "país", + "países", + "para", + "parte", + "partido", + "partir", + "passado", + "paulo", + "pela", + "pelo", + "pelos", + "período", + "pesquisa", + "pessoas", + "plano", + "pode", + "podem", + "poder", + "polícia", + "política", + "pontos", + "por", + "porque", + "pouco", + "prazo", + "preço", + "preços", + "presidente", + "primeira", + "primeiro", + "problema", + "problemas", + "processo", + "produção", + "produtos", + "programa", + "projeto", + "próprio", + "pt", + "público", + "qual", + "qualquer", + "quando", + "quanto", + "quase", + "quatro", + "que", + "quem", + "quer", + "r", + "real", + "recursos", + "região", + "relação", + "reportagem", + "rio", + "são", + "saúde", + "se", + "segundo", + "seja", + "sem", + "semana", + "sempre", + "sendo", + "ser", + "será", + "serão", + "seria", + "setembro", + "setor", + "seu", + "seus", + "sido", + "silva", + "sistema", + "só", + "sobre", + "social", + "sociedade", + "sp", + "sua", + "suas", + "sucursal", + "sul", + "também", + "tão", + "tel", + "tem", + "têm", + "tempo", + "ter", + "teve", + "tinha", + "toda", + "todas", + "todo", + "todos", + "trabalho", + "três", + "tudo", + "último", + "um", + "uma", + "us", + "vai", + "valor", + "vão", + "vem", + "vez", + "vezes", + "vida", + "você", + "zona" +] diff --git a/data/stopwords/stopwords-pt.txt b/data/stopwords/stopwords-pt.txt deleted file mode 100644 index 4739f55..0000000 --- a/data/stopwords/stopwords-pt.txt +++ /dev/null @@ -1,609 +0,0 @@ -a -à -abril -agosto -ainda -ano -anos -ao -aos -apenas -as -às -até -brasil -com -como -contra -da -das -de -depois -deve -dezembro -dia -disse -diz -do -dois -dos -e -é -ela -ele -em -entre -era -está -estado -estão -eu -foi -folha -foram -governo -grande -há -hoje -isso -já -local -maio -maior -mais -mas -mercado -mesmo -mil -milhões -muito -mundo -na -não -nas -no -nos -o -ontem -os -ou -país -para -paulo -pela -pelo -pessoas -pode -por -porque -presidente -quando -que -quem -r -rio -são -se -segundo -sem -ser -será -seu -seus -só -sobre -sua -também -tem -ter -todos -três -um -uma -us -vaia -à -acordo -afirmou -agora -ainda -além -alguns -ano -anos -antes -ao -aos -apenas -as -às -assim -até -banco -bem -brasil -brasileira -brasileiro -brasília -cada -carlos -casa -caso -cerca -cidade -com -como -congresso -contra -da -das -de -depois -desde -deve -dia -dias -dinheiro -disse -diz -do -dois -dos -duas -durante -e -é -economia -ela -ele -eles -em -empresa -empresas -entre -era -especial -essa -esse -esta -está -estado -estão -estava -este -eu -eua -exemplo -faz -fazer -federal -fernando -fevereiro -fhc -filme -final -foi -folha -foram -forma -governo -grande -grupo -há -henrique -história -hoje -inflação -isso -já -janeiro -josé -lei -local -maior -mais -março -mas -me -melhor -menos -mercado -mês -meses -mesmo -mil -milhões -ministro -muito -mundo -na -nacional -nada -não -nas -nem -no -nos -nova -novo -o -onde -ontem -os -ou -outra -outro -outros -outubro -país -para -parte -partir -passado -paulo -pela -pelo -pelos -pessoas -plano -pode -polícia -política -por -porque -preços -presidente -primeira -primeiro -programa -projeto -público -qual -qualquer -quando -quatro -que -quem -r -real -reportagem -rio -são -se -segundo -seja -sem -semana -sempre -sendo -ser -será -seria -seu -seus -sistema -só -sobre -sp -sua -suas -também -tem -têm -tempo -ter -todo -todos -trabalho -três -tudo -um -uma -us -vai -vez -vida -vocêa -à -acordo -afirma -afirmou -agora -ainda -além -alguns -ano -anos -antes -ao -aos -apenas -após -aqui -área -as -às -assim -até -aumento -banco -bem -bilhões -bom -brasil -brasileira -brasileiro -brasília -cada -câmara -campanha -candidato -carlos -casa -caso -central -centro -cerca -cidade -cinco -cinema -coisa -com -como -congresso -conta -contra -da -dar -das -de -depois -deputado -desde -deve -dia -dias -dinheiro -direito -diretor -disse -diz -do -dois -dos -duas -durante -e -é -economia -econômica -ela -ele -eles -em -empresa -empresas -enquanto -então -entre -equipe -era -especial -essa -esse -esta -está -estado -estados -estão -estava -este -eu -eua -exemplo -falta -fato -faz -fazer -federal -fernando -fez -fhc -ficou -filho -filme -fim -final -foi -folha -fora -foram -forma -governo -grande -grupo -há -havia -henrique -história -hoje -inflação -início -isso -já -janeiro -jogo -josé -junho -julho -juros -justiça -lado -lei -livro -local -lugar -maior -mais -mas -me -média -meio -melhor -menos -mercado -mês -meses -mesma -mesmo -meu -mil -milhões -minha -ministério -ministro -momento -muito -mulher -mundo -na -nacional -nada -não -nas -nem -neste -no -noite -nome -nos -nós -nova -novembro -novo -num -numa -número -o -onde -ontem -os -ou -outra -outras -outro -outros -país -países -para -parte -partido -partir -passado -paulo -pela -pelo -pelos -período -pesquisa -pessoas -plano -pode -podem -poder -polícia -política -pontos -por -porque -pouco -prazo -preço -preços -presidente -primeira -primeiro -problema -problemas -processo -produção -produtos -programa -projeto -próprio -pt -público -qual -qualquer -quando -quanto -quase -quatro -que -quem -quer -r -real -recursos -região -relação -reportagem -rio -são -saúde -se -segundo -seja -sem -semana -sempre -sendo -ser -será -serão -seria -setembro -setor -seu -seus -sido -silva -sistema -só -sobre -social -sociedade -sp -sua -suas -sucursal -sul -também -tão -tel -tem -têm -tempo -ter -teve -tinha -toda -todas -todo -todos -trabalho -três -tudo -último -um -uma -us -vai -valor -vão -vem -vez -vezes -vida -você -zona \ No newline at end of file diff --git a/data/stopwords/stopwords-ru.json b/data/stopwords/stopwords-ru.json new file mode 100644 index 0000000..11ac8c2 --- /dev/null +++ b/data/stopwords/stopwords-ru.json @@ -0,0 +1,424 @@ +[ + "\ufeffа\r", + "е\r", + "и\r", + "ж\r", + "м\r", + "о\r", + "на\r", + "не\r", + "ни\r", + "об\r", + "но\r", + "он\r", + "мне\r", + "мои\r", + "мож\r", + "она\r", + "они\r", + "оно\r", + "мной\r", + "много\r", + "многочисленное\r", + "многочисленная\r", + "многочисленные\r", + "многочисленный\r", + "мною\r", + "мой\r", + "мог\r", + "могут\r", + "можно\r", + "может\r", + "можхо\r", + "мор\r", + "моя\r", + "моё\r", + "мочь\r", + "над\r", + "нее\r", + "оба\r", + "нам\r", + "нем\r", + "нами\r", + "ними\r", + "мимо\r", + "немного\r", + "одной\r", + "одного\r", + "менее\r", + "однажды\r", + "однако\r", + "меня\r", + "нему\r", + "меньше\r", + "ней\r", + "наверху\r", + "него\r", + "ниже\r", + "мало\r", + "надо\r", + "один\r", + "одиннадцать\r", + "одиннадцатый\r", + "назад\r", + "наиболее\r", + "недавно\r", + "миллионов\r", + "недалеко\r", + "между\r", + "низко\r", + "меля\r", + "нельзя\r", + "нибудь\r", + "непрерывно\r", + "наконец\r", + "никогда\r", + "никуда\r", + "нас\r", + "наш\r", + "нет\r", + "нею\r", + "неё\r", + "них\r", + "мира\r", + "наша\r", + "наше\r", + "наши\r", + "ничего\r", + "начала\r", + "нередко\r", + "несколько\r", + "обычно\r", + "опять\r", + "около\r", + "мы\r", + "ну\r", + "нх\r", + "от\r", + "отовсюду\r", + "особенно\r", + "нужно\r", + "очень\r", + "отсюда\r", + "в\r", + "во\r", + "вон\r", + "вниз\r", + "внизу\r", + "вокруг\r", + "вот\r", + "восемнадцать\r", + "восемнадцатый\r", + "восемь\r", + "восьмой\r", + "вверх\r", + "вам\r", + "вами\r", + "важное\r", + "важная\r", + "важные\r", + "важный\r", + "вдали\r", + "везде\r", + "ведь\r", + "вас\r", + "ваш\r", + "ваша\r", + "ваше\r", + "ваши\r", + "впрочем\r", + "весь\r", + "вдруг\r", + "вы\r", + "все\r", + "второй\r", + "всем\r", + "всеми\r", + "времени\r", + "время\r", + "всему\r", + "всего\r", + "всегда\r", + "всех\r", + "всею\r", + "всю\r", + "вся\r", + "всё\r", + "всюду\r", + "г\r", + "год\r", + "говорил\r", + "говорит\r", + "года\r", + "году\r", + "где\r", + "да\r", + "ее\r", + "за\r", + "из\r", + "ли\r", + "же\r", + "им\r", + "до\r", + "по\r", + "ими\r", + "под\r", + "иногда\r", + "довольно\r", + "именно\r", + "долго\r", + "позже\r", + "более\r", + "должно\r", + "пожалуйста\r", + "значит\r", + "иметь\r", + "больше\r", + "пока\r", + "ему\r", + "имя\r", + "пор\r", + "пора\r", + "потом\r", + "потому\r", + "после\r", + "почему\r", + "почти\r", + "посреди\r", + "ей\r", + "два\r", + "две\r", + "двенадцать\r", + "двенадцатый\r", + "двадцать\r", + "двадцатый\r", + "двух\r", + "его\r", + "дел\r", + "или\r", + "без\r", + "день\r", + "занят\r", + "занята\r", + "занято\r", + "заняты\r", + "действительно\r", + "давно\r", + "девятнадцать\r", + "девятнадцатый\r", + "девять\r", + "девятый\r", + "даже\r", + "алло\r", + "жизнь\r", + "далеко\r", + "близко\r", + "здесь\r", + "дальше\r", + "для\r", + "лет\r", + "зато\r", + "даром\r", + "первый\r", + "перед\r", + "затем\r", + "зачем\r", + "лишь\r", + "десять\r", + "десятый\r", + "ею\r", + "её\r", + "их\r", + "бы\r", + "еще\r", + "при\r", + "был\r", + "про\r", + "процентов\r", + "против\r", + "просто\r", + "бывает\r", + "бывь\r", + "если\r", + "люди\r", + "была\r", + "были\r", + "было\r", + "будем\r", + "будет\r", + "будете\r", + "будешь\r", + "прекрасно\r", + "буду\r", + "будь\r", + "будто\r", + "будут\r", + "ещё\r", + "пятнадцать\r", + "пятнадцатый\r", + "друго\r", + "другое\r", + "другой\r", + "другие\r", + "другая\r", + "других\r", + "есть\r", + "пять\r", + "быть\r", + "лучше\r", + "пятый\r", + "к\r", + "ком\r", + "конечно\r", + "кому\r", + "кого\r", + "когда\r", + "которой\r", + "которого\r", + "которая\r", + "которые\r", + "который\r", + "которых\r", + "кем\r", + "каждое\r", + "каждая\r", + "каждые\r", + "каждый\r", + "кажется\r", + "как\r", + "какой\r", + "какая\r", + "кто\r", + "кроме\r", + "куда\r", + "кругом\r", + "с\r", + "т\r", + "у\r", + "я\r", + "та\r", + "те\r", + "уж\r", + "со\r", + "то\r", + "том\r", + "снова\r", + "тому\r", + "совсем\r", + "того\r", + "тогда\r", + "тоже\r", + "собой\r", + "тобой\r", + "собою\r", + "тобою\r", + "сначала\r", + "только\r", + "уметь\r", + "тот\r", + "тою\r", + "хорошо\r", + "хотеть\r", + "хочешь\r", + "хоть\r", + "хотя\r", + "свое\r", + "свои\r", + "твой\r", + "своей\r", + "своего\r", + "своих\r", + "свою\r", + "твоя\r", + "твоё\r", + "раз\r", + "уже\r", + "сам\r", + "там\r", + "тем\r", + "чем\r", + "сама\r", + "сами\r", + "теми\r", + "само\r", + "рано\r", + "самом\r", + "самому\r", + "самой\r", + "самого\r", + "семнадцать\r", + "семнадцатый\r", + "самим\r", + "самими\r", + "самих\r", + "саму\r", + "семь\r", + "чему\r", + "раньше\r", + "сейчас\r", + "чего\r", + "сегодня\r", + "себе\r", + "тебе\r", + "сеаой\r", + "человек\r", + "разве\r", + "теперь\r", + "себя\r", + "тебя\r", + "седьмой\r", + "спасибо\r", + "слишком\r", + "так\r", + "такое\r", + "такой\r", + "такие\r", + "также\r", + "такая\r", + "сих\r", + "тех\r", + "чаще\r", + "четвертый\r", + "через\r", + "часто\r", + "шестой\r", + "шестнадцать\r", + "шестнадцатый\r", + "шесть\r", + "четыре\r", + "четырнадцать\r", + "четырнадцатый\r", + "сколько\r", + "сказал\r", + "сказала\r", + "сказать\r", + "ту\r", + "ты\r", + "три\r", + "эта\r", + "эти\r", + "что\r", + "это\r", + "чтоб\r", + "этом\r", + "этому\r", + "этой\r", + "этого\r", + "чтобы\r", + "этот\r", + "стал\r", + "туда\r", + "этим\r", + "этими\r", + "рядом\r", + "тринадцать\r", + "тринадцатый\r", + "этих\r", + "третий\r", + "тут\r", + "эту\r", + "суть\r", + "чуть\r", + "тысяч\r", + "" +] diff --git a/data/stopwords/stopwords-ru.txt b/data/stopwords/stopwords-ru.txt deleted file mode 100644 index 9498480..0000000 --- a/data/stopwords/stopwords-ru.txt +++ /dev/null @@ -1,421 +0,0 @@ -а -е -и -ж -м -о -на -не -ни -об -но -он -мне -мои -мож -она -они -оно -мной -много -многочисленное -многочисленная -многочисленные -многочисленный -мною -мой -мог -могут -можно -может -можхо -мор -моя -моё -мочь -над -нее -оба -нам -нем -нами -ними -мимо -немного -одной -одного -менее -однажды -однако -меня -нему -меньше -ней -наверху -него -ниже -мало -надо -один -одиннадцать -одиннадцатый -назад -наиболее -недавно -миллионов -недалеко -между -низко -меля -нельзя -нибудь -непрерывно -наконец -никогда -никуда -нас -наш -нет -нею -неё -них -мира -наша -наше -наши -ничего -начала -нередко -несколько -обычно -опять -около -мы -ну -нх -от -отовсюду -особенно -нужно -очень -отсюда -в -во -вон -вниз -внизу -вокруг -вот -восемнадцать -восемнадцатый -восемь -восьмой -вверх -вам -вами -важное -важная -важные -важный -вдали -везде -ведь -вас -ваш -ваша -ваше -ваши -впрочем -весь -вдруг -вы -все -второй -всем -всеми -времени -время -всему -всего -всегда -всех -всею -всю -вся -всё -всюду -г -год -говорил -говорит -года -году -где -да -ее -за -из -ли -же -им -до -по -ими -под -иногда -довольно -именно -долго -позже -более -должно -пожалуйста -значит -иметь -больше -пока -ему -имя -пор -пора -потом -потому -после -почему -почти -посреди -ей -два -две -двенадцать -двенадцатый -двадцать -двадцатый -двух -его -дел -или -без -день -занят -занята -занято -заняты -действительно -давно -девятнадцать -девятнадцатый -девять -девятый -даже -алло -жизнь -далеко -близко -здесь -дальше -для -лет -зато -даром -первый -перед -затем -зачем -лишь -десять -десятый -ею -её -их -бы -еще -при -был -про -процентов -против -просто -бывает -бывь -если -люди -была -были -было -будем -будет -будете -будешь -прекрасно -буду -будь -будто -будут -ещё -пятнадцать -пятнадцатый -друго -другое -другой -другие -другая -других -есть -пять -быть -лучше -пятый -к -ком -конечно -кому -кого -когда -которой -которого -которая -которые -который -которых -кем -каждое -каждая -каждые -каждый -кажется -как -какой -какая -кто -кроме -куда -кругом -с -т -у -я -та -те -уж -со -то -том -снова -тому -совсем -того -тогда -тоже -собой -тобой -собою -тобою -сначала -только -уметь -тот -тою -хорошо -хотеть -хочешь -хоть -хотя -свое -свои -твой -своей -своего -своих -свою -твоя -твоё -раз -уже -сам -там -тем -чем -сама -сами -теми -само -рано -самом -самому -самой -самого -семнадцать -семнадцатый -самим -самими -самих -саму -семь -чему -раньше -сейчас -чего -сегодня -себе -тебе -сеаой -человек -разве -теперь -себя -тебя -седьмой -спасибо -слишком -так -такое -такой -такие -также -такая -сих -тех -чаще -четвертый -через -часто -шестой -шестнадцать -шестнадцатый -шесть -четыре -четырнадцать -четырнадцатый -сколько -сказал -сказала -сказать -ту -ты -три -эта -эти -что -это -чтоб -этом -этому -этой -этого -чтобы -этот -стал -туда -этим -этими -рядом -тринадцать -тринадцатый -этих -третий -тут -эту -суть -чуть -тысяч diff --git a/data/stopwords/stopwords-sv.json b/data/stopwords/stopwords-sv.json new file mode 100644 index 0000000..c2bdd4a --- /dev/null +++ b/data/stopwords/stopwords-sv.json @@ -0,0 +1,546 @@ +[ + "kunna", + "om", + "ovan", + "enligt", + "i enlighet med detta", + "över", + "faktiskt", + "efter", + "efteråt", + "igen", + "mot", + "är inte", + "alla", + "tillåta", + "tillåter", + "nästan", + "ensam", + "längs", + "redan", + "också", + "även om", + "alltid", + "am", + "bland", + "bland", + "en", + "och", + "en annan", + "någon", + "någon", + "hur som helst", + "någon", + "något", + "ändå", + "ändå", + "var som helst", + "isär", + "visas", + "uppskatta", + "lämpligt", + "är", + "inte", + "runt", + "som", + "åt sidan", + "be", + "frågar", + "associerad", + "vid", + "tillgängliga", + "bort", + "väldigt", + "vara", + "blev", + "eftersom", + "bli", + "blir", + "blir", + "varit", + "innan", + "förhand", + "bakom", + "vara", + "tro", + "nedan", + "bredvid", + "förutom", + "bäst", + "bättre", + "mellan", + "bortom", + "både", + "kort", + "men", + "genom", + "c", + "c'mon", + "c: s", + "kom", + "kampanj", + "kan", + "kan inte", + "kan inte", + "cant", + "orsaka", + "orsaker", + "viss", + "säkerligen", + "förändringar", + "klart", + "co", + "com", + "komma", + "kommer", + "om", + "följaktligen", + "överväga", + "överväger", + "innehålla", + "innehållande", + "innehåller", + "motsvarande", + "kunde", + "kunde inte", + "kurs", + "närvarande", + "definitivt", + "beskrivits", + "trots", + "gjorde", + "inte", + "olika", + "göra", + "gör", + "inte", + "gör", + "inte", + "gjort", + "ned", + "nedåt", + "under", + "varje", + "edu", + "åtta", + "antingen", + "annars", + "någon annanstans", + "tillräckligt", + "godkändes", + "helt", + "speciellt", + "et", + "etc", + "även", + "någonsin", + "varje", + "alla", + "alla", + "allt", + "överallt", + "ex", + "exakt", + "exempel", + "utom", + "långt", + "få", + "femte", + "först", + "finansiella", + "fem", + "följt", + "efter", + "följer", + "för", + "fd", + "tidigare", + "framåt", + "fyra", + "från", + "ytterligare", + "dessutom", + "få", + "blir", + "få", + "given", + "ger", + "gå", + "går", + "gå", + "borta", + "fick", + "fått", + "hälsningar", + "hade", + "hade inte", + "händer", + "knappast", + "har", + "har inte", + "ha", + "har inte", + "med", + "han", + "han är", + "hallå", + "hjälpa", + "hence", + "henne", + "här", + "här finns", + "härefter", + "härmed", + "häri", + "härpå", + "hennes", + "själv", + "hej", + "honom", + "själv", + "hans", + "hit", + "förhoppningsvis", + "hur", + "howbeit", + "dock", + "jag skulle", + "jag ska", + "jag är", + "jag har", + "om", + "ignoreras", + "omedelbar", + "i", + "eftersom", + "inc", + "indeed", + "indikera", + "indikerade", + "indikerar", + "inre", + "mån", + "istället", + "in", + "inåt", + "är", + "är inte", + "den", + "det skulle", + "det ska", + "det är", + "dess", + "själv", + "bara", + "hålla", + "håller", + "hålls", + "vet", + "vet", + "känd", + "sista", + "nyligen", + "senare", + "senare", + "latterly", + "minst", + "mindre", + "lest", + "låt", + "låt oss", + "liknande", + "gillade", + "sannolikt", + "lite", + "ser", + "ser", + "ser", + "ltd", + "huvudsakligen", + "många", + "kan", + "kanske", + "mig", + "betyda", + "under tiden", + "endast", + "kanske", + "mer", + "dessutom", + "mest", + "mestadels", + "mycket", + "måste", + "min", + "själv", + "namn", + "nämligen", + "nd", + "nära", + "nästan", + "nödvändigt", + "behöver", + "behov", + "varken", + "aldrig", + "ändå", + "ny", + "nästa", + "nio", + "ingen", + "ingen", + "icke", + "ingen", + "ingen", + "eller", + "normalt", + "inte", + "ingenting", + "roman", + "nu", + "ingenstans", + "uppenbarligen", + "av", + "off", + "ofta", + "oh", + "ok", + "okay", + "gammal", + "på", + "en gång", + "ett", + "ettor", + "endast", + "på", + "eller", + "andra", + "andra", + "annars", + "borde", + "vår", + "vårt", + "oss", + "ut", + "utanför", + "över", + "övergripande", + "egen", + "särskilt", + "särskilt", + "per", + "kanske", + "placeras", + "vänligen", + "plus", + "möjligt", + "förmodligen", + "förmodligen", + "ger", + "ganska", + "citera", + "kvartalsvis", + "snarare", + "verkligen", + "rimligen", + "om", + "oavsett", + "gäller", + "relativt", + "respektive", + "höger", + "sa", + "samma", + "såg", + "säga", + "säger", + "säger", + "andra", + "det andra", + "se", + "ser", + "verkar", + "verkade", + "informationsproblem", + "verkar", + "sett", + "själv", + "själva", + "förnuftig", + "skickas", + "allvarlig", + "allvarligt", + "sju", + "flera", + "skall", + "hon", + "bör", + "bör inte", + "eftersom", + "sex", + "så", + "några", + "någon", + "på något sätt", + "någon", + "något", + "sometime", + "ibland", + "något", + "någonstans", + "snart", + "sorry", + "specificerade", + "ange", + "ange", + "fortfarande", + "sub", + "sådan", + "sup", + "säker", + "t s", + "ta", + "tas", + "berätta", + "tenderar", + "än", + "tacka", + "tack", + "thanx", + "att", + "det är", + "brinner", + "den", + "deras", + "deras", + "dem", + "själva", + "sedan", + "därifrån", + "där", + "det finns", + "därefter", + "därigenom", + "därför", + "däri", + "theres", + "därpå", + "dessa", + "de", + "de hade", + "de kommer", + "de är", + "de har", + "tror", + "tredje", + "detta", + "grundlig", + "grundligt", + "de", + "though", + "tre", + "genom", + "hela", + "thru", + "sålunda", + "till", + "tillsammans", + "alltför", + "tog", + "mot", + "mot", + "försökte", + "försöker", + "verkligt", + "försök", + "försöker", + "två gånger", + "två", + "enligt", + "tyvärr", + "såvida inte", + "osannolikt", + "tills", + "åt", + "upp", + "på", + "oss", + "använda", + "används", + "användbar", + "använder", + "användning", + "vanligtvis", + "uucp", + "värde", + "olika", + "mycket", + "via", + "viz", + "vs", + "vill", + "vill", + "var", + "var inte", + "sätt", + "vi", + "vi skulle", + "vi kommer", + "vi är", + "vi har", + "välkommen", + "väl", + "gick", + "var", + "var inte", + "vad", + "vad är", + "oavsett", + "när", + "varifrån", + "närhelst", + "där", + "var är", + "varefter", + "medan", + "varigenom", + "vari", + "varpå", + "varhelst", + "huruvida", + "som", + "medan", + "dit", + "som", + "vem är", + "vem", + "hela", + "vem", + "vars", + "varför", + "kommer", + "villig", + "önskar", + "med", + "inom", + "utan", + "kommer inte", + "undrar", + "skulle", + "skulle inte", + "ja", + "ännu", + "ni", + "du skulle", + "kommer du", + "du är", + "du har", + "din", + "själv", + "er", + "noll", + "tjänsteman", + "skarpt", + "kritiserade", + "" +] diff --git a/data/stopwords/stopwords-sv.txt b/data/stopwords/stopwords-sv.txt deleted file mode 100644 index 74c0a89..0000000 --- a/data/stopwords/stopwords-sv.txt +++ /dev/null @@ -1,547 +0,0 @@ -#----------------------------------------------------------------------- -# translated -#----------------------------------------------------------------------- - -kunna -om -ovan -enligt -i enlighet med detta -över -faktiskt -efter -efteråt -igen -mot -är inte -alla -tillåta -tillåter -nästan -ensam -längs -redan -också -även om -alltid -am -bland -bland -en -och -en annan -någon -någon -hur som helst -någon -något -ändå -ändå -var som helst -isär -visas -uppskatta -lämpligt -är -inte -runt -som -åt sidan -be -frågar -associerad -vid -tillgängliga -bort -väldigt -vara -blev -eftersom -bli -blir -blir -varit -innan -förhand -bakom -vara -tro -nedan -bredvid -förutom -bäst -bättre -mellan -bortom -både -kort -men -genom -c -c'mon -c: s -kom -kampanj -kan -kan inte -kan inte -cant -orsaka -orsaker -viss -säkerligen -förändringar -klart -co -com -komma -kommer -om -följaktligen -överväga -överväger -innehålla -innehållande -innehåller -motsvarande -kunde -kunde inte -kurs -närvarande -definitivt -beskrivits -trots -gjorde -inte -olika -göra -gör -inte -gör -inte -gjort -ned -nedåt -under -varje -edu -åtta -antingen -annars -någon annanstans -tillräckligt -godkändes -helt -speciellt -et -etc -även -någonsin -varje -alla -alla -allt -överallt -ex -exakt -exempel -utom -långt -få -femte -först -finansiella -fem -följt -efter -följer -för -fd -tidigare -framåt -fyra -från -ytterligare -dessutom -få -blir -få -given -ger -gå -går -gå -borta -fick -fått -hälsningar -hade -hade inte -händer -knappast -har -har inte -ha -har inte -med -han -han är -hallå -hjälpa -hence -henne -här -här finns -härefter -härmed -häri -härpå -hennes -själv -hej -honom -själv -hans -hit -förhoppningsvis -hur -howbeit -dock -jag skulle -jag ska -jag är -jag har -om -ignoreras -omedelbar -i -eftersom -inc -indeed -indikera -indikerade -indikerar -inre -mån -istället -in -inåt -är -är inte -den -det skulle -det ska -det är -dess -själv -bara -hålla -håller -hålls -vet -vet -känd -sista -nyligen -senare -senare -latterly -minst -mindre -lest -låt -låt oss -liknande -gillade -sannolikt -lite -ser -ser -ser -ltd -huvudsakligen -många -kan -kanske -mig -betyda -under tiden -endast -kanske -mer -dessutom -mest -mestadels -mycket -måste -min -själv -namn -nämligen -nd -nära -nästan -nödvändigt -behöver -behov -varken -aldrig -ändå -ny -nästa -nio -ingen -ingen -icke -ingen -ingen -eller -normalt -inte -ingenting -roman -nu -ingenstans -uppenbarligen -av -off -ofta -oh -ok -okay -gammal -på -en gång -ett -ettor -endast -på -eller -andra -andra -annars -borde -vår -vårt -oss -ut -utanför -över -övergripande -egen -särskilt -särskilt -per -kanske -placeras -vänligen -plus -möjligt -förmodligen -förmodligen -ger -ganska -citera -kvartalsvis -snarare -verkligen -rimligen -om -oavsett -gäller -relativt -respektive -höger -sa -samma -såg -säga -säger -säger -andra -det andra -se -ser -verkar -verkade -informationsproblem -verkar -sett -själv -själva -förnuftig -skickas -allvarlig -allvarligt -sju -flera -skall -hon -bör -bör inte -eftersom -sex -så -några -någon -på något sätt -någon -något -sometime -ibland -något -någonstans -snart -sorry -specificerade -ange -ange -fortfarande -sub -sådan -sup -säker -t s -ta -tas -berätta -tenderar -än -tacka -tack -thanx -att -det är -brinner -den -deras -deras -dem -själva -sedan -därifrån -där -det finns -därefter -därigenom -därför -däri -theres -därpå -dessa -de -de hade -de kommer -de är -de har -tror -tredje -detta -grundlig -grundligt -de -though -tre -genom -hela -thru -sålunda -till -tillsammans -alltför -tog -mot -mot -försökte -försöker -verkligt -försök -försöker -två gånger -två -enligt -tyvärr -såvida inte -osannolikt -tills -åt -upp -på -oss -använda -används -användbar -använder -användning -vanligtvis -uucp -värde -olika -mycket -via -viz -vs -vill -vill -var -var inte -sätt -vi -vi skulle -vi kommer -vi är -vi har -välkommen -väl -gick -var -var inte -vad -vad är -oavsett -när -varifrån -närhelst -där -var är -varefter -medan -varigenom -vari -varpå -varhelst -huruvida -som -medan -dit -som -vem är -vem -hela -vem -vars -varför -kommer -villig -önskar -med -inom -utan -kommer inte -undrar -skulle -skulle inte -ja -ännu -ni -du skulle -kommer du -du är -du har -din -själv -er -noll -tjänsteman -skarpt -kritiserade diff --git a/data/stopwords/stopwords-th.json b/data/stopwords/stopwords-th.json new file mode 100644 index 0000000..4609cd2 --- /dev/null +++ b/data/stopwords/stopwords-th.json @@ -0,0 +1,1050 @@ +[ + "\ufeffเก็บ\r", + "เกิด\r", + "เกิน\r", + "เกินๆ\r", + "เกี่ยวเนื่อง\r", + "เกี่ยวกัน\r", + "เกี่ยวกับ\r", + "เกี่ยวข้อง\r", + "เกี่ยวๆ\r", + "เกือบ\r", + "เกือบจะ\r", + "เกือบๆ\r", + "เขา\r", + "เข้า\r", + "เข้าใจ\r", + "เขียน\r", + "เคย\r", + "เคยๆ\r", + "เฉกเช่น\r", + "เฉพาะ\r", + "เฉย\r", + "เฉยๆ\r", + "เช่น\r", + "เช่นเคย\r", + "เช่นเดียวกัน\r", + "เช่นเดียวกับ\r", + "เช่นเมื่อ\r", + "เช่นใด\r", + "เช่นไร\r", + "เช่นก่อน\r", + "เช่นกัน\r", + "เช่นดัง\r", + "เช่นดังเก่า\r", + "เช่นดังก่อน\r", + "เช่นดังที่\r", + "เช่นดังว่า\r", + "เช่นที่\r", + "เช่นที่เคย\r", + "เช่นที่ว่า\r", + "เช่นนั้น\r", + "เช่นนั้นเอง\r", + "เช่นนี้\r", + "เชื่อ\r", + "เชื่อถือ\r", + "เชื่อมั่น\r", + "เชื่อว่า\r", + "เดิม\r", + "เดิมที\r", + "เดิมๆ\r", + "เดียว\r", + "เดี๋ยว\r", + "เดี๋ยวก่อน\r", + "เดียวกัน\r", + "เดียวกับ\r", + "เดี๋ยวนั้น\r", + "เดี๋ยวนี้\r", + "เต็มไปด้วย\r", + "เต็มไปหมด\r", + "เต็มๆ\r", + "เถอะ\r", + "เถิด\r", + "เท่า\r", + "เท่าใด\r", + "เท่าไร\r", + "เท่าไหร่\r", + "เท่ากัน\r", + "เท่ากับ\r", + "เท่าที่\r", + "เท่านั้น\r", + "เท่านี้\r", + "เธอ\r", + "เน้น\r", + "เนี่ย\r", + "เนี่ยเอง\r", + "เป็น\r", + "เป็นเพื่อ\r", + "เป็นแต่\r", + "เป็นด้วย\r", + "เป็นดัง\r", + "เป็นต้น\r", + "เป็นอัน\r", + "เป็นอันมาก\r", + "เป็นอาทิ\r", + "เปลี่ยน\r", + "เปลี่ยนแปลง\r", + "เผื่อ\r", + "เผื่อจะ\r", + "เผื่อที่\r", + "เผื่อว่า\r", + "เพราะ\r", + "เพราะฉะนั้น\r", + "เพราะว่า\r", + "เพิ่ง\r", + "เพิ่งจะ\r", + "เพิ่ม\r", + "เพิ่มเติม\r", + "เพี\r", + "เพียง\r", + "เพียงเพื่อ\r", + "เพียงแค่\r", + "เพียงแต่\r", + "เพียงใด\r", + "เพียงไหน\r", + "เพียงพอ\r", + "เพื่อ\r", + "เพื่อให้\r", + "เพื่อที่\r", + "เพื่อว่า\r", + "เมื่อ\r", + "เมื่อเช้า\r", + "เมื่อเย็น\r", + "เมื่อใด\r", + "เมื่อไร\r", + "เมื่อไหร่\r", + "เมื่อก่อน\r", + "เมื่อครั้ง\r", + "เมื่อครั้งก่อน\r", + "เมื่อคราว\r", + "เมื่อคราวก่อน\r", + "เมื่อคราวที่\r", + "เมื่อคืน\r", + "เมื่อนั้น\r", + "เมื่อนี้\r", + "เมื่อวันวาน\r", + "เมื่อวาน\r", + "เยอะ\r", + "เยอะแยะ\r", + "เร็ว\r", + "เร็วๆ\r", + "เรา\r", + "เราๆ\r", + "เริ่ม\r", + "เรียก\r", + "เรียบ\r", + "เรื่อย\r", + "เรื่อยๆ\r", + "เล็ก\r", + "เล็กน้อย\r", + "เล็กๆ\r", + "เลย\r", + "เล่าว่า\r", + "เสร็จ\r", + "เสร็จแล้ว\r", + "เสียแล้ว\r", + "เสียจน\r", + "เสียด้วย\r", + "เสียนี่\r", + "เหตุ\r", + "เหตุไร\r", + "เหตุนั้น\r", + "เหตุนี้\r", + "เหตุมด\r", + "เห็นแก่\r", + "เห็นควร\r", + "เห็นจะ\r", + "เห็นว่า\r", + "เหล่า\r", + "เหล่านั้น\r", + "เหล่านี้\r", + "เหลือ\r", + "เหลือเกิน\r", + "เอง\r", + "เอ็ง\r", + "เอา\r", + "แก\r", + "แก่\r", + "แก้ไข\r", + "แค่\r", + "แค่เพียง\r", + "แค่ไหน\r", + "แค่จะ\r", + "แค่นั้น\r", + "แค่นี้\r", + "แค่ว่า\r", + "แด่\r", + "แต่\r", + "แต่เดิม\r", + "แต่เพียง\r", + "แต่เมื่อ\r", + "แต่ไร\r", + "แต่ไหน\r", + "แต่ก็\r", + "แต่ก่อน\r", + "แต่จะ\r", + "แต่ต้อง\r", + "แต่ถ้า\r", + "แต่ทว่า\r", + "แต่ที่\r", + "แต่นั้น\r", + "แต่ละ\r", + "แต่ว่า\r", + "แต่อย่างใด\r", + "แท้\r", + "แท้จริง\r", + "แบบ\r", + "แม้\r", + "แม้แต่\r", + "แม้กระทั่ง\r", + "แม้นว่า\r", + "แม้ว่า\r", + "แยะ\r", + "แล้ว\r", + "แล้วเสร็จ\r", + "แล้วแต่\r", + "แล้วกัน\r", + "แสดง\r", + "แสดงว่า\r", + "แห่ง\r", + "แห่งโน้น\r", + "แห่งใด\r", + "แห่งไหน\r", + "แห่งนั้น\r", + "แห่งนี้\r", + "แหละ\r", + "โดย\r", + "โดยเฉพาะ\r", + "โดยเฉพาะอย่าง\r", + "โดยเมื่อ\r", + "โดยเร็ว\r", + "โดยแท้\r", + "โดยแท้จริง\r", + "โดยง่าย\r", + "โดยดี\r", + "โดยดุษฎี\r", + "โดยตลอด\r", + "โดยทั่ว\r", + "โดยทั่วไป\r", + "โดยทั่วกัน\r", + "โดยทั่วถึง\r", + "โดยที่\r", + "โดยนัย\r", + "โดยปกติ\r", + "โดยมัก\r", + "โดยมักจะ\r", + "โดยมาก\r", + "โดยรวม\r", + "โดยรวมๆ\r", + "โดยละม่อม\r", + "โดยลําดับ\r", + "โดยส่วนใหญ่\r", + "โดยส่วนมาก\r", + "โดยส่วนรวม\r", + "โต\r", + "โตๆ\r", + "ใกล้\r", + "ใกล้ๆ\r", + "ใคร\r", + "ใคร่\r", + "ใคร่จะ\r", + "ใช่\r", + "ใช้\r", + "ใช่ไหม\r", + "ใด\r", + "ใดๆ\r", + "ใต้\r", + "ใน\r", + "ในเมื่อ\r", + "ในช่วง\r", + "ในที่\r", + "ในระหว่าง\r", + "ให้\r", + "ให้แก่\r", + "ใหญ่\r", + "ใหญ่โต\r", + "ไกล\r", + "ไกลๆ\r", + "ไง\r", + "ไฉน\r", + "ได้\r", + "ได้แก่\r", + "ได้แต่\r", + "ได้ที่\r", + "ได้มา\r", + "ได้รับ\r", + "ไป\r", + "ไม่\r", + "ไม่เป็นไร\r", + "ไม่ใช่\r", + "ไม่ค่อย\r", + "ไม่ค่อยเป็น\r", + "ไม่ค่อยจะ\r", + "ไม่ว่า\r", + "ไร\r", + "ไหน\r", + "ไหนๆ\r", + "ก็\r", + "ก็แค่\r", + "ก็แล้วแต่\r", + "ก็ได้\r", + "ก็คือ\r", + "ก็จะ\r", + "ก็ดี\r", + "ก็ต่อเมื่อ\r", + "ก็ตาม\r", + "ก็ตามแต่\r", + "ก็ตามที\r", + "กระทั่ง\r", + "กระทํา\r", + "กระนั้น\r", + "กระผม\r", + "กลับ\r", + "กล่าว\r", + "กล่าวคือ\r", + "กลุ่ม\r", + "กลุ่มก้อน\r", + "กลุ่มๆ\r", + "กว่า\r", + "กว้าง\r", + "กว้างขวาง\r", + "กว้างๆ\r", + "ก่อน\r", + "ก่อนหน้า\r", + "ก่อนหน้านี้\r", + "ก่อนๆ\r", + "กัน\r", + "กันเถอะ\r", + "กันเอง\r", + "กันและกัน\r", + "กันไหม\r", + "กันดีไหม\r", + "กันดีกว่า\r", + "กันนะ\r", + "กับ\r", + "การ\r", + "กําลัง\r", + "กําลังจะ\r", + "กําหนด\r", + "กู\r", + "ขณะ\r", + "ขณะเดียวกัน\r", + "ขณะใด\r", + "ขณะใดๆ\r", + "ขณะที่\r", + "ขณะนั้น\r", + "ขณะนี้\r", + "ขณะหนึ่ง\r", + "ขวาง\r", + "ขวางๆ\r", + "ขอ\r", + "ของ\r", + "ขั้น\r", + "ข้า\r", + "ข้าง\r", + "ข้างเคียง\r", + "ข้างต้น\r", + "ข้างบน\r", + "ข้างล่าง\r", + "ข้างๆ\r", + "ขาด\r", + "ข้าพเจ้า\r", + "ข้าฯ\r", + "ขึ้น\r", + "คง\r", + "คงจะ\r", + "คงอยู่\r", + "ครบ\r", + "ครบครัน\r", + "ครบถ้วน\r", + "ครั้ง\r", + "ครั้งใด\r", + "ครั้งไหน\r", + "ครั้งกระนั้น\r", + "ครั้งก่อน\r", + "ครั้งครา\r", + "ครั้งคราว\r", + "ครั้งที่\r", + "ครั้งนั้น\r", + "ครั้งนี้\r", + "ครั้งละ\r", + "ครั้งหนึ่ง\r", + "ครั้งหลัง\r", + "ครั้งหลังสุด\r", + "ครั้งๆ\r", + "ครัน\r", + "ครับ\r", + "ครา\r", + "คราใด\r", + "คราไหน\r", + "คราที่\r", + "ครานั้น\r", + "ครานี้\r", + "คราว\r", + "คราวโน้น\r", + "คราวใด\r", + "คราวไหน\r", + "คราวก่อน\r", + "คราวที่\r", + "คราวนั้น\r", + "คราวนี้\r", + "คราวละ\r", + "คราวหน้า\r", + "คราวหนึ่ง\r", + "คราวหลัง\r", + "คราวๆ\r", + "คราหนึ่ง\r", + "คล้าย\r", + "คล้ายกัน\r", + "คล้ายกันกับ\r", + "คล้ายกับ\r", + "คล้ายกับว่า\r", + "คล้ายว่า\r", + "ควร\r", + "ความ\r", + "ค่อน\r", + "ค่อนข้าง\r", + "ค่อนข้างจะ\r", + "ค่อนมาทาง\r", + "ค่อย\r", + "ค่อยไปทาง\r", + "ค่อยๆ\r", + "คะ\r", + "ค่ะ\r", + "คํา\r", + "คิด\r", + "คิดว่า\r", + "คือ\r", + "คุณ\r", + "คุณๆ\r", + "ง่าย\r", + "ง่ายๆ\r", + "จง\r", + "จด\r", + "จนเมื่อ\r", + "จนแม้\r", + "จนแม้น\r", + "จนกระทั่ง\r", + "จนกว่า\r", + "จนขณะนี้\r", + "จนตลอด\r", + "จนถึง\r", + "จนทั่ว\r", + "จนบัดนี้\r", + "จรด\r", + "จรดกับ\r", + "จริง\r", + "จริงจัง\r", + "จริงๆ\r", + "จริงๆ\r", + "จวน\r", + "จวนเจียน\r", + "จวนจะ\r", + "จวบ\r", + "จวบกับ\r", + "จวบจน\r", + "จะ\r", + "จ้ะ\r", + "จ๊ะ\r", + "จะได้\r", + "จัง\r", + "จังๆ\r", + "จังๆ\r", + "จัด\r", + "จัดแจง\r", + "จัดให้\r", + "จัดการ\r", + "จัดงาน\r", + "จัดตั้ง\r", + "จัดทํา\r", + "จัดหา\r", + "จับ\r", + "จ้า\r", + "จ้า\r", + "จํา\r", + "จําเป็น\r", + "จาก\r", + "จากนั้น\r", + "จากนี้\r", + "จากนี้ไป\r", + "จําพวก\r", + "จึง\r", + "จึงเป็น\r", + "จึงจะ\r", + "จู่ๆ\r", + "ฉะนั้น\r", + "ฉะนี้\r", + "ฉัน\r", + "ช่วง\r", + "ช่วงแรก\r", + "ช่วงก่อน\r", + "ช่วงต่อไป\r", + "ช่วงถัดไป\r", + "ช่วงท้าย\r", + "ช่วงที่\r", + "ช่วงนั้น\r", + "ช่วงนี้\r", + "ช่วงระหว่าง\r", + "ช่วงหน้า\r", + "ช่วงหลัง\r", + "ช่วงๆ\r", + "ช่วย\r", + "ช้า\r", + "ช้านาน\r", + "ชาว\r", + "ช้าๆ\r", + "ซะ\r", + "ซะก่อน\r", + "ซะจน\r", + "ซะจนกระทั่ง\r", + "ซะจนถึง\r", + "ซึ่ง\r", + "ซึ่งได้แก่\r", + "ซึ่งก็\r", + "ซึ่งก็คือ\r", + "ซึ่งกัน\r", + "ซึ่งกันและกัน\r", + "ซึ่งๆ\r", + "ณ\r", + "ด้วย\r", + "ด้วยเช่นกัน\r", + "ด้วยเพราะ\r", + "ด้วยเหตุเพราะ\r", + "ด้วยเหตุที่\r", + "ด้วยเหตุนั้น\r", + "ด้วยเหตุนี้\r", + "ด้วยเหตุว่า\r", + "ด้วยเหมือนกัน\r", + "ด้วยกัน\r", + "ด้วยที่\r", + "ด้วยประการฉะนี้\r", + "ด้วยว่า\r", + "ดัง\r", + "ดั่ง\r", + "ดังเก่า\r", + "ดั่งเก่า\r", + "ดังเคย\r", + "ดั่งเคย\r", + "ดังเช่น\r", + "ดั่งเช่น\r", + "ดังเช่นที่\r", + "ดั่งเช่นที่\r", + "ดังเดิม\r", + "ดั่งเดิม\r", + "ดังเหมือน\r", + "ดั่งเหมือน\r", + "ดังแต่ก่อน\r", + "ดั่งแต่ก่อน\r", + "ดังแม้\r", + "ดั่งแม้\r", + "ดังกล่าว\r", + "ดังกับ\r", + "ดั่งกับ\r", + "ดังกับว่า\r", + "ดั่งกับว่า\r", + "ดังจะ\r", + "ดั่งจะ\r", + "ดังต่อไปนี้\r", + "ดังที่\r", + "ดั่งที่\r", + "ดังที่เคย\r", + "ดังที่กล่าว\r", + "ดังที่จะเป็น\r", + "ดังนั้น\r", + "ดังนี้\r", + "ดังนี้เช่น\r", + "ดังนี้เพราะ\r", + "ดังว่า\r", + "ดั่งว่า\r", + "ดําเนิน\r", + "ดําเนินไป\r", + "ดําเนินการ\r", + "ดําเนินงาน\r", + "ด้าน\r", + "ด้านๆ\r", + "ดิฉัน\r", + "ดี\r", + "ดีๆ\r", + "ดู\r", + "ดูเหมือน\r", + "ดูเหมือนว่า\r", + "ดูแล\r", + "ดูแล้ว\r", + "ดูจะ\r", + "ดูว่า\r", + "ดูๆ\r", + "ตน\r", + "ตนเอง\r", + "ตนฯ\r", + "ตรง\r", + "ตรงๆ\r", + "ตลอด\r", + "ตลอดเวลา\r", + "ตลอดไป\r", + "ตลอดกาล\r", + "ตลอดกาลนาน\r", + "ตลอดจน\r", + "ตลอดถึง\r", + "ตลอดทั้ง\r", + "ตลอดทั่ว\r", + "ตลอดทั่วถึง\r", + "ตลอดทั่วทั้ง\r", + "ตลอดปี\r", + "ตลอดมา\r", + "ตลอดระยะเวลา\r", + "ตลอดวัน\r", + "ตลอดศก\r", + "ต่อ\r", + "ต่อเมื่อ\r", + "ต่อให้\r", + "ต่อไป\r", + "ต่อไปนี้\r", + "ต่อกัน\r", + "ต่อกับ\r", + "ต้อง\r", + "ต้องการ\r", + "ต่อจาก\r", + "ตอน\r", + "ตอนแรก\r", + "ตอนใด\r", + "ตอนไหน\r", + "ตอนก่อน\r", + "ตอนต่อ\r", + "ตอนต่อไป\r", + "ตอนต่อมา\r", + "ตอนถัดไป\r", + "ตอนถัดมา\r", + "ตอนที่\r", + "ตอนที่แล้ว\r", + "ตอนนั้น\r", + "ตอนนี้\r", + "ตอนสุดท้าย\r", + "ตอนหน้า\r", + "ตอนหลัง\r", + "ตอนๆ\r", + "ต่อมา\r", + "ต่อว่า\r", + "ต่อๆ\r", + "ตะหาก\r", + "ตั้ง\r", + "ตั้งแต่\r", + "ตั้งแต่แรก\r", + "ตั้งแต่นั้น\r", + "ตั้งแต่นี้\r", + "ตั้งต้น\r", + "ตั้งที่\r", + "ตั้งอยู่\r", + "ตัว\r", + "ตัวเอง\r", + "ตัวโน้น\r", + "ตัวใด\r", + "ตัวไหน\r", + "ตัวที่\r", + "ตัวนั้น\r", + "ตัวนี้\r", + "ตัวละ\r", + "ตัวอย่างเช่น\r", + "ตัวๆ\r", + "ต่าง\r", + "ต่างก็\r", + "ต่างหาก\r", + "ต่างๆ\r", + "ตาม\r", + "ตามแต่\r", + "ตามด้วย\r", + "ตามที่\r", + "ตามๆ\r", + "ถ้า\r", + "ถ้าจะ\r", + "ถ้าหาก\r", + "ถึง\r", + "ถึงเมื่อ\r", + "ถึงเมื่อใด\r", + "ถึงเมื่อไร\r", + "ถึงแก่\r", + "ถึงแม้\r", + "ถึงแม้จะ\r", + "ถึงแม้ว่า\r", + "ถึงจะ\r", + "ถึงบัดนั้น\r", + "ถึงบัดนี้\r", + "ถึงอย่างไร\r", + "ถือ\r", + "ถือว่า\r", + "ถูก\r", + "ถูกต้อง\r", + "ถูกๆ\r", + "ทรง\r", + "ทว่า\r", + "ทั้ง\r", + "ทั้งเป็น\r", + "ทั้งคน\r", + "ทั้งตัว\r", + "ทั้งที่\r", + "ทั้งนั้น\r", + "ทั้งนั้นเพราะ\r", + "ทั้งนั้นด้วย\r", + "ทั้งนี้\r", + "ทั้งปวง\r", + "ทั้งมวล\r", + "ทั้งสิ้น\r", + "ทั้งหมด\r", + "ทั้งหลาย\r", + "ทั้งๆ\r", + "ทั้งๆ\r", + "ทัน\r", + "ทันใดนั้น\r", + "ทันที\r", + "ทันทีทันใด\r", + "ทั่ว\r", + "ทําให้\r", + "ทําไม\r", + "ทําไร\r", + "ทําๆ\r", + "ที\r", + "ที่\r", + "ที่\r", + "ทีเดียว\r", + "ทีเถอะ\r", + "ที่แท้\r", + "ที่แท้จริง\r", + "ที่แล้ว\r", + "ที่แห่งนั้น\r", + "ทีใด\r", + "ที่ใด\r", + "ที่ได้\r", + "ทีไร\r", + "ที่ไหน\r", + "ที่จริง\r", + "ที่ซึ่ง\r", + "ที่นั้น\r", + "ที่นี้\r", + "ทีละ\r", + "ที่ละ\r", + "ที่ว่า\r", + "ที่สุด\r", + "ทีๆ\r", + "ที่ๆ\r", + "ทุก\r", + "ทุกเมื่อ\r", + "ทุกแห่ง\r", + "ทุกคน\r", + "ทุกครั้ง\r", + "ทุกครา\r", + "ทุกคราว\r", + "ทุกชิ้น\r", + "ทุกตัว\r", + "ทุกทาง\r", + "ทุกที\r", + "ทุกที่\r", + "ทุกวัน\r", + "ทุกวันนี้\r", + "ทุกสิ่ง\r", + "ทุกหน\r", + "ทุกอย่าง\r", + "ทุกอัน\r", + "ทุกๆ\r", + "นอก\r", + "นอกเหนือ\r", + "นอกจาก\r", + "นอกจากที่\r", + "นอกจากนั้น\r", + "นอกจากนี้\r", + "นอกจากว่า\r", + "นอกนั้น\r", + "น้อย\r", + "น้อยกว่า\r", + "น้อยๆ\r", + "นะ\r", + "น่ะ\r", + "นัก\r", + "นั่น\r", + "นั้นไว\r", + "นับแต่นี้\r", + "นับจากนั้น\r", + "นับจากนี้\r", + "น่า\r", + "นํา\r", + "นาง\r", + "นางสาว\r", + "น่าจะ\r", + "นาน\r", + "นานๆ\r", + "นําพา\r", + "นํามา\r", + "นาย\r", + "นิด\r", + "นิดหน่อย\r", + "นิดๆ\r", + "นี่\r", + "นี้\r", + "นี่เอง\r", + "นี้เอง\r", + "นี่แน่ะ\r", + "นี้แหล่\r", + "นี่แหละ\r", + "นี่ไง\r", + "นี่นา\r", + "นู่น\r", + "นู้น\r", + "บน\r", + "บอก\r", + "บอกแล้ว\r", + "บอกว่า\r", + "บ่อย\r", + "บ่อยกว่า\r", + "บ่อยครั้ง\r", + "บ่อยๆ\r", + "บัดเดี๋ยวนี้\r", + "บัดดล\r", + "บัดนั้น\r", + "บัดนี้\r", + "บาง\r", + "บ้าง\r", + "บางแห่ง\r", + "บางกว่า\r", + "บางขณะ\r", + "บางครั้ง\r", + "บางครา\r", + "บางคราว\r", + "บางที\r", + "บางที่\r", + "บางๆ\r", + "ปฏิบัติ\r", + "ประกอบ\r", + "ประการ\r", + "ประการใด\r", + "ประการฉะนี้\r", + "ประการหนึ่ง\r", + "ประมาณ\r", + "ประสบ\r", + "ปรับ\r", + "ปรากฏ\r", + "ปรากฏว่า\r", + "ปัจจุบัน\r", + "ปิด\r", + "ผ่าน\r", + "ผ่านๆ\r", + "ผิด\r", + "ผิดๆ\r", + "ผู้\r", + "ผู้ใด\r", + "ฝ่าย\r", + "ฝ่ายใด\r", + "พบ\r", + "พบว่า\r", + "พยายาม\r", + "พร้อม\r", + "พร้อมเพียง\r", + "พร้อมกัน\r", + "พร้อมกับ\r", + "พร้อมด้วย\r", + "พร้อมทั้ง\r", + "พร้อมที่\r", + "พวก\r", + "พวกเขา\r", + "พวกเธอ\r", + "พวกแก\r", + "พวกโน้น\r", + "พวกกัน\r", + "พวกกู\r", + "พวกคุณ\r", + "พวกฉัน\r", + "พวกท่าน\r", + "พวกที่\r", + "พวกนั้น\r", + "พวกนี้\r", + "พวกนู้น\r", + "พวกมัน\r", + "พวกมึง\r", + "พอ\r", + "พอเพียง\r", + "พอเหมาะ\r", + "พอแล้ว\r", + "พอกัน\r", + "พอควร\r", + "พอจะ\r", + "พอดี\r", + "พอตัว\r", + "พอที\r", + "พอที่\r", + "พอสม\r", + "พอสมควร\r", + "พอๆ\r", + "พา\r", + "พึง\r", + "พึ่ง\r", + "พื้นๆ\r", + "พูด\r", + "ภาค\r", + "ภาย\r", + "ภายใต้\r", + "ภายนอก\r", + "ภายหลัง\r", + "มอง\r", + "มองว่า\r", + "มัก\r", + "มักจะ\r", + "มัน\r", + "มั๊ย\r", + "มา\r", + "มาก\r", + "มากกว่า\r", + "มากมาย\r", + "มิ\r", + "มิใช่\r", + "มิได้\r", + "มิฉะนั้น\r", + "มี\r", + "มีแต่\r", + "มึง\r", + "มุ่ง\r", + "มุ่งเน้น\r", + "มุ่งหมาย\r", + "ยก\r", + "ยกให้\r", + "ยงเพราะ\r", + "ยอม\r", + "ย่อม\r", + "ยอมรับ\r", + "ย่อย\r", + "ยัง\r", + "ยังแต่\r", + "ยังโง้น\r", + "ยังไง\r", + "ยังคง\r", + "ยังงั้น\r", + "ยังงี้\r", + "ยังจะ\r", + "ยาก\r", + "ยาว\r", + "ยาวนาน\r", + "ยิ่ง\r", + "ยิ่งเมื่อ\r", + "ยิ่งแล้ว\r", + "ยิ่งใหญ่\r", + "ยิ่งกว่า\r", + "ยิ่งขึ้น\r", + "ยิ่งขึ้นไป\r", + "ยิ่งจน\r", + "ยิ่งจะ\r", + "ยิ่งนัก\r", + "รวด\r", + "รวดเร็ว\r", + "รวม\r", + "ร่วม\r", + "รวมกัน\r", + "ร่วมกัน\r", + "รวมด้วย\r", + "ร่วมด้วย\r", + "รวมถึง\r", + "รวมทั้ง\r", + "ระยะ\r", + "ระหว่าง\r", + "รับ\r", + "รึ\r", + "รือ\r", + "รือว่า\r", + "ล้วน\r", + "ล้วนแต่\r", + "ล้วนจน\r", + "ละ\r", + "ล่าสุด\r", + "วันใด\r", + "วันไหน\r", + "วันนั้น\r", + "วันนี้\r", + "สบาย\r", + "สมัย\r", + "สมัยโน้น\r", + "สมัยก่อน\r", + "สมัยนั้น\r", + "สมัยนี้\r", + "ส่วน\r", + "ส่วนเกิน\r", + "ส่วนใด\r", + "ส่วนใหญ่\r", + "ส่วนด้อย\r", + "ส่วนดี\r", + "ส่วนที่\r", + "ส่วนน้อย\r", + "ส่วนนั้น\r", + "ส่วนมาก\r", + "สั้น\r", + "สั้นๆ\r", + "สําคัญ\r", + "สามารถ\r", + "สิ่ง\r", + "สิ่งใด\r", + "สิ่งไหน\r", + "สิ่งนั้น\r", + "สิ่งนี้\r", + "สิ้น\r", + "สุด\r", + "หน\r", + "หนอ\r", + "หนอย\r", + "หน่อย\r", + "หมด\r", + "หมดกัน\r", + "หมดสิ้น\r", + "หรือเปล่า\r", + "หรือไง\r", + "หรือไม่\r", + "หรือไร\r", + "หรือยัง\r", + "หลังจาก\r", + "หาใช่\r", + "หาก\r", + "หากแม้\r", + "หากแม้น\r", + "หากแม้นว่า\r", + "หากว่า\r", + "หาความ\r", + "หารือ\r", + "อดีต\r", + "อนึ่ง\r", + "อยาก\r", + "อย่าง\r", + "อย่างเช่น\r", + "อย่างเดียว\r", + "อย่างโน้น\r", + "อย่างใด\r", + "อย่างไร\r", + "อย่างไรเสีย\r", + "อย่างไรก็\r", + "อย่างไรก็ได้\r", + "อย่างไหน\r", + "อย่างดี\r", + "อย่างที่\r", + "อย่างน้อย\r", + "อย่างนั้น\r", + "อย่างนี้\r", + "อย่างมาก\r", + "อย่างยิ่ง\r", + "อย่างละ\r", + "อย่างหนึ่ง\r", + "อย่างๆ\r", + "อัน\r", + "อันเนื่องมาจาก\r", + "อันใด\r", + "อันได้แก่\r", + "อันไหน\r", + "อันจะ\r", + "อันที่\r", + "อันที่จริง\r", + "อันที่จะ\r", + "อันละ\r", + "อันๆ\r", + "อาจ\r", + "อาจเป็น\r", + "อาจเป็นด้วย\r", + "อาจจะ\r", + "อีก\r", + "อื่น\r", + "อื่นๆ\r", + "ฯ\r", + "ฯพณฯ\r", + "ฯล\r", + "" +] diff --git a/data/stopwords/stopwords-th.txt b/data/stopwords/stopwords-th.txt deleted file mode 100644 index 01baee0..0000000 --- a/data/stopwords/stopwords-th.txt +++ /dev/null @@ -1,1047 +0,0 @@ -เก็บ -เกิด -เกิน -เกินๆ -เกี่ยวเนื่อง -เกี่ยวกัน -เกี่ยวกับ -เกี่ยวข้อง -เกี่ยวๆ -เกือบ -เกือบจะ -เกือบๆ -เขา -เข้า -เข้าใจ -เขียน -เคย -เคยๆ -เฉกเช่น -เฉพาะ -เฉย -เฉยๆ -เช่น -เช่นเคย -เช่นเดียวกัน -เช่นเดียวกับ -เช่นเมื่อ -เช่นใด -เช่นไร -เช่นก่อน -เช่นกัน -เช่นดัง -เช่นดังเก่า -เช่นดังก่อน -เช่นดังที่ -เช่นดังว่า -เช่นที่ -เช่นที่เคย -เช่นที่ว่า -เช่นนั้น -เช่นนั้นเอง -เช่นนี้ -เชื่อ -เชื่อถือ -เชื่อมั่น -เชื่อว่า -เดิม -เดิมที -เดิมๆ -เดียว -เดี๋ยว -เดี๋ยวก่อน -เดียวกัน -เดียวกับ -เดี๋ยวนั้น -เดี๋ยวนี้ -เต็มไปด้วย -เต็มไปหมด -เต็มๆ -เถอะ -เถิด -เท่า -เท่าใด -เท่าไร -เท่าไหร่ -เท่ากัน -เท่ากับ -เท่าที่ -เท่านั้น -เท่านี้ -เธอ -เน้น -เนี่ย -เนี่ยเอง -เป็น -เป็นเพื่อ -เป็นแต่ -เป็นด้วย -เป็นดัง -เป็นต้น -เป็นอัน -เป็นอันมาก -เป็นอาทิ -เปลี่ยน -เปลี่ยนแปลง -เผื่อ -เผื่อจะ -เผื่อที่ -เผื่อว่า -เพราะ -เพราะฉะนั้น -เพราะว่า -เพิ่ง -เพิ่งจะ -เพิ่ม -เพิ่มเติม -เพี -เพียง -เพียงเพื่อ -เพียงแค่ -เพียงแต่ -เพียงใด -เพียงไหน -เพียงพอ -เพื่อ -เพื่อให้ -เพื่อที่ -เพื่อว่า -เมื่อ -เมื่อเช้า -เมื่อเย็น -เมื่อใด -เมื่อไร -เมื่อไหร่ -เมื่อก่อน -เมื่อครั้ง -เมื่อครั้งก่อน -เมื่อคราว -เมื่อคราวก่อน -เมื่อคราวที่ -เมื่อคืน -เมื่อนั้น -เมื่อนี้ -เมื่อวันวาน -เมื่อวาน -เยอะ -เยอะแยะ -เร็ว -เร็วๆ -เรา -เราๆ -เริ่ม -เรียก -เรียบ -เรื่อย -เรื่อยๆ -เล็ก -เล็กน้อย -เล็กๆ -เลย -เล่าว่า -เสร็จ -เสร็จแล้ว -เสียแล้ว -เสียจน -เสียด้วย -เสียนี่ -เหตุ -เหตุไร -เหตุนั้น -เหตุนี้ -เหตุมด -เห็นแก่ -เห็นควร -เห็นจะ -เห็นว่า -เหล่า -เหล่านั้น -เหล่านี้ -เหลือ -เหลือเกิน -เอง -เอ็ง -เอา -แก -แก่ -แก้ไข -แค่ -แค่เพียง -แค่ไหน -แค่จะ -แค่นั้น -แค่นี้ -แค่ว่า -แด่ -แต่ -แต่เดิม -แต่เพียง -แต่เมื่อ -แต่ไร -แต่ไหน -แต่ก็ -แต่ก่อน -แต่จะ -แต่ต้อง -แต่ถ้า -แต่ทว่า -แต่ที่ -แต่นั้น -แต่ละ -แต่ว่า -แต่อย่างใด -แท้ -แท้จริง -แบบ -แม้ -แม้แต่ -แม้กระทั่ง -แม้นว่า -แม้ว่า -แยะ -แล้ว -แล้วเสร็จ -แล้วแต่ -แล้วกัน -แสดง -แสดงว่า -แห่ง -แห่งโน้น -แห่งใด -แห่งไหน -แห่งนั้น -แห่งนี้ -แหละ -โดย -โดยเฉพาะ -โดยเฉพาะอย่าง -โดยเมื่อ -โดยเร็ว -โดยแท้ -โดยแท้จริง -โดยง่าย -โดยดี -โดยดุษฎี -โดยตลอด -โดยทั่ว -โดยทั่วไป -โดยทั่วกัน -โดยทั่วถึง -โดยที่ -โดยนัย -โดยปกติ -โดยมัก -โดยมักจะ -โดยมาก -โดยรวม -โดยรวมๆ -โดยละม่อม -โดยลําดับ -โดยส่วนใหญ่ -โดยส่วนมาก -โดยส่วนรวม -โต -โตๆ -ใกล้ -ใกล้ๆ -ใคร -ใคร่ -ใคร่จะ -ใช่ -ใช้ -ใช่ไหม -ใด -ใดๆ -ใต้ -ใน -ในเมื่อ -ในช่วง -ในที่ -ในระหว่าง -ให้ -ให้แก่ -ใหญ่ -ใหญ่โต -ไกล -ไกลๆ -ไง -ไฉน -ได้ -ได้แก่ -ได้แต่ -ได้ที่ -ได้มา -ได้รับ -ไป -ไม่ -ไม่เป็นไร -ไม่ใช่ -ไม่ค่อย -ไม่ค่อยเป็น -ไม่ค่อยจะ -ไม่ว่า -ไร -ไหน -ไหนๆ -ก็ -ก็แค่ -ก็แล้วแต่ -ก็ได้ -ก็คือ -ก็จะ -ก็ดี -ก็ต่อเมื่อ -ก็ตาม -ก็ตามแต่ -ก็ตามที -กระทั่ง -กระทํา -กระนั้น -กระผม -กลับ -กล่าว -กล่าวคือ -กลุ่ม -กลุ่มก้อน -กลุ่มๆ -กว่า -กว้าง -กว้างขวาง -กว้างๆ -ก่อน -ก่อนหน้า -ก่อนหน้านี้ -ก่อนๆ -กัน -กันเถอะ -กันเอง -กันและกัน -กันไหม -กันดีไหม -กันดีกว่า -กันนะ -กับ -การ -กําลัง -กําลังจะ -กําหนด -กู -ขณะ -ขณะเดียวกัน -ขณะใด -ขณะใดๆ -ขณะที่ -ขณะนั้น -ขณะนี้ -ขณะหนึ่ง -ขวาง -ขวางๆ -ขอ -ของ -ขั้น -ข้า -ข้าง -ข้างเคียง -ข้างต้น -ข้างบน -ข้างล่าง -ข้างๆ -ขาด -ข้าพเจ้า -ข้าฯ -ขึ้น -คง -คงจะ -คงอยู่ -ครบ -ครบครัน -ครบถ้วน -ครั้ง -ครั้งใด -ครั้งไหน -ครั้งกระนั้น -ครั้งก่อน -ครั้งครา -ครั้งคราว -ครั้งที่ -ครั้งนั้น -ครั้งนี้ -ครั้งละ -ครั้งหนึ่ง -ครั้งหลัง -ครั้งหลังสุด -ครั้งๆ -ครัน -ครับ -ครา -คราใด -คราไหน -คราที่ -ครานั้น -ครานี้ -คราว -คราวโน้น -คราวใด -คราวไหน -คราวก่อน -คราวที่ -คราวนั้น -คราวนี้ -คราวละ -คราวหน้า -คราวหนึ่ง -คราวหลัง -คราวๆ -คราหนึ่ง -คล้าย -คล้ายกัน -คล้ายกันกับ -คล้ายกับ -คล้ายกับว่า -คล้ายว่า -ควร -ความ -ค่อน -ค่อนข้าง -ค่อนข้างจะ -ค่อนมาทาง -ค่อย -ค่อยไปทาง -ค่อยๆ -คะ -ค่ะ -คํา -คิด -คิดว่า -คือ -คุณ -คุณๆ -ง่าย -ง่ายๆ -จง -จด -จนเมื่อ -จนแม้ -จนแม้น -จนกระทั่ง -จนกว่า -จนขณะนี้ -จนตลอด -จนถึง -จนทั่ว -จนบัดนี้ -จรด -จรดกับ -จริง -จริงจัง -จริงๆ -จริงๆ -จวน -จวนเจียน -จวนจะ -จวบ -จวบกับ -จวบจน -จะ -จ้ะ -จ๊ะ -จะได้ -จัง -จังๆ -จังๆ -จัด -จัดแจง -จัดให้ -จัดการ -จัดงาน -จัดตั้ง -จัดทํา -จัดหา -จับ -จ้า -จ้า -จํา -จําเป็น -จาก -จากนั้น -จากนี้ -จากนี้ไป -จําพวก -จึง -จึงเป็น -จึงจะ -จู่ๆ -ฉะนั้น -ฉะนี้ -ฉัน -ช่วง -ช่วงแรก -ช่วงก่อน -ช่วงต่อไป -ช่วงถัดไป -ช่วงท้าย -ช่วงที่ -ช่วงนั้น -ช่วงนี้ -ช่วงระหว่าง -ช่วงหน้า -ช่วงหลัง -ช่วงๆ -ช่วย -ช้า -ช้านาน -ชาว -ช้าๆ -ซะ -ซะก่อน -ซะจน -ซะจนกระทั่ง -ซะจนถึง -ซึ่ง -ซึ่งได้แก่ -ซึ่งก็ -ซึ่งก็คือ -ซึ่งกัน -ซึ่งกันและกัน -ซึ่งๆ -ณ -ด้วย -ด้วยเช่นกัน -ด้วยเพราะ -ด้วยเหตุเพราะ -ด้วยเหตุที่ -ด้วยเหตุนั้น -ด้วยเหตุนี้ -ด้วยเหตุว่า -ด้วยเหมือนกัน -ด้วยกัน -ด้วยที่ -ด้วยประการฉะนี้ -ด้วยว่า -ดัง -ดั่ง -ดังเก่า -ดั่งเก่า -ดังเคย -ดั่งเคย -ดังเช่น -ดั่งเช่น -ดังเช่นที่ -ดั่งเช่นที่ -ดังเดิม -ดั่งเดิม -ดังเหมือน -ดั่งเหมือน -ดังแต่ก่อน -ดั่งแต่ก่อน -ดังแม้ -ดั่งแม้ -ดังกล่าว -ดังกับ -ดั่งกับ -ดังกับว่า -ดั่งกับว่า -ดังจะ -ดั่งจะ -ดังต่อไปนี้ -ดังที่ -ดั่งที่ -ดังที่เคย -ดังที่กล่าว -ดังที่จะเป็น -ดังนั้น -ดังนี้ -ดังนี้เช่น -ดังนี้เพราะ -ดังว่า -ดั่งว่า -ดําเนิน -ดําเนินไป -ดําเนินการ -ดําเนินงาน -ด้าน -ด้านๆ -ดิฉัน -ดี -ดีๆ -ดู -ดูเหมือน -ดูเหมือนว่า -ดูแล -ดูแล้ว -ดูจะ -ดูว่า -ดูๆ -ตน -ตนเอง -ตนฯ -ตรง -ตรงๆ -ตลอด -ตลอดเวลา -ตลอดไป -ตลอดกาล -ตลอดกาลนาน -ตลอดจน -ตลอดถึง -ตลอดทั้ง -ตลอดทั่ว -ตลอดทั่วถึง -ตลอดทั่วทั้ง -ตลอดปี -ตลอดมา -ตลอดระยะเวลา -ตลอดวัน -ตลอดศก -ต่อ -ต่อเมื่อ -ต่อให้ -ต่อไป -ต่อไปนี้ -ต่อกัน -ต่อกับ -ต้อง -ต้องการ -ต่อจาก -ตอน -ตอนแรก -ตอนใด -ตอนไหน -ตอนก่อน -ตอนต่อ -ตอนต่อไป -ตอนต่อมา -ตอนถัดไป -ตอนถัดมา -ตอนที่ -ตอนที่แล้ว -ตอนนั้น -ตอนนี้ -ตอนสุดท้าย -ตอนหน้า -ตอนหลัง -ตอนๆ -ต่อมา -ต่อว่า -ต่อๆ -ตะหาก -ตั้ง -ตั้งแต่ -ตั้งแต่แรก -ตั้งแต่นั้น -ตั้งแต่นี้ -ตั้งต้น -ตั้งที่ -ตั้งอยู่ -ตัว -ตัวเอง -ตัวโน้น -ตัวใด -ตัวไหน -ตัวที่ -ตัวนั้น -ตัวนี้ -ตัวละ -ตัวอย่างเช่น -ตัวๆ -ต่าง -ต่างก็ -ต่างหาก -ต่างๆ -ตาม -ตามแต่ -ตามด้วย -ตามที่ -ตามๆ -ถ้า -ถ้าจะ -ถ้าหาก -ถึง -ถึงเมื่อ -ถึงเมื่อใด -ถึงเมื่อไร -ถึงแก่ -ถึงแม้ -ถึงแม้จะ -ถึงแม้ว่า -ถึงจะ -ถึงบัดนั้น -ถึงบัดนี้ -ถึงอย่างไร -ถือ -ถือว่า -ถูก -ถูกต้อง -ถูกๆ -ทรง -ทว่า -ทั้ง -ทั้งเป็น -ทั้งคน -ทั้งตัว -ทั้งที่ -ทั้งนั้น -ทั้งนั้นเพราะ -ทั้งนั้นด้วย -ทั้งนี้ -ทั้งปวง -ทั้งมวล -ทั้งสิ้น -ทั้งหมด -ทั้งหลาย -ทั้งๆ -ทั้งๆ -ทัน -ทันใดนั้น -ทันที -ทันทีทันใด -ทั่ว -ทําให้ -ทําไม -ทําไร -ทําๆ -ที -ที่ -ที่ -ทีเดียว -ทีเถอะ -ที่แท้ -ที่แท้จริง -ที่แล้ว -ที่แห่งนั้น -ทีใด -ที่ใด -ที่ได้ -ทีไร -ที่ไหน -ที่จริง -ที่ซึ่ง -ที่นั้น -ที่นี้ -ทีละ -ที่ละ -ที่ว่า -ที่สุด -ทีๆ -ที่ๆ -ทุก -ทุกเมื่อ -ทุกแห่ง -ทุกคน -ทุกครั้ง -ทุกครา -ทุกคราว -ทุกชิ้น -ทุกตัว -ทุกทาง -ทุกที -ทุกที่ -ทุกวัน -ทุกวันนี้ -ทุกสิ่ง -ทุกหน -ทุกอย่าง -ทุกอัน -ทุกๆ -นอก -นอกเหนือ -นอกจาก -นอกจากที่ -นอกจากนั้น -นอกจากนี้ -นอกจากว่า -นอกนั้น -น้อย -น้อยกว่า -น้อยๆ -นะ -น่ะ -นัก -นั่น -นั้นไว -นับแต่นี้ -นับจากนั้น -นับจากนี้ -น่า -นํา -นาง -นางสาว -น่าจะ -นาน -นานๆ -นําพา -นํามา -นาย -นิด -นิดหน่อย -นิดๆ -นี่ -นี้ -นี่เอง -นี้เอง -นี่แน่ะ -นี้แหล่ -นี่แหละ -นี่ไง -นี่นา -นู่น -นู้น -บน -บอก -บอกแล้ว -บอกว่า -บ่อย -บ่อยกว่า -บ่อยครั้ง -บ่อยๆ -บัดเดี๋ยวนี้ -บัดดล -บัดนั้น -บัดนี้ -บาง -บ้าง -บางแห่ง -บางกว่า -บางขณะ -บางครั้ง -บางครา -บางคราว -บางที -บางที่ -บางๆ -ปฏิบัติ -ประกอบ -ประการ -ประการใด -ประการฉะนี้ -ประการหนึ่ง -ประมาณ -ประสบ -ปรับ -ปรากฏ -ปรากฏว่า -ปัจจุบัน -ปิด -ผ่าน -ผ่านๆ -ผิด -ผิดๆ -ผู้ -ผู้ใด -ฝ่าย -ฝ่ายใด -พบ -พบว่า -พยายาม -พร้อม -พร้อมเพียง -พร้อมกัน -พร้อมกับ -พร้อมด้วย -พร้อมทั้ง -พร้อมที่ -พวก -พวกเขา -พวกเธอ -พวกแก -พวกโน้น -พวกกัน -พวกกู -พวกคุณ -พวกฉัน -พวกท่าน -พวกที่ -พวกนั้น -พวกนี้ -พวกนู้น -พวกมัน -พวกมึง -พอ -พอเพียง -พอเหมาะ -พอแล้ว -พอกัน -พอควร -พอจะ -พอดี -พอตัว -พอที -พอที่ -พอสม -พอสมควร -พอๆ -พา -พึง -พึ่ง -พื้นๆ -พูด -ภาค -ภาย -ภายใต้ -ภายนอก -ภายหลัง -มอง -มองว่า -มัก -มักจะ -มัน -มั๊ย -มา -มาก -มากกว่า -มากมาย -มิ -มิใช่ -มิได้ -มิฉะนั้น -มี -มีแต่ -มึง -มุ่ง -มุ่งเน้น -มุ่งหมาย -ยก -ยกให้ -ยงเพราะ -ยอม -ย่อม -ยอมรับ -ย่อย -ยัง -ยังแต่ -ยังโง้น -ยังไง -ยังคง -ยังงั้น -ยังงี้ -ยังจะ -ยาก -ยาว -ยาวนาน -ยิ่ง -ยิ่งเมื่อ -ยิ่งแล้ว -ยิ่งใหญ่ -ยิ่งกว่า -ยิ่งขึ้น -ยิ่งขึ้นไป -ยิ่งจน -ยิ่งจะ -ยิ่งนัก -รวด -รวดเร็ว -รวม -ร่วม -รวมกัน -ร่วมกัน -รวมด้วย -ร่วมด้วย -รวมถึง -รวมทั้ง -ระยะ -ระหว่าง -รับ -รึ -รือ -รือว่า -ล้วน -ล้วนแต่ -ล้วนจน -ละ -ล่าสุด -วันใด -วันไหน -วันนั้น -วันนี้ -สบาย -สมัย -สมัยโน้น -สมัยก่อน -สมัยนั้น -สมัยนี้ -ส่วน -ส่วนเกิน -ส่วนใด -ส่วนใหญ่ -ส่วนด้อย -ส่วนดี -ส่วนที่ -ส่วนน้อย -ส่วนนั้น -ส่วนมาก -สั้น -สั้นๆ -สําคัญ -สามารถ -สิ่ง -สิ่งใด -สิ่งไหน -สิ่งนั้น -สิ่งนี้ -สิ้น -สุด -หน -หนอ -หนอย -หน่อย -หมด -หมดกัน -หมดสิ้น -หรือเปล่า -หรือไง -หรือไม่ -หรือไร -หรือยัง -หลังจาก -หาใช่ -หาก -หากแม้ -หากแม้น -หากแม้นว่า -หากว่า -หาความ -หารือ -อดีต -อนึ่ง -อยาก -อย่าง -อย่างเช่น -อย่างเดียว -อย่างโน้น -อย่างใด -อย่างไร -อย่างไรเสีย -อย่างไรก็ -อย่างไรก็ได้ -อย่างไหน -อย่างดี -อย่างที่ -อย่างน้อย -อย่างนั้น -อย่างนี้ -อย่างมาก -อย่างยิ่ง -อย่างละ -อย่างหนึ่ง -อย่างๆ -อัน -อันเนื่องมาจาก -อันใด -อันได้แก่ -อันไหน -อันจะ -อันที่ -อันที่จริง -อันที่จะ -อันละ -อันๆ -อาจ -อาจเป็น -อาจเป็นด้วย -อาจจะ -อีก -อื่น -อื่นๆ -ฯ -ฯพณฯ -ฯล diff --git a/data/stopwords/stopwords-tr.json b/data/stopwords/stopwords-tr.json new file mode 100644 index 0000000..3ca46cf --- /dev/null +++ b/data/stopwords/stopwords-tr.json @@ -0,0 +1,225 @@ +[ + "a", + "acaba", + "altı", + "ama", + "ancak", + "artık", + "asla", + "aslında", + "az", + "b", + "bana", + "bazen", + "bazı", + "bazıları", + "bazısı", + "belki", + "ben", + "beni", + "benim", + "beş", + "bile", + "bir", + "birçoğu", + "birçok", + "birçokları", + "biri", + "birisi", + "birkaç", + "birkaçı", + "birşey", + "birşeyi", + "biz", + "bize", + "bizi", + "bizim", + "böyle", + "böylece", + "bu", + "buna", + "bunda", + "bundan", + "bunu", + "bunun", + "burada", + "bütün", + "c", + "ç", + "çoğu", + "çoğuna", + "çoğunu", + "çok", + "çünkü", + "d", + "da", + "daha", + "de", + "değil", + "demek", + "diğer", + "diğeri", + "diğerleri", + "diye", + "dokuz", + "dolayı", + "dört", + "e", + "elbette", + "en", + "f", + "fakat", + "falan", + "felan", + "filan", + "g", + "gene", + "gibi", + "ğ", + "h", + "hâlâ", + "hangi", + "hangisi", + "hani", + "hatta", + "hem", + "henüz", + "hep", + "hepsi", + "hepsine", + "hepsini", + "her", + "her biri", + "herkes", + "herkese", + "herkesi", + "hiç", + "hiç kimse", + "hiçbiri", + "hiçbirine", + "hiçbirini", + "ı", + "i", + "için", + "içinde", + "iki", + "ile", + "ise", + "işte", + "j", + "k", + "kaç", + "kadar", + "kendi", + "kendine", + "kendini", + "ki", + "kim", + "kime", + "kimi", + "kimin", + "kimisi", + "l", + "m", + "madem", + "mı", + "mı", + "mi", + "mu", + "mu", + "mü", + "mü", + "n", + "nasıl", + "ne", + "ne kadar", + "ne zaman", + "neden", + "nedir", + "nerde", + "nerede", + "nereden", + "nereye", + "nesi", + "neyse", + "niçin", + "niye", + "o", + "on", + "ona", + "ondan", + "onlar", + "onlara", + "onlardan", + "onların", + "onların", + "onu", + "onun", + "orada", + "oysa", + "oysaki", + "ö", + "öbürü", + "ön", + "önce", + "ötürü", + "öyle", + "p", + "r", + "rağmen", + "s", + "sana", + "sekiz", + "sen", + "senden", + "seni", + "senin", + "siz", + "sizden", + "size", + "sizi", + "sizin", + "son", + "sonra", + "ş", + "şayet", + "şey", + "şeyden", + "şeye", + "şeyi", + "şeyler", + "şimdi", + "şöyle", + "şu", + "şuna", + "şunda", + "şundan", + "şunlar", + "şunu", + "şunun", + "t", + "tabi", + "tamam", + "tüm", + "tümü", + "u", + "ü", + "üç", + "üzere", + "v", + "var", + "ve", + "veya", + "veyahut", + "y", + "ya", + "ya da", + "yani", + "yedi", + "yerine", + "yine", + "yoksa", + "z", + "zaten", + "zira" +] diff --git a/data/stopwords/stopwords-tr.txt b/data/stopwords/stopwords-tr.txt deleted file mode 100644 index 6245dd1..0000000 --- a/data/stopwords/stopwords-tr.txt +++ /dev/null @@ -1,223 +0,0 @@ -a -acaba -altı -ama -ancak -artık -asla -aslında -az -b -bana -bazen -bazı -bazıları -bazısı -belki -ben -beni -benim -beş -bile -bir -birçoğu -birçok -birçokları -biri -birisi -birkaç -birkaçı -birşey -birşeyi -biz -bize -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunu -bunun -burada -bütün -c -ç -çoğu -çoğuna -çoğunu -çok -çünkü -d -da -daha -de -değil -demek -diğer -diğeri -diğerleri -diye -dokuz -dolayı -dört -e -elbette -en -f -fakat -falan -felan -filan -g -gene -gibi -ğ -h -hâlâ -hangi -hangisi -hani -hatta -hem -henüz -hep -hepsi -hepsine -hepsini -her -her biri -herkes -herkese -herkesi -hiç -hiç kimse -hiçbiri -hiçbirine -hiçbirini -ı -i -için -içinde -iki -ile -ise -işte -j -k -kaç -kadar -kendi -kendine -kendini -ki -kim -kime -kimi -kimin -kimisi -l -m -madem -mı -mı -mi -mu -mu -mü -mü -n -nasıl -ne -ne kadar -ne zaman -neden -nedir -nerde -nerede -nereden -nereye -nesi -neyse -niçin -niye -o -on -ona -ondan -onlar -onlara -onlardan -onların -onların -onu -onun -orada -oysa -oysaki -ö -öbürü -ön -önce -ötürü -öyle -p -r -rağmen -s -sana -sekiz -sen -senden -seni -senin -siz -sizden -size -sizi -sizin -son -sonra -ş -şayet -şey -şeyden -şeye -şeyi -şeyler -şimdi -şöyle -şu -şuna -şunda -şundan -şunlar -şunu -şunun -t -tabi -tamam -tüm -tümü -u -ü -üç -üzere -v -var -ve -veya -veyahut -y -ya -ya da -yani -yedi -yerine -yine -yoksa -z -zaten -zira \ No newline at end of file diff --git a/data/stopwords/stopwords-zh.json b/data/stopwords/stopwords-zh.json new file mode 100644 index 0000000..394a9b7 --- /dev/null +++ b/data/stopwords/stopwords-zh.json @@ -0,0 +1,127 @@ +[ + "的\r", + "一\r", + "不\r", + "在\r", + "人\r", + "有\r", + "是\r", + "为\r", + "以\r", + "于\r", + "上\r", + "他\r", + "而\r", + "后\r", + "之\r", + "来\r", + "及\r", + "了\r", + "因\r", + "下\r", + "可\r", + "到\r", + "由\r", + "这\r", + "与\r", + "也\r", + "此\r", + "但\r", + "并\r", + "个\r", + "其\r", + "已\r", + "无\r", + "小\r", + "我\r", + "们\r", + "起\r", + "最\r", + "再\r", + "今\r", + "去\r", + "好\r", + "只\r", + "又\r", + "或\r", + "很\r", + "亦\r", + "某\r", + "把\r", + "那\r", + "你\r", + "乃\r", + "它\r", + "吧\r", + "被\r", + "比\r", + "别\r", + "趁\r", + "当\r", + "从\r", + "到\r", + "得\r", + "打\r", + "凡\r", + "儿\r", + "尔\r", + "该\r", + "各\r", + "给\r", + "跟\r", + "和\r", + "何\r", + "还\r", + "即\r", + "几\r", + "既\r", + "看\r", + "据\r", + "距\r", + "靠\r", + "啦\r", + "了\r", + "另\r", + "么\r", + "每\r", + "们\r", + "嘛\r", + "拿\r", + "哪\r", + "那\r", + "您\r", + "凭\r", + "且\r", + "却\r", + "让\r", + "仍\r", + "啥\r", + "如\r", + "若\r", + "使\r", + "谁\r", + "虽\r", + "随\r", + "同\r", + "所\r", + "她\r", + "哇\r", + "嗡\r", + "往\r", + "哪\r", + "些\r", + "向\r", + "沿\r", + "哟\r", + "用\r", + "于\r", + "咱\r", + "则\r", + "怎\r", + "曾\r", + "至\r", + "致\r", + "着\r", + "诸\r", + "自" +] diff --git a/data/stopwords/stopwords-zh.txt b/data/stopwords/stopwords-zh.txt deleted file mode 100644 index 955ff2b..0000000 --- a/data/stopwords/stopwords-zh.txt +++ /dev/null @@ -1,125 +0,0 @@ -的 -一 -不 -在 -人 -有 -是 -为 -以 -于 -上 -他 -而 -后 -之 -来 -及 -了 -因 -下 -可 -到 -由 -这 -与 -也 -此 -但 -并 -个 -其 -已 -无 -小 -我 -们 -起 -最 -再 -今 -去 -好 -只 -又 -或 -很 -亦 -某 -把 -那 -你 -乃 -它 -吧 -被 -比 -别 -趁 -当 -从 -到 -得 -打 -凡 -儿 -尔 -该 -各 -给 -跟 -和 -何 -还 -即 -几 -既 -看 -据 -距 -靠 -啦 -了 -另 -么 -每 -们 -嘛 -拿 -哪 -那 -您 -凭 -且 -却 -让 -仍 -啥 -如 -若 -使 -谁 -虽 -随 -同 -所 -她 -哇 -嗡 -往 -哪 -些 -向 -沿 -哟 -用 -于 -咱 -则 -怎 -曾 -至 -致 -着 -诸 -自 \ No newline at end of file From c684399df204e1f2087419180b3b0b5d49d46263 Mon Sep 17 00:00:00 2001 From: johdirr Date: Mon, 19 Dec 2016 16:27:51 +0100 Subject: [PATCH 2/6] introduce stopwords loader --- data/stopwords/index.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 data/stopwords/index.js diff --git a/data/stopwords/index.js b/data/stopwords/index.js new file mode 100644 index 0000000..b9b5a37 --- /dev/null +++ b/data/stopwords/index.js @@ -0,0 +1,17 @@ +function stopwordsJSON(lang) { + return './stopwords-' + lang + '.json' +}; + + +module.exports = function(language) { + var stopwords; + + try { + stopwords = require(stopwordsJSON(language)) + } catch (e) { + console.error("WARNING: No stopwords found for '" + language + "' - defaulting to English!") + stopwords = require(stopwordsJSON('en')) + } finally { + return stopwords + } +} From f78e89c28455879f98f5c4d2c0eefb120235b0dd Mon Sep 17 00:00:00 2001 From: johdirr Date: Mon, 19 Dec 2016 16:28:09 +0100 Subject: [PATCH 3/6] use stopwords loader instead of fs --- src/stopwords.coffee | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/stopwords.coffee b/src/stopwords.coffee index ecebd0c..696fa00 100644 --- a/src/stopwords.coffee +++ b/src/stopwords.coffee @@ -1,26 +1,17 @@ path = require('path') fs = require('fs') _ = require('lodash') +stepwords = require('../data/stopwords') cache = {} -getFilePath = (language) -> - path.join(__dirname, "..", "data", "stopwords", "stopwords-#{language}.txt") - # Given a language, loads a list of stop words for that language # and then returns which of those words exist in the given content module.exports = stopwords = (content, language = 'en') -> - filePath = getFilePath(language) - - if !fs.existsSync(filePath) - console.error("WARNING: No stopwords file found for '#{language}' - defaulting to English!") - filePath = getFilePath('en') - if cache.hasOwnProperty(language) stopWords = cache[language] else - stopWords = fs.readFileSync(filePath).toString().split('\n') - .filter((s) -> s.length > 0) + stopWords = stepwords(language) cache[language] = stopWords strippedInput = removePunctuation(content) From daa7f78fc360dfa2113bca1a6fa5a08687b9040d Mon Sep 17 00:00:00 2001 From: johdirr Date: Mon, 19 Dec 2016 16:35:48 +0100 Subject: [PATCH 4/6] build --- lib/stopwords.js | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/lib/stopwords.js b/lib/stopwords.js index 2a1888d..5ae7381 100644 --- a/lib/stopwords.js +++ b/lib/stopwords.js @@ -1,28 +1,19 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, cache, candiateWords, fs, getFilePath, path, removePunctuation, stopwords; + var _, cache, candiateWords, fs, path, removePunctuation, stepwords, stopwords; path = require('path'); fs = require('fs'); _ = require('lodash'); + stepwords = require('../data/stopwords'); cache = {}; - getFilePath = function (language) { - return path.join(__dirname, '..', 'data', 'stopwords', 'stopwords-' + language + '.txt'); - }; module.exports = stopwords = function (content, language) { - var count, filePath, overlappingStopwords, stopWords, strippedInput, words; + var count, overlappingStopwords, stopWords, strippedInput, words; if (null == language) language = 'en'; - filePath = getFilePath(language); - if (!fs.existsSync(filePath)) { - console.error("WARNING: No stopwords file found for '" + language + "' - defaulting to English!"); - filePath = getFilePath('en'); - } if (cache.hasOwnProperty(language)) { stopWords = cache[language]; } else { - stopWords = fs.readFileSync(filePath).toString().split('\n').filter(function (s) { - return s.length > 0; - }); + stopWords = stepwords(language); cache[language] = stopWords; } strippedInput = removePunctuation(content); From 043c40952add5d1404a5197ff42a444abdae4c79 Mon Sep 17 00:00:00 2001 From: johdirr Date: Wed, 21 Dec 2016 22:29:49 +0100 Subject: [PATCH 5/6] remove unnecessary fs require --- lib/stopwords.js | 3 +-- src/stopwords.coffee | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/stopwords.js b/lib/stopwords.js index 5ae7381..4ef10d9 100644 --- a/lib/stopwords.js +++ b/lib/stopwords.js @@ -1,8 +1,7 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, cache, candiateWords, fs, path, removePunctuation, stepwords, stopwords; + var _, cache, candiateWords, path, removePunctuation, stepwords, stopwords; path = require('path'); - fs = require('fs'); _ = require('lodash'); stepwords = require('../data/stopwords'); cache = {}; diff --git a/src/stopwords.coffee b/src/stopwords.coffee index 696fa00..fbc076d 100644 --- a/src/stopwords.coffee +++ b/src/stopwords.coffee @@ -1,5 +1,4 @@ path = require('path') -fs = require('fs') _ = require('lodash') stepwords = require('../data/stopwords') From 10c9ac95ef13f425b9e84ff2f8eb0017f74e3e6d Mon Sep 17 00:00:00 2001 From: johdirr Date: Fri, 6 Jan 2017 10:40:58 +0100 Subject: [PATCH 6/6] browserify support --- data/stopwords/index.js | 43 ++++++++++++++++++++++++++++++----------- lib/stopwords.js | 6 +++--- src/stopwords.coffee | 4 ++-- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/data/stopwords/index.js b/data/stopwords/index.js index b9b5a37..2dacedb 100644 --- a/data/stopwords/index.js +++ b/data/stopwords/index.js @@ -1,17 +1,38 @@ -function stopwordsJSON(lang) { - return './stopwords-' + lang + '.json' -}; - +var stopwords = { + ar: require('./stopwords-ar.json'), + bg: require('./stopwords-bg.json'), + cs: require('./stopwords-cs.json'), + da: require('./stopwords-da.json'), + de: require('./stopwords-de.json'), + en: require('./stopwords-en.json'), + es: require('./stopwords-es.json'), + fi: require('./stopwords-fi.json'), + fr: require('./stopwords-fr.json'), + hu: require('./stopwords-hu.json'), + id: require('./stopwords-id.json'), + it: require('./stopwords-it.json'), + ko: require('./stopwords-ko.json'), + nb: require('./stopwords-nb.json'), + nl: require('./stopwords-nl.json'), + no: require('./stopwords-no.json'), + pl: require('./stopwords-pl.json'), + pt: require('./stopwords-pt.json'), + ru: require('./stopwords-ru.json'), + sv: require('./stopwords-sv.json'), + th: require('./stopwords-th.json'), + tr: require('./stopwords-tr.json'), + zh: require('./stopwords-zh.json') +} module.exports = function(language) { - var stopwords; + var sw; - try { - stopwords = require(stopwordsJSON(language)) - } catch (e) { + if (stopwords.hasOwnProperty(language)) { + sw = stopwords[language] + } else { console.error("WARNING: No stopwords found for '" + language + "' - defaulting to English!") - stopwords = require(stopwordsJSON('en')) - } finally { - return stopwords + sw = stopwords.en } + + return sw } diff --git a/lib/stopwords.js b/lib/stopwords.js index 4ef10d9..12d5d63 100644 --- a/lib/stopwords.js +++ b/lib/stopwords.js @@ -1,9 +1,9 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, cache, candiateWords, path, removePunctuation, stepwords, stopwords; + var _, cache, candiateWords, path, removePunctuation, stopwords, sw; path = require('path'); _ = require('lodash'); - stepwords = require('../data/stopwords'); + sw = require('../data/stopwords'); cache = {}; module.exports = stopwords = function (content, language) { var count, overlappingStopwords, stopWords, strippedInput, words; @@ -12,7 +12,7 @@ void function () { if (cache.hasOwnProperty(language)) { stopWords = cache[language]; } else { - stopWords = stepwords(language); + stopWords = sw(language); cache[language] = stopWords; } strippedInput = removePunctuation(content); diff --git a/src/stopwords.coffee b/src/stopwords.coffee index fbc076d..03faa7a 100644 --- a/src/stopwords.coffee +++ b/src/stopwords.coffee @@ -1,6 +1,6 @@ path = require('path') _ = require('lodash') -stepwords = require('../data/stopwords') +sw = require('../data/stopwords') cache = {} @@ -10,7 +10,7 @@ module.exports = stopwords = (content, language = 'en') -> if cache.hasOwnProperty(language) stopWords = cache[language] else - stopWords = stepwords(language) + stopWords = sw(language) cache[language] = stopWords strippedInput = removePunctuation(content)