From e06f08c9275d91d6ec314a8995fd3a75975dc58d Mon Sep 17 00:00:00 2001 From: knod Date: Thu, 22 Dec 2016 08:03:31 -0500 Subject: [PATCH 1/2] txt to json for all language files --- data/stopwords/stopwords-ar.json | 1 + data/stopwords/stopwords-ar.txt | 162 ---- data/stopwords/stopwords-bg.json | 1 + data/stopwords/stopwords-bg.txt | 259 ------ data/stopwords/stopwords-cs.json | 1 + data/stopwords/stopwords-cs.txt | 256 ------ data/stopwords/stopwords-da.json | 1 + data/stopwords/stopwords-da.txt | 101 --- data/stopwords/stopwords-de.json | 1 + data/stopwords/stopwords-de.txt | 894 -------------------- data/stopwords/stopwords-en.json | 1 + data/stopwords/stopwords-en.txt | 546 ------------- data/stopwords/stopwords-es.json | 1 + data/stopwords/stopwords-es.txt | 308 ------- data/stopwords/stopwords-fi.json | 1 + data/stopwords/stopwords-fi.txt | 68 -- data/stopwords/stopwords-fr.json | 1 + data/stopwords/stopwords-fr.txt | 220 ----- data/stopwords/stopwords-hu.json | 1 + data/stopwords/stopwords-hu.txt | 403 --------- data/stopwords/stopwords-id.json | 1 + data/stopwords/stopwords-id.txt | 1309 ------------------------------ data/stopwords/stopwords-it.json | 1 + data/stopwords/stopwords-it.txt | 287 ------- data/stopwords/stopwords-ko.json | 1 + data/stopwords/stopwords-ko.txt | 70 -- data/stopwords/stopwords-nb.json | 1 + data/stopwords/stopwords-nb.txt | 117 --- data/stopwords/stopwords-nl.json | 1 + data/stopwords/stopwords-nl.txt | 48 -- data/stopwords/stopwords-no.json | 1 + data/stopwords/stopwords-no.txt | 120 --- data/stopwords/stopwords-pl.json | 1 + data/stopwords/stopwords-pl.txt | 277 ------- data/stopwords/stopwords-pt.json | 1 + data/stopwords/stopwords-pt.txt | 609 -------------- data/stopwords/stopwords-ru.json | 1 + data/stopwords/stopwords-ru.txt | 421 ---------- data/stopwords/stopwords-sv.json | 1 + data/stopwords/stopwords-sv.txt | 547 ------------- data/stopwords/stopwords-th.json | 1 + data/stopwords/stopwords-th.txt | 1047 ------------------------ data/stopwords/stopwords-tr.json | 1 + data/stopwords/stopwords-tr.txt | 223 ----- data/stopwords/stopwords-zh.json | 1 + data/stopwords/stopwords-zh.txt | 125 --- 46 files changed, 23 insertions(+), 8417 deletions(-) create mode 100644 data/stopwords/stopwords-ar.json delete mode 100644 data/stopwords/stopwords-ar.txt create mode 100644 data/stopwords/stopwords-bg.json delete mode 100644 data/stopwords/stopwords-bg.txt create mode 100755 data/stopwords/stopwords-cs.json delete mode 100755 data/stopwords/stopwords-cs.txt create mode 100644 data/stopwords/stopwords-da.json delete mode 100644 data/stopwords/stopwords-da.txt create mode 100644 data/stopwords/stopwords-de.json delete mode 100644 data/stopwords/stopwords-de.txt create mode 100644 data/stopwords/stopwords-en.json delete mode 100644 data/stopwords/stopwords-en.txt create mode 100644 data/stopwords/stopwords-es.json delete mode 100644 data/stopwords/stopwords-es.txt create mode 100644 data/stopwords/stopwords-fi.json delete mode 100644 data/stopwords/stopwords-fi.txt create mode 100644 data/stopwords/stopwords-fr.json delete mode 100644 data/stopwords/stopwords-fr.txt create mode 100644 data/stopwords/stopwords-hu.json delete mode 100644 data/stopwords/stopwords-hu.txt create mode 100644 data/stopwords/stopwords-id.json delete mode 100644 data/stopwords/stopwords-id.txt create mode 100644 data/stopwords/stopwords-it.json delete mode 100644 data/stopwords/stopwords-it.txt create mode 100644 data/stopwords/stopwords-ko.json delete mode 100644 data/stopwords/stopwords-ko.txt create mode 100644 data/stopwords/stopwords-nb.json delete mode 100644 data/stopwords/stopwords-nb.txt create mode 100644 data/stopwords/stopwords-nl.json delete mode 100644 data/stopwords/stopwords-nl.txt create mode 100644 data/stopwords/stopwords-no.json delete mode 100644 data/stopwords/stopwords-no.txt create mode 100644 data/stopwords/stopwords-pl.json delete mode 100644 data/stopwords/stopwords-pl.txt create mode 100644 data/stopwords/stopwords-pt.json delete mode 100644 data/stopwords/stopwords-pt.txt create mode 100644 data/stopwords/stopwords-ru.json delete mode 100644 data/stopwords/stopwords-ru.txt create mode 100644 data/stopwords/stopwords-sv.json delete mode 100644 data/stopwords/stopwords-sv.txt create mode 100644 data/stopwords/stopwords-th.json delete mode 100644 data/stopwords/stopwords-th.txt create mode 100644 data/stopwords/stopwords-tr.json delete mode 100644 data/stopwords/stopwords-tr.txt create mode 100644 data/stopwords/stopwords-zh.json delete mode 100644 data/stopwords/stopwords-zh.txt diff --git a/data/stopwords/stopwords-ar.json b/data/stopwords/stopwords-ar.json new file mode 100644 index 0000000..1952984 --- /dev/null +++ b/data/stopwords/stopwords-ar.json @@ -0,0 +1 @@ +[ "فى", "في", "كل", "لم", "لن", "له", "من", "هو", "هي", "قوة", "كما", "لها", "منذ", "وقد", "ولا", "نفسه", "لقاء", "مقابل", "هناك", "وقال", "وكان", "نهاية", "وقالت", "وكانت", "للامم", "فيه", "كلم", "لكن", "وفي", "وقف", "ولم", "ومن", "وهو", "وهي", "يوم", "فيها", "منها", "مليار", "لوكالة", "يكون", "يمكن", "مليون", "حيث", "اكد", "الا", "اما", "امس", "السابق", "التى", "التي", "اكثر", "ايار", "ايضا", "ثلاثة", "الذاتي", "الاخيرة", "الثاني", "الثانية", "الذى", "الذي", "الان", "امام", "ايام", "خلال", "حوالى", "الذين", "الاول", "الاولى", "بين", "ذلك", "دون", "حول", "حين", "الف", "الى", "انه", "اول", "ضمن", "انها", "جميع", "الماضي", "الوقت", "المقبل", "اليوم", "ـ", "ف", "و", "و6", "قد", "لا", "ما", "مع", "مساء", "هذا", "واحد", "واضاف", "واضافت", "فان", "قبل", "قال", "كان", "لدى", "نحو", "هذه", "وان", "واكد", "كانت", "واوضح", "مايو", "ب", "ا", "أ", "،", "عشر", "عدد", "عدة", "عشرة", "عدم", "عام", "عاما", "عن", "عند", "عندما", "على", "عليه", "عليها", "زيارة", "سنة", "سنوات", "تم", "ضد", "بعد", "بعض", "اعادة", "اعلنت", "بسبب", "حتى", "اذا", "احد", "اثر", "برس", "باسم", "غدا", "شخصا", "صباح", "اطار", "اربعة", "اخرى", "بان", "اجل", "غير", "بشكل", "حاليا", "بن", "به", "ثم", "اف", "ان", "او", "اي", "بها", "صفر" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-ar.txt b/data/stopwords/stopwords-ar.txt deleted file mode 100644 index 64e0e71..0000000 --- a/data/stopwords/stopwords-ar.txt +++ /dev/null @@ -1,162 +0,0 @@ -فى -في -كل -لم -لن -له -من -هو -هي -قوة -كما -لها -منذ -وقد -ولا -نفسه -لقاء -مقابل -هناك -وقال -وكان -نهاية -وقالت -وكانت -للامم -فيه -كلم -لكن -وفي -وقف -ولم -ومن -وهو -وهي -يوم -فيها -منها -مليار -لوكالة -يكون -يمكن -مليون -حيث -اكد -الا -اما -امس -السابق -التى -التي -اكثر -ايار -ايضا -ثلاثة -الذاتي -الاخيرة -الثاني -الثانية -الذى -الذي -الان -امام -ايام -خلال -حوالى -الذين -الاول -الاولى -بين -ذلك -دون -حول -حين -الف -الى -انه -اول -ضمن -انها -جميع -الماضي -الوقت -المقبل -اليوم -ـ -ف -و -و6 -قد -لا -ما -مع -مساء -هذا -واحد -واضاف -واضافت -فان -قبل -قال -كان -لدى -نحو -هذه -وان -واكد -كانت -واوضح -مايو -ب -ا -أ -، -عشر -عدد -عدة -عشرة -عدم -عام -عاما -عن -عند -عندما -على -عليه -عليها -زيارة -سنة -سنوات -تم -ضد -بعد -بعض -اعادة -اعلنت -بسبب -حتى -اذا -احد -اثر -برس -باسم -غدا -شخصا -صباح -اطار -اربعة -اخرى -بان -اجل -غير -بشكل -حاليا -بن -به -ثم -اف -ان -او -اي -بها -صفر \ No newline at end of file diff --git a/data/stopwords/stopwords-bg.json b/data/stopwords/stopwords-bg.json new file mode 100644 index 0000000..dba5f34 --- /dev/null +++ b/data/stopwords/stopwords-bg.json @@ -0,0 +1 @@ +[ "а", "автентичен", "аз", "ако", "ала", "бе", "без", "беше", "би", "бивш", "бивша", "бившо", "бил", "била", "били", "било", "благодаря", "близо", "бъдат", "бъде", "бяха", "в", "вас", "ваш", "ваша", "вероятно", "вече", "взема", "ви", "вие", "винаги", "внимава", "време", "все", "всеки", "всички", "всичко", "всяка", "във", "въпреки", "върху", "г", "ги", "главен", "главна", "главно", "глас", "го", "година", "години", "годишен", "д", "да", "дали", "два", "двама", "двамата", "две", "двете", "ден", "днес", "дни", "до", "добра", "добре", "добро", "добър", "докато", "докога", "дори", "досега", "доста", "друг", "друга", "други", "е", "евтин", "едва", "един", "една", "еднаква", "еднакви", "еднакъв", "едно", "екип", "ето", "живот", "за", "забавям", "зад", "заедно", "заради", "засега", "заспал", "затова", "защо", "защото", "и", "из", "или", "им", "има", "имат", "иска", "й", "каза", "как", "каква", "какво", "както", "какъв", "като", "кога", "когато", "което", "които", "кой", "който", "колко", "която", "къде", "където", "към", "лесен", "лесно", "ли", "лош", "м", "май", "малко", "ме", "между", "мек", "мен", "месец", "ми", "много", "мнозина", "мога", "могат", "може", "мокър", "моля", "момента", "му", "н", "на", "над", "назад", "най", "направи", "напред", "например", "нас", "не", "него", "нещо", "нея", "ни", "ние", "никой", "нито", "нищо", "но", "нов", "нова", "нови", "новина", "някои", "някой", "няколко", "няма", "обаче", "около", "освен", "особено", "от", "отгоре", "отново", "още", "пак", "по", "повече", "повечето", "под", "поне", "поради", "после", "почти", "прави", "пред", "преди", "през", "при", "пък", "първата", "първи", "първо", "пъти", "равен", "равна", "с", "са", "сам", "само", "се", "сега", "си", "син", "скоро", "след", "следващ", "сме", "смях", "според", "сред", "срещу", "сте", "съм", "със", "също", "т", "тази", "така", "такива", "такъв", "там", "твой", "те", "тези", "ти", "т.н.", "то", "това", "тогава", "този", "той", "толкова", "точно", "три", "трябва", "тук", "тъй", "тя", "тях", "у", "утре", "харесва", "хиляди", "ч", "часа", "че", "често", "чрез", "ще", "щом", "юмрук", "я", "як" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-bg.txt b/data/stopwords/stopwords-bg.txt deleted file mode 100644 index 9700c31..0000000 --- a/data/stopwords/stopwords-bg.txt +++ /dev/null @@ -1,259 +0,0 @@ -а -автентичен -аз -ако -ала -бе -без -беше -би -бивш -бивша -бившо -бил -била -били -било -благодаря -близо -бъдат -бъде -бяха -в -вас -ваш -ваша -вероятно -вече -взема -ви -вие -винаги -внимава -време -все -всеки -всички -всичко -всяка -във -въпреки -върху -г -ги -главен -главна -главно -глас -го -година -години -годишен -д -да -дали -два -двама -двамата -две -двете -ден -днес -дни -до -добра -добре -добро -добър -докато -докога -дори -досега -доста -друг -друга -други -е -евтин -едва -един -една -еднаква -еднакви -еднакъв -едно -екип -ето -живот -за -забавям -зад -заедно -заради -засега -заспал -затова -защо -защото -и -из -или -им -има -имат -иска -й -каза -как -каква -какво -както -какъв -като -кога -когато -което -които -кой -който -колко -която -къде -където -към -лесен -лесно -ли -лош -м -май -малко -ме -между -мек -мен -месец -ми -много -мнозина -мога -могат -може -мокър -моля -момента -му -н -на -над -назад -най -направи -напред -например -нас -не -него -нещо -нея -ни -ние -никой -нито -нищо -но -нов -нова -нови -новина -някои -някой -няколко -няма -обаче -около -освен -особено -от -отгоре -отново -още -пак -по -повече -повечето -под -поне -поради -после -почти -прави -пред -преди -през -при -пък -първата -първи -първо -пъти -равен -равна -с -са -сам -само -се -сега -си -син -скоро -след -следващ -сме -смях -според -сред -срещу -сте -съм -със -също -т -тази -така -такива -такъв -там -твой -те -тези -ти -т.н. -то -това -тогава -този -той -толкова -точно -три -трябва -тук -тъй -тя -тях -у -утре -харесва -хиляди -ч -часа -че -често -чрез -ще -щом -юмрук -я -як \ No newline at end of file diff --git a/data/stopwords/stopwords-cs.json b/data/stopwords/stopwords-cs.json new file mode 100755 index 0000000..00356da --- /dev/null +++ b/data/stopwords/stopwords-cs.json @@ -0,0 +1 @@ +[ "ačkoli", "ahoj", "ale", "anebo", "ano", "asi", "aspoň", "během", "bez", "beze", "blízko", "bohužel", "brzo", "bude", "budeme", "budeš", "budete", "budou", "budu", "byl", "byla", "byli", "bylo", "byly", "bys", "čau", "chce", "chceme", "chceš", "chcete", "chci", "chtějí", "chtít", "chut'", "chuti", "co", "čtrnáct", "čtyři", "dál", "dále", "daleko", "děkovat", "děkujeme", "děkuji", "den", "deset", "devatenáct", "devět", "do", "dobrý", "docela", "dva", "dvacet", "dvanáct", "dvě", "hodně", "já", "jak", "jde", "je", "jeden", "jedenáct", "jedna", "jedno", "jednou", "jedou", "jeho", "její", "jejich", "jemu", "jen", "jenom", "ještě", "jestli", "jestliže", "jí", "jich", "jím", "jimi", "jinak", "jsem", "jsi", "jsme", "jsou", "jste", "kam", "kde", "kdo", "kdy", "když", "ke", "kolik", "kromě", "která", "které", "kteří", "který", "kvůli", "má", "mají", "málo", "mám", "máme", "máš", "máte", "mé", "mě", "mezi", "mí", "mít", "mně", "mnou", "moc", "mohl", "mohou", "moje", "moji", "možná", "můj", "musí", "může", "my", "na", "nad", "nade", "nám", "námi", "naproti", "nás", "náš", "naše", "naši", "ne", "ně", "nebo", "nebyl", "nebyla", "nebyli", "nebyly", "něco", "nedělá", "nedělají", "nedělám", "neděláme", "neděláš", "neděláte", "nějak", "nejsi", "někde", "někdo", "nemají", "nemáme", "nemáte", "neměl", "němu", "není", "nestačí", "nevadí", "než", "nic", "nich", "ním", "nimi", "nula", "od", "ode", "on", "ona", "oni", "ono", "ony", "osm", "osmnáct", "pak", "patnáct", "pět", "po", "pořád", "potom", "pozdě", "před", "přes", "přese", "pro", "proč", "prosím", "prostě", "proti", "protože", "rovně", "se", "sedm", "sedmnáct", "šest", "šestnáct", "skoro", "smějí", "smí", "snad", "spolu", "sta", "sté", "sto", "ta", "tady", "tak", "takhle", "taky", "tam", "tamhle", "tamhleto", "tamto", "tě", "tebe", "tebou", "ted'", "tedy", "ten", "ti", "tisíc", "tisíce", "to", "tobě", "tohle", "toto", "třeba", "tři", "třináct", "trošku", "tvá", "tvé", "tvoje", "tvůj", "ty", "určitě", "už", "vám", "vámi", "vás", "váš", "vaše", "vaši", "ve", "večer", "vedle", "vlastně", "všechno", "všichni", "vůbec", "vy", "vždy", "za", "zač", "zatímco", "ze", "že" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-cs.txt b/data/stopwords/stopwords-cs.txt deleted file mode 100755 index df359f3..0000000 --- a/data/stopwords/stopwords-cs.txt +++ /dev/null @@ -1,256 +0,0 @@ -ačkoli -ahoj -ale -anebo -ano -asi -aspoň -během -bez -beze -blízko -bohužel -brzo -bude -budeme -budeš -budete -budou -budu -byl -byla -byli -bylo -byly -bys -čau -chce -chceme -chceš -chcete -chci -chtějí -chtít -chut' -chuti -co -čtrnáct -čtyři -dál -dále -daleko -děkovat -děkujeme -děkuji -den -deset -devatenáct -devět -do -dobrý -docela -dva -dvacet -dvanáct -dvě -hodně -já -jak -jde -je -jeden -jedenáct -jedna -jedno -jednou -jedou -jeho -její -jejich -jemu -jen -jenom -ještě -jestli -jestliže -jí -jich -jím -jimi -jinak -jsem -jsi -jsme -jsou -jste -kam -kde -kdo -kdy -když -ke -kolik -kromě -která -které -kteří -který -kvůli -má -mají -málo -mám -máme -máš -máte -mé -mě -mezi -mí -mít -mně -mnou -moc -mohl -mohou -moje -moji -možná -můj -musí -může -my -na -nad -nade -nám -námi -naproti -nás -náš -naše -naši -ne -ně -nebo -nebyl -nebyla -nebyli -nebyly -něco -nedělá -nedělají -nedělám -neděláme -neděláš -neděláte -nějak -nejsi -někde -někdo -nemají -nemáme -nemáte -neměl -němu -není -nestačí -nevadí -než -nic -nich -ním -nimi -nula -od -ode -on -ona -oni -ono -ony -osm -osmnáct -pak -patnáct -pět -po -pořád -potom -pozdě -před -přes -přese -pro -proč -prosím -prostě -proti -protože -rovně -se -sedm -sedmnáct -šest -šestnáct -skoro -smějí -smí -snad -spolu -sta -sté -sto -ta -tady -tak -takhle -taky -tam -tamhle -tamhleto -tamto -tě -tebe -tebou -ted' -tedy -ten -ti -tisíc -tisíce -to -tobě -tohle -toto -třeba -tři -třináct -trošku -tvá -tvé -tvoje -tvůj -ty -určitě -už -vám -vámi -vás -váš -vaše -vaši -ve -večer -vedle -vlastně -všechno -všichni -vůbec -vy -vždy -za -zač -zatímco -ze -že \ No newline at end of file diff --git a/data/stopwords/stopwords-da.json b/data/stopwords/stopwords-da.json new file mode 100644 index 0000000..0fe5d43 --- /dev/null +++ b/data/stopwords/stopwords-da.json @@ -0,0 +1 @@ +[ "af", "alle", "andet", "andre", "at", "begge", "da", "de", "den", "denne", "der", "deres", "det", "dette", "dig", "din", "dog", "du", "ej", "eller", "en", "end", "ene", "eneste", "enhver", "et", "fem", "fire", "flere", "fleste", "for", "fordi", "forrige", "fra", "få", "før", "god", "han", "hans", "har", "hendes", "her", "hun", "hvad", "hvem", "hver", "hvilken", "hvis", "hvor", "hvordan", "hvorfor", "hvornår", "i", "ikke", "ind", "ingen", "intet", "jeg", "jeres", "kan", "kom", "kommer", "lav", "lidt", "lille", "man", "mand", "mange", "med", "meget", "men", "mens", "mere", "mig", "ned", "ni", "nogen", "noget", "ny", "nyt", "nær", "næste", "næsten", "og", "op", "otte", "over", "på", "se", "seks", "ses", "som", "stor", "store", "syv", "ti", "til", "to", "tre", "ud", "var" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-da.txt b/data/stopwords/stopwords-da.txt deleted file mode 100644 index e8522ef..0000000 --- a/data/stopwords/stopwords-da.txt +++ /dev/null @@ -1,101 +0,0 @@ -af -alle -andet -andre -at -begge -da -de -den -denne -der -deres -det -dette -dig -din -dog -du -ej -eller -en -end -ene -eneste -enhver -et -fem -fire -flere -fleste -for -fordi -forrige -fra -få -før -god -han -hans -har -hendes -her -hun -hvad -hvem -hver -hvilken -hvis -hvor -hvordan -hvorfor -hvornår -i -ikke -ind -ingen -intet -jeg -jeres -kan -kom -kommer -lav -lidt -lille -man -mand -mange -med -meget -men -mens -mere -mig -ned -ni -nogen -noget -ny -nyt -nær -næste -næsten -og -op -otte -over -på -se -seks -ses -som -stor -store -syv -ti -til -to -tre -ud -var diff --git a/data/stopwords/stopwords-de.json b/data/stopwords/stopwords-de.json new file mode 100644 index 0000000..78646ba --- /dev/null +++ b/data/stopwords/stopwords-de.json @@ -0,0 +1 @@ +[ "der", "die", "und", "in", "den", "von", "zu", "mit", "ist", "das", "des", "im", "für", "auf", "sich", "dem", "Die", "nicht", "ein", "eine", "als", "auch", "an", "es", "er", "aus", "bei", "werden", "sie", "nach", "Der", "sind", "war", "wurde", "wird", "einer", "Das", "hat", "am", "wie", "um", "Sie", "zum", "oder", "einen", "über", "dass", "einem", "noch", "bis", "nur", "vor", "zur", "durch", "so", "haben", "aber", "ich", "In", "man", "mehr", "wir", "daß", "kann", "sein", "vom", "Es", "unter", "Ich", "hatte", "gegen", "Im", "Er", "wenn", "dieser", "seine", "eines", "können", "diese", "wieder", "wurden", "dann", "was", "schon", "Jahr", "zwei", "seiner", "Jahre", "Jahren", "ihre", "gibt", "zwischen", "Ein", "immer", "waren", "Zeit", "Uhr", "keine", "Wir", "sei", "habe", "sehr", "hier", "alle", "Nach", "ab", "sowie", "da", "beim", "heute", "seit", "diesem", "uns", "soll", "Und", "Deutschland", "Mit", "anderen", "jedoch", "ihr", "damit", "ersten", "drei", "Auch", "doch", "ihm", "seinen", "Stadt", "etwa", "sagte", "ihn", "Eine", "sondern", "bereits", "müssen", "ohne", "Menschen", "will", "Prozent", "ihrer", "worden", "Bei", "selbst", "jetzt", "of", "Als", "seinem", "neue", "muss", "allem", "neuen", "Ende", "nun", "Von", "geht", "ihren", "SPD", "So", "Für", "weil", "wo", "mich", "mir", "Aber", "Am", "Diese", "ganz", "dieses", "etwas", "andere", "Geschichte", "Frau", "liegt", "Wenn", "ins", "gut", "einmal", "konnte", "Euro", "du", "denn", "viele", "Auf", "machen", "Herr", "Leben", "the", "diesen", "erst", "lassen", "Wie", "dort", "beiden", "erste", "The", "Teil", "deutschen", "weiter", "also", "viel", "sollte", "dabei", "Millionen", "Was", "später", "hatten", "während", "Welt", "ISBN", "sagt", "denen", "wollen", "steht", "Da", "kommt", "kein", "vier", "nichts", "de", "allerdings", "Seite", "ob", "dazu", "gab", "s", "letzten", "kam", "USA", "wegen", "dies", "zurück", "großen", "kommen", "alles", "rund", "ja", "sollen", "deren", "dafür", "Doch", "Kinder", "wäre", "Frage", "weitere", "würde", "dessen", "große", "Januar", "zwar", "darauf", "Arbeit", "Beispiel", "September", "zusammen", "einige", "Land", "allen", "fast", "Frauen", "März", "Namen", "Unternehmen", "ihrem", "davon", "Mann", "Mai", "Platz", "deutsche", "werde", "Oktober", "muß", "Literatur", "Art", "ihnen", "Deutschen", "fünf", "gilt", "sehen", "könnte", "Dezember", "stehen", "sogar", "seien", "Wer", "Seit", "August", "bin", "Beifall", "Fall", "Juni", "eigenen", "November", "mal", "Film", "finden", "sagen", "Regierung", "April", "München", "oft", "Dies", "lange", "ebenfalls", "bekannt", "Präsident", "wohl", "CDU/CSU", "Zu", "gehört", "Man", "weniger", "gerade", "statt", "aller", "Juli", "möchte", "Weg", "Entwicklung", "zunächst", "ging", "Mark", "Bild", "möglich", "gar", "besonders", "hätte", "macht", "Politik", "geben", "Tag", "Ihnen", "Februar", "Hier", "Gemeinde", "wenig", "gewesen", "Europa", "gehen", "gemacht", "welche", "New", "gegenüber", "heißt", "Familie", "Union", "tun", "Jahrhundert", "einfach", "Frankfurt", "deutlich", "Dabei", "neben", "sollten", "Kirche", "keinen", "Artikel", "Ihre", "Peter", "Thema", "besteht", "vielen", "nie", "bzw.", "Aus", "Zeitung", "wollte", "Kommission", "seines", "Hamburg", "hätten", "Geld", "meine", "Dr", "kaum", "zweiten", "Während", "lässt", "Anfang", "Um", "Ort", "weiß", "findet", "Bereich", "Haus", "anderem", "Mal", "deshalb", "alten", "erhalten", "zehn", "Zum", "bisher", "meisten", "darüber", "würden", "hin", "Form", "An", "bleibt", "sieht", "Gesellschaft", "Berliner", "Den", "vergangenen", "bezeichnet", "Nr.", "Ziel", "je", "weit", "Grund", "sechs", "darf", "Rolle", "Deutsche", "wissen", "jeder", "zeigt", "Damit", "Denn", "mehrere", "nächsten", "Vor", "Dann", "schließlich", "kleinen", "Durch", "Michael", "km", "Lage", "Gruppe", "Band", "damals", "Spiel", "Sohn", "Dr.", "stark", "Universität", "Hilfe", "besser", "hinter", "meist", "Seine", "St.", "stellt", "Tage", "unsere", "daher", "Nur", "wirklich", "führt", "Dieser", "beispielsweise", "kurz", "Bericht", "gleich", "weiteren", "Straße", "bleiben", "Wirtschaft", "Siehe", "Zukunft", "eher", "Bedeutung", "Recht", "insbesondere", "Bevölkerung", "schnell", "nehmen", "Verlag", "CDU", "Tod", "Alle", "solche", "neu", "Bundesregierung", "pro", "Frankreich", "Jahres", "konnten", "Ihr", "ließ", "Du", "kleine", "Europäischen", "Vater", "genannt", "lang", "Titel", "Rahmen", "Wort", "eigentlich", "erhielt", "einigen", "Woche", "FC", "Musik", "dagegen", "Sein", "allein", "Einsatz", "genau", "begann", "innerhalb", "unserer", "Partei", "Polizei", "Wasser", "bringen", "deutscher", "natürlich", "eigene", "Wochen", "insgesamt", "Außerdem", "Bis", "halten", "politischen", "musste", "Parlament", "Meter", "Hand", "Zahl", "stellen", "gesagt", "führen", "daran", "Erfolg", "befindet", "Zur", "verschiedenen", "Probleme", "Unter", "Abgeordneten", "Milliarden", "nahm", "stand", "geworden", "c", "liegen", "erstmals", "Sprache", "Fragen", "nämlich", "Ja", "Kollegen", "Männer", "Nicht", "Wolfgang", "Problem", "Mutter", "Minuten", "Weitere", "Mitte", "Mitglied", "Jahrhunderts", "Krieg", "Hans", "könnten", "Thomas", "Über", "Personen", "Friedrich", "ca.", "ebenso", "machte", "York", "vielleicht", "Stelle", "derzeit", "Ländern", "Höhe", "verwendet", "gute", "überhaupt", "Länder", "Angaben", "führte", "gegeben", "Tel.", "klar", "Karl", "europäischen", "sicher", "Saison", "Programm", "erreicht", "GRÜNEN", "beide", "Sonntag", "sowohl", "Region", "alte", "Staaten", "Paris", "Beginn", "Buch", "zweite", "ganze", "hinaus", "König", "Morgen", "handelt", "fand", "Schweiz", "jeweils", "Weise", "DM", "fest", "per", "blieb", "Mitglieder", "Richtung", "Heute", "Stunden", "leicht", "Leute", "wobei", "gehören", "bietet", "Wien", "politische", "Folge", "Blick", "aufgrund", "Entscheidung", "Dort", "Neben", "hält", "Gebiet", "gemeinsam", "erklärt", "direkt", "könne", "Daten", "recht", "schwer", "Bayern", "jeden", "Name", "Schule", "GmbH", "dürfen", "laut", "Seiten", "Bürger", "Eltern", "dpa", "Meinung", "Werke", "Jetzt", "letzte", "Spieler", "bald", "London", "häufig", "heutigen", "Einwohner", "acht", "eben", "Internet", "Markt", "dich", "Nein", "Situation", "System", "zuvor", "Möglichkeit", "Freitag", "mein", "Mannheim", "Fenster", "Kosten", "inzwischen", "kamen", "John", "sieben", "bekommen", "erreichen", "unser", "Verfügung", "Köln", "Dazu", "besten", "Zusammenhang", "Reihe", "Kritik", "richtig", "Liste", "Herren", "Augen", "taz", "zeigen", "siehe", "hohen", "spielte", "leben", "völlig", "Neue", "ihres", "spielt", "Sicherheit", "weiterhin", "hoch", "nachdem", "gegründet", "erneut", "sah", "z.", "wer", "Informationen", "anders", "spielen", "Dieses", "gleichen", "Kultur", "größten", "eingesetzt", "Unterstützung", "Beim", "erklärte", "Allerdings", "Firma", "Amt", "Kopf", "trotz", "Erst", "gebracht", "gestellt", "läuft", "schließen", "Bilder", "nimmt", "Mitarbeiter", "BÜNDNIS", "Deshalb", "verschiedene", "zudem", "Werk", "Ergebnis", "Heinrich", "Bau", "ehemaligen", "Preis", "Tochter", "Stuttgart", "Samstag", "Bad", "Verfahren", "Kind", "früher", "Paul", "darin", "paar", "Punkt", "Weblinks", "Nun", "Maßnahmen", "Österreich", "Wilhelm", "Herrn", "z.B.", "Noch", "Staat", "Zusammenarbeit", "knapp", "Nacht", "einzelnen", "trat", "gestern", "Team", "Osten", "scheint", "Mannschaft", "Tagen", "internationalen", "jede", "mindestens", "teilweise", "einzige", "Soldaten", "setzt", "gefunden", "Kunst", "lediglich", "öffentlichen", "bedeutet", "Raum", "gewann", "Kampf", "Martin", "Ist", "Begriff", "Hause", "entwickelt", "Wahl", "Schon", "arbeiten", "größte", "Donnerstag", "Ab", "Viele", "Quellen", "Nachdem", "dadurch", "Italien", "erster", "gekommen", "dir", "Mittwoch", "danach", "stellte", "her", "zahlreiche", "Landes", "Gesetz", "Monaten", "PDS", "Rat", "Franz", "Verein", "sonst", "Frankfurter", "Meine", "Klaus", "Karriere", "müsse", "meiner", "anderer", "zuletzt", "Monate", "Alter", "hohe", "Interesse", "Regie", "Montag", "genommen", "lag", "Sommer", "spricht", "Trainer", "Liebe", "jedem", "/DIE", "Westen", "guten", "Kilometer", "Johann", "gesehen", "darunter", "solchen", "indem", "Mittel", "oben", "Schweizer", "wichtig", "Hälfte", "Regel", "obwohl", "Bürgermeister", "Aufgabe", "Spiele", "folgenden", "Dienstag", "version", "Sache", "sprechen", "Gemeinden", "electronic", "for", "Norden", "außerdem", "Antrag", "gleichzeitig", "ganzen", "Politiker", "gehörte", "großer", "China", "Nähe", "bereit", "setzte", "Druck", "tatsächlich", "Gott", "frei", "Grünen", "zumindest", "Opfer", "genug", "versucht", "bevor" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-de.txt b/data/stopwords/stopwords-de.txt deleted file mode 100644 index 37e6714..0000000 --- a/data/stopwords/stopwords-de.txt +++ /dev/null @@ -1,894 +0,0 @@ -der -die -und -in -den -von -zu -mit -ist -das -des -im -für -auf -sich -dem -Die -nicht -ein -eine -als -auch -an -es -er -aus -bei -werden -sie -nach -Der -sind -war -wurde -wird -einer -Das -hat -am -wie -um -Sie -zum -oder -einen -über -dass -einem -noch -bis -nur -vor -zur -durch -so -haben -aber -ich -In -man -mehr -wir -daß -kann -sein -vom -Es -unter -Ich -hatte -gegen -Im -Er -wenn -dieser -seine -eines -können -diese -wieder -wurden -dann -was -schon -Jahr -zwei -seiner -Jahre -Jahren -ihre -gibt -zwischen -Ein -immer -waren -Zeit -Uhr -keine -Wir -sei -habe -sehr -hier -alle -Nach -ab -sowie -da -beim -heute -seit -diesem -uns -soll -Und -Deutschland -Mit -anderen -jedoch -ihr -damit -ersten -drei -Auch -doch -ihm -seinen -Stadt -etwa -sagte -ihn -Eine -sondern -bereits -müssen -ohne -Menschen -will -Prozent -ihrer -worden -Bei -selbst -jetzt -of -Als -seinem -neue -muss -allem -neuen -Ende -nun -Von -geht -ihren -SPD -So -Für -weil -wo -mich -mir -Aber -Am -Diese -ganz -dieses -etwas -andere -Geschichte -Frau -liegt -Wenn -ins -gut -einmal -konnte -Euro -du -denn -viele -Auf -machen -Herr -Leben -the -diesen -erst -lassen -Wie -dort -beiden -erste -The -Teil -deutschen -weiter -also -viel -sollte -dabei -Millionen -Was -später -hatten -während -Welt -ISBN -sagt -denen -wollen -steht -Da -kommt -kein -vier -nichts -de -allerdings -Seite -ob -dazu -gab -s -letzten -kam -USA -wegen -dies -zurück -großen -kommen -alles -rund -ja -sollen -deren -dafür -Doch -Kinder -wäre -Frage -weitere -würde -dessen -große -Januar -zwar -darauf -Arbeit - -Beispiel -September -zusammen -einige -Land -allen -fast -Frauen -März -Namen -Unternehmen -ihrem -davon -Mann -Mai -Platz -deutsche -werde -Oktober -muß -Literatur -Art -ihnen -Deutschen -fünf -gilt -sehen -könnte -Dezember -stehen -sogar -seien -Wer -Seit -August -bin -Beifall -Fall -Juni -eigenen -November -mal -Film -finden -sagen -Regierung -April -München -oft -Dies -lange -ebenfalls -bekannt -Präsident -wohl -CDU/CSU -Zu -gehört -Man -weniger -gerade -statt -aller -Juli -möchte -Weg -Entwicklung -zunächst -ging -Mark -Bild -möglich -gar -besonders -hätte -macht -Politik -geben -Tag -Ihnen -Februar -Hier -Gemeinde -wenig -gewesen -Europa -gehen -gemacht -welche -New -gegenüber -heißt -Familie -Union -tun -Jahrhundert -einfach -Frankfurt -deutlich -Dabei -neben -sollten -Kirche -keinen -Artikel -Ihre -Peter -Thema -besteht -vielen -nie -bzw. -Aus -Zeitung -wollte -Kommission -seines -Hamburg -hätten -Geld -meine -Dr -kaum -zweiten -Während -lässt -Anfang -Um -Ort -weiß -findet -Bereich -Haus -anderem -Mal - -deshalb -alten -erhalten -zehn -Zum -bisher -meisten -darüber -würden -hin -Form -An -bleibt -sieht -Gesellschaft -Berliner -Den -vergangenen -bezeichnet -Nr. -Ziel -je -weit -Grund -sechs -darf -Rolle -Deutsche -wissen -jeder -zeigt -Damit -Denn -mehrere -nächsten -Vor -Dann -schließlich -kleinen -Durch -Michael -km -Lage -Gruppe -Band -damals -Spiel -Sohn -Dr. -stark -Universität -Hilfe -besser -hinter -meist -Seine -St. -stellt -Tage -unsere -daher -Nur -wirklich -führt -Dieser -beispielsweise -kurz -Bericht -gleich -weiteren -Straße -bleiben -Wirtschaft -Siehe -Zukunft -eher -Bedeutung -Recht -insbesondere -Bevölkerung -schnell -nehmen -Verlag -CDU -Tod -Alle -solche -neu -Bundesregierung -pro -Frankreich -Jahres -konnten -Ihr -ließ -Du -kleine -Europäischen -Vater -genannt -lang -Titel -Rahmen -Wort -eigentlich -erhielt -einigen -Woche -FC -Musik -dagegen -Sein -allein -Einsatz -genau -begann -innerhalb -unserer -Partei -Polizei -Wasser -bringen -deutscher -natürlich -eigene - -Wochen -insgesamt -Außerdem -Bis -halten -politischen -musste -Parlament -Meter -Hand -Zahl -stellen -gesagt -führen -daran -Erfolg -befindet -Zur -verschiedenen -Probleme -Unter -Abgeordneten -Milliarden -nahm -stand -geworden -c -liegen -erstmals -Sprache -Fragen -nämlich -Ja -Kollegen -Männer -Nicht -Wolfgang -Problem -Mutter -Minuten -Weitere -Mitte -Mitglied -Jahrhunderts -Krieg -Hans -könnten -Thomas -Über -Personen -Friedrich -ca. -ebenso -machte -York -vielleicht -Stelle -derzeit -Ländern -Höhe -verwendet -gute -überhaupt -Länder -Angaben -führte -gegeben -Tel. -klar -Karl -europäischen -sicher -Saison -Programm -erreicht -GRÜNEN -beide -Sonntag -sowohl -Region -alte -Staaten -Paris -Beginn -Buch -zweite -ganze -hinaus -König -Morgen -handelt -fand -Schweiz -jeweils -Weise -DM -fest -per -blieb -Mitglieder -Richtung -Heute -Stunden -leicht -Leute -wobei -gehören -bietet -Wien -politische -Folge -Blick -aufgrund -Entscheidung -Dort -Neben -hält -Gebiet -gemeinsam -erklärt -direkt -könne -Daten -recht -schwer -Bayern -jeden -Name -Schule -GmbH -dürfen -laut -Seiten -Bürger -Eltern -dpa -Meinung -Werke -Jetzt -letzte -Spieler -bald -London -häufig -heutigen -Einwohner -acht -eben -Internet -Markt -dich -Nein -Situation -System -zuvor -Möglichkeit -Freitag -mein -Mannheim -Fenster -Kosten -inzwischen -kamen -John -sieben -bekommen -erreichen -unser -Verfügung -Köln -Dazu -besten -Zusammenhang -Reihe -Kritik -richtig -Liste -Herren -Augen -taz -zeigen -siehe -hohen -spielte -leben -völlig -Neue -ihres -spielt -Sicherheit -weiterhin -hoch -nachdem -gegründet -erneut -sah -z. -wer -Informationen -anders -spielen -Dieses -gleichen -Kultur -größten -eingesetzt -Unterstützung -Beim -erklärte -Allerdings -Firma -Amt -Kopf -trotz -Erst -gebracht -gestellt -läuft -schließen -Bilder -nimmt -Mitarbeiter -BÜNDNIS -Deshalb -verschiedene -zudem -Werk -Ergebnis -Heinrich -Bau -ehemaligen -Preis -Tochter -Stuttgart -Samstag -Bad -Verfahren -Kind -früher -Paul -darin -paar -Punkt -Weblinks -Nun -Maßnahmen -Österreich -Wilhelm -Herrn -z.B. -Noch -Staat -Zusammenarbeit -knapp -Nacht -einzelnen -trat -gestern -Team -Osten -scheint -Mannschaft -Tagen -internationalen -jede -mindestens -teilweise -einzige -Soldaten -setzt -gefunden -Kunst -lediglich -öffentlichen -bedeutet -Raum -gewann -Kampf -Martin -Ist -Begriff -Hause -entwickelt -Wahl -Schon -arbeiten -größte -Donnerstag -Ab -Viele -Quellen -Nachdem -dadurch -Italien -erster -gekommen -dir -Mittwoch -danach -stellte -her -zahlreiche -Landes -Gesetz -Monaten -PDS -Rat -Franz -Verein -sonst -Frankfurter -Meine -Klaus -Karriere -müsse -meiner -anderer -zuletzt -Monate -Alter -hohe -Interesse -Regie -Montag -genommen -lag -Sommer -spricht -Trainer -Liebe -jedem -/DIE -Westen -guten -Kilometer -Johann -gesehen -darunter -solchen -indem -Mittel -oben -Schweizer -wichtig -Hälfte -Regel -obwohl -Bürgermeister -Aufgabe -Spiele -folgenden -Dienstag -version -Sache -sprechen -Gemeinden -electronic -for -Norden -außerdem -Antrag -gleichzeitig -ganzen -Politiker -gehörte -großer -China -Nähe -bereit -setzte -Druck -tatsächlich -Gott -frei -Grünen -zumindest -Opfer -genug -versucht -bevor \ No newline at end of file diff --git a/data/stopwords/stopwords-en.json b/data/stopwords/stopwords-en.json new file mode 100644 index 0000000..c0a3af3 --- /dev/null +++ b/data/stopwords/stopwords-en.json @@ -0,0 +1 @@ +[ "a's", "able", "about", "above", "according", "accordingly", "across", "actually", "after", "afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask", "asking", "associated", "at", "available", "away", "awfully", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's", "came", "campaign", "can", "can't", "cannot", "cant", "cause", "causes", "certain", "certainly", "changes", "clearly", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldn't", "course", "currently", "definitely", "described", "despite", "did", "didn't", "different", "do", "does", "doesn't", "doing", "don't", "done", "down", "downwards", "during", "each", "edu", "eight", "either", "else", "elsewhere", "enough", "endorsed", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "far", "few", "fifth", "first", "financial", "five", "followed", "following", "follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "get", "gets", "getting", "given", "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't", "have", "haven't", "having", "he", "he's", "hello", "help", "hence", "her", "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i'd", "i'll", "i'm", "i've", "if", "ignored", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar", "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "it's", "its", "itself", "just", "keep", "keeps", "kept", "know", "knows", "known", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like", "liked", "likely", "little", "look", "looking", "looks", "ltd", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly", "necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone", "nor", "normally", "not", "nothing", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "overall", "own", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably", "probably", "provides", "quite", "quote", "quarterly", "rather", "really", "reasonably", "regarding", "regardless", "regards", "relatively", "respectively", "right", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specified", "specify", "specifying", "still", "sub", "such", "sup", "sure", "t's", "take", "taken", "tell", "tends", "than", "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "there's", "thereafter", "thereby", "therefore", "therein", "theres", "thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly", "those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards", "tried", "tries", "truly", "try", "trying", "twice", "two", "under", "unfortunately", "unless", "unlikely", "until", "unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "uucp", "value", "various", "very", "via", "viz", "vs", "want", "wants", "was", "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome", "well", "went", "were", "weren't", "what", "what's", "whatever", "when", "whence", "whenever", "where", "where's", "whereafter", "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who", "who's", "whoever", "whole", "whom", "whose", "why", "will", "willing", "wish", "with", "within", "without", "won't", "wonder", "would", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", "zero", "official", "sharply", "criticized" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-en.txt b/data/stopwords/stopwords-en.txt deleted file mode 100644 index d3a3954..0000000 --- a/data/stopwords/stopwords-en.txt +++ /dev/null @@ -1,546 +0,0 @@ -a's -able -about -above -according -accordingly -across -actually -after -afterwards -again -against -ain't -all -allow -allows -almost -alone -along -already -also -although -always -am -among -amongst -an -and -another -any -anybody -anyhow -anyone -anything -anyway -anyways -anywhere -apart -appear -appreciate -appropriate -are -aren't -around -as -aside -ask -asking -associated -at -available -away -awfully -be -became -because -become -becomes -becoming -been -before -beforehand -behind -being -believe -below -beside -besides -best -better -between -beyond -both -brief -but -by -c -c'mon -c's -came -campaign -can -can't -cannot -cant -cause -causes -certain -certainly -changes -clearly -co -com -come -comes -concerning -consequently -consider -considering -contain -containing -contains -corresponding -could -couldn't -course -currently -definitely -described -despite -did -didn't -different -do -does -doesn't -doing -don't -done -down -downwards -during -each -edu -eight -either -else -elsewhere -enough -endorsed -entirely -especially -et -etc -even -ever -every -everybody -everyone -everything -everywhere -ex -exactly -example -except -far -few -fifth -first -financial -five -followed -following -follows -for -former -formerly -forth -four -from -further -furthermore -get -gets -getting -given -gives -go -goes -going -gone -got -gotten -greetings -had -hadn't -happens -hardly -has -hasn't -have -haven't -having -he -he's -hello -help -hence -her -here -here's -hereafter -hereby -herein -hereupon -hers -herself -hi -him -himself -his -hither -hopefully -how -howbeit -however -i'd -i'll -i'm -i've -if -ignored -immediate -in -inasmuch -inc -indeed -indicate -indicated -indicates -inner -insofar -instead -into -inward -is -isn't -it -it'd -it'll -it's -its -itself -just -keep -keeps -kept -know -knows -known -last -lately -later -latter -latterly -least -less -lest -let -let's -like -liked -likely -little -look -looking -looks -ltd -mainly -many -may -maybe -me -mean -meanwhile -merely -might -more -moreover -most -mostly -much -must -my -myself -name -namely -nd -near -nearly -necessary -need -needs -neither -never -nevertheless -new -next -nine -no -nobody -non -none -noone -nor -normally -not -nothing -novel -now -nowhere -obviously -of -off -often -oh -ok -okay -old -on -once -one -ones -only -onto -or -other -others -otherwise -ought -our -ours -ourselves -out -outside -over -overall -own -particular -particularly -per -perhaps -placed -please -plus -possible -presumably -probably -provides -quite -quote -quarterly -rather -really -reasonably -regarding -regardless -regards -relatively -respectively -right -said -same -saw -say -saying -says -second -secondly -see -seeing -seem -seemed -seeming -seems -seen -self -selves -sensible -sent -serious -seriously -seven -several -shall -she -should -shouldn't -since -six -so -some -somebody -somehow -someone -something -sometime -sometimes -somewhat -somewhere -soon -sorry -specified -specify -specifying -still -sub -such -sup -sure -t's -take -taken -tell -tends -than -thank -thanks -thanx -that -that's -thats -the -their -theirs -them -themselves -then -thence -there -there's -thereafter -thereby -therefore -therein -theres -thereupon -these -they -they'd -they'll -they're -they've -think -third -this -thorough -thoroughly -those -though -three -through -throughout -thru -thus -to -together -too -took -toward -towards -tried -tries -truly -try -trying -twice -two -under -unfortunately -unless -unlikely -until -unto -up -upon -us -use -used -useful -uses -using -usually -uucp -value -various -very -via -viz -vs -want -wants -was -wasn't -way -we -we'd -we'll -we're -we've -welcome -well -went -were -weren't -what -what's -whatever -when -whence -whenever -where -where's -whereafter -whereas -whereby -wherein -whereupon -wherever -whether -which -while -whither -who -who's -whoever -whole -whom -whose -why -will -willing -wish -with -within -without -won't -wonder -would -would -wouldn't -yes -yet -you -you'd -you'll -you're -you've -your -yours -yourself -yourselves -zero -official -sharply -criticized \ No newline at end of file diff --git a/data/stopwords/stopwords-es.json b/data/stopwords/stopwords-es.json new file mode 100644 index 0000000..b74fdf8 --- /dev/null +++ b/data/stopwords/stopwords-es.json @@ -0,0 +1 @@ +[ "de", "la", "que", "el", "en", "y", "a", "los", "del", "se", "las", "por", "un", "para", "con", "no", "una", "su", "al", "lo", "como", "más", "pero", "sus", "le", "ya", "o", "este", "sí", "porque", "esta", "entre", "cuando", "muy", "sin", "sobre", "también", "me", "hasta", "hay", "donde", "quien", "desde", "todo", "nos", "durante", "todos", "uno", "les", "ni", "contra", "otros", "ese", "eso", "ante", "ellos", "e", "esto", "mí", "antes", "algunos", "qué", "unos", "yo", "otro", "otras", "otra", "él", "tanto", "esa", "estos", "mucho", "quienes", "nada", "muchos", "cual", "poco", "ella", "estar", "estas", "algunas", "algo", "nosotros", "mi", "mis", "tú", "te", "ti", "tu", "tus", "ellas", "nosotras", "vosotros", "vosotras", "os", "mío", "mía", "míos", "mías", "tuyo", "tuya", "tuyos", "tuyas", "suyo", "suya", "suyos", "suyas", "nuestro", "nuestra", "nuestros", "nuestras", "vuestro", "vuestra", "vuestros", "vuestras", "esos", "esas", "estoy", "estás", "está", "estamos", "estáis", "están", "esté", "estés", "estemos", "estéis", "estén", "estaré", "estarás", "estará", "estaremos", "estaréis", "estarán", "estaría", "estarías", "estaríamos", "estaríais", "estarían", "estaba", "estabas", "estábamos", "estabais", "estaban", "estuve", "estuviste", "estuvo", "estuvimos", "estuvisteis", "estuvieron", "estuviera", "estuvieras", "estuviéramos", "estuvierais", "estuvieran", "estuviese", "estuvieses", "estuviésemos", "estuvieseis", "estuviesen", "estando", "estado", "estada", "estados", "estadas", "estad", "he", "has", "ha", "hemos", "habéis", "han", "haya", "hayas", "hayamos", "hayáis", "hayan", "habré", "habrás", "habrá", "habremos", "habréis", "habrán", "habría", "habrías", "habríamos", "habríais", "habrían", "había", "habías", "habíamos", "habíais", "habían", "hube", "hubiste", "hubo", "hubimos", "hubisteis", "hubieron", "hubiera", "hubieras", "hubiéramos", "hubierais", "hubieran", "hubiese", "hubieses", "hubiésemos", "hubieseis", "hubiesen", "habiendo", "habido", "habida", "habidos", "habidas", "soy", "eres", "es", "somos", "sois", "son", "sea", "seas", "seamos", "seáis", "sean", "seré", "serás", "será", "seremos", "seréis", "serán", "sería", "serías", "seríamos", "seríais", "serían", "era", "eras", "éramos", "erais", "eran", "fui", "fuiste", "fue", "fuimos", "fuisteis", "fueron", "fuera", "fueras", "fuéramos", "fuerais", "fueran", "fuese", "fueses", "fuésemos", "fueseis", "fuesen", "siendo", "sido", "tengo", "tienes", "tiene", "tenemos", "tenéis", "tienen", "tenga", "tengas", "tengamos", "tengáis", "tengan", "tendré", "tendrás", "tendrá", "tendremos", "tendréis", "tendrán", "tendría", "tendrías", "tendríamos", "tendríais", "tendrían", "tenía", "tenías", "teníamos", "teníais", "tenían", "tuve", "tuviste", "tuvo", "tuvimos", "tuvisteis", "tuvieron", "tuviera", "tuvieras", "tuviéramos", "tuvierais", "tuvieran", "tuviese", "tuvieses", "tuviésemos", "tuvieseis", "tuviesen", "teniendo", "tenido", "tenida", "tenidos", "tenidas", "tened" ] diff --git a/data/stopwords/stopwords-es.txt b/data/stopwords/stopwords-es.txt deleted file mode 100644 index c59d9b2..0000000 --- a/data/stopwords/stopwords-es.txt +++ /dev/null @@ -1,308 +0,0 @@ -de -la -que -el -en -y -a -los -del -se -las -por -un -para -con -no -una -su -al -lo -como -más -pero -sus -le -ya -o -este -sí -porque -esta -entre -cuando -muy -sin -sobre -también -me -hasta -hay -donde -quien -desde -todo -nos -durante -todos -uno -les -ni -contra -otros -ese -eso -ante -ellos -e -esto -mí -antes -algunos -qué -unos -yo -otro -otras -otra -él -tanto -esa -estos -mucho -quienes -nada -muchos -cual -poco -ella -estar -estas -algunas -algo -nosotros -mi -mis -tú -te -ti -tu -tus -ellas -nosotras -vosotros -vosotras -os -mío -mía -míos -mías -tuyo -tuya -tuyos -tuyas -suyo -suya -suyos -suyas -nuestro -nuestra -nuestros -nuestras -vuestro -vuestra -vuestros -vuestras -esos -esas -estoy -estás -está -estamos -estáis -están -esté -estés -estemos -estéis -estén -estaré -estarás -estará -estaremos -estaréis -estarán -estaría -estarías -estaríamos -estaríais -estarían -estaba -estabas -estábamos -estabais -estaban -estuve -estuviste -estuvo -estuvimos -estuvisteis -estuvieron -estuviera -estuvieras -estuviéramos -estuvierais -estuvieran -estuviese -estuvieses -estuviésemos -estuvieseis -estuviesen -estando -estado -estada -estados -estadas -estad -he -has -ha -hemos -habéis -han -haya -hayas -hayamos -hayáis -hayan -habré -habrás -habrá -habremos -habréis -habrán -habría -habrías -habríamos -habríais -habrían -había -habías -habíamos -habíais -habían -hube -hubiste -hubo -hubimos -hubisteis -hubieron -hubiera -hubieras -hubiéramos -hubierais -hubieran -hubiese -hubieses -hubiésemos -hubieseis -hubiesen -habiendo -habido -habida -habidos -habidas -soy -eres -es -somos -sois -son -sea -seas -seamos -seáis -sean -seré -serás -será -seremos -seréis -serán -sería -serías -seríamos -seríais -serían -era -eras -éramos -erais -eran -fui -fuiste -fue -fuimos -fuisteis -fueron -fuera -fueras -fuéramos -fuerais -fueran -fuese -fueses -fuésemos -fueseis -fuesen -siendo -sido -tengo -tienes -tiene -tenemos -tenéis -tienen -tenga -tengas -tengamos -tengáis -tengan -tendré -tendrás -tendrá -tendremos -tendréis -tendrán -tendría -tendrías -tendríamos -tendríais -tendrían -tenía -tenías -teníamos -teníais -tenían -tuve -tuviste -tuvo -tuvimos -tuvisteis -tuvieron -tuviera -tuvieras -tuviéramos -tuvierais -tuvieran -tuviese -tuvieses -tuviésemos -tuvieseis -tuviesen -teniendo -tenido -tenida -tenidos -tenidas -tened diff --git a/data/stopwords/stopwords-fi.json b/data/stopwords/stopwords-fi.json new file mode 100644 index 0000000..de6a803 --- /dev/null +++ b/data/stopwords/stopwords-fi.json @@ -0,0 +1 @@ +[ "alla", "ansiosta", "ehkä", "ei", "enemmän", "ennen", "etessa", "f", "haikki", "he", "hitaasti", "hoikein", "hyvin", "hän", "ilman", "ja", "jos", "jälkeen", "kanssa", "kaukana", "kenties", "keskellä", "kesken", "koskaan", "kuinkan", "kukka", "kylliksi", "kyllä", "liian", "lla", "lla", "luona", "lähellä", "läpi", "me", "miksi", "mikä", "milloin", "milloinkan", "minä", "missä", "miten", "nopeasti", "nyt", "oikea", "oikealla", "paljon", "siellä", "sinä", "ssa", "sta", "suoraan", "tai", "takana", "takia", "tarpeeksi", "te", "tässä", "ulkopuolella", "vahemmän", "vasen", "vasenmalla", "vastan", "vielä", "vieressä", "vähän", "yhdessä", "ylös" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-fi.txt b/data/stopwords/stopwords-fi.txt deleted file mode 100644 index 3b468b3..0000000 --- a/data/stopwords/stopwords-fi.txt +++ /dev/null @@ -1,68 +0,0 @@ -alla -ansiosta -ehkä -ei -enemmän -ennen -etessa -f -haikki -he -hitaasti -hoikein -hyvin -hän -ilman -ja -jos -jälkeen -kanssa -kaukana -kenties -keskellä -kesken -koskaan -kuinkan -kukka -kylliksi -kyllä -liian -lla -lla -luona -lähellä -läpi -me -miksi -mikä -milloin -milloinkan -minä -missä -miten -nopeasti -nyt -oikea -oikealla -paljon -siellä -sinä -ssa -sta -suoraan -tai -takana -takia -tarpeeksi -te -tässä -ulkopuolella -vahemmän -vasen -vasenmalla -vastan -vielä -vieressä -vähän -yhdessä -ylös diff --git a/data/stopwords/stopwords-fr.json b/data/stopwords/stopwords-fr.json new file mode 100644 index 0000000..794ca13 --- /dev/null +++ b/data/stopwords/stopwords-fr.json @@ -0,0 +1 @@ +[ "au", "aux", "avec", "ce", "ces", "dans", "de", "des", "du", "elle", "en", "et", "eux", "il", "je", "la", "le", "leur", "lui", "ma", "mais", "me", "même", "mes", "moi", "mon", "ne", "nos", "notre", "nous", "on", "ou", "par", "pas", "pour", "qu", "que", "qui", "sa", "se", "ses", "son", "sur", "ta", "te", "tes", "toi", "ton", "tu", "un", "une", "vos", "votre", "vous", "c", "d", "j", "l", "à", "m", "n", "s", "t", "y", "été", "étée", "étées", "étés", "étant", "suis", "es", "est", "sommes", "êtes", "sont", "serai", "seras", "sera", "serons", "serez", "seront", "serais", "serait", "serions", "seriez", "seraient", "étais", "était", "étions", "étiez", "étaient", "fus", "fut", "fûmes", "fûtes", "furent", "sois", "soit", "soyons", "soyez", "soient", "fusse", "fusses", "fût", "fussions", "fussiez", "fussent", "ayant", "eu", "eue", "eues", "eus", "ai", "as", "avons", "avez", "ont", "aurai", "auras", "aura", "aurons", "aurez", "auront", "aurais", "aurait", "aurions", "auriez", "auraient", "avais", "avait", "avions", "aviez", "avaient", "eut", "eûmes", "eûtes", "eurent", "aie", "aies", "ait", "ayons", "ayez", "aient", "eusse", "eusses", "eût", "eussions", "eussiez", "eussent", "ceci", "celà", "cet", "cette", "ici", "ils", "les", "leurs", "quel", "quels", "quelle", "quelles", "sans", "soi" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-fr.txt b/data/stopwords/stopwords-fr.txt deleted file mode 100644 index 30e06ef..0000000 --- a/data/stopwords/stopwords-fr.txt +++ /dev/null @@ -1,220 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -#----------------------------------------------------------------------- -# a couple of test stopwords to test that the words are really being -# configured from this file: -stopworda -stopwordb - -#Standard english stop words taken from Lucene's StopAnalyzer -a -an -and -are -as -at -be -but -by -for -if -in -into -is -it -no -not -of -on -or -s -such -t -that -the -their -then -there -these -they -this -to -was -will -with -au -aux -avec -ce -ces -dans -de -des -du -elle -en -et -eux -il -je -la -le -leur -lui -ma -mais -me -même -mes -moi -mon -ne -nos -notre -nous -on -ou -par -pas -pour -qu -que -qui -sa -se -ses -son -sur -ta -te -tes -toi -ton -tu -un -une -vos -votre -vous -c -d -j -l -à -m -n -s -t -y -été -étée -étées -étés -étant -suis -es -est -sommes -êtes -sont -serai -seras -sera -serons -serez -seront -serais -serait -serions -seriez -seraient -étais -était -étions -étiez -étaient -fus -fut -fûmes -fûtes -furent -sois -soit -soyons -soyez -soient -fusse -fusses -fût -fussions -fussiez -fussent -ayant -eu -eue -eues -eus -ai -as -avons -avez -ont -aurai -auras -aura -aurons -aurez -auront -aurais -aurait -aurions -auriez -auraient -avais -avait -avions -aviez -avaient -eut -eûmes -eûtes -eurent -aie -aies -ait -ayons -ayez -aient -eusse -eusses -eût -eussions -eussiez -eussent -ceci -celà -cet -cette -ici -ils -les -leurs -quel -quels -quelle -quelles -sans -soi diff --git a/data/stopwords/stopwords-hu.json b/data/stopwords/stopwords-hu.json new file mode 100644 index 0000000..c779dee --- /dev/null +++ b/data/stopwords/stopwords-hu.json @@ -0,0 +1 @@ +[ "a", "á", "ahogy", "ahol", "aki", "akik", "akkor", "alatt", "által", "általában", "amely", "amelyek", "amelyekben", "amelyeket", "amelyet", "amelynek", "ami", "amit", "amolyan", "amp", "amíg", "amikor", "át", "abban", "ahhoz", "annak", "arra", "arról", "az", "azok", "azon", "azt", "azzal", "azért", "aztán", "azután", "azonban", "b", "bár", "be", "belül", "benne", "c", "cikk", "cikkek", "cikkeket", "csak", "d", "de", "e", "é", "eddig", "egész", "egy", "egyes", "egyetlen", "egyéb", "egyik", "egyre", "ekkor", "el", "elég", "ellen", "elő", "először", "előtt", "első", "én", "éppen", "ebben", "ehhez", "emilyen", "ennek", "erre", "ez", "ezt", "ezek", "ezen", "ezzel", "ezért", "és", "f", "fel", "felé", "g", "h", "hanem", "hiszen", "hogy", "hogyan", "i", "í", "igen", "így", "illetve", "ill.", "ill", "ilyen", "ilyenkor", "is", "ison", "ismét", "itt", "j", "jó", "jól", "jobban", "k", "kell", "kellett", "keresztül", "keressünk", "ki", "kívül", "között", "közül", "l", "legalább", "lehet", "lehetett", "legyen", "lenne", "lenni", "lesz", "lett", "m", "maga", "magát", "majd", "majd", "már", "más", "másik", "meg", "még", "mellett", "mert", "mely", "melyek", "mi", "mit", "míg", "miért", "milyen", "mikor", "minden", "mindent", "mindenki", "mindig", "mint", "mintha", "mivel", "most", "n", "nagy", "nagyobb", "nagyon", "ne", "néha", "nekem", "neki", "nem", "néhány", "nélkül", "nincs", "o", "ó", "olyan", "ott", "össze", "ö", "ő", "ők", "őket", "p", "pedig", "persze", "q", "r", "rá", "s", "saját", "sem", "semmi", "sok", "sokat", "sokkal", "sz", "számára", "szemben", "szerint", "szinte", "t", "talán", "tehát", "teljes", "tovább", "továbbá", "több", "u", "ú", "úgy", "ugyanis", "új", "újabb", "újra", "után", "utána", "utolsó", "ü", "ű", "v", "vagy", "vagyis", "valaki", "valamely", "valami", "valamint", "való", "vagyok", "van", "vannak", "volt", "voltam", "voltak", "voltunk", "vissza", "vele", "viszont", "volna", "számolnak", "szólnak", "szól", "w", "x", "y", "z", "zs", "a", "ahogy", "ahol", "aki", "akkor", "alatt", "általában", "által", "amely", "amíg", "amikor", "ami", "amolyan", "arra", "át", "az", "azért", "azonban", "azon", "aztán", "azt", "azután", "azzal", "bár", "be", "belül", "benne", "cikk", "csak", "de", "eddig", "egész", "egy", "egyéb", "egyes", "egyetlen", "egyik", "egyre", "ekkor", "el", "elég", "ellen", "elő", "először", "előtt", "első", "emilyen", "én", "éppen", "erre", "és", "e", "ez", "ezen", "ezért", "ezzel", "fel", "felé", "hanem", "hiszen", "hogy", "hogyan", "igen", "így", "ill.", "illetve", "ill", "ilyen", "ilyenkor", "ismét", "ison", "itt", "jó", "jobban", "jól", "kell", "keres", "keresztül", "ki", "kívül", "között", "közül", "legalább", "legyen", "lehet", "lenni", "lett", "maga", "maga", "majd", "már", "más", "másik", "még", "meg", "mellett", "mely", "mert", "miért", "míg", "mikor", "milyen", "minden", "mindenki", "mindig", "mi", "mint", "mintha", "mivel", "most", "nagy", "nagyobb", "nagyon", "ne", "néha", "néhány", "neki", "nélkül", "nem", "nincs", "ők", "olyan", "ő", "össze", "ott", "pedig", "persze", "rá", "saját", "s", "sem", "semmi", "sokkal", "sok", "számára", "számol", "szemben", "szerint", "szinte", "szól", "talán", "tehát", "teljes", "továbbá", "tovább", "úgy", "ugyanis", "új", "újabb", "újra", "utána", "után", "utolsó", "vagy", "vagyis", "valaki", "valamely", "valami", "valamint", "való", "van", "vissza", "viszont", "volt" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-hu.txt b/data/stopwords/stopwords-hu.txt deleted file mode 100644 index 694feb1..0000000 --- a/data/stopwords/stopwords-hu.txt +++ /dev/null @@ -1,403 +0,0 @@ -a -á -ahogy -ahol -aki -akik -akkor -alatt -által -általában -amely -amelyek -amelyekben -amelyeket -amelyet -amelynek -ami -amit -amolyan -amp -amíg -amikor -át -abban -ahhoz -annak -arra -arról -az -azok -azon -azt -azzal -azért -aztán -azután -azonban -b -bár -be -belül -benne -c -cikk -cikkek -cikkeket -csak -d -de -e -é -eddig -egész -egy -egyes -egyetlen -egyéb -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -én -éppen -ebben -ehhez -emilyen -ennek -erre -ez -ezt -ezek -ezen -ezzel -ezért -és -f -fel -felé -g -h -hanem -hiszen -hogy -hogyan -i -í -igen -így -illetve -ill. -ill -ilyen -ilyenkor -is -ison -ismét -itt -j -jó -jól -jobban -k -kell -kellett -keresztül -keressünk -ki -kívül -között -közül -l -legalább -lehet -lehetett -legyen -lenne -lenni -lesz -lett -m -maga -magát -majd -majd -már -más -másik -meg -még -mellett -mert -mely -melyek -mi -mit -míg -miért -milyen -mikor -minden -mindent -mindenki -mindig -mint -mintha -mivel -most -n -nagy -nagyobb -nagyon -ne -néha -nekem -neki -nem -néhány -nélkül -nincs -o -ó -olyan -ott -össze -ö -ő -ők -őket -p -pedig -persze -q -r -rá -s -saját -sem -semmi -sok -sokat -sokkal -sz -számára -szemben -szerint -szinte -t -talán -tehát -teljes -tovább -továbbá -több -u -ú -úgy -ugyanis -új -újabb -újra -után -utána -utolsó -ü -ű -v -vagy -vagyis -valaki -valamely -valami -valamint -való -vagyok -van -vannak -volt -voltam -voltak -voltunk -vissza -vele -viszont -volna -számolnak -szólnak -szól -w -x -y -z -zs -a -ahogy -ahol -aki -akkor -alatt -általában -által -amely -amíg -amikor -ami -amolyan -arra -át -az -azért -azonban -azon -aztán -azt -azután -azzal -bár -be -belül -benne -cikk -csak -de -eddig -egész -egy -egyéb -egyes -egyetlen -egyik -egyre -ekkor -el -elég -ellen -elő -először -előtt -első -emilyen -én -éppen -erre -és -e -ez -ezen -ezért -ezzel -fel -felé -hanem -hiszen -hogy -hogyan -igen -így -ill. -illetve -ill -ilyen -ilyenkor -ismét -ison -itt -jó -jobban -jól -kell -keres -keresztül -ki -kívül -között -közül -legalább -legyen -lehet -lenni -lett -maga -maga -majd -már -más -másik -még -meg -mellett -mely -mert -miért -míg -mikor -milyen -minden -mindenki -mindig -mi -mint -mintha -mivel -most -nagy -nagyobb -nagyon -ne -néha -néhány -neki -nélkül -nem -nincs -ők -olyan -ő -össze -ott -pedig -persze -rá -saját -s -sem -semmi -sokkal -sok -számára -számol -szemben -szerint -szinte -szól -talán -tehát -teljes -továbbá -tovább -úgy -ugyanis -új -újabb -újra -utána -után -utolsó -vagy -vagyis -valaki -valamely -valami -valamint -való -van -vissza -viszont -volt - diff --git a/data/stopwords/stopwords-id.json b/data/stopwords/stopwords-id.json new file mode 100644 index 0000000..10af093 --- /dev/null +++ b/data/stopwords/stopwords-id.json @@ -0,0 +1 @@ +[ "a", "abad", "acara", "aceh", "ada", "adalah", "adanya", "adapun", "agak", "agaknya", "agama", "agar", "agustus", "air", "akan", "akankah", "akhir", "akhiri", "akhirnya", "akibat", "aku", "akulah", "alam", "album", "amat", "amatlah", "amerika", "anak", "and", "anda", "andalah", "anggota", "antar", "antara", "antarabangsa", "antaranya", "apa", "apaan", "apabila", "apakah", "apalagi", "apatah", "api", "april", "artikel", "artinya", "as", "asal", "asalkan", "asas", "asia", "asing", "atas", "atau", "ataukah", "ataupun", "australia", "awal", "awalnya", "awam", "b", "badan", "bagai", "bagaikan", "bagaimana", "bagaimanakah", "bagaimanapun", "bagainamakah", "bagi", "bagian", "bahagian", "bahan", "baharu", "bahasa", "bahawa", "bahkan", "bahwa", "bahwasannya", "bahwasanya", "baik", "baiknya", "bakal", "bakalan", "balik", "bandar", "bangsa", "bank", "banyak", "bapak", "barang", "barangan", "barat", "baru", "baru-baru", "bawah", "beberapa", "begini", "beginian", "beginikah", "beginilah", "begitu", "begitukah", "begitulah", "begitupun", "bekas", "bekerja", "belakang", "belakangan", "belanda", "beli", "beliau", "belum", "belumlah", "benar", "benarkah", "benarlah", "bentuk", "berada", "berakhir", "berakhirlah", "berakhirnya", "berapa", "berapakah", "berapalah", "berapapun", "berarti", "berasal", "berat", "berawal", "berbagai", "berbanding", "berbeda", "berdasarkan", "berdatangan", "berharap", "berhasil", "beri", "berikan", "berikut", "berikutan", "berikutnya", "berita", "berjalan", "berjaya", "berjumlah", "berkaitan", "berkali", "berkali-kali", "berkata", "berkehendak", "berkeinginan", "berkenaan", "berlainan", "berlaku", "berlalu", "berlangsung", "berlebihan", "bermacam", "bermacam-macam", "bermain", "bermaksud", "bermula", "bernama", "bernilai", "bersama", "bersama-sama", "bersiap", "bertanya", "bertemu", "berturut", "bertutur", "berubah", "berujar", "berupa", "besar", "besok", "betul", "betulkah", "bhd", "biasa", "biasanya", "bidang", "bila", "bilakah", "bilion", "bintang", "bisa", "bisakah", "blog", "bn", "bola", "boleh", "bolehkah", "bolehlah", "buat", "bukan", "bukankah", "bukanlah", "bukannya", "buku", "bulan", "bumi", "bung", "bursa", "cadangan", "cara", "caranya", "catch", "china", "click", "code", "copyright", "cukup", "cukupkah", "cukuplah", "cuma", "daerah", "dagangan", "dahulu", "dalam", "dan", "dana", "dapat", "dari", "daripada", "dasar", "data", "datang", "datuk", "dekat", "demi", "demikian", "demikianlah", "dengan", "depan", "derivatives", "desa", "desember", "detik", "dewan", "di", "dia", "diadakan", "diakhiri", "diakhirinya", "dialah", "dianggap", "diantara", "diantaranya", "diberi", "diberikan", "diberikannya", "dibuat", "dibuatnya", "dibuka", "dicatatkan", "didapat", "didatangkan", "didirikan", "diduga", "digunakan", "diibaratkan", "diibaratkannya", "diingat", "diingatkan", "diinginkan", "dijangka", "dijawab", "dijelaskan", "dijelaskannya", "dikarenakan", "dikatakan", "dikatakannya", "dikenal", "dikerjakan", "diketahui", "diketahuinya", "dikira", "dilakukan", "dilalui", "dilihat", "dimaksud", "dimaksudkan", "dimaksudkannya", "dimaksudnya", "dimana", "diminta", "dimintai", "dimisalkan", "dimulai", "dimulailah", "dimulainya", "dimungkinkan", "dini", "diniagakan", "dipastikan", "diperbuat", "diperbuatnya", "dipergunakan", "diperkirakan", "diperlihatkan", "diperlukan", "diperlukannya", "dipersoalkan", "dipertanyakan", "dipunyai", "diri", "dirilis", "dirinya", "dis", "disampaikan", "disebut", "disebutkan", "disebutkannya", "disember", "disini", "disinilah", "distrik", "ditambahkan", "ditandaskan", "ditanya", "ditanyai", "ditanyakan", "ditegaskan", "ditemukan", "ditujukan", "ditunjuk", "ditunjuki", "ditunjukkan", "ditunjukkannya", "ditunjuknya", "ditutup", "dituturkan", "dituturkannya", "diucapkan", "diucapkannya", "diungkapkan", "document.write", "dolar", "dong", "dr", "dua", "dulu", "dunia", "effective", "ekonomi", "eksekutif", "eksport", "empat", "enam", "enggak", "enggaknya", "entah", "entahlah", "era", "eropa", "err", "faedah", "feb", "film", "gat", "gedung", "gelar", "gettracker", "global", "grup", "guna", "gunakan", "gunung", "hadap", "hadapan", "hal", "hampir", "hanya", "hanyalah", "harga", "hari", "harian", "harus", "haruslah", "harusnya", "hasil", "hendak", "hendaklah", "hendaknya", "hidup", "hingga", "https", "hubungan", "hukum", "hutan", "i", "ia", "iaitu", "ialah", "ibarat", "ibaratkan", "ibaratnya", "ibu", "ii", "iklan", "ikut", "ilmu", "indeks", "india", "indonesia", "industri", "informasi", "ingat", "inggris", "ingin", "inginkah", "inginkan", "ini", "inikah", "inilah", "internasional", "islam", "isnin", "isu", "italia", "itu", "itukah", "itulah", "jabatan", "jadi", "jadilah", "jadinya", "jakarta", "jalan", "jalur", "jaman", "jan", "jangan", "jangankan", "janganlah", "januari", "jauh", "jawa", "jawab", "jawaban", "jawabnya", "jawatan", "jawatankuasa", "jelas", "jelaskan", "jelaslah", "jelasnya", "jenis", "jepang", "jepun", "jerman", "jika", "jikalau", "jiwa", "jual", "jualan", "juga", "julai", "jumaat", "jumat", "jumlah", "jumlahnya", "jun", "juni", "justru", "juta", "kabar", "kabupaten", "kadar", "kala", "kalangan", "kalau", "kalaulah", "kalaupun", "kali", "kalian", "kalimantan", "kami", "kamilah", "kamis", "kamu", "kamulah", "kan", "kantor", "kapal", "kapan", "kapankah", "kapanpun", "karena", "karenanya", "karya", "kasus", "kata", "katakan", "katakanlah", "katanya", "kaunter", "kawasan", "ke", "keadaan", "kebetulan", "kebutuhan", "kecamatan", "kecil", "kedua", "kedua-dua", "keduanya", "kedudukan", "kegiatan", "kehidupan", "keinginan", "kejadian", "kekal", "kelamaan", "kelihatan", "kelihatannya", "kelima", "kelompok", "keluar", "keluarga", "kelurahan", "kembali", "kementerian", "kemudahan", "kemudian", "kemungkinan", "kemungkinannya", "kenaikan", "kenapa", "kenyataan", "kepada", "kepadanya", "kepala", "kepentingan", "keputusan", "kerajaan", "kerana", "kereta", "kerja", "kerjasama", "kes", "kesampaian", "keselamatan", "keseluruhan", "keseluruhannya", "kesempatan", "kesihatan", "keterangan", "keterlaluan", "ketiga", "ketika", "ketua", "keuntungan", "kewangan", "khamis", "khusus", "khususnya", "kini", "kinilah", "kira", "kira-kira", "kiranya", "kita", "kitalah", "klci", "klibor", "klik", "km", "kok", "komentar", "kompas", "komposit", "kondisi", "kontrak", "korban", "korea", "kos", "kota", "kuala", "kuasa", "kukuh", "kumpulan", "kurang", "kurangnya", "lagi", "lagian", "lagu", "lah", "lain", "lainnya", "laku", "lalu", "lama", "lamanya", "langkah", "langsung", "lanjut", "lanjutnya", "laporan", "laut", "lebih", "lembaga", "lepas", "lewat", "lima", "lingkungan", "login", "lokasi", "lot", "luar", "luas", "lumpur", "mac", "macam", "mahkamah", "mahu", "majlis", "maka", "makanan", "makanya", "makin", "maklumat", "malah", "malahan", "malam", "malaysia", "mampu", "mampukah", "mana", "manakala", "manalagi", "mantan", "manusia", "masa", "masalah", "masalahnya", "masih", "masihkah", "masing", "masing-masing", "masuk", "masyarakat", "mata", "mau", "maupun", "measure", "media", "mei", "melainkan", "melakukan", "melalui", "melawan", "melihat", "melihatnya", "memandangkan", "memang", "memastikan", "membantu", "membawa", "memberi", "memberikan", "membolehkan", "membuat", "memerlukan", "memihak", "memiliki", "meminta", "memintakan", "memisalkan", "memperbuat", "mempergunakan", "memperkirakan", "memperlihatkan", "mempersiapkan", "mempersoalkan", "mempertanyakan", "mempunyai", "memulai", "memungkinkan", "menaiki", "menambah", "menambahkan", "menandaskan", "menanti", "menantikan", "menanya", "menanyai", "menanyakan", "menarik", "menawarkan", "mencapai", "mencari", "mencatatkan", "mendapat", "mendapatkan", "mendatang", "mendatangi", "mendatangkan", "menegaskan", "menerima", "menerusi", "mengadakan", "mengakhiri", "mengaku", "mengalami", "mengambil", "mengapa", "mengatakan", "mengatakannya", "mengenai", "mengerjakan", "mengetahui", "menggalakkan", "menggunakan", "menghadapi", "menghendaki", "mengibaratkan", "mengibaratkannya", "mengikut", "mengingat", "mengingatkan", "menginginkan", "mengira", "mengucapkan", "mengucapkannya", "mengumumkan", "mengungkapkan", "mengurangkan", "meninggal", "meningkat", "meningkatkan", "menjadi", "menjalani", "menjawab", "menjelang", "menjelaskan", "menokok", "menteri", "menuju", "menunjuk", "menunjuki", "menunjukkan", "menunjuknya", "menurut", "menuturkan", "menyaksikan", "menyampaikan", "menyangkut", "menyatakan", "menyebabkan", "menyebutkan", "menyediakan", "menyeluruh", "menyiapkan", "merasa", "mereka", "merekalah", "merosot", "merupakan", "meski", "meskipun", "mesyuarat", "metrotv", "meyakini", "meyakinkan", "milik", "militer", "minat", "minggu", "minta", "minyak", "mirip", "misal", "misalkan", "misalnya", "mobil", "modal", "mohd", "mudah", "mula", "mulai", "mulailah", "mulanya", "muncul", "mungkin", "mungkinkah", "musik", "musim", "nah", "naik", "nama", "namun", "nanti", "nantinya", "nasional", "negara", "negara-negara", "negeri", "new", "niaga", "nilai", "nomor", "noun", "nov", "november", "numeral", "numeralia", "nya", "nyaris", "nyatanya", "of", "ogos", "okt", "oktober", "olah", "oleh", "olehnya", "operasi", "orang", "organisasi", "pada", "padahal", "padanya", "pagetracker", "pagi", "pak", "paling", "pameran", "panjang", "pantas", "papan", "para", "paras", "parlimen", "partai", "parti", "particle", "pasar", "pasaran", "password", "pasti", "pastilah", "pasukan", "paticle", "pegawai", "pejabat", "pekan", "pekerja", "pelabur", "pelaburan", "pelancongan", "pelanggan", "pelbagai", "peluang", "pemain", "pembangunan", "pemberita", "pembinaan", "pemerintah", "pemerintahan", "pemimpin", "pendapatan", "pendidikan", "penduduk", "penerbangan", "pengarah", "pengeluaran", "pengerusi", "pengguna", "penggunaan", "pengurusan", "peniaga", "peningkatan", "penting", "pentingnya", "per", "perancis", "perang", "peratus", "percuma", "perdagangan", "perdana", "peringkat", "perjanjian", "perkara", "perkhidmatan", "perladangan", "perlu", "perlukah", "perlunya", "permintaan", "pernah", "perniagaan", "persekutuan", "persen", "persidangan", "persoalan", "pertama", "pertandingan", "pertanyaan", "pertanyakan", "pertubuhan", "pertumbuhan", "perubahan", "perusahaan", "pesawat", "peserta", "petang", "pihak", "pihaknya", "pilihan", "pinjaman", "polis", "polisi", "politik", "pos", "posisi", "presiden", "prestasi", "produk", "program", "projek", "pronomia", "pronoun", "proses", "proton", "provinsi", "pt", "pubdate", "pukul", "pula", "pulau", "pun", "punya", "pusat", "rabu", "radio", "raja", "rakan", "rakyat", "ramai", "rantau", "rasa", "rasanya", "rata", "raya", "rendah", "republik", "resmi", "ribu", "ringgit", "root", "ruang", "rumah", "rupa", "rupanya", "saat", "saatnya", "sabah", "sabtu", "sahaja", "saham", "saja", "sajalah", "sakit", "salah", "saling", "sama", "sama-sama", "sambil", "sampai", "sampaikan", "sana", "sangat", "sangatlah", "sarawak", "satu", "sawit", "saya", "sayalah", "sdn", "se", "sebab", "sebabnya", "sebagai", "sebagaimana", "sebagainya", "sebagian", "sebahagian", "sebaik", "sebaiknya", "sebaliknya", "sebanyak", "sebarang", "sebegini", "sebegitu", "sebelah", "sebelum", "sebelumnya", "sebenarnya", "seberapa", "sebesar", "sebetulnya", "sebisanya", "sebuah", "sebut", "sebutlah", "sebutnya", "secara", "secukupnya", "sedang", "sedangkan", "sedemikian", "sedikit", "sedikitnya", "seenaknya", "segala", "segalanya", "segera", "segi", "seharusnya", "sehingga", "seingat", "sejak", "sejarah", "sejauh", "sejenak", "sejumlah", "sekadar", "sekadarnya", "sekali", "sekali-kali", "sekalian", "sekaligus", "sekalipun", "sekarang", "sekaranglah", "sekecil", "seketika", "sekiranya", "sekitar", "sekitarnya", "sekolah", "sektor", "sekurang", "sekurangnya", "sekuriti", "sela", "selagi", "selain", "selaku", "selalu", "selama", "selama-lamanya", "selamanya", "selanjutnya", "selasa", "selatan", "selepas", "seluruh", "seluruhnya", "semacam", "semakin", "semalam", "semampu", "semampunya", "semasa", "semasih", "semata", "semaunya", "sementara", "semisal", "semisalnya", "sempat", "semua", "semuanya", "semula", "sen", "sendiri", "sendirian", "sendirinya", "senin", "seolah", "seolah-olah", "seorang", "sepak", "sepanjang", "sepantasnya", "sepantasnyalah", "seperlunya", "seperti", "sepertinya", "sepihak", "sept", "september", "serangan", "serantau", "seri", "serikat", "sering", "seringnya", "serta", "serupa", "sesaat", "sesama", "sesampai", "sesegera", "sesekali", "seseorang", "sesi", "sesuai", "sesuatu", "sesuatunya", "sesudah", "sesudahnya", "setelah", "setempat", "setengah", "seterusnya", "setiap", "setiausaha", "setiba", "setibanya", "setidak", "setidaknya", "setinggi", "seusai", "sewaktu", "siap", "siapa", "siapakah", "siapapun", "siaran", "sidang", "singapura", "sini", "sinilah", "sistem", "soal", "soalnya", "sokongan", "sri", "stasiun", "suara", "suatu", "sudah", "sudahkah", "sudahlah", "sukan", "suku", "sumber", "sungai", "supaya", "surat", "susut", "syarikat", "syed", "tadi", "tadinya", "tahap", "tahu", "tahun", "tak", "tama", "tambah", "tambahnya", "tampak", "tampaknya", "tampil", "tan", "tanah", "tandas", "tandasnya", "tanggal", "tanpa", "tanya", "tanyakan", "tanyanya", "tapi", "tawaran", "tegas", "tegasnya", "teknologi", "telah", "televisi", "teman", "tempat", "tempatan", "tempo", "tempoh", "tenaga", "tengah", "tentang", "tentara", "tentu", "tentulah", "tentunya", "tepat", "terakhir", "terasa", "terbaik", "terbang", "terbanyak", "terbesar", "terbuka", "terdahulu", "terdapat", "terdiri", "terhadap", "terhadapnya", "teringat", "terjadi", "terjadilah", "terjadinya", "terkait", "terkenal", "terkira", "terlalu", "terlebih", "terletak", "terlihat", "termasuk", "ternyata", "tersampaikan", "tersebut", "tersebutlah", "tertentu", "tertuju", "terus", "terutama", "testimoni", "testimony", "tetap", "tetapi", "the", "tiada", "tiap", "tiba", "tidak", "tidakkah", "tidaklah", "tidaknya", "tiga", "tim", "timbalan", "timur", "tindakan", "tinggal", "tinggi", "tingkat", "toh", "tokoh", "try", "tun", "tunai", "tunjuk", "turun", "turut", "tutur", "tuturnya", "tv", "uang", "ucap", "ucapnya", "udara", "ujar", "ujarnya", "umum", "umumnya", "unescape", "ungkap", "ungkapnya", "unit", "universitas", "untuk", "untung", "upaya", "urus", "usah", "usaha", "usai", "user", "utama", "utara", "var", "versi", "waduh", "wah", "wahai", "wakil", "waktu", "waktunya", "walau", "walaupun", "wang", "wanita", "warga", "warta", "wib", "wilayah", "wong", "word", "ya", "yaitu", "yakin", "yakni", "yang", "zaman" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-id.txt b/data/stopwords/stopwords-id.txt deleted file mode 100644 index 418f43f..0000000 --- a/data/stopwords/stopwords-id.txt +++ /dev/null @@ -1,1309 +0,0 @@ -a -abad -acara -aceh -ada -adalah -adanya -adapun -agak -agaknya -agama -agar -agustus -air -akan -akankah -akhir -akhiri -akhirnya -akibat -aku -akulah -alam -album -amat -amatlah -amerika -anak -and -anda -andalah -anggota -antar -antara -antarabangsa -antaranya -apa -apaan -apabila -apakah -apalagi -apatah -api -april -artikel -artinya -as -asal -asalkan -asas -asia -asing -atas -atau -ataukah -ataupun -australia -awal -awalnya -awam -b -badan -bagai -bagaikan -bagaimana -bagaimanakah -bagaimanapun -bagainamakah -bagi -bagian -bahagian -bahan -baharu -bahasa -bahawa -bahkan -bahwa -bahwasannya -bahwasanya -baik -baiknya -bakal -bakalan -balik -bandar -bangsa -bank -banyak -bapak -barang -barangan -barat -baru -baru-baru -bawah -beberapa -begini -beginian -beginikah -beginilah -begitu -begitukah -begitulah -begitupun -bekas -bekerja -belakang -belakangan -belanda -beli -beliau -belum -belumlah -benar -benarkah -benarlah -bentuk -berada -berakhir -berakhirlah -berakhirnya -berapa -berapakah -berapalah -berapapun -berarti -berasal -berat -berawal -berbagai -berbanding -berbeda -berdasarkan -berdatangan -berharap -berhasil -beri -berikan -berikut -berikutan -berikutnya -berita -berjalan -berjaya -berjumlah -berkaitan -berkali -berkali-kali -berkata -berkehendak -berkeinginan -berkenaan -berlainan -berlaku -berlalu -berlangsung -berlebihan -bermacam -bermacam-macam -bermain -bermaksud -bermula -bernama -bernilai -bersama -bersama-sama -bersiap -bertanya -bertemu -berturut -bertutur -berubah -berujar -berupa -besar -besok -betul -betulkah -bhd -biasa -biasanya -bidang -bila -bilakah -bilion -bintang -bisa -bisakah -blog -bn -bola -boleh -bolehkah -bolehlah -buat -bukan -bukankah -bukanlah -bukannya -buku -bulan -bumi -bung -bursa -cadangan -cara -caranya -catch -china -click -code -copyright -cukup -cukupkah -cukuplah -cuma -daerah -dagangan -dahulu -dalam -dan -dana -dapat -dari -daripada -dasar -data -datang -datuk -dekat -demi -demikian -demikianlah -dengan -depan -derivatives -desa -desember -detik -dewan -di -dia -diadakan -diakhiri -diakhirinya -dialah -dianggap -diantara -diantaranya -diberi -diberikan -diberikannya -dibuat -dibuatnya -dibuka -dicatatkan -didapat -didatangkan -didirikan -diduga -digunakan -diibaratkan -diibaratkannya -diingat -diingatkan -diinginkan -dijangka -dijawab -dijelaskan -dijelaskannya -dikarenakan -dikatakan -dikatakannya -dikenal -dikerjakan -diketahui -diketahuinya -dikira -dilakukan -dilalui -dilihat -dimaksud -dimaksudkan -dimaksudkannya -dimaksudnya -dimana -diminta -dimintai -dimisalkan -dimulai -dimulailah -dimulainya -dimungkinkan -dini -diniagakan -dipastikan -diperbuat -diperbuatnya -dipergunakan -diperkirakan -diperlihatkan -diperlukan -diperlukannya -dipersoalkan -dipertanyakan -dipunyai -diri -dirilis -dirinya -dis -disampaikan -disebut -disebutkan -disebutkannya -disember -disini -disinilah -distrik -ditambahkan -ditandaskan -ditanya -ditanyai -ditanyakan -ditegaskan -ditemukan -ditujukan -ditunjuk -ditunjuki -ditunjukkan -ditunjukkannya -ditunjuknya -ditutup -dituturkan -dituturkannya -diucapkan -diucapkannya -diungkapkan -document.write -dolar -dong -dr -dua -dulu -dunia -effective -ekonomi -eksekutif -eksport -empat -enam -enggak -enggaknya -entah -entahlah -era -eropa -err -faedah -feb -film -gat -gedung -gelar -gettracker -global -grup -guna -gunakan -gunung -hadap -hadapan -hal -hampir -hanya -hanyalah -harga -hari -harian -harus -haruslah -harusnya -hasil -hendak -hendaklah -hendaknya -hidup -hingga -https -hubungan -hukum -hutan -i -ia -iaitu -ialah -ibarat -ibaratkan -ibaratnya -ibu -ii -iklan -ikut -ilmu -indeks -india -indonesia -industri -informasi -ingat -inggris -ingin -inginkah -inginkan -ini -inikah -inilah -internasional -islam -isnin -isu -italia -itu -itukah -itulah -jabatan -jadi -jadilah -jadinya -jakarta -jalan -jalur -jaman -jan -jangan -jangankan -janganlah -januari -jauh -jawa -jawab -jawaban -jawabnya -jawatan -jawatankuasa -jelas -jelaskan -jelaslah -jelasnya -jenis -jepang -jepun -jerman -jika -jikalau -jiwa -jual -jualan -juga -julai -jumaat -jumat -jumlah -jumlahnya -jun -juni -justru -juta -kabar -kabupaten -kadar -kala -kalangan -kalau -kalaulah -kalaupun -kali -kalian -kalimantan -kami -kamilah -kamis -kamu -kamulah -kan -kantor -kapal -kapan -kapankah -kapanpun -karena -karenanya -karya -kasus -kata -katakan -katakanlah -katanya -kaunter -kawasan -ke -keadaan -kebetulan -kebutuhan -kecamatan -kecil -kedua -kedua-dua -keduanya -kedudukan -kegiatan -kehidupan -keinginan -kejadian -kekal -kelamaan -kelihatan -kelihatannya -kelima -kelompok -keluar -keluarga -kelurahan -kembali -kementerian -kemudahan -kemudian -kemungkinan -kemungkinannya -kenaikan -kenapa -kenyataan -kepada -kepadanya -kepala -kepentingan -keputusan -kerajaan -kerana -kereta -kerja -kerjasama -kes -kesampaian -keselamatan -keseluruhan -keseluruhannya -kesempatan -kesihatan -keterangan -keterlaluan -ketiga -ketika -ketua -keuntungan -kewangan -khamis -khusus -khususnya -kini -kinilah -kira -kira-kira -kiranya -kita -kitalah -klci -klibor -klik -km -kok -komentar -kompas -komposit -kondisi -kontrak -korban -korea -kos -kota -kuala -kuasa -kukuh -kumpulan -kurang -kurangnya -lagi -lagian -lagu -lah -lain -lainnya -laku -lalu -lama -lamanya -langkah -langsung -lanjut -lanjutnya -laporan -laut -lebih -lembaga -lepas -lewat -lima -lingkungan -login -lokasi -lot -luar -luas -lumpur -mac -macam -mahkamah -mahu -majlis -maka -makanan -makanya -makin -maklumat -malah -malahan -malam -malaysia -mampu -mampukah -mana -manakala -manalagi -mantan -manusia -masa -masalah -masalahnya -masih -masihkah -masing -masing-masing -masuk -masyarakat -mata -mau -maupun -measure -media -mei -melainkan -melakukan -melalui -melawan -melihat -melihatnya -memandangkan -memang -memastikan -membantu -membawa -memberi -memberikan -membolehkan -membuat -memerlukan -memihak -memiliki -meminta -memintakan -memisalkan -memperbuat -mempergunakan -memperkirakan -memperlihatkan -mempersiapkan -mempersoalkan -mempertanyakan -mempunyai -memulai -memungkinkan -menaiki -menambah -menambahkan -menandaskan -menanti -menantikan -menanya -menanyai -menanyakan -menarik -menawarkan -mencapai -mencari -mencatatkan -mendapat -mendapatkan -mendatang -mendatangi -mendatangkan -menegaskan -menerima -menerusi -mengadakan -mengakhiri -mengaku -mengalami -mengambil -mengapa -mengatakan -mengatakannya -mengenai -mengerjakan -mengetahui -menggalakkan -menggunakan -menghadapi -menghendaki -mengibaratkan -mengibaratkannya -mengikut -mengingat -mengingatkan -menginginkan -mengira -mengucapkan -mengucapkannya -mengumumkan -mengungkapkan -mengurangkan -meninggal -meningkat -meningkatkan -menjadi -menjalani -menjawab -menjelang -menjelaskan -menokok -menteri -menuju -menunjuk -menunjuki -menunjukkan -menunjuknya -menurut -menuturkan -menyaksikan -menyampaikan -menyangkut -menyatakan -menyebabkan -menyebutkan -menyediakan -menyeluruh -menyiapkan -merasa -mereka -merekalah -merosot -merupakan -meski -meskipun -mesyuarat -metrotv -meyakini -meyakinkan -milik -militer -minat -minggu -minta -minyak -mirip -misal -misalkan -misalnya -mobil -modal -mohd -mudah -mula -mulai -mulailah -mulanya -muncul -mungkin -mungkinkah -musik -musim -nah -naik -nama -namun -nanti -nantinya -nasional -negara -negara-negara -negeri -new -niaga -nilai -nomor -noun -nov -november -numeral -numeralia -nya -nyaris -nyatanya -of -ogos -okt -oktober -olah -oleh -olehnya -operasi -orang -organisasi -pada -padahal -padanya -pagetracker -pagi -pak -paling -pameran -panjang -pantas -papan -para -paras -parlimen -partai -parti -particle -pasar -pasaran -password -pasti -pastilah -pasukan -paticle -pegawai -pejabat -pekan -pekerja -pelabur -pelaburan -pelancongan -pelanggan -pelbagai -peluang -pemain -pembangunan -pemberita -pembinaan -pemerintah -pemerintahan -pemimpin -pendapatan -pendidikan -penduduk -penerbangan -pengarah -pengeluaran -pengerusi -pengguna -penggunaan -pengurusan -peniaga -peningkatan -penting -pentingnya -per -perancis -perang -peratus -percuma -perdagangan -perdana -peringkat -perjanjian -perkara -perkhidmatan -perladangan -perlu -perlukah -perlunya -permintaan -pernah -perniagaan -persekutuan -persen -persidangan -persoalan -pertama -pertandingan -pertanyaan -pertanyakan -pertubuhan -pertumbuhan -perubahan -perusahaan -pesawat -peserta -petang -pihak -pihaknya -pilihan -pinjaman -polis -polisi -politik -pos -posisi -presiden -prestasi -produk -program -projek -pronomia -pronoun -proses -proton -provinsi -pt -pubdate -pukul -pula -pulau -pun -punya -pusat -rabu -radio -raja -rakan -rakyat -ramai -rantau -rasa -rasanya -rata -raya -rendah -republik -resmi -ribu -ringgit -root -ruang -rumah -rupa -rupanya -saat -saatnya -sabah -sabtu -sahaja -saham -saja -sajalah -sakit -salah -saling -sama -sama-sama -sambil -sampai -sampaikan -sana -sangat -sangatlah -sarawak -satu -sawit -saya -sayalah -sdn -se -sebab -sebabnya -sebagai -sebagaimana -sebagainya -sebagian -sebahagian -sebaik -sebaiknya -sebaliknya -sebanyak -sebarang -sebegini -sebegitu -sebelah -sebelum -sebelumnya -sebenarnya -seberapa -sebesar -sebetulnya -sebisanya -sebuah -sebut -sebutlah -sebutnya -secara -secukupnya -sedang -sedangkan -sedemikian -sedikit -sedikitnya -seenaknya -segala -segalanya -segera -segi -seharusnya -sehingga -seingat -sejak -sejarah -sejauh -sejenak -sejumlah -sekadar -sekadarnya -sekali -sekali-kali -sekalian -sekaligus -sekalipun -sekarang -sekaranglah -sekecil -seketika -sekiranya -sekitar -sekitarnya -sekolah -sektor -sekurang -sekurangnya -sekuriti -sela -selagi -selain -selaku -selalu -selama -selama-lamanya -selamanya -selanjutnya -selasa -selatan -selepas -seluruh -seluruhnya -semacam -semakin -semalam -semampu -semampunya -semasa -semasih -semata -semaunya -sementara -semisal -semisalnya -sempat -semua -semuanya -semula -sen -sendiri -sendirian -sendirinya -senin -seolah -seolah-olah -seorang -sepak -sepanjang -sepantasnya -sepantasnyalah -seperlunya -seperti -sepertinya -sepihak -sept -september -serangan -serantau -seri -serikat -sering -seringnya -serta -serupa -sesaat -sesama -sesampai -sesegera -sesekali -seseorang -sesi -sesuai -sesuatu -sesuatunya -sesudah -sesudahnya -setelah -setempat -setengah -seterusnya -setiap -setiausaha -setiba -setibanya -setidak -setidaknya -setinggi -seusai -sewaktu -siap -siapa -siapakah -siapapun -siaran -sidang -singapura -sini -sinilah -sistem -soal -soalnya -sokongan -sri -stasiun -suara -suatu -sudah -sudahkah -sudahlah -sukan -suku -sumber -sungai -supaya -surat -susut -syarikat -syed -tadi -tadinya -tahap -tahu -tahun -tak -tama -tambah -tambahnya -tampak -tampaknya -tampil -tan -tanah -tandas -tandasnya -tanggal -tanpa -tanya -tanyakan -tanyanya -tapi -tawaran -tegas -tegasnya -teknologi -telah -televisi -teman -tempat -tempatan -tempo -tempoh -tenaga -tengah -tentang -tentara -tentu -tentulah -tentunya -tepat -terakhir -terasa -terbaik -terbang -terbanyak -terbesar -terbuka -terdahulu -terdapat -terdiri -terhadap -terhadapnya -teringat -terjadi -terjadilah -terjadinya -terkait -terkenal -terkira -terlalu -terlebih -terletak -terlihat -termasuk -ternyata -tersampaikan -tersebut -tersebutlah -tertentu -tertuju -terus -terutama -testimoni -testimony -tetap -tetapi -the -tiada -tiap -tiba -tidak -tidakkah -tidaklah -tidaknya -tiga -tim -timbalan -timur -tindakan -tinggal -tinggi -tingkat -toh -tokoh -try -tun -tunai -tunjuk -turun -turut -tutur -tuturnya -tv -uang -ucap -ucapnya -udara -ujar -ujarnya -umum -umumnya -unescape -ungkap -ungkapnya -unit -universitas -untuk -untung -upaya -urus -usah -usaha -usai -user -utama -utara -var -versi -waduh -wah -wahai -wakil -waktu -waktunya -walau -walaupun -wang -wanita -warga -warta -wib -wilayah -wong -word -ya -yaitu -yakin -yakni -yang -zaman \ No newline at end of file diff --git a/data/stopwords/stopwords-it.json b/data/stopwords/stopwords-it.json new file mode 100644 index 0000000..056d1f4 --- /dev/null +++ b/data/stopwords/stopwords-it.json @@ -0,0 +1 @@ +[ "ad", "al", "allo", "ai", "agli", "all", "agl", "alla", "alle", "con", "col", "coi", "da", "dal", "dallo", "dai", "dagli", "dall", "dagl", "dalla", "dalle", "di", "del", "dello", "dei", "degli", "dell", "degl", "della", "delle", "in", "nel", "nello", "nei", "negli", "nell", "negl", "nella", "nelle", "su", "sul", "sullo", "sui", "sugli", "sull", "sugl", "sulla", "sulle", "per", "tra", "contro", "io", "tu", "lui", "lei", "noi", "voi", "loro", "mio", "mia", "miei", "mie", "tuo", "tua", "tuoi", "tue", "suo", "sua", "suoi", "sue", "nostro", "nostra", "nostri", "nostre", "vostro", "vostra", "vostri", "vostre", "mi", "ti", "ci", "vi", "lo", "la", "li", "le", "gli", "ne", "il", "un", "uno", "una", "ma", "ed", "se", "perchè", "perché", "perche", "anche", "come", "dov", "dove", "che", "chi", "cui", "non", "più", "piu", "quale", "quanto", "quanti", "quanta", "quante", "quello", "quelli", "quella", "quelle", "questo", "questi", "questa", "queste", "si", "tutto", "tutti", "a", "c", "e", "i", "l", "o", "ho", "hai", "ha", "abbiamo", "avete", "hanno", "abbia", "abbiate", "abbiano", "avrò", "avro", "avrai", "avrà", "avra", "avremo", "avrete", "avranno", "avrei", "avresti", "avrebbe", "avremmo", "avreste", "avrebbero", "avevo", "avevi", "aveva", "avevamo", "avevate", "avevano", "ebbi", "avesti", "ebbe", "avemmo", "aveste", "ebbero", "avessi", "avesse", "avessimo", "avessero", "avendo", "avuto", "avuta", "avuti", "avute", "sono", "sei", "è", "é", "e", "siamo", "siete", "sia", "siate", "siano", "sarà", "sarai", "sarò", "saro", "saremo", "sarete", "saranno", "sarei", "saresti", "sarebbe", "saremmo", "sareste", "sarebbero", "ero", "eri", "era", "eravamo", "eravate", "erano", "fui", "fosti", "fu", "fummo", "foste", "furono", "fossi", "fosse", "fossimo", "fossero", "essendo", "faccio", "fai", "facciamo", "fanno", "faccia", "facciate", "facciano", "farà", "farai", "farò", "faremo", "farete", "faranno", "farei", "faresti", "farebbe", "faremmo", "fareste", "farebbero", "facevo", "facevi", "faceva", "facevamo", "facevate", "facevano", "feci", "facesti", "fece", "facemmo", "faceste", "fecero", "facessi", "facesse", "facessimo", "facessero", "facendo", "sto", "stai", "sta", "stiamo", "stanno", "stia", "stiate", "stiano", "starà", "starai", "starò", "staremo", "starete", "staranno", "starei", "staresti", "starebbe", "staremmo", "stareste", "starebbero", "stavo", "stavi", "stava", "stavamo", "stavate", "stavano", "stetti", "stesti", "stette", "stemmo", "steste", "stettero", "stessi", "stesse", "stessimo", "stessero", "stando" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-it.txt b/data/stopwords/stopwords-it.txt deleted file mode 100644 index 98ffee1..0000000 --- a/data/stopwords/stopwords-it.txt +++ /dev/null @@ -1,287 +0,0 @@ -ad -al -allo -ai -agli -all -agl -alla -alle -con -col -coi -da -dal -dallo -dai -dagli -dall -dagl -dalla -dalle -di -del -dello -dei -degli -dell -degl -della -delle -in -nel -nello -nei -negli -nell -negl -nella -nelle -su -sul -sullo -sui -sugli -sull -sugl -sulla -sulle -per -tra -contro -io -tu -lui -lei -noi -voi -loro -mio -mia -miei -mie -tuo -tua -tuoi -tue -suo -sua -suoi -sue -nostro -nostra -nostri -nostre -vostro -vostra -vostri -vostre -mi -ti -ci -vi -lo -la -li -le -gli -ne -il -un -uno -una -ma -ed -se -perchè -perché -perche -anche -come -dov -dove -che -chi -cui -non -più -piu -quale -quanto -quanti -quanta -quante -quello -quelli -quella -quelle -questo -questi -questa -queste -si -tutto -tutti -a -c -e -i -l -o -ho -hai -ha -abbiamo -avete -hanno -abbia -abbiate -abbiano -avrò -avro -avrai -avrà -avra -avremo -avrete -avranno -avrei -avresti -avrebbe -avremmo -avreste -avrebbero -avevo -avevi -aveva -avevamo -avevate -avevano -ebbi -avesti -ebbe -avemmo -aveste -ebbero -avessi -avesse -avessimo -avessero -avendo -avuto -avuta -avuti -avute -sono -sei -è -é -e -siamo -siete -sia -siate -siano -sarà -sarai -sarò -saro -saremo -sarete -saranno -sarei -saresti -sarebbe -saremmo -sareste -sarebbero -ero -eri -era -eravamo -eravate -erano -fui -fosti -fu -fummo -foste -furono -fossi -fosse -fossimo -fossero -essendo -faccio -fai -facciamo -fanno -faccia -facciate -facciano -farà -farai -farò -faremo -farete -faranno -farei -faresti -farebbe -faremmo -fareste -farebbero -facevo -facevi -faceva -facevamo -facevate -facevano -feci -facesti -fece -facemmo -faceste -fecero -facessi -facesse -facessimo -facessero -facendo -sto -stai -sta -stiamo -stanno -stia -stiate -stiano -starà -starai -starò -staremo -starete -staranno -starei -staresti -starebbe -staremmo -stareste -starebbero -stavo -stavi -stava -stavamo -stavate -stavano -stetti -stesti -stette -stemmo -steste -stettero -stessi -stesse -stessimo -stessero -stando diff --git a/data/stopwords/stopwords-ko.json b/data/stopwords/stopwords-ko.json new file mode 100644 index 0000000..ac5385b --- /dev/null +++ b/data/stopwords/stopwords-ko.json @@ -0,0 +1 @@ +[ "을", "의", "에", "이", "를", "으로", "은", "는", "가", "로", "하고", "과", "에서", "도", "와", "이다", "고", "부터", "까지", "께", "에는", "이라고", "만", "라고", "보다", "에도", "다", "토록", "에게", "나", "대로", "에서는", "이나", "이며", "요", "든", "으로써", "같이", "로는", "밖에", "과의", "며", "로부터", "처럼", "아", "라", "여", "으로는", "이고", "에서의", "이라는", "만에", "으로부터", "에서도", "와의", "엔", "만을", "부터는", "만의", "야", "까지의", "과는", "치고", "과를", "으로의", "까지는", "보다는", "만이", "에만", "로의" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-ko.txt b/data/stopwords/stopwords-ko.txt deleted file mode 100644 index a6746f7..0000000 --- a/data/stopwords/stopwords-ko.txt +++ /dev/null @@ -1,70 +0,0 @@ -을 -의 -에 -이 -를 -으로 -은 -는 -가 -로 -하고 -과 -에서 -도 -와 -이다 -고 -부터 -까지 -께 -에는 -이라고 -만 -라고 -보다 -에도 -다 -토록 -에게 -나 -대로 -에서는 -이나 -이며 -요 -든 -으로써 -같이 -로는 -밖에 -과의 -며 -로부터 -처럼 -아 -라 -여 -으로는 -이고 -에서의 -이라는 -만에 -으로부터 -에서도 -와의 -엔 -만을 -부터는 -만의 -야 -까지의 -과는 -치고 -과를 -으로의 -까지는 -보다는 -만이 -에만 -로의 \ No newline at end of file diff --git a/data/stopwords/stopwords-nb.json b/data/stopwords/stopwords-nb.json new file mode 100644 index 0000000..691c871 --- /dev/null +++ b/data/stopwords/stopwords-nb.json @@ -0,0 +1 @@ +[ "alle", "andre", "arbeid", "av", "begge", "bort", "bra", "bruke", "da", "denne", "der", "deres", "det", "din", "disse", "du", "eller", "en", "ene", "eneste", "enhver", "enn", "er", "et", "folk", "for", "fordi", "forsÛke", "fra", "fÅ", "fÛr", "fÛrst", "gjorde", "gjÛre", "god", "gÅ", "ha", "hadde", "han", "hans", "hennes", "her", "hva", "hvem", "hver", "hvilken", "hvis", "hvor", "hvordan", "hvorfor", "ikke", "inn", "innen", "kan", "kunne", "lage", "lang", "lik", "like", "makt", "mange", "med", "meg", "meget", "men", "mens", "mer", "mest", "min", "mye", "mÅ", "mÅte", "navn", "nei", "ny", "nÅ", "nÅr", "og", "ogsÅ", "om", "opp", "oss", "over", "part", "punkt", "pÅ", "rett", "riktig", "samme", "sant", "si", "siden", "sist", "skulle", "slik", "slutt", "som", "start", "stille", "tid", "til", "tilbake", "tilstand", "under", "ut", "uten", "var", "ved", "verdi", "vi", "vil", "ville", "vite", "vÅr", "vÖre", "vÖrt", "Å" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-nb.txt b/data/stopwords/stopwords-nb.txt deleted file mode 100644 index bb9edb1..0000000 --- a/data/stopwords/stopwords-nb.txt +++ /dev/null @@ -1,117 +0,0 @@ -alle -andre -arbeid -av -begge -bort -bra -bruke -da -denne -der -deres -det -din -disse -du -eller -en -ene -eneste -enhver -enn -er -et -folk -for -fordi -forsÛke -fra -fÅ -fÛr -fÛrst -gjorde -gjÛre -god -gÅ -ha -hadde -han -hans -hennes -her -hva -hvem -hver -hvilken -hvis -hvor -hvordan -hvorfor -ikke -inn -innen -kan -kunne -lage -lang -lik -like -makt -mange -med -meg -meget -men -mens -mer -mest -min -mye -mÅ -mÅte -navn -nei -ny -nÅ -nÅr -og -ogsÅ -om -opp -oss -over -part -punkt -pÅ -rett -riktig -samme -sant -si -siden -sist -skulle -slik -slutt -som -start -stille -tid -til -tilbake -tilstand -under -ut -uten -var -ved -verdi -vi -vil -ville -vite -vÅr -vÖre -vÖrt -Å diff --git a/data/stopwords/stopwords-nl.json b/data/stopwords/stopwords-nl.json new file mode 100644 index 0000000..27345f2 --- /dev/null +++ b/data/stopwords/stopwords-nl.json @@ -0,0 +1 @@ +[ "aan", "af", "al", "als", "bij", "dan", "dat", "die", "dit", "een", "en", "er", "had", "heb", "hem", "het", "hij", "hoe", "hun", "ik", "in", "is", "je", "kan", "me", "men", "met", "mij", "nog", "nu", "of", "ons", "ook", "te", "tot", "uit", "van", "was", "wat", "we", "wel", "wij", "zal", "ze", "zei", "zij", "zo", "zou" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-nl.txt b/data/stopwords/stopwords-nl.txt deleted file mode 100644 index 300c368..0000000 --- a/data/stopwords/stopwords-nl.txt +++ /dev/null @@ -1,48 +0,0 @@ -aan -af -al -als -bij -dan -dat -die -dit -een -en -er -had -heb -hem -het -hij -hoe -hun -ik -in -is -je -kan -me -men -met -mij -nog -nu -of -ons -ook -te -tot -uit -van -was -wat -we -wel -wij -zal -ze -zei -zij -zo -zou diff --git a/data/stopwords/stopwords-no.json b/data/stopwords/stopwords-no.json new file mode 100644 index 0000000..0ab491a --- /dev/null +++ b/data/stopwords/stopwords-no.json @@ -0,0 +1 @@ +[ "at", "av", "de", "den", "der", "det", "du", "en", "er", "et", "for", "fra", "før", "med", "og", "om", "over", "på", "som", "til", "ved", "år", "alle", "bare", "ble", "bort", "bra", "da", "deg", "dem", "denne", "dere", "deres", "det", "dette", "din", "disse", "dit", "ditt", "eller", "ene", "enn", "er", "et", "ett", "etter", "for", "fram", "først", "få", "god", "gå", "ha", "han", "hans", "har", "her", "hit", "hun", "hva", "hvem", "hver", "ikke", "inn", "ja", "jeg", "kan", "kom", "kun", "kunne", "lage", "lang", "lik", "like", "man", "mer", "min", "mot", "mye", "må", "måte", "ned", "nei", "noe", "noen", "ny", "nå", "når", "også", "opp", "oss", "seg", "selv", "si", "siden", "sin", "sine", "sist", "skal", "skulle", "slik", "som", "så", "sånn", "tid", "til", "under", "ut", "uten", "var", "ved", "vi", "vil", "vite", "vår", "å", "dei", "di", "då", "eg" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-no.txt b/data/stopwords/stopwords-no.txt deleted file mode 100644 index 4b14918..0000000 --- a/data/stopwords/stopwords-no.txt +++ /dev/null @@ -1,120 +0,0 @@ -at -av -de -den -der -det -du -en -er -et -for -fra -før -med -og -om -over -på -som -til -ved -år -alle -bare -ble -bort -bra -da -deg -dem -denne -dere -deres -det -dette -din -disse -dit -ditt -eller -ene -enn -er -et -ett -etter -for -fram -først -få -god -gå -ha -han -hans -har -her -hit -hun -hva -hvem -hver -ikke -inn -ja -jeg -kan -kom -kun -kunne -lage -lang -lik -like -man -mer -min -mot -mye -må -måte -ned -nei -noe -noen -ny -nå -når -også -opp -oss -seg -selv -si -siden -sin -sine -sist -skal -skulle -slik -som -så -sånn -tid -til -under -ut -uten -var -ved -vi -vil -vite -vår -å -dei -di -då -eg \ No newline at end of file diff --git a/data/stopwords/stopwords-pl.json b/data/stopwords/stopwords-pl.json new file mode 100644 index 0000000..9beec6f --- /dev/null +++ b/data/stopwords/stopwords-pl.json @@ -0,0 +1 @@ +[ "a", "aby", "ach", "acz", "aczkolwiek", "aj", "albo", "ale", "ależ", "ani", "aż", "bardziej", "bardzo", "bo", "bowiem", "by", "byli", "bynajmniej", "być", "był", "była", "było", "były", "będzie", "będą", "cali", "cała", "cały", "ci", "cię", "ciebie", "co", "cokolwiek", "coś", "czasami", "czasem", "czemu", "czy", "czyli", "daleko", "dla", "dlaczego", "dlatego", "do", "dobrze", "dokąd", "dość", "dużo", "dwa", "dwaj", "dwie", "dwoje", "dziś", "dzisiaj", "gdy", "gdyby", "gdyż", "gdzie", "gdziekolwiek", "gdzieś", "i", "ich", "ile", "im", "inna", "inne", "inny", "innych", "iż", "ja", "ją", "jak", "jakaś", "jakby", "jaki", "jakichś", "jakie", "jakiś", "jakiż", "jakkolwiek", "jako", "jakoś", "je", "jeden", "jedna", "jedno", "jednak", "jednakże", "jego", "jej", "jemu", "jest", "jestem", "jeszcze", "jeśli", "jeżeli", "już", "ją", "każdy", "kiedy", "kilka", "kimś", "kto", "ktokolwiek", "ktoś", "która", "które", "którego", "której", "który", "których", "którym", "którzy", "ku", "lat", "lecz", "lub", "ma", "mają", "mało", "mam", "mi", "mimo", "między", "mną", "mnie", "mogą", "moi", "moim", "moja", "moje", "może", "możliwe", "można", "mój", "mu", "musi", "my", "na", "nad", "nam", "nami", "nas", "nasi", "nasz", "nasza", "nasze", "naszego", "naszych", "natomiast", "natychmiast", "nawet", "nią", "nic", "nich", "nie", "niech", "niego", "niej", "niemu", "nigdy", "nim", "nimi", "niż", "no", "o", "obok", "od", "około", "on", "ona", "one", "oni", "ono", "oraz", "oto", "owszem", "pan", "pana", "pani", "po", "pod", "podczas", "pomimo", "ponad", "ponieważ", "powinien", "powinna", "powinni", "powinno", "poza", "prawie", "przecież", "przed", "przede", "przedtem", "przez", "przy", "roku", "również", "sam", "sama", "są", "się", "skąd", "sobie", "sobą", "sposób", "swoje", "ta", "tak", "taka", "taki", "takie", "także", "tam", "te", "tego", "tej", "temu", "ten", "teraz", "też", "to", "tobą", "tobie", "toteż", "trzeba", "tu", "tutaj", "twoi", "twoim", "twoja", "twoje", "twym", "twój", "ty", "tych", "tylko", "tym", "u", "w", "wam", "wami", "was", "wasz", "wasza", "wasze", "we", "według", "wiele", "wielu", "więc", "więcej", "wszyscy", "wszystkich", "wszystkie", "wszystkim", "wszystko", "wtedy", "wy", "właśnie", "z", "za", "zapewne", "zawsze", "ze", "zł", "znowu", "znów", "został", "żaden", "żadna", "żadne", "żadnych", "że", "żeby" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-pl.txt b/data/stopwords/stopwords-pl.txt deleted file mode 100644 index 93dac82..0000000 --- a/data/stopwords/stopwords-pl.txt +++ /dev/null @@ -1,277 +0,0 @@ -a -aby -ach -acz -aczkolwiek -aj -albo -ale -ależ -ani -aż -bardziej -bardzo -bo -bowiem -by -byli -bynajmniej -być -był -była -było -były -będzie -będą -cali -cała -cały -ci -cię -ciebie -co -cokolwiek -coś -czasami -czasem -czemu -czy -czyli -daleko -dla -dlaczego -dlatego -do -dobrze -dokąd -dość -dużo -dwa -dwaj -dwie -dwoje -dziś -dzisiaj -gdy -gdyby -gdyż -gdzie -gdziekolwiek -gdzieś -i -ich -ile -im -inna -inne -inny -innych -iż -ja -ją -jak -jakaś -jakby -jaki -jakichś -jakie -jakiś -jakiż -jakkolwiek -jako -jakoś -je -jeden -jedna -jedno -jednak -jednakże -jego -jej -jemu -jest -jestem -jeszcze -jeśli -jeżeli -już -ją -każdy -kiedy -kilka -kimś -kto -ktokolwiek -ktoś -która -które -którego -której -który -których -którym -którzy -ku -lat -lecz -lub -ma -mają -mało -mam -mi -mimo -między -mną -mnie -mogą -moi -moim -moja -moje -może -możliwe -można -mój -mu -musi -my -na -nad -nam -nami -nas -nasi -nasz -nasza -nasze -naszego -naszych -natomiast -natychmiast -nawet -nią -nic -nich -nie -niech -niego -niej -niemu -nigdy -nim -nimi -niż -no -o -obok -od -około -on -ona -one -oni -ono -oraz -oto -owszem -pan -pana -pani -po -pod -podczas -pomimo -ponad -ponieważ -powinien -powinna -powinni -powinno -poza -prawie -przecież -przed -przede -przedtem -przez -przy -roku -również -sam -sama -są -się -skąd -sobie -sobą -sposób -swoje -ta -tak -taka -taki -takie -także -tam -te -tego -tej -temu -ten -teraz -też -to -tobą -tobie -toteż -trzeba -tu -tutaj -twoi -twoim -twoja -twoje -twym -twój -ty -tych -tylko -tym -u -w -wam -wami -was -wasz -wasza -wasze -we -według -wiele -wielu -więc -więcej -wszyscy -wszystkich -wszystkie -wszystkim -wszystko -wtedy -wy -właśnie -z -za -zapewne -zawsze -ze -zł -znowu -znów -został -żaden -żadna -żadne -żadnych -że -żeby \ No newline at end of file diff --git a/data/stopwords/stopwords-pt.json b/data/stopwords/stopwords-pt.json new file mode 100644 index 0000000..ad83486 --- /dev/null +++ b/data/stopwords/stopwords-pt.json @@ -0,0 +1 @@ +[ "a", "à", "abril", "agosto", "ainda", "ano", "anos", "ao", "aos", "apenas", "as", "às", "até", "brasil", "com", "como", "contra", "da", "das", "de", "depois", "deve", "dezembro", "dia", "disse", "diz", "do", "dois", "dos", "e", "é", "ela", "ele", "em", "entre", "era", "está", "estado", "estão", "eu", "foi", "folha", "foram", "governo", "grande", "há", "hoje", "isso", "já", "local", "maio", "maior", "mais", "mas", "mercado", "mesmo", "mil", "milhões", "muito", "mundo", "na", "não", "nas", "no", "nos", "o", "ontem", "os", "ou", "país", "para", "paulo", "pela", "pelo", "pessoas", "pode", "por", "porque", "presidente", "quando", "que", "quem", "r", "rio", "são", "se", "segundo", "sem", "ser", "será", "seu", "seus", "só", "sobre", "sua", "também", "tem", "ter", "todos", "três", "um", "uma", "us", "vaia", "à", "acordo", "afirmou", "agora", "ainda", "além", "alguns", "ano", "anos", "antes", "ao", "aos", "apenas", "as", "às", "assim", "até", "banco", "bem", "brasil", "brasileira", "brasileiro", "brasília", "cada", "carlos", "casa", "caso", "cerca", "cidade", "com", "como", "congresso", "contra", "da", "das", "de", "depois", "desde", "deve", "dia", "dias", "dinheiro", "disse", "diz", "do", "dois", "dos", "duas", "durante", "e", "é", "economia", "ela", "ele", "eles", "em", "empresa", "empresas", "entre", "era", "especial", "essa", "esse", "esta", "está", "estado", "estão", "estava", "este", "eu", "eua", "exemplo", "faz", "fazer", "federal", "fernando", "fevereiro", "fhc", "filme", "final", "foi", "folha", "foram", "forma", "governo", "grande", "grupo", "há", "henrique", "história", "hoje", "inflação", "isso", "já", "janeiro", "josé", "lei", "local", "maior", "mais", "março", "mas", "me", "melhor", "menos", "mercado", "mês", "meses", "mesmo", "mil", "milhões", "ministro", "muito", "mundo", "na", "nacional", "nada", "não", "nas", "nem", "no", "nos", "nova", "novo", "o", "onde", "ontem", "os", "ou", "outra", "outro", "outros", "outubro", "país", "para", "parte", "partir", "passado", "paulo", "pela", "pelo", "pelos", "pessoas", "plano", "pode", "polícia", "política", "por", "porque", "preços", "presidente", "primeira", "primeiro", "programa", "projeto", "público", "qual", "qualquer", "quando", "quatro", "que", "quem", "r", "real", "reportagem", "rio", "são", "se", "segundo", "seja", "sem", "semana", "sempre", "sendo", "ser", "será", "seria", "seu", "seus", "sistema", "só", "sobre", "sp", "sua", "suas", "também", "tem", "têm", "tempo", "ter", "todo", "todos", "trabalho", "três", "tudo", "um", "uma", "us", "vai", "vez", "vida", "vocêa", "à", "acordo", "afirma", "afirmou", "agora", "ainda", "além", "alguns", "ano", "anos", "antes", "ao", "aos", "apenas", "após", "aqui", "área", "as", "às", "assim", "até", "aumento", "banco", "bem", "bilhões", "bom", "brasil", "brasileira", "brasileiro", "brasília", "cada", "câmara", "campanha", "candidato", "carlos", "casa", "caso", "central", "centro", "cerca", "cidade", "cinco", "cinema", "coisa", "com", "como", "congresso", "conta", "contra", "da", "dar", "das", "de", "depois", "deputado", "desde", "deve", "dia", "dias", "dinheiro", "direito", "diretor", "disse", "diz", "do", "dois", "dos", "duas", "durante", "e", "é", "economia", "econômica", "ela", "ele", "eles", "em", "empresa", "empresas", "enquanto", "então", "entre", "equipe", "era", "especial", "essa", "esse", "esta", "está", "estado", "estados", "estão", "estava", "este", "eu", "eua", "exemplo", "falta", "fato", "faz", "fazer", "federal", "fernando", "fez", "fhc", "ficou", "filho", "filme", "fim", "final", "foi", "folha", "fora", "foram", "forma", "governo", "grande", "grupo", "há", "havia", "henrique", "história", "hoje", "inflação", "início", "isso", "já", "janeiro", "jogo", "josé", "junho", "julho", "juros", "justiça", "lado", "lei", "livro", "local", "lugar", "maior", "mais", "mas", "me", "média", "meio", "melhor", "menos", "mercado", "mês", "meses", "mesma", "mesmo", "meu", "mil", "milhões", "minha", "ministério", "ministro", "momento", "muito", "mulher", "mundo", "na", "nacional", "nada", "não", "nas", "nem", "neste", "no", "noite", "nome", "nos", "nós", "nova", "novembro", "novo", "num", "numa", "número", "o", "onde", "ontem", "os", "ou", "outra", "outras", "outro", "outros", "país", "países", "para", "parte", "partido", "partir", "passado", "paulo", "pela", "pelo", "pelos", "período", "pesquisa", "pessoas", "plano", "pode", "podem", "poder", "polícia", "política", "pontos", "por", "porque", "pouco", "prazo", "preço", "preços", "presidente", "primeira", "primeiro", "problema", "problemas", "processo", "produção", "produtos", "programa", "projeto", "próprio", "pt", "público", "qual", "qualquer", "quando", "quanto", "quase", "quatro", "que", "quem", "quer", "r", "real", "recursos", "região", "relação", "reportagem", "rio", "são", "saúde", "se", "segundo", "seja", "sem", "semana", "sempre", "sendo", "ser", "será", "serão", "seria", "setembro", "setor", "seu", "seus", "sido", "silva", "sistema", "só", "sobre", "social", "sociedade", "sp", "sua", "suas", "sucursal", "sul", "também", "tão", "tel", "tem", "têm", "tempo", "ter", "teve", "tinha", "toda", "todas", "todo", "todos", "trabalho", "três", "tudo", "último", "um", "uma", "us", "vai", "valor", "vão", "vem", "vez", "vezes", "vida", "você", "zona" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-pt.txt b/data/stopwords/stopwords-pt.txt deleted file mode 100644 index 4739f55..0000000 --- a/data/stopwords/stopwords-pt.txt +++ /dev/null @@ -1,609 +0,0 @@ -a -à -abril -agosto -ainda -ano -anos -ao -aos -apenas -as -às -até -brasil -com -como -contra -da -das -de -depois -deve -dezembro -dia -disse -diz -do -dois -dos -e -é -ela -ele -em -entre -era -está -estado -estão -eu -foi -folha -foram -governo -grande -há -hoje -isso -já -local -maio -maior -mais -mas -mercado -mesmo -mil -milhões -muito -mundo -na -não -nas -no -nos -o -ontem -os -ou -país -para -paulo -pela -pelo -pessoas -pode -por -porque -presidente -quando -que -quem -r -rio -são -se -segundo -sem -ser -será -seu -seus -só -sobre -sua -também -tem -ter -todos -três -um -uma -us -vaia -à -acordo -afirmou -agora -ainda -além -alguns -ano -anos -antes -ao -aos -apenas -as -às -assim -até -banco -bem -brasil -brasileira -brasileiro -brasília -cada -carlos -casa -caso -cerca -cidade -com -como -congresso -contra -da -das -de -depois -desde -deve -dia -dias -dinheiro -disse -diz -do -dois -dos -duas -durante -e -é -economia -ela -ele -eles -em -empresa -empresas -entre -era -especial -essa -esse -esta -está -estado -estão -estava -este -eu -eua -exemplo -faz -fazer -federal -fernando -fevereiro -fhc -filme -final -foi -folha -foram -forma -governo -grande -grupo -há -henrique -história -hoje -inflação -isso -já -janeiro -josé -lei -local -maior -mais -março -mas -me -melhor -menos -mercado -mês -meses -mesmo -mil -milhões -ministro -muito -mundo -na -nacional -nada -não -nas -nem -no -nos -nova -novo -o -onde -ontem -os -ou -outra -outro -outros -outubro -país -para -parte -partir -passado -paulo -pela -pelo -pelos -pessoas -plano -pode -polícia -política -por -porque -preços -presidente -primeira -primeiro -programa -projeto -público -qual -qualquer -quando -quatro -que -quem -r -real -reportagem -rio -são -se -segundo -seja -sem -semana -sempre -sendo -ser -será -seria -seu -seus -sistema -só -sobre -sp -sua -suas -também -tem -têm -tempo -ter -todo -todos -trabalho -três -tudo -um -uma -us -vai -vez -vida -vocêa -à -acordo -afirma -afirmou -agora -ainda -além -alguns -ano -anos -antes -ao -aos -apenas -após -aqui -área -as -às -assim -até -aumento -banco -bem -bilhões -bom -brasil -brasileira -brasileiro -brasília -cada -câmara -campanha -candidato -carlos -casa -caso -central -centro -cerca -cidade -cinco -cinema -coisa -com -como -congresso -conta -contra -da -dar -das -de -depois -deputado -desde -deve -dia -dias -dinheiro -direito -diretor -disse -diz -do -dois -dos -duas -durante -e -é -economia -econômica -ela -ele -eles -em -empresa -empresas -enquanto -então -entre -equipe -era -especial -essa -esse -esta -está -estado -estados -estão -estava -este -eu -eua -exemplo -falta -fato -faz -fazer -federal -fernando -fez -fhc -ficou -filho -filme -fim -final -foi -folha -fora -foram -forma -governo -grande -grupo -há -havia -henrique -história -hoje -inflação -início -isso -já -janeiro -jogo -josé -junho -julho -juros -justiça -lado -lei -livro -local -lugar -maior -mais -mas -me -média -meio -melhor -menos -mercado -mês -meses -mesma -mesmo -meu -mil -milhões -minha -ministério -ministro -momento -muito -mulher -mundo -na -nacional -nada -não -nas -nem -neste -no -noite -nome -nos -nós -nova -novembro -novo -num -numa -número -o -onde -ontem -os -ou -outra -outras -outro -outros -país -países -para -parte -partido -partir -passado -paulo -pela -pelo -pelos -período -pesquisa -pessoas -plano -pode -podem -poder -polícia -política -pontos -por -porque -pouco -prazo -preço -preços -presidente -primeira -primeiro -problema -problemas -processo -produção -produtos -programa -projeto -próprio -pt -público -qual -qualquer -quando -quanto -quase -quatro -que -quem -quer -r -real -recursos -região -relação -reportagem -rio -são -saúde -se -segundo -seja -sem -semana -sempre -sendo -ser -será -serão -seria -setembro -setor -seu -seus -sido -silva -sistema -só -sobre -social -sociedade -sp -sua -suas -sucursal -sul -também -tão -tel -tem -têm -tempo -ter -teve -tinha -toda -todas -todo -todos -trabalho -três -tudo -último -um -uma -us -vai -valor -vão -vem -vez -vezes -vida -você -zona \ No newline at end of file diff --git a/data/stopwords/stopwords-ru.json b/data/stopwords/stopwords-ru.json new file mode 100644 index 0000000..10af28e --- /dev/null +++ b/data/stopwords/stopwords-ru.json @@ -0,0 +1 @@ +[ "а", "е", "и", "ж", "м", "о", "на", "не", "ни", "об", "но", "он", "мне", "мои", "мож", "она", "они", "оно", "мной", "много", "многочисленное", "многочисленная", "многочисленные", "многочисленный", "мною", "мой", "мог", "могут", "можно", "может", "можхо", "мор", "моя", "моё", "мочь", "над", "нее", "оба", "нам", "нем", "нами", "ними", "мимо", "немного", "одной", "одного", "менее", "однажды", "однако", "меня", "нему", "меньше", "ней", "наверху", "него", "ниже", "мало", "надо", "один", "одиннадцать", "одиннадцатый", "назад", "наиболее", "недавно", "миллионов", "недалеко", "между", "низко", "меля", "нельзя", "нибудь", "непрерывно", "наконец", "никогда", "никуда", "нас", "наш", "нет", "нею", "неё", "них", "мира", "наша", "наше", "наши", "ничего", "начала", "нередко", "несколько", "обычно", "опять", "около", "мы", "ну", "нх", "от", "отовсюду", "особенно", "нужно", "очень", "отсюда", "в", "во", "вон", "вниз", "внизу", "вокруг", "вот", "восемнадцать", "восемнадцатый", "восемь", "восьмой", "вверх", "вам", "вами", "важное", "важная", "важные", "важный", "вдали", "везде", "ведь", "вас", "ваш", "ваша", "ваше", "ваши", "впрочем", "весь", "вдруг", "вы", "все", "второй", "всем", "всеми", "времени", "время", "всему", "всего", "всегда", "всех", "всею", "всю", "вся", "всё", "всюду", "г", "год", "говорил", "говорит", "года", "году", "где", "да", "ее", "за", "из", "ли", "же", "им", "до", "по", "ими", "под", "иногда", "довольно", "именно", "долго", "позже", "более", "должно", "пожалуйста", "значит", "иметь", "больше", "пока", "ему", "имя", "пор", "пора", "потом", "потому", "после", "почему", "почти", "посреди", "ей", "два", "две", "двенадцать", "двенадцатый", "двадцать", "двадцатый", "двух", "его", "дел", "или", "без", "день", "занят", "занята", "занято", "заняты", "действительно", "давно", "девятнадцать", "девятнадцатый", "девять", "девятый", "даже", "алло", "жизнь", "далеко", "близко", "здесь", "дальше", "для", "лет", "зато", "даром", "первый", "перед", "затем", "зачем", "лишь", "десять", "десятый", "ею", "её", "их", "бы", "еще", "при", "был", "про", "процентов", "против", "просто", "бывает", "бывь", "если", "люди", "была", "были", "было", "будем", "будет", "будете", "будешь", "прекрасно", "буду", "будь", "будто", "будут", "ещё", "пятнадцать", "пятнадцатый", "друго", "другое", "другой", "другие", "другая", "других", "есть", "пять", "быть", "лучше", "пятый", "к", "ком", "конечно", "кому", "кого", "когда", "которой", "которого", "которая", "которые", "который", "которых", "кем", "каждое", "каждая", "каждые", "каждый", "кажется", "как", "какой", "какая", "кто", "кроме", "куда", "кругом", "с", "т", "у", "я", "та", "те", "уж", "со", "то", "том", "снова", "тому", "совсем", "того", "тогда", "тоже", "собой", "тобой", "собою", "тобою", "сначала", "только", "уметь", "тот", "тою", "хорошо", "хотеть", "хочешь", "хоть", "хотя", "свое", "свои", "твой", "своей", "своего", "своих", "свою", "твоя", "твоё", "раз", "уже", "сам", "там", "тем", "чем", "сама", "сами", "теми", "само", "рано", "самом", "самому", "самой", "самого", "семнадцать", "семнадцатый", "самим", "самими", "самих", "саму", "семь", "чему", "раньше", "сейчас", "чего", "сегодня", "себе", "тебе", "сеаой", "человек", "разве", "теперь", "себя", "тебя", "седьмой", "спасибо", "слишком", "так", "такое", "такой", "такие", "также", "такая", "сих", "тех", "чаще", "четвертый", "через", "часто", "шестой", "шестнадцать", "шестнадцатый", "шесть", "четыре", "четырнадцать", "четырнадцатый", "сколько", "сказал", "сказала", "сказать", "ту", "ты", "три", "эта", "эти", "что", "это", "чтоб", "этом", "этому", "этой", "этого", "чтобы", "этот", "стал", "туда", "этим", "этими", "рядом", "тринадцать", "тринадцатый", "этих", "третий", "тут", "эту", "суть", "чуть", "тысяч" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-ru.txt b/data/stopwords/stopwords-ru.txt deleted file mode 100644 index 9498480..0000000 --- a/data/stopwords/stopwords-ru.txt +++ /dev/null @@ -1,421 +0,0 @@ -а -е -и -ж -м -о -на -не -ни -об -но -он -мне -мои -мож -она -они -оно -мной -много -многочисленное -многочисленная -многочисленные -многочисленный -мною -мой -мог -могут -можно -может -можхо -мор -моя -моё -мочь -над -нее -оба -нам -нем -нами -ними -мимо -немного -одной -одного -менее -однажды -однако -меня -нему -меньше -ней -наверху -него -ниже -мало -надо -один -одиннадцать -одиннадцатый -назад -наиболее -недавно -миллионов -недалеко -между -низко -меля -нельзя -нибудь -непрерывно -наконец -никогда -никуда -нас -наш -нет -нею -неё -них -мира -наша -наше -наши -ничего -начала -нередко -несколько -обычно -опять -около -мы -ну -нх -от -отовсюду -особенно -нужно -очень -отсюда -в -во -вон -вниз -внизу -вокруг -вот -восемнадцать -восемнадцатый -восемь -восьмой -вверх -вам -вами -важное -важная -важные -важный -вдали -везде -ведь -вас -ваш -ваша -ваше -ваши -впрочем -весь -вдруг -вы -все -второй -всем -всеми -времени -время -всему -всего -всегда -всех -всею -всю -вся -всё -всюду -г -год -говорил -говорит -года -году -где -да -ее -за -из -ли -же -им -до -по -ими -под -иногда -довольно -именно -долго -позже -более -должно -пожалуйста -значит -иметь -больше -пока -ему -имя -пор -пора -потом -потому -после -почему -почти -посреди -ей -два -две -двенадцать -двенадцатый -двадцать -двадцатый -двух -его -дел -или -без -день -занят -занята -занято -заняты -действительно -давно -девятнадцать -девятнадцатый -девять -девятый -даже -алло -жизнь -далеко -близко -здесь -дальше -для -лет -зато -даром -первый -перед -затем -зачем -лишь -десять -десятый -ею -её -их -бы -еще -при -был -про -процентов -против -просто -бывает -бывь -если -люди -была -были -было -будем -будет -будете -будешь -прекрасно -буду -будь -будто -будут -ещё -пятнадцать -пятнадцатый -друго -другое -другой -другие -другая -других -есть -пять -быть -лучше -пятый -к -ком -конечно -кому -кого -когда -которой -которого -которая -которые -который -которых -кем -каждое -каждая -каждые -каждый -кажется -как -какой -какая -кто -кроме -куда -кругом -с -т -у -я -та -те -уж -со -то -том -снова -тому -совсем -того -тогда -тоже -собой -тобой -собою -тобою -сначала -только -уметь -тот -тою -хорошо -хотеть -хочешь -хоть -хотя -свое -свои -твой -своей -своего -своих -свою -твоя -твоё -раз -уже -сам -там -тем -чем -сама -сами -теми -само -рано -самом -самому -самой -самого -семнадцать -семнадцатый -самим -самими -самих -саму -семь -чему -раньше -сейчас -чего -сегодня -себе -тебе -сеаой -человек -разве -теперь -себя -тебя -седьмой -спасибо -слишком -так -такое -такой -такие -также -такая -сих -тех -чаще -четвертый -через -часто -шестой -шестнадцать -шестнадцатый -шесть -четыре -четырнадцать -четырнадцатый -сколько -сказал -сказала -сказать -ту -ты -три -эта -эти -что -это -чтоб -этом -этому -этой -этого -чтобы -этот -стал -туда -этим -этими -рядом -тринадцать -тринадцатый -этих -третий -тут -эту -суть -чуть -тысяч diff --git a/data/stopwords/stopwords-sv.json b/data/stopwords/stopwords-sv.json new file mode 100644 index 0000000..0155175 --- /dev/null +++ b/data/stopwords/stopwords-sv.json @@ -0,0 +1 @@ +[ "kunna", "om", "ovan", "enligt", "i enlighet med detta", "över", "faktiskt", "efter", "efteråt", "igen", "mot", "är inte", "alla", "tillåta", "tillåter", "nästan", "ensam", "längs", "redan", "också", "även om", "alltid", "am", "bland", "bland", "en", "och", "en annan", "någon", "någon", "hur som helst", "någon", "något", "ändå", "ändå", "var som helst", "isär", "visas", "uppskatta", "lämpligt", "är", "inte", "runt", "som", "åt sidan", "be", "frågar", "associerad", "vid", "tillgängliga", "bort", "väldigt", "vara", "blev", "eftersom", "bli", "blir", "blir", "varit", "innan", "förhand", "bakom", "vara", "tro", "nedan", "bredvid", "förutom", "bäst", "bättre", "mellan", "bortom", "både", "kort", "men", "genom", "c", "c'mon", "c: s", "kom", "kampanj", "kan", "kan inte", "kan inte", "cant", "orsaka", "orsaker", "viss", "säkerligen", "förändringar", "klart", "co", "com", "komma", "kommer", "om", "följaktligen", "överväga", "överväger", "innehålla", "innehållande", "innehåller", "motsvarande", "kunde", "kunde inte", "kurs", "närvarande", "definitivt", "beskrivits", "trots", "gjorde", "inte", "olika", "göra", "gör", "inte", "gör", "inte", "gjort", "ned", "nedåt", "under", "varje", "edu", "åtta", "antingen", "annars", "någon annanstans", "tillräckligt", "godkändes", "helt", "speciellt", "et", "etc", "även", "någonsin", "varje", "alla", "alla", "allt", "överallt", "ex", "exakt", "exempel", "utom", "långt", "få", "femte", "först", "finansiella", "fem", "följt", "efter", "följer", "för", "fd", "tidigare", "framåt", "fyra", "från", "ytterligare", "dessutom", "få", "blir", "få", "given", "ger", "gå", "går", "gå", "borta", "fick", "fått", "hälsningar", "hade", "hade inte", "händer", "knappast", "har", "har inte", "ha", "har inte", "med", "han", "han är", "hallå", "hjälpa", "hence", "henne", "här", "här finns", "härefter", "härmed", "häri", "härpå", "hennes", "själv", "hej", "honom", "själv", "hans", "hit", "förhoppningsvis", "hur", "howbeit", "dock", "jag skulle", "jag ska", "jag är", "jag har", "om", "ignoreras", "omedelbar", "i", "eftersom", "inc", "indeed", "indikera", "indikerade", "indikerar", "inre", "mån", "istället", "in", "inåt", "är", "är inte", "den", "det skulle", "det ska", "det är", "dess", "själv", "bara", "hålla", "håller", "hålls", "vet", "vet", "känd", "sista", "nyligen", "senare", "senare", "latterly", "minst", "mindre", "lest", "låt", "låt oss", "liknande", "gillade", "sannolikt", "lite", "ser", "ser", "ser", "ltd", "huvudsakligen", "många", "kan", "kanske", "mig", "betyda", "under tiden", "endast", "kanske", "mer", "dessutom", "mest", "mestadels", "mycket", "måste", "min", "själv", "namn", "nämligen", "nd", "nära", "nästan", "nödvändigt", "behöver", "behov", "varken", "aldrig", "ändå", "ny", "nästa", "nio", "ingen", "ingen", "icke", "ingen", "ingen", "eller", "normalt", "inte", "ingenting", "roman", "nu", "ingenstans", "uppenbarligen", "av", "off", "ofta", "oh", "ok", "okay", "gammal", "på", "en gång", "ett", "ettor", "endast", "på", "eller", "andra", "andra", "annars", "borde", "vår", "vårt", "oss", "ut", "utanför", "över", "övergripande", "egen", "särskilt", "särskilt", "per", "kanske", "placeras", "vänligen", "plus", "möjligt", "förmodligen", "förmodligen", "ger", "ganska", "citera", "kvartalsvis", "snarare", "verkligen", "rimligen", "om", "oavsett", "gäller", "relativt", "respektive", "höger", "sa", "samma", "såg", "säga", "säger", "säger", "andra", "det andra", "se", "ser", "verkar", "verkade", "informationsproblem", "verkar", "sett", "själv", "själva", "förnuftig", "skickas", "allvarlig", "allvarligt", "sju", "flera", "skall", "hon", "bör", "bör inte", "eftersom", "sex", "så", "några", "någon", "på något sätt", "någon", "något", "sometime", "ibland", "något", "någonstans", "snart", "sorry", "specificerade", "ange", "ange", "fortfarande", "sub", "sådan", "sup", "säker", "t s", "ta", "tas", "berätta", "tenderar", "än", "tacka", "tack", "thanx", "att", "det är", "brinner", "den", "deras", "deras", "dem", "själva", "sedan", "därifrån", "där", "det finns", "därefter", "därigenom", "därför", "däri", "theres", "därpå", "dessa", "de", "de hade", "de kommer", "de är", "de har", "tror", "tredje", "detta", "grundlig", "grundligt", "de", "though", "tre", "genom", "hela", "thru", "sålunda", "till", "tillsammans", "alltför", "tog", "mot", "mot", "försökte", "försöker", "verkligt", "försök", "försöker", "två gånger", "två", "enligt", "tyvärr", "såvida inte", "osannolikt", "tills", "åt", "upp", "på", "oss", "använda", "används", "användbar", "använder", "användning", "vanligtvis", "uucp", "värde", "olika", "mycket", "via", "viz", "vs", "vill", "vill", "var", "var inte", "sätt", "vi", "vi skulle", "vi kommer", "vi är", "vi har", "välkommen", "väl", "gick", "var", "var inte", "vad", "vad är", "oavsett", "när", "varifrån", "närhelst", "där", "var är", "varefter", "medan", "varigenom", "vari", "varpå", "varhelst", "huruvida", "som", "medan", "dit", "som", "vem är", "vem", "hela", "vem", "vars", "varför", "kommer", "villig", "önskar", "med", "inom", "utan", "kommer inte", "undrar", "skulle", "skulle inte", "ja", "ännu", "ni", "du skulle", "kommer du", "du är", "du har", "din", "själv", "er", "noll", "tjänsteman", "skarpt", "kritiserade" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-sv.txt b/data/stopwords/stopwords-sv.txt deleted file mode 100644 index 74c0a89..0000000 --- a/data/stopwords/stopwords-sv.txt +++ /dev/null @@ -1,547 +0,0 @@ -#----------------------------------------------------------------------- -# translated -#----------------------------------------------------------------------- - -kunna -om -ovan -enligt -i enlighet med detta -över -faktiskt -efter -efteråt -igen -mot -är inte -alla -tillåta -tillåter -nästan -ensam -längs -redan -också -även om -alltid -am -bland -bland -en -och -en annan -någon -någon -hur som helst -någon -något -ändå -ändå -var som helst -isär -visas -uppskatta -lämpligt -är -inte -runt -som -åt sidan -be -frågar -associerad -vid -tillgängliga -bort -väldigt -vara -blev -eftersom -bli -blir -blir -varit -innan -förhand -bakom -vara -tro -nedan -bredvid -förutom -bäst -bättre -mellan -bortom -både -kort -men -genom -c -c'mon -c: s -kom -kampanj -kan -kan inte -kan inte -cant -orsaka -orsaker -viss -säkerligen -förändringar -klart -co -com -komma -kommer -om -följaktligen -överväga -överväger -innehålla -innehållande -innehåller -motsvarande -kunde -kunde inte -kurs -närvarande -definitivt -beskrivits -trots -gjorde -inte -olika -göra -gör -inte -gör -inte -gjort -ned -nedåt -under -varje -edu -åtta -antingen -annars -någon annanstans -tillräckligt -godkändes -helt -speciellt -et -etc -även -någonsin -varje -alla -alla -allt -överallt -ex -exakt -exempel -utom -långt -få -femte -först -finansiella -fem -följt -efter -följer -för -fd -tidigare -framåt -fyra -från -ytterligare -dessutom -få -blir -få -given -ger -gå -går -gå -borta -fick -fått -hälsningar -hade -hade inte -händer -knappast -har -har inte -ha -har inte -med -han -han är -hallå -hjälpa -hence -henne -här -här finns -härefter -härmed -häri -härpå -hennes -själv -hej -honom -själv -hans -hit -förhoppningsvis -hur -howbeit -dock -jag skulle -jag ska -jag är -jag har -om -ignoreras -omedelbar -i -eftersom -inc -indeed -indikera -indikerade -indikerar -inre -mån -istället -in -inåt -är -är inte -den -det skulle -det ska -det är -dess -själv -bara -hålla -håller -hålls -vet -vet -känd -sista -nyligen -senare -senare -latterly -minst -mindre -lest -låt -låt oss -liknande -gillade -sannolikt -lite -ser -ser -ser -ltd -huvudsakligen -många -kan -kanske -mig -betyda -under tiden -endast -kanske -mer -dessutom -mest -mestadels -mycket -måste -min -själv -namn -nämligen -nd -nära -nästan -nödvändigt -behöver -behov -varken -aldrig -ändå -ny -nästa -nio -ingen -ingen -icke -ingen -ingen -eller -normalt -inte -ingenting -roman -nu -ingenstans -uppenbarligen -av -off -ofta -oh -ok -okay -gammal -på -en gång -ett -ettor -endast -på -eller -andra -andra -annars -borde -vår -vårt -oss -ut -utanför -över -övergripande -egen -särskilt -särskilt -per -kanske -placeras -vänligen -plus -möjligt -förmodligen -förmodligen -ger -ganska -citera -kvartalsvis -snarare -verkligen -rimligen -om -oavsett -gäller -relativt -respektive -höger -sa -samma -såg -säga -säger -säger -andra -det andra -se -ser -verkar -verkade -informationsproblem -verkar -sett -själv -själva -förnuftig -skickas -allvarlig -allvarligt -sju -flera -skall -hon -bör -bör inte -eftersom -sex -så -några -någon -på något sätt -någon -något -sometime -ibland -något -någonstans -snart -sorry -specificerade -ange -ange -fortfarande -sub -sådan -sup -säker -t s -ta -tas -berätta -tenderar -än -tacka -tack -thanx -att -det är -brinner -den -deras -deras -dem -själva -sedan -därifrån -där -det finns -därefter -därigenom -därför -däri -theres -därpå -dessa -de -de hade -de kommer -de är -de har -tror -tredje -detta -grundlig -grundligt -de -though -tre -genom -hela -thru -sålunda -till -tillsammans -alltför -tog -mot -mot -försökte -försöker -verkligt -försök -försöker -två gånger -två -enligt -tyvärr -såvida inte -osannolikt -tills -åt -upp -på -oss -använda -används -användbar -använder -användning -vanligtvis -uucp -värde -olika -mycket -via -viz -vs -vill -vill -var -var inte -sätt -vi -vi skulle -vi kommer -vi är -vi har -välkommen -väl -gick -var -var inte -vad -vad är -oavsett -när -varifrån -närhelst -där -var är -varefter -medan -varigenom -vari -varpå -varhelst -huruvida -som -medan -dit -som -vem är -vem -hela -vem -vars -varför -kommer -villig -önskar -med -inom -utan -kommer inte -undrar -skulle -skulle inte -ja -ännu -ni -du skulle -kommer du -du är -du har -din -själv -er -noll -tjänsteman -skarpt -kritiserade diff --git a/data/stopwords/stopwords-th.json b/data/stopwords/stopwords-th.json new file mode 100644 index 0000000..c3e35b8 --- /dev/null +++ b/data/stopwords/stopwords-th.json @@ -0,0 +1 @@ +[ "เก็บ", "เกิด", "เกิน", "เกินๆ", "เกี่ยวเนื่อง", "เกี่ยวกัน", "เกี่ยวกับ", "เกี่ยวข้อง", "เกี่ยวๆ", "เกือบ", "เกือบจะ", "เกือบๆ", "เขา", "เข้า", "เข้าใจ", "เขียน", "เคย", "เคยๆ", "เฉกเช่น", "เฉพาะ", "เฉย", "เฉยๆ", "เช่น", "เช่นเคย", "เช่นเดียวกัน", "เช่นเดียวกับ", "เช่นเมื่อ", "เช่นใด", "เช่นไร", "เช่นก่อน", "เช่นกัน", "เช่นดัง", "เช่นดังเก่า", "เช่นดังก่อน", "เช่นดังที่", "เช่นดังว่า", "เช่นที่", "เช่นที่เคย", "เช่นที่ว่า", "เช่นนั้น", "เช่นนั้นเอง", "เช่นนี้", "เชื่อ", "เชื่อถือ", "เชื่อมั่น", "เชื่อว่า", "เดิม", "เดิมที", "เดิมๆ", "เดียว", "เดี๋ยว", "เดี๋ยวก่อน", "เดียวกัน", "เดียวกับ", "เดี๋ยวนั้น", "เดี๋ยวนี้", "เต็มไปด้วย", "เต็มไปหมด", "เต็มๆ", "เถอะ", "เถิด", "เท่า", "เท่าใด", "เท่าไร", "เท่าไหร่", "เท่ากัน", "เท่ากับ", "เท่าที่", "เท่านั้น", "เท่านี้", "เธอ", "เน้น", "เนี่ย", "เนี่ยเอง", "เป็น", "เป็นเพื่อ", "เป็นแต่", "เป็นด้วย", "เป็นดัง", "เป็นต้น", "เป็นอัน", "เป็นอันมาก", "เป็นอาทิ", "เปลี่ยน", "เปลี่ยนแปลง", "เผื่อ", "เผื่อจะ", "เผื่อที่", "เผื่อว่า", "เพราะ", "เพราะฉะนั้น", "เพราะว่า", "เพิ่ง", "เพิ่งจะ", "เพิ่ม", "เพิ่มเติม", "เพี", "เพียง", "เพียงเพื่อ", "เพียงแค่", "เพียงแต่", "เพียงใด", "เพียงไหน", "เพียงพอ", "เพื่อ", "เพื่อให้", "เพื่อที่", "เพื่อว่า", "เมื่อ", "เมื่อเช้า", "เมื่อเย็น", "เมื่อใด", "เมื่อไร", "เมื่อไหร่", "เมื่อก่อน", "เมื่อครั้ง", "เมื่อครั้งก่อน", "เมื่อคราว", "เมื่อคราวก่อน", "เมื่อคราวที่", "เมื่อคืน", "เมื่อนั้น", "เมื่อนี้", "เมื่อวันวาน", "เมื่อวาน", "เยอะ", "เยอะแยะ", "เร็ว", "เร็วๆ", "เรา", "เราๆ", "เริ่ม", "เรียก", "เรียบ", "เรื่อย", "เรื่อยๆ", "เล็ก", "เล็กน้อย", "เล็กๆ", "เลย", "เล่าว่า", "เสร็จ", "เสร็จแล้ว", "เสียแล้ว", "เสียจน", "เสียด้วย", "เสียนี่", "เหตุ", "เหตุไร", "เหตุนั้น", "เหตุนี้", "เหตุมด", "เห็นแก่", "เห็นควร", "เห็นจะ", "เห็นว่า", "เหล่า", "เหล่านั้น", "เหล่านี้", "เหลือ", "เหลือเกิน", "เอง", "เอ็ง", "เอา", "แก", "แก่", "แก้ไข", "แค่", "แค่เพียง", "แค่ไหน", "แค่จะ", "แค่นั้น", "แค่นี้", "แค่ว่า", "แด่", "แต่", "แต่เดิม", "แต่เพียง", "แต่เมื่อ", "แต่ไร", "แต่ไหน", "แต่ก็", "แต่ก่อน", "แต่จะ", "แต่ต้อง", "แต่ถ้า", "แต่ทว่า", "แต่ที่", "แต่นั้น", "แต่ละ", "แต่ว่า", "แต่อย่างใด", "แท้", "แท้จริง", "แบบ", "แม้", "แม้แต่", "แม้กระทั่ง", "แม้นว่า", "แม้ว่า", "แยะ", "แล้ว", "แล้วเสร็จ", "แล้วแต่", "แล้วกัน", "แสดง", "แสดงว่า", "แห่ง", "แห่งโน้น", "แห่งใด", "แห่งไหน", "แห่งนั้น", "แห่งนี้", "แหละ", "โดย", "โดยเฉพาะ", "โดยเฉพาะอย่าง", "โดยเมื่อ", "โดยเร็ว", "โดยแท้", "โดยแท้จริง", "โดยง่าย", "โดยดี", "โดยดุษฎี", "โดยตลอด", "โดยทั่ว", "โดยทั่วไป", "โดยทั่วกัน", "โดยทั่วถึง", "โดยที่", "โดยนัย", "โดยปกติ", "โดยมัก", "โดยมักจะ", "โดยมาก", "โดยรวม", "โดยรวมๆ", "โดยละม่อม", "โดยลําดับ", "โดยส่วนใหญ่", "โดยส่วนมาก", "โดยส่วนรวม", "โต", "โตๆ", "ใกล้", "ใกล้ๆ", "ใคร", "ใคร่", "ใคร่จะ", "ใช่", "ใช้", "ใช่ไหม", "ใด", "ใดๆ", "ใต้", "ใน", "ในเมื่อ", "ในช่วง", "ในที่", "ในระหว่าง", "ให้", "ให้แก่", "ใหญ่", "ใหญ่โต", "ไกล", "ไกลๆ", "ไง", "ไฉน", "ได้", "ได้แก่", "ได้แต่", "ได้ที่", "ได้มา", "ได้รับ", "ไป", "ไม่", "ไม่เป็นไร", "ไม่ใช่", "ไม่ค่อย", "ไม่ค่อยเป็น", "ไม่ค่อยจะ", "ไม่ว่า", "ไร", "ไหน", "ไหนๆ", "ก็", "ก็แค่", "ก็แล้วแต่", "ก็ได้", "ก็คือ", "ก็จะ", "ก็ดี", "ก็ต่อเมื่อ", "ก็ตาม", "ก็ตามแต่", "ก็ตามที", "กระทั่ง", "กระทํา", "กระนั้น", "กระผม", "กลับ", "กล่าว", "กล่าวคือ", "กลุ่ม", "กลุ่มก้อน", "กลุ่มๆ", "กว่า", "กว้าง", "กว้างขวาง", "กว้างๆ", "ก่อน", "ก่อนหน้า", "ก่อนหน้านี้", "ก่อนๆ", "กัน", "กันเถอะ", "กันเอง", "กันและกัน", "กันไหม", "กันดีไหม", "กันดีกว่า", "กันนะ", "กับ", "การ", "กําลัง", "กําลังจะ", "กําหนด", "กู", "ขณะ", "ขณะเดียวกัน", "ขณะใด", "ขณะใดๆ", "ขณะที่", "ขณะนั้น", "ขณะนี้", "ขณะหนึ่ง", "ขวาง", "ขวางๆ", "ขอ", "ของ", "ขั้น", "ข้า", "ข้าง", "ข้างเคียง", "ข้างต้น", "ข้างบน", "ข้างล่าง", "ข้างๆ", "ขาด", "ข้าพเจ้า", "ข้าฯ", "ขึ้น", "คง", "คงจะ", "คงอยู่", "ครบ", "ครบครัน", "ครบถ้วน", "ครั้ง", "ครั้งใด", "ครั้งไหน", "ครั้งกระนั้น", "ครั้งก่อน", "ครั้งครา", "ครั้งคราว", "ครั้งที่", "ครั้งนั้น", "ครั้งนี้", "ครั้งละ", "ครั้งหนึ่ง", "ครั้งหลัง", "ครั้งหลังสุด", "ครั้งๆ", "ครัน", "ครับ", "ครา", "คราใด", "คราไหน", "คราที่", "ครานั้น", "ครานี้", "คราว", "คราวโน้น", "คราวใด", "คราวไหน", "คราวก่อน", "คราวที่", "คราวนั้น", "คราวนี้", "คราวละ", "คราวหน้า", "คราวหนึ่ง", "คราวหลัง", "คราวๆ", "คราหนึ่ง", "คล้าย", "คล้ายกัน", "คล้ายกันกับ", "คล้ายกับ", "คล้ายกับว่า", "คล้ายว่า", "ควร", "ความ", "ค่อน", "ค่อนข้าง", "ค่อนข้างจะ", "ค่อนมาทาง", "ค่อย", "ค่อยไปทาง", "ค่อยๆ", "คะ", "ค่ะ", "คํา", "คิด", "คิดว่า", "คือ", "คุณ", "คุณๆ", "ง่าย", "ง่ายๆ", "จง", "จด", "จนเมื่อ", "จนแม้", "จนแม้น", "จนกระทั่ง", "จนกว่า", "จนขณะนี้", "จนตลอด", "จนถึง", "จนทั่ว", "จนบัดนี้", "จรด", "จรดกับ", "จริง", "จริงจัง", "จริงๆ", "จริงๆ", "จวน", "จวนเจียน", "จวนจะ", "จวบ", "จวบกับ", "จวบจน", "จะ", "จ้ะ", "จ๊ะ", "จะได้", "จัง", "จังๆ", "จังๆ", "จัด", "จัดแจง", "จัดให้", "จัดการ", "จัดงาน", "จัดตั้ง", "จัดทํา", "จัดหา", "จับ", "จ้า", "จ้า", "จํา", "จําเป็น", "จาก", "จากนั้น", "จากนี้", "จากนี้ไป", "จําพวก", "จึง", "จึงเป็น", "จึงจะ", "จู่ๆ", "ฉะนั้น", "ฉะนี้", "ฉัน", "ช่วง", "ช่วงแรก", "ช่วงก่อน", "ช่วงต่อไป", "ช่วงถัดไป", "ช่วงท้าย", "ช่วงที่", "ช่วงนั้น", "ช่วงนี้", "ช่วงระหว่าง", "ช่วงหน้า", "ช่วงหลัง", "ช่วงๆ", "ช่วย", "ช้า", "ช้านาน", "ชาว", "ช้าๆ", "ซะ", "ซะก่อน", "ซะจน", "ซะจนกระทั่ง", "ซะจนถึง", "ซึ่ง", "ซึ่งได้แก่", "ซึ่งก็", "ซึ่งก็คือ", "ซึ่งกัน", "ซึ่งกันและกัน", "ซึ่งๆ", "ณ", "ด้วย", "ด้วยเช่นกัน", "ด้วยเพราะ", "ด้วยเหตุเพราะ", "ด้วยเหตุที่", "ด้วยเหตุนั้น", "ด้วยเหตุนี้", "ด้วยเหตุว่า", "ด้วยเหมือนกัน", "ด้วยกัน", "ด้วยที่", "ด้วยประการฉะนี้", "ด้วยว่า", "ดัง", "ดั่ง", "ดังเก่า", "ดั่งเก่า", "ดังเคย", "ดั่งเคย", "ดังเช่น", "ดั่งเช่น", "ดังเช่นที่", "ดั่งเช่นที่", "ดังเดิม", "ดั่งเดิม", "ดังเหมือน", "ดั่งเหมือน", "ดังแต่ก่อน", "ดั่งแต่ก่อน", "ดังแม้", "ดั่งแม้", "ดังกล่าว", "ดังกับ", "ดั่งกับ", "ดังกับว่า", "ดั่งกับว่า", "ดังจะ", "ดั่งจะ", "ดังต่อไปนี้", "ดังที่", "ดั่งที่", "ดังที่เคย", "ดังที่กล่าว", "ดังที่จะเป็น", "ดังนั้น", "ดังนี้", "ดังนี้เช่น", "ดังนี้เพราะ", "ดังว่า", "ดั่งว่า", "ดําเนิน", "ดําเนินไป", "ดําเนินการ", "ดําเนินงาน", "ด้าน", "ด้านๆ", "ดิฉัน", "ดี", "ดีๆ", "ดู", "ดูเหมือน", "ดูเหมือนว่า", "ดูแล", "ดูแล้ว", "ดูจะ", "ดูว่า", "ดูๆ", "ตน", "ตนเอง", "ตนฯ", "ตรง", "ตรงๆ", "ตลอด", "ตลอดเวลา", "ตลอดไป", "ตลอดกาล", "ตลอดกาลนาน", "ตลอดจน", "ตลอดถึง", "ตลอดทั้ง", "ตลอดทั่ว", "ตลอดทั่วถึง", "ตลอดทั่วทั้ง", "ตลอดปี", "ตลอดมา", "ตลอดระยะเวลา", "ตลอดวัน", "ตลอดศก", "ต่อ", "ต่อเมื่อ", "ต่อให้", "ต่อไป", "ต่อไปนี้", "ต่อกัน", "ต่อกับ", "ต้อง", "ต้องการ", "ต่อจาก", "ตอน", "ตอนแรก", "ตอนใด", "ตอนไหน", "ตอนก่อน", "ตอนต่อ", "ตอนต่อไป", "ตอนต่อมา", "ตอนถัดไป", "ตอนถัดมา", "ตอนที่", "ตอนที่แล้ว", "ตอนนั้น", "ตอนนี้", "ตอนสุดท้าย", "ตอนหน้า", "ตอนหลัง", "ตอนๆ", "ต่อมา", "ต่อว่า", "ต่อๆ", "ตะหาก", "ตั้ง", "ตั้งแต่", "ตั้งแต่แรก", "ตั้งแต่นั้น", "ตั้งแต่นี้", "ตั้งต้น", "ตั้งที่", "ตั้งอยู่", "ตัว", "ตัวเอง", "ตัวโน้น", "ตัวใด", "ตัวไหน", "ตัวที่", "ตัวนั้น", "ตัวนี้", "ตัวละ", "ตัวอย่างเช่น", "ตัวๆ", "ต่าง", "ต่างก็", "ต่างหาก", "ต่างๆ", "ตาม", "ตามแต่", "ตามด้วย", "ตามที่", "ตามๆ", "ถ้า", "ถ้าจะ", "ถ้าหาก", "ถึง", "ถึงเมื่อ", "ถึงเมื่อใด", "ถึงเมื่อไร", "ถึงแก่", "ถึงแม้", "ถึงแม้จะ", "ถึงแม้ว่า", "ถึงจะ", "ถึงบัดนั้น", "ถึงบัดนี้", "ถึงอย่างไร", "ถือ", "ถือว่า", "ถูก", "ถูกต้อง", "ถูกๆ", "ทรง", "ทว่า", "ทั้ง", "ทั้งเป็น", "ทั้งคน", "ทั้งตัว", "ทั้งที่", "ทั้งนั้น", "ทั้งนั้นเพราะ", "ทั้งนั้นด้วย", "ทั้งนี้", "ทั้งปวง", "ทั้งมวล", "ทั้งสิ้น", "ทั้งหมด", "ทั้งหลาย", "ทั้งๆ", "ทั้งๆ", "ทัน", "ทันใดนั้น", "ทันที", "ทันทีทันใด", "ทั่ว", "ทําให้", "ทําไม", "ทําไร", "ทําๆ", "ที", "ที่", "ที่", "ทีเดียว", "ทีเถอะ", "ที่แท้", "ที่แท้จริง", "ที่แล้ว", "ที่แห่งนั้น", "ทีใด", "ที่ใด", "ที่ได้", "ทีไร", "ที่ไหน", "ที่จริง", "ที่ซึ่ง", "ที่นั้น", "ที่นี้", "ทีละ", "ที่ละ", "ที่ว่า", "ที่สุด", "ทีๆ", "ที่ๆ", "ทุก", "ทุกเมื่อ", "ทุกแห่ง", "ทุกคน", "ทุกครั้ง", "ทุกครา", "ทุกคราว", "ทุกชิ้น", "ทุกตัว", "ทุกทาง", "ทุกที", "ทุกที่", "ทุกวัน", "ทุกวันนี้", "ทุกสิ่ง", "ทุกหน", "ทุกอย่าง", "ทุกอัน", "ทุกๆ", "นอก", "นอกเหนือ", "นอกจาก", "นอกจากที่", "นอกจากนั้น", "นอกจากนี้", "นอกจากว่า", "นอกนั้น", "น้อย", "น้อยกว่า", "น้อยๆ", "นะ", "น่ะ", "นัก", "นั่น", "นั้นไว", "นับแต่นี้", "นับจากนั้น", "นับจากนี้", "น่า", "นํา", "นาง", "นางสาว", "น่าจะ", "นาน", "นานๆ", "นําพา", "นํามา", "นาย", "นิด", "นิดหน่อย", "นิดๆ", "นี่", "นี้", "นี่เอง", "นี้เอง", "นี่แน่ะ", "นี้แหล่", "นี่แหละ", "นี่ไง", "นี่นา", "นู่น", "นู้น", "บน", "บอก", "บอกแล้ว", "บอกว่า", "บ่อย", "บ่อยกว่า", "บ่อยครั้ง", "บ่อยๆ", "บัดเดี๋ยวนี้", "บัดดล", "บัดนั้น", "บัดนี้", "บาง", "บ้าง", "บางแห่ง", "บางกว่า", "บางขณะ", "บางครั้ง", "บางครา", "บางคราว", "บางที", "บางที่", "บางๆ", "ปฏิบัติ", "ประกอบ", "ประการ", "ประการใด", "ประการฉะนี้", "ประการหนึ่ง", "ประมาณ", "ประสบ", "ปรับ", "ปรากฏ", "ปรากฏว่า", "ปัจจุบัน", "ปิด", "ผ่าน", "ผ่านๆ", "ผิด", "ผิดๆ", "ผู้", "ผู้ใด", "ฝ่าย", "ฝ่ายใด", "พบ", "พบว่า", "พยายาม", "พร้อม", "พร้อมเพียง", "พร้อมกัน", "พร้อมกับ", "พร้อมด้วย", "พร้อมทั้ง", "พร้อมที่", "พวก", "พวกเขา", "พวกเธอ", "พวกแก", "พวกโน้น", "พวกกัน", "พวกกู", "พวกคุณ", "พวกฉัน", "พวกท่าน", "พวกที่", "พวกนั้น", "พวกนี้", "พวกนู้น", "พวกมัน", "พวกมึง", "พอ", "พอเพียง", "พอเหมาะ", "พอแล้ว", "พอกัน", "พอควร", "พอจะ", "พอดี", "พอตัว", "พอที", "พอที่", "พอสม", "พอสมควร", "พอๆ", "พา", "พึง", "พึ่ง", "พื้นๆ", "พูด", "ภาค", "ภาย", "ภายใต้", "ภายนอก", "ภายหลัง", "มอง", "มองว่า", "มัก", "มักจะ", "มัน", "มั๊ย", "มา", "มาก", "มากกว่า", "มากมาย", "มิ", "มิใช่", "มิได้", "มิฉะนั้น", "มี", "มีแต่", "มึง", "มุ่ง", "มุ่งเน้น", "มุ่งหมาย", "ยก", "ยกให้", "ยงเพราะ", "ยอม", "ย่อม", "ยอมรับ", "ย่อย", "ยัง", "ยังแต่", "ยังโง้น", "ยังไง", "ยังคง", "ยังงั้น", "ยังงี้", "ยังจะ", "ยาก", "ยาว", "ยาวนาน", "ยิ่ง", "ยิ่งเมื่อ", "ยิ่งแล้ว", "ยิ่งใหญ่", "ยิ่งกว่า", "ยิ่งขึ้น", "ยิ่งขึ้นไป", "ยิ่งจน", "ยิ่งจะ", "ยิ่งนัก", "รวด", "รวดเร็ว", "รวม", "ร่วม", "รวมกัน", "ร่วมกัน", "รวมด้วย", "ร่วมด้วย", "รวมถึง", "รวมทั้ง", "ระยะ", "ระหว่าง", "รับ", "รึ", "รือ", "รือว่า", "ล้วน", "ล้วนแต่", "ล้วนจน", "ละ", "ล่าสุด", "วันใด", "วันไหน", "วันนั้น", "วันนี้", "สบาย", "สมัย", "สมัยโน้น", "สมัยก่อน", "สมัยนั้น", "สมัยนี้", "ส่วน", "ส่วนเกิน", "ส่วนใด", "ส่วนใหญ่", "ส่วนด้อย", "ส่วนดี", "ส่วนที่", "ส่วนน้อย", "ส่วนนั้น", "ส่วนมาก", "สั้น", "สั้นๆ", "สําคัญ", "สามารถ", "สิ่ง", "สิ่งใด", "สิ่งไหน", "สิ่งนั้น", "สิ่งนี้", "สิ้น", "สุด", "หน", "หนอ", "หนอย", "หน่อย", "หมด", "หมดกัน", "หมดสิ้น", "หรือเปล่า", "หรือไง", "หรือไม่", "หรือไร", "หรือยัง", "หลังจาก", "หาใช่", "หาก", "หากแม้", "หากแม้น", "หากแม้นว่า", "หากว่า", "หาความ", "หารือ", "อดีต", "อนึ่ง", "อยาก", "อย่าง", "อย่างเช่น", "อย่างเดียว", "อย่างโน้น", "อย่างใด", "อย่างไร", "อย่างไรเสีย", "อย่างไรก็", "อย่างไรก็ได้", "อย่างไหน", "อย่างดี", "อย่างที่", "อย่างน้อย", "อย่างนั้น", "อย่างนี้", "อย่างมาก", "อย่างยิ่ง", "อย่างละ", "อย่างหนึ่ง", "อย่างๆ", "อัน", "อันเนื่องมาจาก", "อันใด", "อันได้แก่", "อันไหน", "อันจะ", "อันที่", "อันที่จริง", "อันที่จะ", "อันละ", "อันๆ", "อาจ", "อาจเป็น", "อาจเป็นด้วย", "อาจจะ", "อีก", "อื่น", "อื่นๆ", "ฯ", "ฯพณฯ", "ฯล" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-th.txt b/data/stopwords/stopwords-th.txt deleted file mode 100644 index 01baee0..0000000 --- a/data/stopwords/stopwords-th.txt +++ /dev/null @@ -1,1047 +0,0 @@ -เก็บ -เกิด -เกิน -เกินๆ -เกี่ยวเนื่อง -เกี่ยวกัน -เกี่ยวกับ -เกี่ยวข้อง -เกี่ยวๆ -เกือบ -เกือบจะ -เกือบๆ -เขา -เข้า -เข้าใจ -เขียน -เคย -เคยๆ -เฉกเช่น -เฉพาะ -เฉย -เฉยๆ -เช่น -เช่นเคย -เช่นเดียวกัน -เช่นเดียวกับ -เช่นเมื่อ -เช่นใด -เช่นไร -เช่นก่อน -เช่นกัน -เช่นดัง -เช่นดังเก่า -เช่นดังก่อน -เช่นดังที่ -เช่นดังว่า -เช่นที่ -เช่นที่เคย -เช่นที่ว่า -เช่นนั้น -เช่นนั้นเอง -เช่นนี้ -เชื่อ -เชื่อถือ -เชื่อมั่น -เชื่อว่า -เดิม -เดิมที -เดิมๆ -เดียว -เดี๋ยว -เดี๋ยวก่อน -เดียวกัน -เดียวกับ -เดี๋ยวนั้น -เดี๋ยวนี้ -เต็มไปด้วย -เต็มไปหมด -เต็มๆ -เถอะ -เถิด -เท่า -เท่าใด -เท่าไร -เท่าไหร่ -เท่ากัน -เท่ากับ -เท่าที่ -เท่านั้น -เท่านี้ -เธอ -เน้น -เนี่ย -เนี่ยเอง -เป็น -เป็นเพื่อ -เป็นแต่ -เป็นด้วย -เป็นดัง -เป็นต้น -เป็นอัน -เป็นอันมาก -เป็นอาทิ -เปลี่ยน -เปลี่ยนแปลง -เผื่อ -เผื่อจะ -เผื่อที่ -เผื่อว่า -เพราะ -เพราะฉะนั้น -เพราะว่า -เพิ่ง -เพิ่งจะ -เพิ่ม -เพิ่มเติม -เพี -เพียง -เพียงเพื่อ -เพียงแค่ -เพียงแต่ -เพียงใด -เพียงไหน -เพียงพอ -เพื่อ -เพื่อให้ -เพื่อที่ -เพื่อว่า -เมื่อ -เมื่อเช้า -เมื่อเย็น -เมื่อใด -เมื่อไร -เมื่อไหร่ -เมื่อก่อน -เมื่อครั้ง -เมื่อครั้งก่อน -เมื่อคราว -เมื่อคราวก่อน -เมื่อคราวที่ -เมื่อคืน -เมื่อนั้น -เมื่อนี้ -เมื่อวันวาน -เมื่อวาน -เยอะ -เยอะแยะ -เร็ว -เร็วๆ -เรา -เราๆ -เริ่ม -เรียก -เรียบ -เรื่อย -เรื่อยๆ -เล็ก -เล็กน้อย -เล็กๆ -เลย -เล่าว่า -เสร็จ -เสร็จแล้ว -เสียแล้ว -เสียจน -เสียด้วย -เสียนี่ -เหตุ -เหตุไร -เหตุนั้น -เหตุนี้ -เหตุมด -เห็นแก่ -เห็นควร -เห็นจะ -เห็นว่า -เหล่า -เหล่านั้น -เหล่านี้ -เหลือ -เหลือเกิน -เอง -เอ็ง -เอา -แก -แก่ -แก้ไข -แค่ -แค่เพียง -แค่ไหน -แค่จะ -แค่นั้น -แค่นี้ -แค่ว่า -แด่ -แต่ -แต่เดิม -แต่เพียง -แต่เมื่อ -แต่ไร -แต่ไหน -แต่ก็ -แต่ก่อน -แต่จะ -แต่ต้อง -แต่ถ้า -แต่ทว่า -แต่ที่ -แต่นั้น -แต่ละ -แต่ว่า -แต่อย่างใด -แท้ -แท้จริง -แบบ -แม้ -แม้แต่ -แม้กระทั่ง -แม้นว่า -แม้ว่า -แยะ -แล้ว -แล้วเสร็จ -แล้วแต่ -แล้วกัน -แสดง -แสดงว่า -แห่ง -แห่งโน้น -แห่งใด -แห่งไหน -แห่งนั้น -แห่งนี้ -แหละ -โดย -โดยเฉพาะ -โดยเฉพาะอย่าง -โดยเมื่อ -โดยเร็ว -โดยแท้ -โดยแท้จริง -โดยง่าย -โดยดี -โดยดุษฎี -โดยตลอด -โดยทั่ว -โดยทั่วไป -โดยทั่วกัน -โดยทั่วถึง -โดยที่ -โดยนัย -โดยปกติ -โดยมัก -โดยมักจะ -โดยมาก -โดยรวม -โดยรวมๆ -โดยละม่อม -โดยลําดับ -โดยส่วนใหญ่ -โดยส่วนมาก -โดยส่วนรวม -โต -โตๆ -ใกล้ -ใกล้ๆ -ใคร -ใคร่ -ใคร่จะ -ใช่ -ใช้ -ใช่ไหม -ใด -ใดๆ -ใต้ -ใน -ในเมื่อ -ในช่วง -ในที่ -ในระหว่าง -ให้ -ให้แก่ -ใหญ่ -ใหญ่โต -ไกล -ไกลๆ -ไง -ไฉน -ได้ -ได้แก่ -ได้แต่ -ได้ที่ -ได้มา -ได้รับ -ไป -ไม่ -ไม่เป็นไร -ไม่ใช่ -ไม่ค่อย -ไม่ค่อยเป็น -ไม่ค่อยจะ -ไม่ว่า -ไร -ไหน -ไหนๆ -ก็ -ก็แค่ -ก็แล้วแต่ -ก็ได้ -ก็คือ -ก็จะ -ก็ดี -ก็ต่อเมื่อ -ก็ตาม -ก็ตามแต่ -ก็ตามที -กระทั่ง -กระทํา -กระนั้น -กระผม -กลับ -กล่าว -กล่าวคือ -กลุ่ม -กลุ่มก้อน -กลุ่มๆ -กว่า -กว้าง -กว้างขวาง -กว้างๆ -ก่อน -ก่อนหน้า -ก่อนหน้านี้ -ก่อนๆ -กัน -กันเถอะ -กันเอง -กันและกัน -กันไหม -กันดีไหม -กันดีกว่า -กันนะ -กับ -การ -กําลัง -กําลังจะ -กําหนด -กู -ขณะ -ขณะเดียวกัน -ขณะใด -ขณะใดๆ -ขณะที่ -ขณะนั้น -ขณะนี้ -ขณะหนึ่ง -ขวาง -ขวางๆ -ขอ -ของ -ขั้น -ข้า -ข้าง -ข้างเคียง -ข้างต้น -ข้างบน -ข้างล่าง -ข้างๆ -ขาด -ข้าพเจ้า -ข้าฯ -ขึ้น -คง -คงจะ -คงอยู่ -ครบ -ครบครัน -ครบถ้วน -ครั้ง -ครั้งใด -ครั้งไหน -ครั้งกระนั้น -ครั้งก่อน -ครั้งครา -ครั้งคราว -ครั้งที่ -ครั้งนั้น -ครั้งนี้ -ครั้งละ -ครั้งหนึ่ง -ครั้งหลัง -ครั้งหลังสุด -ครั้งๆ -ครัน -ครับ -ครา -คราใด -คราไหน -คราที่ -ครานั้น -ครานี้ -คราว -คราวโน้น -คราวใด -คราวไหน -คราวก่อน -คราวที่ -คราวนั้น -คราวนี้ -คราวละ -คราวหน้า -คราวหนึ่ง -คราวหลัง -คราวๆ -คราหนึ่ง -คล้าย -คล้ายกัน -คล้ายกันกับ -คล้ายกับ -คล้ายกับว่า -คล้ายว่า -ควร -ความ -ค่อน -ค่อนข้าง -ค่อนข้างจะ -ค่อนมาทาง -ค่อย -ค่อยไปทาง -ค่อยๆ -คะ -ค่ะ -คํา -คิด -คิดว่า -คือ -คุณ -คุณๆ -ง่าย -ง่ายๆ -จง -จด -จนเมื่อ -จนแม้ -จนแม้น -จนกระทั่ง -จนกว่า -จนขณะนี้ -จนตลอด -จนถึง -จนทั่ว -จนบัดนี้ -จรด -จรดกับ -จริง -จริงจัง -จริงๆ -จริงๆ -จวน -จวนเจียน -จวนจะ -จวบ -จวบกับ -จวบจน -จะ -จ้ะ -จ๊ะ -จะได้ -จัง -จังๆ -จังๆ -จัด -จัดแจง -จัดให้ -จัดการ -จัดงาน -จัดตั้ง -จัดทํา -จัดหา -จับ -จ้า -จ้า -จํา -จําเป็น -จาก -จากนั้น -จากนี้ -จากนี้ไป -จําพวก -จึง -จึงเป็น -จึงจะ -จู่ๆ -ฉะนั้น -ฉะนี้ -ฉัน -ช่วง -ช่วงแรก -ช่วงก่อน -ช่วงต่อไป -ช่วงถัดไป -ช่วงท้าย -ช่วงที่ -ช่วงนั้น -ช่วงนี้ -ช่วงระหว่าง -ช่วงหน้า -ช่วงหลัง -ช่วงๆ -ช่วย -ช้า -ช้านาน -ชาว -ช้าๆ -ซะ -ซะก่อน -ซะจน -ซะจนกระทั่ง -ซะจนถึง -ซึ่ง -ซึ่งได้แก่ -ซึ่งก็ -ซึ่งก็คือ -ซึ่งกัน -ซึ่งกันและกัน -ซึ่งๆ -ณ -ด้วย -ด้วยเช่นกัน -ด้วยเพราะ -ด้วยเหตุเพราะ -ด้วยเหตุที่ -ด้วยเหตุนั้น -ด้วยเหตุนี้ -ด้วยเหตุว่า -ด้วยเหมือนกัน -ด้วยกัน -ด้วยที่ -ด้วยประการฉะนี้ -ด้วยว่า -ดัง -ดั่ง -ดังเก่า -ดั่งเก่า -ดังเคย -ดั่งเคย -ดังเช่น -ดั่งเช่น -ดังเช่นที่ -ดั่งเช่นที่ -ดังเดิม -ดั่งเดิม -ดังเหมือน -ดั่งเหมือน -ดังแต่ก่อน -ดั่งแต่ก่อน -ดังแม้ -ดั่งแม้ -ดังกล่าว -ดังกับ -ดั่งกับ -ดังกับว่า -ดั่งกับว่า -ดังจะ -ดั่งจะ -ดังต่อไปนี้ -ดังที่ -ดั่งที่ -ดังที่เคย -ดังที่กล่าว -ดังที่จะเป็น -ดังนั้น -ดังนี้ -ดังนี้เช่น -ดังนี้เพราะ -ดังว่า -ดั่งว่า -ดําเนิน -ดําเนินไป -ดําเนินการ -ดําเนินงาน -ด้าน -ด้านๆ -ดิฉัน -ดี -ดีๆ -ดู -ดูเหมือน -ดูเหมือนว่า -ดูแล -ดูแล้ว -ดูจะ -ดูว่า -ดูๆ -ตน -ตนเอง -ตนฯ -ตรง -ตรงๆ -ตลอด -ตลอดเวลา -ตลอดไป -ตลอดกาล -ตลอดกาลนาน -ตลอดจน -ตลอดถึง -ตลอดทั้ง -ตลอดทั่ว -ตลอดทั่วถึง -ตลอดทั่วทั้ง -ตลอดปี -ตลอดมา -ตลอดระยะเวลา -ตลอดวัน -ตลอดศก -ต่อ -ต่อเมื่อ -ต่อให้ -ต่อไป -ต่อไปนี้ -ต่อกัน -ต่อกับ -ต้อง -ต้องการ -ต่อจาก -ตอน -ตอนแรก -ตอนใด -ตอนไหน -ตอนก่อน -ตอนต่อ -ตอนต่อไป -ตอนต่อมา -ตอนถัดไป -ตอนถัดมา -ตอนที่ -ตอนที่แล้ว -ตอนนั้น -ตอนนี้ -ตอนสุดท้าย -ตอนหน้า -ตอนหลัง -ตอนๆ -ต่อมา -ต่อว่า -ต่อๆ -ตะหาก -ตั้ง -ตั้งแต่ -ตั้งแต่แรก -ตั้งแต่นั้น -ตั้งแต่นี้ -ตั้งต้น -ตั้งที่ -ตั้งอยู่ -ตัว -ตัวเอง -ตัวโน้น -ตัวใด -ตัวไหน -ตัวที่ -ตัวนั้น -ตัวนี้ -ตัวละ -ตัวอย่างเช่น -ตัวๆ -ต่าง -ต่างก็ -ต่างหาก -ต่างๆ -ตาม -ตามแต่ -ตามด้วย -ตามที่ -ตามๆ -ถ้า -ถ้าจะ -ถ้าหาก -ถึง -ถึงเมื่อ -ถึงเมื่อใด -ถึงเมื่อไร -ถึงแก่ -ถึงแม้ -ถึงแม้จะ -ถึงแม้ว่า -ถึงจะ -ถึงบัดนั้น -ถึงบัดนี้ -ถึงอย่างไร -ถือ -ถือว่า -ถูก -ถูกต้อง -ถูกๆ -ทรง -ทว่า -ทั้ง -ทั้งเป็น -ทั้งคน -ทั้งตัว -ทั้งที่ -ทั้งนั้น -ทั้งนั้นเพราะ -ทั้งนั้นด้วย -ทั้งนี้ -ทั้งปวง -ทั้งมวล -ทั้งสิ้น -ทั้งหมด -ทั้งหลาย -ทั้งๆ -ทั้งๆ -ทัน -ทันใดนั้น -ทันที -ทันทีทันใด -ทั่ว -ทําให้ -ทําไม -ทําไร -ทําๆ -ที -ที่ -ที่ -ทีเดียว -ทีเถอะ -ที่แท้ -ที่แท้จริง -ที่แล้ว -ที่แห่งนั้น -ทีใด -ที่ใด -ที่ได้ -ทีไร -ที่ไหน -ที่จริง -ที่ซึ่ง -ที่นั้น -ที่นี้ -ทีละ -ที่ละ -ที่ว่า -ที่สุด -ทีๆ -ที่ๆ -ทุก -ทุกเมื่อ -ทุกแห่ง -ทุกคน -ทุกครั้ง -ทุกครา -ทุกคราว -ทุกชิ้น -ทุกตัว -ทุกทาง -ทุกที -ทุกที่ -ทุกวัน -ทุกวันนี้ -ทุกสิ่ง -ทุกหน -ทุกอย่าง -ทุกอัน -ทุกๆ -นอก -นอกเหนือ -นอกจาก -นอกจากที่ -นอกจากนั้น -นอกจากนี้ -นอกจากว่า -นอกนั้น -น้อย -น้อยกว่า -น้อยๆ -นะ -น่ะ -นัก -นั่น -นั้นไว -นับแต่นี้ -นับจากนั้น -นับจากนี้ -น่า -นํา -นาง -นางสาว -น่าจะ -นาน -นานๆ -นําพา -นํามา -นาย -นิด -นิดหน่อย -นิดๆ -นี่ -นี้ -นี่เอง -นี้เอง -นี่แน่ะ -นี้แหล่ -นี่แหละ -นี่ไง -นี่นา -นู่น -นู้น -บน -บอก -บอกแล้ว -บอกว่า -บ่อย -บ่อยกว่า -บ่อยครั้ง -บ่อยๆ -บัดเดี๋ยวนี้ -บัดดล -บัดนั้น -บัดนี้ -บาง -บ้าง -บางแห่ง -บางกว่า -บางขณะ -บางครั้ง -บางครา -บางคราว -บางที -บางที่ -บางๆ -ปฏิบัติ -ประกอบ -ประการ -ประการใด -ประการฉะนี้ -ประการหนึ่ง -ประมาณ -ประสบ -ปรับ -ปรากฏ -ปรากฏว่า -ปัจจุบัน -ปิด -ผ่าน -ผ่านๆ -ผิด -ผิดๆ -ผู้ -ผู้ใด -ฝ่าย -ฝ่ายใด -พบ -พบว่า -พยายาม -พร้อม -พร้อมเพียง -พร้อมกัน -พร้อมกับ -พร้อมด้วย -พร้อมทั้ง -พร้อมที่ -พวก -พวกเขา -พวกเธอ -พวกแก -พวกโน้น -พวกกัน -พวกกู -พวกคุณ -พวกฉัน -พวกท่าน -พวกที่ -พวกนั้น -พวกนี้ -พวกนู้น -พวกมัน -พวกมึง -พอ -พอเพียง -พอเหมาะ -พอแล้ว -พอกัน -พอควร -พอจะ -พอดี -พอตัว -พอที -พอที่ -พอสม -พอสมควร -พอๆ -พา -พึง -พึ่ง -พื้นๆ -พูด -ภาค -ภาย -ภายใต้ -ภายนอก -ภายหลัง -มอง -มองว่า -มัก -มักจะ -มัน -มั๊ย -มา -มาก -มากกว่า -มากมาย -มิ -มิใช่ -มิได้ -มิฉะนั้น -มี -มีแต่ -มึง -มุ่ง -มุ่งเน้น -มุ่งหมาย -ยก -ยกให้ -ยงเพราะ -ยอม -ย่อม -ยอมรับ -ย่อย -ยัง -ยังแต่ -ยังโง้น -ยังไง -ยังคง -ยังงั้น -ยังงี้ -ยังจะ -ยาก -ยาว -ยาวนาน -ยิ่ง -ยิ่งเมื่อ -ยิ่งแล้ว -ยิ่งใหญ่ -ยิ่งกว่า -ยิ่งขึ้น -ยิ่งขึ้นไป -ยิ่งจน -ยิ่งจะ -ยิ่งนัก -รวด -รวดเร็ว -รวม -ร่วม -รวมกัน -ร่วมกัน -รวมด้วย -ร่วมด้วย -รวมถึง -รวมทั้ง -ระยะ -ระหว่าง -รับ -รึ -รือ -รือว่า -ล้วน -ล้วนแต่ -ล้วนจน -ละ -ล่าสุด -วันใด -วันไหน -วันนั้น -วันนี้ -สบาย -สมัย -สมัยโน้น -สมัยก่อน -สมัยนั้น -สมัยนี้ -ส่วน -ส่วนเกิน -ส่วนใด -ส่วนใหญ่ -ส่วนด้อย -ส่วนดี -ส่วนที่ -ส่วนน้อย -ส่วนนั้น -ส่วนมาก -สั้น -สั้นๆ -สําคัญ -สามารถ -สิ่ง -สิ่งใด -สิ่งไหน -สิ่งนั้น -สิ่งนี้ -สิ้น -สุด -หน -หนอ -หนอย -หน่อย -หมด -หมดกัน -หมดสิ้น -หรือเปล่า -หรือไง -หรือไม่ -หรือไร -หรือยัง -หลังจาก -หาใช่ -หาก -หากแม้ -หากแม้น -หากแม้นว่า -หากว่า -หาความ -หารือ -อดีต -อนึ่ง -อยาก -อย่าง -อย่างเช่น -อย่างเดียว -อย่างโน้น -อย่างใด -อย่างไร -อย่างไรเสีย -อย่างไรก็ -อย่างไรก็ได้ -อย่างไหน -อย่างดี -อย่างที่ -อย่างน้อย -อย่างนั้น -อย่างนี้ -อย่างมาก -อย่างยิ่ง -อย่างละ -อย่างหนึ่ง -อย่างๆ -อัน -อันเนื่องมาจาก -อันใด -อันได้แก่ -อันไหน -อันจะ -อันที่ -อันที่จริง -อันที่จะ -อันละ -อันๆ -อาจ -อาจเป็น -อาจเป็นด้วย -อาจจะ -อีก -อื่น -อื่นๆ -ฯ -ฯพณฯ -ฯล diff --git a/data/stopwords/stopwords-tr.json b/data/stopwords/stopwords-tr.json new file mode 100644 index 0000000..aefa09c --- /dev/null +++ b/data/stopwords/stopwords-tr.json @@ -0,0 +1 @@ +[ "a", "acaba", "altı", "ama", "ancak", "artık", "asla", "aslında", "az", "b", "bana", "bazen", "bazı", "bazıları", "bazısı", "belki", "ben", "beni", "benim", "beş", "bile", "bir", "birçoğu", "birçok", "birçokları", "biri", "birisi", "birkaç", "birkaçı", "birşey", "birşeyi", "biz", "bize", "bizi", "bizim", "böyle", "böylece", "bu", "buna", "bunda", "bundan", "bunu", "bunun", "burada", "bütün", "c", "ç", "çoğu", "çoğuna", "çoğunu", "çok", "çünkü", "d", "da", "daha", "de", "değil", "demek", "diğer", "diğeri", "diğerleri", "diye", "dokuz", "dolayı", "dört", "e", "elbette", "en", "f", "fakat", "falan", "felan", "filan", "g", "gene", "gibi", "ğ", "h", "hâlâ", "hangi", "hangisi", "hani", "hatta", "hem", "henüz", "hep", "hepsi", "hepsine", "hepsini", "her", "her biri", "herkes", "herkese", "herkesi", "hiç", "hiç kimse", "hiçbiri", "hiçbirine", "hiçbirini", "ı", "i", "için", "içinde", "iki", "ile", "ise", "işte", "j", "k", "kaç", "kadar", "kendi", "kendine", "kendini", "ki", "kim", "kime", "kimi", "kimin", "kimisi", "l", "m", "madem", "mı", "mı", "mi", "mu", "mu", "mü", "mü", "n", "nasıl", "ne", "ne kadar", "ne zaman", "neden", "nedir", "nerde", "nerede", "nereden", "nereye", "nesi", "neyse", "niçin", "niye", "o", "on", "ona", "ondan", "onlar", "onlara", "onlardan", "onların", "onların", "onu", "onun", "orada", "oysa", "oysaki", "ö", "öbürü", "ön", "önce", "ötürü", "öyle", "p", "r", "rağmen", "s", "sana", "sekiz", "sen", "senden", "seni", "senin", "siz", "sizden", "size", "sizi", "sizin", "son", "sonra", "ş", "şayet", "şey", "şeyden", "şeye", "şeyi", "şeyler", "şimdi", "şöyle", "şu", "şuna", "şunda", "şundan", "şunlar", "şunu", "şunun", "t", "tabi", "tamam", "tüm", "tümü", "u", "ü", "üç", "üzere", "v", "var", "ve", "veya", "veyahut", "y", "ya", "ya da", "yani", "yedi", "yerine", "yine", "yoksa", "z", "zaten", "zira" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-tr.txt b/data/stopwords/stopwords-tr.txt deleted file mode 100644 index 6245dd1..0000000 --- a/data/stopwords/stopwords-tr.txt +++ /dev/null @@ -1,223 +0,0 @@ -a -acaba -altı -ama -ancak -artık -asla -aslında -az -b -bana -bazen -bazı -bazıları -bazısı -belki -ben -beni -benim -beş -bile -bir -birçoğu -birçok -birçokları -biri -birisi -birkaç -birkaçı -birşey -birşeyi -biz -bize -bizi -bizim -böyle -böylece -bu -buna -bunda -bundan -bunu -bunun -burada -bütün -c -ç -çoğu -çoğuna -çoğunu -çok -çünkü -d -da -daha -de -değil -demek -diğer -diğeri -diğerleri -diye -dokuz -dolayı -dört -e -elbette -en -f -fakat -falan -felan -filan -g -gene -gibi -ğ -h -hâlâ -hangi -hangisi -hani -hatta -hem -henüz -hep -hepsi -hepsine -hepsini -her -her biri -herkes -herkese -herkesi -hiç -hiç kimse -hiçbiri -hiçbirine -hiçbirini -ı -i -için -içinde -iki -ile -ise -işte -j -k -kaç -kadar -kendi -kendine -kendini -ki -kim -kime -kimi -kimin -kimisi -l -m -madem -mı -mı -mi -mu -mu -mü -mü -n -nasıl -ne -ne kadar -ne zaman -neden -nedir -nerde -nerede -nereden -nereye -nesi -neyse -niçin -niye -o -on -ona -ondan -onlar -onlara -onlardan -onların -onların -onu -onun -orada -oysa -oysaki -ö -öbürü -ön -önce -ötürü -öyle -p -r -rağmen -s -sana -sekiz -sen -senden -seni -senin -siz -sizden -size -sizi -sizin -son -sonra -ş -şayet -şey -şeyden -şeye -şeyi -şeyler -şimdi -şöyle -şu -şuna -şunda -şundan -şunlar -şunu -şunun -t -tabi -tamam -tüm -tümü -u -ü -üç -üzere -v -var -ve -veya -veyahut -y -ya -ya da -yani -yedi -yerine -yine -yoksa -z -zaten -zira \ No newline at end of file diff --git a/data/stopwords/stopwords-zh.json b/data/stopwords/stopwords-zh.json new file mode 100644 index 0000000..fcb5db6 --- /dev/null +++ b/data/stopwords/stopwords-zh.json @@ -0,0 +1 @@ +[ "的", "一", "不", "在", "人", "有", "是", "为", "以", "于", "上", "他", "而", "后", "之", "来", "及", "了", "因", "下", "可", "到", "由", "这", "与", "也", "此", "但", "并", "个", "其", "已", "无", "小", "我", "们", "起", "最", "再", "今", "去", "好", "只", "又", "或", "很", "亦", "某", "把", "那", "你", "乃", "它", "吧", "被", "比", "别", "趁", "当", "从", "到", "得", "打", "凡", "儿", "尔", "该", "各", "给", "跟", "和", "何", "还", "即", "几", "既", "看", "据", "距", "靠", "啦", "了", "另", "么", "每", "们", "嘛", "拿", "哪", "那", "您", "凭", "且", "却", "让", "仍", "啥", "如", "若", "使", "谁", "虽", "随", "同", "所", "她", "哇", "嗡", "往", "哪", "些", "向", "沿", "哟", "用", "于", "咱", "则", "怎", "曾", "至", "致", "着", "诸", "自" ] \ No newline at end of file diff --git a/data/stopwords/stopwords-zh.txt b/data/stopwords/stopwords-zh.txt deleted file mode 100644 index 955ff2b..0000000 --- a/data/stopwords/stopwords-zh.txt +++ /dev/null @@ -1,125 +0,0 @@ -的 -一 -不 -在 -人 -有 -是 -为 -以 -于 -上 -他 -而 -后 -之 -来 -及 -了 -因 -下 -可 -到 -由 -这 -与 -也 -此 -但 -并 -个 -其 -已 -无 -小 -我 -们 -起 -最 -再 -今 -去 -好 -只 -又 -或 -很 -亦 -某 -把 -那 -你 -乃 -它 -吧 -被 -比 -别 -趁 -当 -从 -到 -得 -打 -凡 -儿 -尔 -该 -各 -给 -跟 -和 -何 -还 -即 -几 -既 -看 -据 -距 -靠 -啦 -了 -另 -么 -每 -们 -嘛 -拿 -哪 -那 -您 -凭 -且 -却 -让 -仍 -啥 -如 -若 -使 -谁 -虽 -随 -同 -所 -她 -哇 -嗡 -往 -哪 -些 -向 -沿 -哟 -用 -于 -咱 -则 -怎 -曾 -至 -致 -着 -诸 -自 \ No newline at end of file From 32ccd152b438e789be9b426fefdbacec5af8877d Mon Sep 17 00:00:00 2001 From: knod Date: Thu, 22 Dec 2016 08:07:21 -0500 Subject: [PATCH 2/2] json used, tests passing --- lib/stopwords.js | 19 ++++++------------- lib/stopwordsdata.js | 25 +++++++++++++++++++++++++ src/stopwords.coffee | 15 +++++---------- src/stopwordsdata.coffee | 23 +++++++++++++++++++++++ 4 files changed, 59 insertions(+), 23 deletions(-) create mode 100644 lib/stopwordsdata.js create mode 100644 src/stopwordsdata.coffee diff --git a/lib/stopwords.js b/lib/stopwords.js index 2a1888d..5b34ece 100644 --- a/lib/stopwords.js +++ b/lib/stopwords.js @@ -1,28 +1,21 @@ // Generated by CoffeeScript 2.0.0-beta7 void function () { - var _, cache, candiateWords, fs, getFilePath, path, removePunctuation, stopwords; - path = require('path'); - fs = require('fs'); + var _, cache, candiateWords, removePunctuation, stopwords, stopwordsData; _ = require('lodash'); + stopwordsData = require('./stopwordsdata'); cache = {}; - getFilePath = function (language) { - return path.join(__dirname, '..', 'data', 'stopwords', 'stopwords-' + language + '.txt'); - }; module.exports = stopwords = function (content, language) { - var count, filePath, overlappingStopwords, stopWords, strippedInput, words; + var count, overlappingStopwords, stopWords, strippedInput, words; if (null == language) language = 'en'; - filePath = getFilePath(language); - if (!fs.existsSync(filePath)) { + stopWords = stopwordsData[language]; + if (!stopWords) { console.error("WARNING: No stopwords file found for '" + language + "' - defaulting to English!"); - filePath = getFilePath('en'); + stopWords = stopwordsData.en; } if (cache.hasOwnProperty(language)) { stopWords = cache[language]; } else { - stopWords = fs.readFileSync(filePath).toString().split('\n').filter(function (s) { - return s.length > 0; - }); cache[language] = stopWords; } strippedInput = removePunctuation(content); diff --git a/lib/stopwordsdata.js b/lib/stopwordsdata.js new file mode 100644 index 0000000..ca5fd59 --- /dev/null +++ b/lib/stopwordsdata.js @@ -0,0 +1,25 @@ +// Generated by CoffeeScript 2.0.0-beta7 +module.exports = { + ar: require('../data/stopwords/stopwords-ar'), + bg: require('../data/stopwords/stopwords-bg'), + cs: require('../data/stopwords/stopwords-cs'), + da: require('../data/stopwords/stopwords-da'), + de: require('../data/stopwords/stopwords-de'), + en: require('../data/stopwords/stopwords-en'), + es: require('../data/stopwords/stopwords-es'), + fi: require('../data/stopwords/stopwords-fi'), + fr: require('../data/stopwords/stopwords-fr'), + hu: require('../data/stopwords/stopwords-hu'), + id: require('../data/stopwords/stopwords-id'), + it: require('../data/stopwords/stopwords-it'), + ko: require('../data/stopwords/stopwords-ko'), + nb: require('../data/stopwords/stopwords-nb'), + no: require('../data/stopwords/stopwords-no'), + pl: require('../data/stopwords/stopwords-pl'), + pt: require('../data/stopwords/stopwords-pt'), + ru: require('../data/stopwords/stopwords-ru'), + sv: require('../data/stopwords/stopwords-sv'), + th: require('../data/stopwords/stopwords-th'), + tr: require('../data/stopwords/stopwords-tr'), + zh: require('../data/stopwords/stopwords-zh') +}; diff --git a/src/stopwords.coffee b/src/stopwords.coffee index ecebd0c..8c0aa8b 100644 --- a/src/stopwords.coffee +++ b/src/stopwords.coffee @@ -1,26 +1,21 @@ -path = require('path') -fs = require('fs') _ = require('lodash') +stopwordsData = require('./stopwordsdata') cache = {} -getFilePath = (language) -> - path.join(__dirname, "..", "data", "stopwords", "stopwords-#{language}.txt") - # Given a language, loads a list of stop words for that language # and then returns which of those words exist in the given content module.exports = stopwords = (content, language = 'en') -> - filePath = getFilePath(language) - if !fs.existsSync(filePath) + stopWords = stopwordsData[ language ] + + if !stopWords console.error("WARNING: No stopwords file found for '#{language}' - defaulting to English!") - filePath = getFilePath('en') + stopWords = stopwordsData[ 'en' ] if cache.hasOwnProperty(language) stopWords = cache[language] else - stopWords = fs.readFileSync(filePath).toString().split('\n') - .filter((s) -> s.length > 0) cache[language] = stopWords strippedInput = removePunctuation(content) diff --git a/src/stopwordsdata.coffee b/src/stopwordsdata.coffee new file mode 100644 index 0000000..c6e779d --- /dev/null +++ b/src/stopwordsdata.coffee @@ -0,0 +1,23 @@ +module.exports = + ar: require('../data/stopwords/stopwords-ar') + bg: require('../data/stopwords/stopwords-bg') + cs: require('../data/stopwords/stopwords-cs') + da: require('../data/stopwords/stopwords-da') + de: require('../data/stopwords/stopwords-de') + en: require('../data/stopwords/stopwords-en') + es: require('../data/stopwords/stopwords-es') + fi: require('../data/stopwords/stopwords-fi') + fr: require('../data/stopwords/stopwords-fr') + hu: require('../data/stopwords/stopwords-hu') + id: require('../data/stopwords/stopwords-id') + it: require('../data/stopwords/stopwords-it') + ko: require('../data/stopwords/stopwords-ko') + nb: require('../data/stopwords/stopwords-nb') + no: require('../data/stopwords/stopwords-no') + pl: require('../data/stopwords/stopwords-pl') + pt: require('../data/stopwords/stopwords-pt') + ru: require('../data/stopwords/stopwords-ru') + sv: require('../data/stopwords/stopwords-sv') + th: require('../data/stopwords/stopwords-th') + tr: require('../data/stopwords/stopwords-tr') + zh: require('../data/stopwords/stopwords-zh') \ No newline at end of file