Skip to content

Commit

Permalink
Fix sklearn.CountVectorizer warnings (#211)
Browse files Browse the repository at this point in the history
  • Loading branch information
ygorg authored Dec 9, 2022
1 parent a24dddd commit 8f1d05d
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions pke/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,14 +381,15 @@ def compute_lda_model(documents,
# get the stoplist from pke.lang because CountVectorizer only contains
# english stopwords atm
if stoplist is None:
stoplist = stopwords.get(language)
# CountVectorizer expects a list
# stopwords.get is a set
stoplist = list(stopwords.get(language))
tf_vectorizer = CountVectorizer(
stop_words=stoplist)
tf = tf_vectorizer.fit_transform(texts)

# extract vocabulary
vocabulary = tf_vectorizer.get_feature_names()
# TODO: deprecation warning: use get_feature_names_out
vocabulary = tf_vectorizer.get_feature_names_out()

# create LDA model and train
lda_model = LatentDirichletAllocation(n_components=n_topics,
Expand Down

0 comments on commit 8f1d05d

Please sign in to comment.