diff --git a/lib/tasks/entry.rake b/lib/tasks/entry.rake index f91f2e60..7bf973ff 100644 --- a/lib/tasks/entry.rake +++ b/lib/tasks/entry.rake @@ -142,18 +142,11 @@ namespace :entry do tokenizer = WordsCounted::Tokeniser.new(entries_bodies).tokenise(exclude: Entry::WORDS_NOT_TO_COUNT) total_words = tokenizer.count - # counter = WordsCounted.count(entries_bodies) - # most_frequent = counter.token_frequency.first(400).select { |w| !Entry::COMMON_WORDS.include?(w[0]) }.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" } - avg_words = total_words / all_entries.count total_chars = entries_bodies.length avg_chars = total_chars / all_entries.count avg_tweets_per_post = ((avg_chars).to_f / 280).ceil - grouped_words = total_words.group_by(&:itself).transform_values(&:count).sort_by { |_k, v| v }.reverse.to_h - grouped_words = grouped_words.select { |word, count| !Entry::WORDS_NOT_TO_COUNT.include?(word) } - grouped_words.first(25) - p "Users created: #{number_with_delimiter(User.where("created_at >= '#{year}-01-01'::DATE AND created_at <= '#{year}-12-31'::DATE").count)}" p "Entries created in #{year}: #{number_with_delimiter(Entry.where("created_at >= '#{year}-01-01'::DATE AND created_at <= '#{year}-12-31'::DATE").count)}" p "Entries for #{year}: #{number_with_delimiter(all_entries.count)}" @@ -161,8 +154,11 @@ namespace :entry do p "Avg words per post: #{number_with_delimiter(avg_words)}" p "Total characters: #{number_with_delimiter(total_chars)}" p "Avg characters per post: #{number_with_delimiter(avg_chars)} (#{avg_tweets_per_post} tweets)" - # p "Most Frequent Words:" - # puts most_frequent + + counter = WordsCounted.count(entries_bodies) + most_frequent = counter.token_frequency.first(400).select { |w| !Entry::COMMON_WORDS.include?(w[0]) }.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" } + p "Most Frequent Words:" + puts most_frequent p "*"*100 end