Skip to content

Commit

Permalink
Fix words counter
Browse files Browse the repository at this point in the history
  • Loading branch information
parterburn committed Dec 14, 2024
1 parent 5607a48 commit 2879cf4
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ gem 'randomized_field', '~> 1.0' # builds user_keys
gem 'rest-client' # RESTClient
gem 'rubyzip', '~> 2'
gem 'summernote-rails', '~> 0.8.20.0', git: "https://github.com/parterburn/summernote-rails"
gem 'words_counted' # Year in Review
gem 'words_counted', '~> 1.0', '>= 1.0.3' # Year in Review
gem 'zip-zip'
gem "chartkick", "~> 5"
gem "loofah", ">= 2.5"
Expand Down
4 changes: 2 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ GEM
websocket-driver (0.7.6)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5)
words_counted (1.0.2)
words_counted (1.0.3)
xpath (3.2.0)
nokogiri (~> 1.8)
zeitwerk (2.6.12)
Expand Down Expand Up @@ -570,7 +570,7 @@ DEPENDENCIES
uglifier
webdrivers (~> 5.0)
webmock
words_counted
words_counted (~> 1.0, >= 1.0.3)
zip-zip

RUBY VERSION
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/entries_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def review
if @total_count.positive?
@body_text = @entries.map { |e| ActionView::Base.full_sanitizer.sanitize(e.body) }.join(" ")
tokeniser = WordsCounted::Tokeniser.new(@body_text)
@words_counter = tokeniser.tokenise
@words_counter = tokeniser.tokenise(exclude: Entry::WORDS_NOT_TO_COUNT)
if @total_count > 20
all_user_entry_count = Entry.where("date >= '#{@year}-01-01'::DATE AND date <= '#{@year}-12-31'::DATE").group(:user_id).reorder("count_all").count.values
@pctile = (((all_user_entry_count.find_index(@total_count) + 1).to_f / all_user_entry_count.count) * 100).round
Expand Down
3 changes: 2 additions & 1 deletion app/models/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ class Entry < ActiveRecord::Base
include Entry::AiAssistant
mount_uploader :image, ImageUploader

WORDS_NOT_TO_COUNT = ['so', 'went', 'while', 's', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'the', 'of', 'and', 'a', 'to', 'in', 'is', 'that', 'it', 'was', 'for', 'on', 'are', 'as', 'with', 'at', 'be', 'this', 'have', 'from', 'or', 'had', 'by', 'but', 'not', 'what', 'all', 'were', 'when', 'can', 'said', 'there', 'use', 'an', 'each', 'which', 'do', 'how', 'if']
WORDS_NOT_TO_COUNT = ['s', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'hr', '<', '>']
COMMON_WORDS = WORDS_NOT_TO_COUNT + ['you', 'so', 'went', 'while', 's', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'the', 'of', 'and', 'a', 'to', 'in', 'is', 'that', 'it', 'was', 'for', 'on', 'are', 'as', 'with', 'at', 'be', 'this', 'have', 'from', 'or', 'had', 'by', 'but', 'not', 'what', 'all', 'were', 'when', 'can', 'said', 'there', 'use', 'an', 'each', 'which', 'do', 'how', 'if']

belongs_to :user
belongs_to :inspiration, optional: true
Expand Down
2 changes: 1 addition & 1 deletion app/views/entries/review.html.haml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
%span= "We're all still journaling in first-person...we collectively used <i>\"i\"</i> 203,180 times in our entries!".html_safe
.s-review-details
- grouped_words = @words_counter.group_by(&:itself).transform_values(&:count).sort_by { |_k, v| v }.reverse.to_h
- grouped_words = grouped_words.select { |word, count| !Entry::WORDS_NOT_TO_COUNT.include?(word) }
- grouped_words = grouped_words.select { |word, count| !Entry::COMMON_WORDS.include?(word) }
- grouped_words.first(25).each do |word, count|
%h3
= word
Expand Down
13 changes: 10 additions & 3 deletions lib/tasks/entry.rake
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,21 @@ namespace :entry do
all_entries = Entry.where("date >= '#{year}-01-01'::DATE AND date <= '#{year}-12-31'::DATE")
entries_bodies = all_entries.map { |e| ActionView::Base.full_sanitizer.sanitize(e.body) }.join(" ")

tokeniser = WordsCounted::Tokeniser.new(entries_bodies)
total_words = tokeniser.tokenise.length.to_f
tokenizer = WordsCounted::Tokeniser.new(entries_bodies).tokenise(exclude: Entry::WORDS_NOT_TO_COUNT)
total_words = tokenizer.count

counter = WordsCounted.count(entries_bodies)
most_frequent = counter.token_frequency.first(400).select { |w| !Entry::COMMON_WORDS.include?(w[0]) }.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" }

avg_words = total_words / all_entries.count
total_chars = entries_bodies.length
avg_chars = total_chars / all_entries.count
avg_tweets_per_post = ((avg_chars).to_f / 280).ceil
most_frequent = words_counter.token_frequency.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" }

grouped_words = total_words.group_by(&:itself).transform_values(&:count).sort_by { |_k, v| v }.reverse.to_h
grouped_words = grouped_words.select { |word, count| !Entry::WORDS_NOT_TO_COUNT.include?(word) }
grouped_words.first(25)

p "Users created: #{number_with_delimiter(User.where("created_at >= '#{year}-01-01'::DATE AND created_at <= '#{year}-12-31'::DATE").count)}"
p "Entries created in #{year}: #{number_with_delimiter(Entry.where("created_at >= '#{year}-01-01'::DATE AND created_at <= '#{year}-12-31'::DATE").count)}"
p "Entries for #{year}: #{number_with_delimiter(all_entries.count)}"
Expand Down

0 comments on commit 2879cf4

Please sign in to comment.