From b86269bbe2483b49f528498c860e9a5b198f849c Mon Sep 17 00:00:00 2001 From: Paul Arterburn Date: Fri, 13 Dec 2024 20:07:29 -0700 Subject: [PATCH] stats tweaks --- app/models/entry.rb | 2 +- lib/tasks/entry.rake | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/app/models/entry.rb b/app/models/entry.rb index 2055b151..6b2de39c 100644 --- a/app/models/entry.rb +++ b/app/models/entry.rb @@ -7,7 +7,7 @@ class Entry < ActiveRecord::Base mount_uploader :image, ImageUploader WORDS_NOT_TO_COUNT = ['s', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'hr', '<', '>'] - COMMON_WORDS = WORDS_NOT_TO_COUNT + ['your', 'we', 'i', "it's", 'dabblemegpt', 'like', 'these', 'you', 'so', 'went', 'while', 's', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'the', 'of', 'and', 'a', 'to', 'in', 'is', 'that', 'it', 'was', 'for', 'on', 'are', 'as', 'with', 'at', 'be', 'this', 'have', 'from', 'or', 'had', 'by', 'but', 'not', 'what', 'all', 'were', 'when', 'can', 'said', 'there', 'use', 'an', 'each', 'which', 'do', 'how', 'if'] + COMMON_WORDS = WORDS_NOT_TO_COUNT + ['has', 'did', "you're", 'your', 'we', 'i', "it's", 'dabblemegpt', 'like', 'these', 'you', 'so', 'went', 'while', 's', 'amp', '-', 'p', 'br', 'div', 'img', 'span', 'the', 'of', 'and', 'a', 'to', 'in', 'is', 'that', 'it', 'was', 'for', 'on', 'are', 'as', 'with', 'at', 'be', 'this', 'have', 'from', 'or', 'had', 'by', 'but', 'not', 'what', 'all', 'were', 'when', 'can', 'said', 'there', 'use', 'an', 'each', 'which', 'do', 'how', 'if'] belongs_to :user belongs_to :inspiration, optional: true diff --git a/lib/tasks/entry.rake b/lib/tasks/entry.rake index 2a50b859..669bb1f8 100644 --- a/lib/tasks/entry.rake +++ b/lib/tasks/entry.rake @@ -162,18 +162,18 @@ namespace :entry do # p "Total characters: #{number_with_delimiter(total_chars)}" # p "Avg characters per post: #{number_with_delimiter(avg_chars)} (#{avg_tweets_per_post} tweets)" - counter = WordsCounted.count(entries_bodies) - most_frequent = counter.token_frequency.first(400).select { |w| !Entry::COMMON_WORDS.include?(w[0]) }.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" } - p "Most Frequent Words:" - puts most_frequent - p "*"*100 + # counter = WordsCounted.count(entries_bodies) + # most_frequent = counter.token_frequency.first(400).select { |w| !Entry::COMMON_WORDS.include?(w[0]) }.first(40).map { |w| "#{w[0]}: #{number_with_delimiter(w[1])}" } + # p "Most Frequent Words:" + # puts most_frequent + # p "*"*100 end # heroku run bundle exec rake "entry:stats_by_user[2022]" --app dabble-me --size=standard-2x task :stats_by_user, [:year] => :environment do |_, year:| data = [] csv_data = CSV.generate(col_sep: "\t") do |csv| - csv << ["USER_ID", "EMAIL", "#{year}_ENTRY", "#{year}_WORD", "#{year}_TWEET"] + csv << ["USER_ID", "EMAIL", "#{year}_ENTRY", "#{year}_WORD"] User.all.each do |user| user_entries = Entry.where("date >= '#{year}-01-01'::DATE AND date <= '#{year}-12-31'::DATE AND user_id = ?", user.id)