Skip to content

Commit

Permalink
feat: impl mass vote scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
JacksonVirgo committed Aug 3, 2024
1 parent 14dda4e commit 9f01b0a
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 67 deletions.
3 changes: 3 additions & 0 deletions migrations/0001_initial.sql
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,6 @@ DROP CONSTRAINT IF EXISTS votes_player_id_fkey;

ALTER TABLE votes
DROP COLUMN IF EXISTS player_id;

ALTER TABLE votes
ADD CONSTRAINT unique_thread_post UNIQUE (thread_id, post_number);
61 changes: 38 additions & 23 deletions src/routes/api/dashboard/vote_data.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::{ components::buttons::{gen_button, ButtonType, FormSubmitButton}, models::votes::{create_vote, get_votes, NewVote}, utils::{app_state::AppState, url::ForumURL}};
use std::thread::current;

use crate::{ components::buttons::{gen_button, ButtonType, FormSubmitButton}, models::votes::{create_vote, get_votes, NewVote}, scraping::scraper::Vote, utils::{app_state::AppState, url::ForumURL}};
use actix_web::{get, post, web::{self, Data}, HttpResponse, Responder};
use maud::{html, Markup};

Expand Down Expand Up @@ -87,34 +89,47 @@ async fn vote_data(state: Data<AppState>, path: web::Path<String>) -> impl Respo

#[post("/votes/{thread_id}")]
async fn scrape_votes(state: Data<AppState>, path: web::Path<String>) -> impl Responder {
// TODO: Make this a polling process, rather than one endpoint that takes a long time

let thread_id = path.into_inner();
let url = ForumURL::new(thread_id.clone());
let initial_page = match url.scrape().await {
Some(page_data) => page_data,
None => {
println!("Failed to get page data");
return HttpResponse::Found().insert_header(("HX-Redirect", format!("/dashboard/{}?d=2", thread_id))).finish()
}
};
let mut url = ForumURL::new(thread_id.clone());

for vote in initial_page.votes {
let vote_copy = vote.clone();
let pg = create_vote(&state, NewVote {
thread_id: thread_id.clone(),
author: vote.author,
target: vote.target,
target_correction: None,
post_number: vote.post_number
}).await;

match pg {
Some(_) => println!("Created vote: {:?}", vote_copy),
let mut is_last_page = false;
let mut current_page = 0;
let mut last_page = 1;
while current_page < last_page {
match url.ppp(200).start(current_page * 200).scrape().await {
Some(page) => {
for vote in page.votes {
let vote_copy: Vote = vote.clone();
let pg = create_vote(&state, NewVote {
thread_id: thread_id.clone(),
author: vote.author,
target: vote.target,
target_correction: None,
post_number: vote.post_number
}).await;

match pg {
Some(_) => (),
None => {
println!("Failed to create vote: {:?}", vote_copy);
}
}
}
last_page = page.last_page;
},
None => {
println!("Failed to create vote: {:?}", vote_copy);
println!("Failed to get page data for page {:?}", current_page);
is_last_page = true;
}
}
};

println!("Scraped page {}/{}", current_page, last_page);
current_page += 1;
}

HttpResponse::Found()
.insert_header(("HX-Redirect", format!("/dashboard/{}?d=2", thread_id))).finish()
.insert_header(("HX-Redirect", format!("/dashboard/{}?d=3", thread_id))).finish()
}
105 changes: 61 additions & 44 deletions src/scraping/scraper.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use reqwest::Client;
use select::document::Document;
use select::predicate::{Attr, Class, Name, Or, Predicate};
use select::predicate::{Class, Name};

use crate::scraping::parser::get_search_params;

Expand Down Expand Up @@ -118,54 +118,71 @@ pub fn scrape_votes(document: &Document) -> Vec<Vote> {
let mut response: Vec<Vote> = Vec::new();

document.find(Class("post")).for_each(|node| {
let votes: Vec<String> = node
.find(Or(Class("bbvote"), Name("div").and(Attr("style", ()))))
.map(|node| node.text())
.filter(|text| text.to_lowercase().starts_with("vote:"))
.collect();
if votes.len() > 0 {
let author: Option<String> =
match node.find(Class("username")).collect::<Vec<_>>().first() {
Some(node) => Some(node.text()),
_ => {
match node
.find(Class("username-coloured"))
.collect::<Vec<_>>()
.first()
{
Some(node) => Some(node.text()),
_ => None,
}
}
};
let content = match node.find(Class("content")).next() {
Some(node) => node,
None => return,
};

let post_number = match node
.find(Class("post-number-bolded"))
.collect::<Vec<_>>()
.first()
{
Some(node) => {
let remove_first_char = node.text().chars().skip(1).collect::<String>();
match remove_first_char.parse::<i32>() {
Ok(num) => Some(num),
_ => None,
}
}
_ => None,
let mut last_vote: Option<String> = None;
content.children().for_each(|node| {
let is_bbvote = match node.attr("class") {
Some(class) => class.contains("bbvote"),
None => false,
};

match (author, post_number) {
(Some(author), Some(post_number)) => {
for vote in votes {
response.push(Vote {
author: author.clone(),
target: vote,
post_number,
})
}
let is_bolded = match node.attr("style") {
Some(style) => style.contains("font-weight:bold"),
None => false,
};

if !(is_bbvote || is_bolded) {
return;
}

let text = node.text();
if text.to_lowercase().starts_with("vote:") {
let text = text.chars().skip(5).collect::<String>().trim().to_string();
last_vote = Some(text);
}
});

let author: Option<String> = match node.find(Class("username")).collect::<Vec<_>>().first()
{
Some(node) => Some(node.text()),
_ => {
match node
.find(Class("username-coloured"))
.collect::<Vec<_>>()
.first()
{
Some(node) => Some(node.text()),
_ => None,
}
_ => (),
}
};

let post_number = match node
.find(Class("post-number-bolded"))
.collect::<Vec<_>>()
.first()
{
Some(node) => {
let remove_first_char = node.text().chars().skip(1).collect::<String>();
match remove_first_char.parse::<i32>() {
Ok(num) => Some(num),
_ => None,
}
}
_ => None,
};

match (last_vote, author, post_number) {
(Some(vote), Some(author), Some(post_number)) => response.push(Vote {
author: author.clone(),
target: vote,
post_number,
}),
_ => (),
}
});

Expand Down
5 changes: 5 additions & 0 deletions src/utils/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ impl ForumURL {
self
}

pub fn start(&mut self, start: i32) -> &mut ForumURL {
self.start = start;
self
}

pub fn url(&self, url_type: URLType) -> String {
match url_type {
URLType::Thread => format!(
Expand Down

0 comments on commit 9f01b0a

Please sign in to comment.