From 1b2fbfc8a7d21e7ed9a012b198c3187714a2ab59 Mon Sep 17 00:00:00 2001 From: Jack Nash Date: Sun, 21 Jul 2024 09:13:11 +1000 Subject: [PATCH] feat: add basic scraping interface --- Cargo.lock | 1 + Cargo.toml | 1 + src/components/buttons.rs | 12 +++++++ src/components/forms/input.rs | 21 +++++++++++++ src/components/forms/mod.rs | 1 + src/components/mod.rs | 2 ++ src/main.rs | 8 ++--- src/routes/api/mod.rs | 5 +++ src/routes/api/scrape_activity_page.rs | 43 ++++++++++++++++++++++++++ src/routes/mod.rs | 10 ++++-- src/routes/{main.rs => pages/home.rs} | 22 +------------ src/routes/pages/mod.rs | 9 ++++++ src/routes/pages/scraper.rs | 42 +++++++++++++++++++++++++ src/routes/{ => pages}/test.rs | 0 src/scraping/mod.rs | 2 ++ src/scraping/parser.rs | 31 +++++++++++++++++++ src/scraping/scraper.rs | 0 17 files changed, 182 insertions(+), 28 deletions(-) create mode 100644 src/components/forms/input.rs create mode 100644 src/components/forms/mod.rs create mode 100644 src/routes/api/mod.rs create mode 100644 src/routes/api/scrape_activity_page.rs rename src/routes/{main.rs => pages/home.rs} (64%) create mode 100644 src/routes/pages/mod.rs create mode 100644 src/routes/pages/scraper.rs rename src/routes/{ => pages}/test.rs (100%) create mode 100644 src/scraping/mod.rs create mode 100644 src/scraping/parser.rs create mode 100644 src/scraping/scraper.rs diff --git a/Cargo.lock b/Cargo.lock index 8a33c19..da901e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1415,6 +1415,7 @@ dependencies = [ "serde", "sqlx", "tokio", + "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2f77c56..73ceb03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ serde = { version = "1.0.204", features = ["derive"] } sqlx = { version = "0.7.4", features = ["postgres", "macros", "runtime-async-std-native-tls", "time", "chrono"] } tokio = "1.38.1" chrono = { version = "0.4.23", features = ["serde"] } +url = "2.5.2" [dev-dependencies] rustfmt = "0.10.0" diff --git a/src/components/buttons.rs b/src/components/buttons.rs index a6d62d0..d43bb65 100644 --- a/src/components/buttons.rs +++ b/src/components/buttons.rs @@ -5,8 +5,13 @@ pub struct ExternalCTAButton { pub link: String, } +pub struct FormSubmitButton { + pub text: String, +} + pub enum ButtonType { ExternalCTA(ExternalCTAButton), + FormSubmit(FormSubmitButton), } pub fn gen_button(btn: ButtonType) -> Markup { @@ -18,5 +23,12 @@ pub fn gen_button(btn: ButtonType) -> Markup { } } } + ButtonType::FormSubmit(btn) => { + html! { + button."text-lg bg-white border-1 border-zinc-400 rounded py-2 px-4 mt-4 select-none w-fit hover:cursor-pointer hover:bg-zinc-300" type="submit" { + (btn.text) + } + } + } } } diff --git a/src/components/forms/input.rs b/src/components/forms/input.rs new file mode 100644 index 0000000..c05cdcb --- /dev/null +++ b/src/components/forms/input.rs @@ -0,0 +1,21 @@ +use maud::{html, Markup}; + +pub struct TextInput { + pub placeholder: String, + pub name: String, + pub is_required: Option, +} + +pub enum InputType { + TextInput(TextInput), +} + +pub fn gen_input(raw_input: InputType) -> Markup { + match raw_input { + InputType::TextInput(input) => { + html! { + input."w-full px-4 py-2 border border-gray-300 rounded text-white bg-zinc-700" type="text" name=(input.name) id=(input.name) placeholder=(input.placeholder) required=(input.is_required.unwrap_or(false)) {} + } + } + } +} diff --git a/src/components/forms/mod.rs b/src/components/forms/mod.rs new file mode 100644 index 0000000..7839bc5 --- /dev/null +++ b/src/components/forms/mod.rs @@ -0,0 +1 @@ +pub mod input; diff --git a/src/components/mod.rs b/src/components/mod.rs index 7e4fcb9..492fcb7 100644 --- a/src/components/mod.rs +++ b/src/components/mod.rs @@ -1,2 +1,4 @@ pub mod buttons; pub mod header; + +pub mod forms; diff --git a/src/main.rs b/src/main.rs index 067a908..823f095 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,10 @@ -use actix_web::{get, web, web::Data, App, HttpResponse, HttpServer, Responder}; +use actix_web::{get, web::Data, App, HttpResponse, HttpServer, Responder}; use dotenv::dotenv; use mime; use sqlx::{postgres::PgPoolOptions, Pool, Postgres}; mod components; mod routes; +mod scraping; pub struct AppState { db: Pool, @@ -39,10 +40,7 @@ async fn main() -> std::io::Result<()> { App::new() .app_data(Data::new(AppState { db: pool.clone() })) .service(serve_css) - .service(routes::main::main) - .service(routes::test::test) - .service(routes::test::test_id) - .default_service(web::route().to(routes::not_found::not_found)) + .configure(routes::init) }) .bind(&address)? .run() diff --git a/src/routes/api/mod.rs b/src/routes/api/mod.rs new file mode 100644 index 0000000..ffaccc2 --- /dev/null +++ b/src/routes/api/mod.rs @@ -0,0 +1,5 @@ +pub mod scrape_activity_page; + +pub fn init(cfg: &mut actix_web::web::ServiceConfig) { + cfg.service(scrape_activity_page::scrape_activity_page); +} diff --git a/src/routes/api/scrape_activity_page.rs b/src/routes/api/scrape_activity_page.rs new file mode 100644 index 0000000..62feaf0 --- /dev/null +++ b/src/routes/api/scrape_activity_page.rs @@ -0,0 +1,43 @@ +use actix_web::{post, web, HttpResponse, Responder}; +use maud::html; + +use crate::scraping::parser::parse_url; + +#[derive(serde::Deserialize)] +pub struct FormData { + url: String, +} + +#[post("/scrape-activity-page")] +async fn scrape_activity_page(form: web::Form) -> impl Responder { + let url = &form.url; + if let Some(new_url) = parse_url(url) { + let markup = match new_url { + crate::scraping::parser::URLType::Thread(thread) => { + html! { + div { + "Thread: " (thread.thread_id) + } + } + } + crate::scraping::parser::URLType::Post(post) => { + html! { + div { + "Post: " (post.post_id) + } + } + } + }; + + let html = markup.into_string(); + return HttpResponse::Ok().body(html); + } else { + let markup = html! { + div { + "Invalid URL: " (url) + } + }; + let html = markup.into_string(); + return HttpResponse::Ok().body(html); + } +} diff --git a/src/routes/mod.rs b/src/routes/mod.rs index 25cdb18..4a5e7eb 100644 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -1,3 +1,9 @@ -pub mod main; +use actix_web::web; +pub mod api; pub mod not_found; -pub mod test; +pub mod pages; +pub fn init(cfg: &mut actix_web::web::ServiceConfig) { + cfg.configure(pages::init); + cfg.service(web::scope("/api").configure(api::init)); + cfg.default_service(web::route().to(not_found::not_found)); +} diff --git a/src/routes/main.rs b/src/routes/pages/home.rs similarity index 64% rename from src/routes/main.rs rename to src/routes/pages/home.rs index 52200cd..f6d20eb 100644 --- a/src/routes/main.rs +++ b/src/routes/pages/home.rs @@ -13,7 +13,7 @@ async fn main() -> impl Responder { let cta = gen_button(ButtonType::ExternalCTA(ExternalCTAButton { text: "Get Started".to_string(), - link: "/test".to_string(), + link: "/scraper".to_string(), })); let markup = html! { @@ -33,23 +33,3 @@ async fn main() -> impl Responder { HttpResponse::Ok().body(html) } - -#[get("/test")] -async fn test() -> impl Responder { - let header = generate_header(Header { - title: "MafiaScum Scraper", - }); - - let markup = html! { - (header) - body."bg-zinc-900 w-screen h-screen flex flex-col items-center justify-center" { - div."text-center w-1/2 flex flex-col items-center justify-center" { - h1."text-3xl text-white font-bold pb-2" { "Test Successful" } - } - } - }; - - let html = markup.into_string(); - - HttpResponse::Ok().body(html) -} diff --git a/src/routes/pages/mod.rs b/src/routes/pages/mod.rs new file mode 100644 index 0000000..82aca1b --- /dev/null +++ b/src/routes/pages/mod.rs @@ -0,0 +1,9 @@ +pub mod home; +pub mod scraper; +pub mod test; + +pub fn init(cfg: &mut actix_web::web::ServiceConfig) { + cfg.service(home::main); + cfg.service(test::test); + cfg.service(scraper::scraper); +} diff --git a/src/routes/pages/scraper.rs b/src/routes/pages/scraper.rs new file mode 100644 index 0000000..fc1c364 --- /dev/null +++ b/src/routes/pages/scraper.rs @@ -0,0 +1,42 @@ +use crate::components::{ + buttons::{gen_button, ButtonType, FormSubmitButton}, + forms::input::{gen_input, InputType, TextInput}, + header::{generate_header, Header}, +}; +use actix_web::{get, HttpResponse, Responder}; +use maud::html; + +#[get("/scraper")] +async fn scraper() -> impl Responder { + let header = generate_header(Header { + title: "MafiaScum Scraper", + }); + + let markup = html! { + (header) + body."bg-zinc-900 w-screen h-screen flex flex-col items-center justify-center" { + h1 ."text-3xl text-white font-bold pb-2" { "MafiaScum Scraper" } + div."text-xl text-white pb-2" { + "Enter a URL to scrape from mafiascum.net" + } + form."text-center w-1/2 flex flex-col items-center justify-center" hx-post="/api/scrape-activity-page" hx-target="#response" { + (gen_input(InputType::TextInput(TextInput { + name: "url".to_string(), + placeholder: "https://mafiascum.net".to_string(), + is_required: Some(true), + }))) + (gen_button(ButtonType::FormSubmit(FormSubmitButton { + text: "Submit".to_string(), + }))) + }; + + div."text-white" id="response" { + "Response Here" + } + } + }; + + let html = markup.into_string(); + + HttpResponse::Ok().body(html) +} diff --git a/src/routes/test.rs b/src/routes/pages/test.rs similarity index 100% rename from src/routes/test.rs rename to src/routes/pages/test.rs diff --git a/src/scraping/mod.rs b/src/scraping/mod.rs new file mode 100644 index 0000000..4b789f3 --- /dev/null +++ b/src/scraping/mod.rs @@ -0,0 +1,2 @@ +pub mod parser; +pub mod scraper; diff --git a/src/scraping/parser.rs b/src/scraping/parser.rs new file mode 100644 index 0000000..f6098a1 --- /dev/null +++ b/src/scraping/parser.rs @@ -0,0 +1,31 @@ +use url::Url; + +pub struct ThreadURL { + pub thread_id: String, +} + +pub struct PostURL { + pub post_id: String, +} + +pub enum URLType { + Thread(ThreadURL), + Post(PostURL), +} + +pub fn parse_url(url_str: &str) -> Option { + if let Ok(parsed_url) = Url::parse(url_str) { + if let Some((_, id)) = parsed_url.query_pairs().find(|(key, _)| key == "t") { + return Some(URLType::Thread(ThreadURL { + thread_id: id.to_string(), + })); + } + + if let Some((_, id)) = parsed_url.query_pairs().find(|(key, _)| key == "p") { + return Some(URLType::Post(PostURL { + post_id: id.to_string(), + })); + } + } + None +} diff --git a/src/scraping/scraper.rs b/src/scraping/scraper.rs new file mode 100644 index 0000000..e69de29