Skip to content

Commit

Permalink
feat: add basic scraping interface
Browse files Browse the repository at this point in the history
  • Loading branch information
JacksonVirgo committed Jul 20, 2024
1 parent a9a77ab commit 1b2fbfc
Show file tree
Hide file tree
Showing 17 changed files with 182 additions and 28 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ serde = { version = "1.0.204", features = ["derive"] }
sqlx = { version = "0.7.4", features = ["postgres", "macros", "runtime-async-std-native-tls", "time", "chrono"] }
tokio = "1.38.1"
chrono = { version = "0.4.23", features = ["serde"] }
url = "2.5.2"

[dev-dependencies]
rustfmt = "0.10.0"
Expand Down
12 changes: 12 additions & 0 deletions src/components/buttons.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ pub struct ExternalCTAButton {
pub link: String,
}

pub struct FormSubmitButton {
pub text: String,
}

pub enum ButtonType {
ExternalCTA(ExternalCTAButton),
FormSubmit(FormSubmitButton),
}

pub fn gen_button(btn: ButtonType) -> Markup {
Expand All @@ -18,5 +23,12 @@ pub fn gen_button(btn: ButtonType) -> Markup {
}
}
}
ButtonType::FormSubmit(btn) => {
html! {
button."text-lg bg-white border-1 border-zinc-400 rounded py-2 px-4 mt-4 select-none w-fit hover:cursor-pointer hover:bg-zinc-300" type="submit" {
(btn.text)
}
}
}
}
}
21 changes: 21 additions & 0 deletions src/components/forms/input.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use maud::{html, Markup};

pub struct TextInput {
pub placeholder: String,
pub name: String,
pub is_required: Option<bool>,
}

pub enum InputType {
TextInput(TextInput),
}

pub fn gen_input(raw_input: InputType) -> Markup {
match raw_input {
InputType::TextInput(input) => {
html! {
input."w-full px-4 py-2 border border-gray-300 rounded text-white bg-zinc-700" type="text" name=(input.name) id=(input.name) placeholder=(input.placeholder) required=(input.is_required.unwrap_or(false)) {}
}
}
}
}
1 change: 1 addition & 0 deletions src/components/forms/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod input;
2 changes: 2 additions & 0 deletions src/components/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
pub mod buttons;
pub mod header;

pub mod forms;
8 changes: 3 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use actix_web::{get, web, web::Data, App, HttpResponse, HttpServer, Responder};
use actix_web::{get, web::Data, App, HttpResponse, HttpServer, Responder};
use dotenv::dotenv;
use mime;
use sqlx::{postgres::PgPoolOptions, Pool, Postgres};
mod components;
mod routes;
mod scraping;

pub struct AppState {
db: Pool<Postgres>,
Expand Down Expand Up @@ -39,10 +40,7 @@ async fn main() -> std::io::Result<()> {
App::new()
.app_data(Data::new(AppState { db: pool.clone() }))
.service(serve_css)
.service(routes::main::main)
.service(routes::test::test)
.service(routes::test::test_id)
.default_service(web::route().to(routes::not_found::not_found))
.configure(routes::init)
})
.bind(&address)?
.run()
Expand Down
5 changes: 5 additions & 0 deletions src/routes/api/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pub mod scrape_activity_page;

pub fn init(cfg: &mut actix_web::web::ServiceConfig) {
cfg.service(scrape_activity_page::scrape_activity_page);
}
43 changes: 43 additions & 0 deletions src/routes/api/scrape_activity_page.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use actix_web::{post, web, HttpResponse, Responder};
use maud::html;

use crate::scraping::parser::parse_url;

#[derive(serde::Deserialize)]
pub struct FormData {
url: String,
}

#[post("/scrape-activity-page")]
async fn scrape_activity_page(form: web::Form<FormData>) -> impl Responder {
let url = &form.url;
if let Some(new_url) = parse_url(url) {
let markup = match new_url {
crate::scraping::parser::URLType::Thread(thread) => {
html! {
div {
"Thread: " (thread.thread_id)
}
}
}
crate::scraping::parser::URLType::Post(post) => {
html! {
div {
"Post: " (post.post_id)
}
}
}
};

let html = markup.into_string();
return HttpResponse::Ok().body(html);
} else {
let markup = html! {
div {
"Invalid URL: " (url)
}
};
let html = markup.into_string();
return HttpResponse::Ok().body(html);
}
}
10 changes: 8 additions & 2 deletions src/routes/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
pub mod main;
use actix_web::web;
pub mod api;
pub mod not_found;
pub mod test;
pub mod pages;
pub fn init(cfg: &mut actix_web::web::ServiceConfig) {
cfg.configure(pages::init);
cfg.service(web::scope("/api").configure(api::init));
cfg.default_service(web::route().to(not_found::not_found));
}
22 changes: 1 addition & 21 deletions src/routes/main.rs → src/routes/pages/home.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async fn main() -> impl Responder {

let cta = gen_button(ButtonType::ExternalCTA(ExternalCTAButton {
text: "Get Started".to_string(),
link: "/test".to_string(),
link: "/scraper".to_string(),
}));

let markup = html! {
Expand All @@ -33,23 +33,3 @@ async fn main() -> impl Responder {

HttpResponse::Ok().body(html)
}

#[get("/test")]
async fn test() -> impl Responder {
let header = generate_header(Header {
title: "MafiaScum Scraper",
});

let markup = html! {
(header)
body."bg-zinc-900 w-screen h-screen flex flex-col items-center justify-center" {
div."text-center w-1/2 flex flex-col items-center justify-center" {
h1."text-3xl text-white font-bold pb-2" { "Test Successful" }
}
}
};

let html = markup.into_string();

HttpResponse::Ok().body(html)
}
9 changes: 9 additions & 0 deletions src/routes/pages/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
pub mod home;
pub mod scraper;
pub mod test;

pub fn init(cfg: &mut actix_web::web::ServiceConfig) {
cfg.service(home::main);
cfg.service(test::test);
cfg.service(scraper::scraper);
}
42 changes: 42 additions & 0 deletions src/routes/pages/scraper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
use crate::components::{
buttons::{gen_button, ButtonType, FormSubmitButton},
forms::input::{gen_input, InputType, TextInput},
header::{generate_header, Header},
};
use actix_web::{get, HttpResponse, Responder};
use maud::html;

#[get("/scraper")]
async fn scraper() -> impl Responder {
let header = generate_header(Header {
title: "MafiaScum Scraper",
});

let markup = html! {
(header)
body."bg-zinc-900 w-screen h-screen flex flex-col items-center justify-center" {
h1 ."text-3xl text-white font-bold pb-2" { "MafiaScum Scraper" }
div."text-xl text-white pb-2" {
"Enter a URL to scrape from mafiascum.net"
}
form."text-center w-1/2 flex flex-col items-center justify-center" hx-post="/api/scrape-activity-page" hx-target="#response" {
(gen_input(InputType::TextInput(TextInput {
name: "url".to_string(),
placeholder: "https://mafiascum.net".to_string(),
is_required: Some(true),
})))
(gen_button(ButtonType::FormSubmit(FormSubmitButton {
text: "Submit".to_string(),
})))
};

div."text-white" id="response" {
"Response Here"
}
}
};

let html = markup.into_string();

HttpResponse::Ok().body(html)
}
File renamed without changes.
2 changes: 2 additions & 0 deletions src/scraping/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pub mod parser;
pub mod scraper;
31 changes: 31 additions & 0 deletions src/scraping/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use url::Url;

pub struct ThreadURL {
pub thread_id: String,
}

pub struct PostURL {
pub post_id: String,
}

pub enum URLType {
Thread(ThreadURL),
Post(PostURL),
}

pub fn parse_url(url_str: &str) -> Option<URLType> {
if let Ok(parsed_url) = Url::parse(url_str) {
if let Some((_, id)) = parsed_url.query_pairs().find(|(key, _)| key == "t") {
return Some(URLType::Thread(ThreadURL {
thread_id: id.to_string(),
}));
}

if let Some((_, id)) = parsed_url.query_pairs().find(|(key, _)| key == "p") {
return Some(URLType::Post(PostURL {
post_id: id.to_string(),
}));
}
}
None
}
Empty file added src/scraping/scraper.rs
Empty file.

0 comments on commit 1b2fbfc

Please sign in to comment.