Skip to content

Commit

Permalink
robots.txt: when emitting original root link treat it "as redirect" s…
Browse files Browse the repository at this point in the history
…o that it retains it root link status
  • Loading branch information
let4be committed Jun 29, 2021
1 parent 93ecba9 commit 92ba94f
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions main/src/task_filters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ enum RobotsTxtState {
#[derive(Default)]
pub struct RobotsTxt {
state: RobotsTxtState,
root_link: Option<Arc<rt::Link>>,
root_link: Option<rt::Link>,
matcher: Option<robotstxt::matcher::CachingRobotsMatcher<robotstxt::matcher::LongestMatchRobotsMatchStrategy>>,
}

Expand Down Expand Up @@ -240,8 +240,10 @@ impl<JS: rt::JobStateValues, TS: rt::TaskStateValues> Filter<JS, TS> for RobotsT
}
}

let original_root_link = self.root_link.take().unwrap();
ctx.push_shared_links(vec![original_root_link].into_iter());
let mut original_root_link = self.root_link.take().unwrap();
// treat this link "as a redirect" from robots.txt, this way it retains root task status
original_root_link.redirect = 1;
ctx.push_links(vec![original_root_link].into_iter());
}

fn accept(&mut self, ctx: &mut rt::JobCtx<JS, TS>, _: usize, task: &mut rt::Task) -> Result {
Expand All @@ -261,7 +263,7 @@ impl<JS: rt::JobStateValues, TS: rt::TaskStateValues> Filter<JS, TS> for RobotsT
let mut link = Arc::new(link);

mem::swap(&mut link, &mut task.link);
self.root_link = Some(link);
self.root_link = Some((*link).clone());

Ok(Action::Accept)
}
Expand Down

0 comments on commit 92ba94f

Please sign in to comment.