From 6830121af67d50786bed1958e8656fdab7b6deed Mon Sep 17 00:00:00 2001 From: lordlabuckdas <55460753+lordlabuckdas@users.noreply.github.com> Date: Fri, 16 Jul 2021 14:36:03 +0530 Subject: [PATCH 1/4] add empty 404 if not present --- snare/cloner.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/snare/cloner.py b/snare/cloner.py index d4d1cf64..f80854d0 100644 --- a/snare/cloner.py +++ b/snare/cloner.py @@ -68,11 +68,14 @@ def get_headers(response): "x-cache", ] + content_type = None headers = [] for key, value in response.headers.items(): - if key.lower() not in ignored_headers_lowercase: + if key.lower() == "content-type": + content_type = value + elif key.lower() not in ignored_headers_lowercase: headers.append({key: value}) - return headers + return [headers, content_type] async def process_link(self, url, level, check_host=False): try: @@ -177,6 +180,7 @@ async def get_body(self, driver): self.logger.debug("Cloned file: %s", file_name) self.meta[file_name]["hash"] = hash_name self.meta[file_name]["headers"] = headers + self.meta[file_name]["content_type"] = content_type if content_type == "text/html": soup = await self.replace_links(data, level) @@ -218,7 +222,7 @@ async def fetch_data(self, session, current_url, level, try_count): redirect_url = None try: response = await session.get(current_url, headers={"Accept": "text/html"}, timeout=10.0) - headers = self.get_headers(response) + headers, _ = self.get_headers(response) content_type = response.content_type response_url = yarl.URL(response.url) if response_url.with_scheme("http") != current_url.with_scheme("http"): @@ -233,14 +237,6 @@ async def fetch_data(self, session, current_url, level, try_count): class HeadlessCloner(BaseCloner): - @staticmethod - def get_content_type(headers): - for header in headers: - for key, val in header.items(): - if key.lower() == "content-type": - return val.split(";")[0] - return None - async def fetch_data(self, browser, current_url, level, try_count): data = None headers = [] @@ -250,8 +246,7 @@ async def fetch_data(self, browser, current_url, level, try_count): try: page = await browser.newPage() response = await page.goto(str(current_url)) - headers = self.get_headers(response) - content_type = self.get_content_type(headers) + headers, content_type = self.get_headers(response) response_url = yarl.URL(response.url) if response_url.with_scheme("http") != current_url.with_scheme("http"): redirect_url = response_url @@ -299,6 +294,13 @@ async def run(self): async def close(self): if not self.runner: raise Exception("Error initializing cloner!") + error_file_name, error_file_hash = self.runner._make_filename(self.runner.error_page) + # create empty file for 404 page if not present and add meta info + if not self.runner.meta.get(error_file_name): + with open(os.path.join(self.runner.target_path, error_file_hash), "wb") as _: + pass + self.runner.meta[error_file_name]["hash"] = error_file_hash + self.runner.meta[error_file_name]["content_type"] = "text/html" with open(os.path.join(self.runner.target_path, "meta.json"), "w") as mj: json.dump(self.runner.meta, mj) if self.driver: From a04ce72cd3e662c9d42e4d7aefdd3e4ca9457be9 Mon Sep 17 00:00:00 2001 From: lordlabuckdas <55460753+lordlabuckdas@users.noreply.github.com> Date: Fri, 16 Jul 2021 16:47:37 +0530 Subject: [PATCH 2/4] fix redirects and enable error page redirects as well --- snare/server.py | 2 -- snare/tanner_handler.py | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/snare/server.py b/snare/server.py index ca052ad7..bd3e95b4 100644 --- a/snare/server.py +++ b/snare/server.py @@ -37,8 +37,6 @@ async def submit_slurp(self, data): async def handle_request(self, request): self.logger.info("Request path: {0}".format(request.path_qs)) - if self.meta[request.path_qs].get("redirect"): - raise web.HTTPFound(self.meta[request.path_qs]["redirect"]) data = self.tanner_handler.create_data(request, 200) if request.method == "POST": post_data = await request.post() diff --git a/snare/tanner_handler.py b/snare/tanner_handler.py index 52f9e44c..79be8cbd 100644 --- a/snare/tanner_handler.py +++ b/snare/tanner_handler.py @@ -3,7 +3,9 @@ import multidict import json import logging + import aiohttp +from aiohttp import web from urllib.parse import unquote from bs4 import BeautifulSoup @@ -103,6 +105,8 @@ async def parse_tanner_response(self, requested_name, detection): requested_name = self.run_args.index_page requested_name = unquote(requested_name) try: + if self.meta.get(requested_name) and self.meta[requested_name].get("redirect"): + raise web.HTTPFound(self.meta[requested_name]["redirect"]) file_name = self.meta[requested_name]["hash"] for header in self.meta[requested_name].get("headers", []): for key, value in header.items(): @@ -117,6 +121,8 @@ async def parse_tanner_response(self, requested_name, detection): break if not file_name: + if self.meta.get("/status_404") and self.meta["/status_404"].get("redirect"): + raise web.HTTPFound(self.meta["/status_404"]["redirect"]) status_code = 404 else: path = os.path.join(self.dir, file_name) From 22e5b091e2e274dac7d55adcd68959b27b960a4c Mon Sep 17 00:00:00 2001 From: lordlabuckdas <55460753+lordlabuckdas@users.noreply.github.com> Date: Thu, 22 Jul 2021 22:09:35 +0530 Subject: [PATCH 3/4] revert empty 404 cloner changes --- snare/cloner.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/snare/cloner.py b/snare/cloner.py index f80854d0..56d2a415 100644 --- a/snare/cloner.py +++ b/snare/cloner.py @@ -294,13 +294,6 @@ async def run(self): async def close(self): if not self.runner: raise Exception("Error initializing cloner!") - error_file_name, error_file_hash = self.runner._make_filename(self.runner.error_page) - # create empty file for 404 page if not present and add meta info - if not self.runner.meta.get(error_file_name): - with open(os.path.join(self.runner.target_path, error_file_hash), "wb") as _: - pass - self.runner.meta[error_file_name]["hash"] = error_file_hash - self.runner.meta[error_file_name]["content_type"] = "text/html" with open(os.path.join(self.runner.target_path, "meta.json"), "w") as mj: json.dump(self.runner.meta, mj) if self.driver: From 7a2055754e8236cdaed5245a802c142f0b977bb5 Mon Sep 17 00:00:00 2001 From: lordlabuckdas <55460753+lordlabuckdas@users.noreply.github.com> Date: Thu, 22 Jul 2021 22:10:13 +0530 Subject: [PATCH 4/4] setup middleware if error page is present and manually raise 404 w/ headers --- snare/server.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/snare/server.py b/snare/server.py index bd3e95b4..e277ccd1 100644 --- a/snare/server.py +++ b/snare/server.py @@ -69,18 +69,22 @@ async def handle_request(self, request): if previous_sess_uuid is None or not previous_sess_uuid.strip() or previous_sess_uuid != cur_sess_id: headers.add("Set-Cookie", "sess_uuid=" + cur_sess_id) + if status_code == 404: + raise web.HTTPNotFound(headers=headers) + return web.Response(body=content, status=status_code, headers=headers) async def start(self): app = web.Application() app.add_routes([web.route("*", "/{tail:.*}", self.handle_request)]) aiohttp_jinja2.setup(app, loader=jinja2.FileSystemLoader(self.dir)) - middleware = SnareMiddleware( - error_404=self.meta["/status_404"].get("hash"), - headers=self.meta["/status_404"].get("headers", []), - server_header=self.run_args.server_header, - ) - middleware.setup_middlewares(app) + if self.meta.get("/status_404"): + middleware = SnareMiddleware( + error_404=self.meta["/status_404"].get("hash"), + headers=self.meta["/status_404"].get("headers", []), + server_header=self.run_args.server_header, + ) + middleware.setup_middlewares(app) self.runner = web.AppRunner(app) await self.runner.setup()