From d30c8ca1814b27c493ff595d72ff9c5ced662221 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Tue, 30 Nov 2021 18:12:26 -0500 Subject: [PATCH] Adds a User-agent header to curl module requests The header will be of the form: peru/{version} Python-urllib/{version} Fixes #218 --- peru/resources/plugins/curl/curl_plugin.py | 20 ++++++++++++++++++-- tests/test_curl_plugin.py | 12 ++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/peru/resources/plugins/curl/curl_plugin.py b/peru/resources/plugins/curl/curl_plugin.py index 59c45796..71fedc27 100755 --- a/peru/resources/plugins/curl/curl_plugin.py +++ b/peru/resources/plugins/curl/curl_plugin.py @@ -9,10 +9,26 @@ import tarfile from urllib.error import HTTPError, URLError from urllib.parse import urlsplit +from urllib.request import Request +import peru.main import urllib.request import zipfile +def peru_version(): + return peru.main.get_version() + + +def build_request(url): + request = Request(url) + components = [ + f"peru/{peru_version()}", + urllib.request.URLopener.version + ] + request.add_header("User-agent", " ".join(components)) + return request + + def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. @@ -80,7 +96,7 @@ def plugin_sync(url, sha1): # Download directly to the destination dir. download_dir = dest - with urllib.request.urlopen(url) as request: + with urllib.request.urlopen(build_request(url)) as request: filename = os.environ['PERU_MODULE_FILENAME'] if not filename: filename = get_request_filename(request) @@ -151,7 +167,7 @@ def __init__(self, message): def plugin_reup(url, sha1): reup_output = os.environ['PERU_REUP_OUTPUT'] - with urllib.request.urlopen(url) as request: + with urllib.request.urlopen(build_request(url)) as request: digest = download_file(request, None) with open(reup_output, 'w') as output_file: print('sha1:', digest, file=output_file) diff --git a/tests/test_curl_plugin.py b/tests/test_curl_plugin.py index fb3add45..df4bc245 100644 --- a/tests/test_curl_plugin.py +++ b/tests/test_curl_plugin.py @@ -2,6 +2,7 @@ import importlib.machinery import io from os.path import abspath, join, dirname +import urllib import peru import shared @@ -100,3 +101,14 @@ def test_evil_archives(self): tar_archive = shared.test_resources / (case + '.tar') with self.assertRaises(curl_plugin.EvilArchiveError): curl_plugin.extract_tar(str(tar_archive), dest) + + def test_request_has_user_agent_header(self): + actual = curl_plugin.build_request("http://example.test") + print(actual.header_items()) + self.assertTrue(actual.has_header("User-agent")) + ua_header = actual.get_header("User-agent") + peru_component, urllib_component = ua_header.split(' ') + _, peru_version = peru_component.split('/') + _, urllib_version = urllib_component.split('/') + self.assertEqual(peru.main.get_version(), peru_version) + self.assertEqual(urllib.request.__version__, urllib_version)