From 3fbc406c41010678e279723e84e59f89772a2460 Mon Sep 17 00:00:00 2001 From: Lian Hu Date: Tue, 5 Dec 2023 00:30:27 +0100 Subject: [PATCH] Validate the path from header 'x-pghoard-target-path'[BF-2344] --- pghoard/webserver.py | 21 ++++++++++++-- test/basebackup/test_basebackup.py | 6 ++-- test/test_webserver.py | 46 ++++++++++++++++++++++++------ 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/pghoard/webserver.py b/pghoard/webserver.py index f92cee28..e86ca9b0 100644 --- a/pghoard/webserver.py +++ b/pghoard/webserver.py @@ -427,16 +427,33 @@ def _try_save_and_verify_restored_file(self, filetype, filename, prefetch_target os.unlink(prefetch_target_path) return e + @staticmethod + def _validate_target_path(config, target_path): + # The `restore_command` (postgres_command.py or pghoard_postgres_command_go.go) called by PostgresSQL has + # prepended the PostgresSQL 'data' directory with `%p` parameter from PostgresSQL server, hence here + # `target_path` is expected to be an absolute path. + # For example, the target_path `/var/lib/pgsql/11/data/pg_wal/RECOVERYXLOG` comes from pghoard restore_command + # prepended %p parameter `pg_wal/RECOVERYXLOG` with `/var/lib/pgsql/11/data` + + # Use pathlib to resolve the symlinks and '.' in case of untrusted user input, e.g., + # /var/lib/pgsql/11/data/../../../../../etc/passwd, could be actually /etc/passwd + data_dir = Path(config["pg_data_directory"]).resolve() + xlog_file = Path(target_path).resolve() + if data_dir not in xlog_file.parents: + raise HttpResponse(f"Invalid xlog file path {target_path}, it should be in data directory", status=400) + def get_wal_or_timeline_file(self, site, filename, filetype): target_path = self.headers.get("x-pghoard-target-path") if not target_path: raise HttpResponse("x-pghoard-target-path header missing from download", status=400) + site_config = self.server.config["backup_sites"][site] + xlog_dir = get_pg_wal_directory(site_config) + + self._validate_target_path(site_config, target_path) self._process_completed_download_operations() # See if we have already prefetched the file - site_config = self.server.config["backup_sites"][site] - xlog_dir = get_pg_wal_directory(site_config) prefetch_target_path = os.path.join(xlog_dir, "{}.pghoard.prefetch".format(filename)) if os.path.exists(prefetch_target_path): ex = self._try_save_and_verify_restored_file(filetype, filename, prefetch_target_path, target_path) diff --git a/test/basebackup/test_basebackup.py b/test/basebackup/test_basebackup.py index c613f137..c417de3b 100644 --- a/test/basebackup/test_basebackup.py +++ b/test/basebackup/test_basebackup.py @@ -493,9 +493,9 @@ def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir, pg_version: switch_wal(conn) self._test_create_basebackup(capsys, db, pghoard, "local-tar") switch_wal(conn) - - backup_out = tmpdir.join("test-restore").strpath - backup_ts_out = tmpdir.join("test-restore-tstest").strpath + data_dir = pghoard.config["backup_sites"][pghoard.test_site]["pg_data_directory"] + backup_out = os.path.join(data_dir, "test-restore") + backup_ts_out = os.path.join(data_dir, "test-restore-tstest") # Tablespaces are extracted to their previous absolute paths by default, but the path must be empty # and it isn't as it's still used by the running PG diff --git a/test/test_webserver.py b/test/test_webserver.py index 4a9ff725..92e9d208 100644 --- a/test/test_webserver.py +++ b/test/test_webserver.py @@ -389,7 +389,15 @@ def test_get_invalid(self, pghoard, tmpdir): headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))} conn.request("GET", nonexistent_wal, headers=headers) status = conn.getresponse().status + assert status == 400 + + wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site]) + restore_target = os.path.join(wal_dir, "test_get_invalid") + headers = {"x-pghoard-target-path": restore_target} + conn.request("GET", nonexistent_wal, headers=headers) + status = conn.getresponse().status assert status == 404 + # no x-pghoard-target-path for head headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))} conn.request("HEAD", nonexistent_wal, headers=headers) @@ -431,12 +439,14 @@ def test_get_invalid(self, pghoard, tmpdir): ) # write to non-existent directory - headers = {"x-pghoard-target-path": str(tmpdir.join("NA", "test_get_invalid"))} + wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site]) + restore_target = os.path.join(wal_dir, "NA", "test_get_invalid") + headers = {"x-pghoard-target-path": restore_target} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status assert status == 409 - def test_get_invalid_retry(self, pghoard_no_mp, tmpdir): + def test_get_invalid_retry(self, pghoard_no_mp): # inject a failure by making a static function fail failures = [0, ""] pghoard = pghoard_no_mp @@ -465,7 +475,9 @@ def failing_func(*args): pghoard.webserver.server.prefetch_404.clear() failures[0] = 2 + prefetch_n failures[1] = "test_two_fails_success" - headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_2"))} + wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site]) + restore_target = os.path.join(wal_dir, "test_get_invalid_2") + headers = {"x-pghoard-target-path": restore_target} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status assert status == 201 @@ -475,7 +487,8 @@ def failing_func(*args): pghoard.webserver.server.prefetch_404.clear() failures[0] = 4 + prefetch_n failures[1] = "test_three_fails_error" - headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_3"))} + restore_target = os.path.join(wal_dir, "test_get_invalid_3") + headers = {"x-pghoard-target-path": restore_target} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status assert status == 500 @@ -485,7 +498,7 @@ def failing_func(*args): for ta in pghoard.transfer_agents: ta.site_transfers = {} - def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir): + def test_retry_fetches_remote(self, pghoard_no_mp): """First fetch request for file returns data from local disk, second from cloud object storage""" pghoard = pghoard_no_mp valid_wal_seg = "0000DDDD0000000D000000FC" @@ -518,8 +531,9 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir): f.write(wal_header_for_file(other_segment)) conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"]) + wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site]) + local_name = os.path.join(wal_dir, "test_get_local") - local_name = str(tmpdir.join("test_get_local")) headers = {"x-pghoard-target-path": local_name} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status @@ -529,7 +543,7 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir): assert recent_files["xlog"]["name"] == valid_wal_seg assert 0 < (time.time() - recent_files["xlog"]["time"]) < 1 - storage_name = str(tmpdir.join("test_get_storage")) + storage_name = os.path.join(wal_dir, "test_get_storage") headers = {"x-pghoard-target-path": storage_name} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status @@ -543,13 +557,13 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir): # Fetch another 10 WAL segments that are also available on disk, # after which the other one is fetched from disk again for other_path in other_paths: - headers = {"x-pghoard-target-path": str(tmpdir.join("RECOVERYXLOG"))} + headers = {"x-pghoard-target-path": os.path.join(wal_dir, "RECOVERYXLOG")} conn.request("GET", other_path, headers=headers) status = conn.getresponse().status assert status == 201 # Now get the original one again - storage_name = str(tmpdir.join("test_get_storage2")) + storage_name = os.path.join(wal_dir, "test_get_storage2") headers = {"x-pghoard-target-path": storage_name} conn.request("GET", valid_wal, headers=headers) status = conn.getresponse().status @@ -737,3 +751,17 @@ def test_parse_request_invalid_path(self, pghoard): resp = conn.getresponse() assert resp.status == 400 assert resp.read() == b"Invalid 'archive' request, only single file retrieval is supported for now" + + def test_uncontrolled_target_path(self, pghoard): + wal_seg = "0000000100000001000000AB" + wal_file = "/{}/archive/{}".format(pghoard.test_site, wal_seg) + conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"]) + headers = {"x-pghoard-target-path": "/etc/passwd"} + conn.request("GET", wal_file, headers=headers) + status = conn.getresponse().status + assert status == 400 + + headers = {"x-pghoard-target-path": "/root/.ssh/id_rsa"} + conn.request("GET", wal_file, headers=headers) + status = conn.getresponse().status + assert status == 400