Skip to content

Commit

Permalink
Validate the path from header 'x-pghoard-target-path'[BF-2344]
Browse files Browse the repository at this point in the history
  • Loading branch information
0xlianhu committed Dec 12, 2023
1 parent 8a2d730 commit 3fbc406
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 14 deletions.
21 changes: 19 additions & 2 deletions pghoard/webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,16 +427,33 @@ def _try_save_and_verify_restored_file(self, filetype, filename, prefetch_target
os.unlink(prefetch_target_path)
return e

@staticmethod
def _validate_target_path(config, target_path):
# The `restore_command` (postgres_command.py or pghoard_postgres_command_go.go) called by PostgresSQL has
# prepended the PostgresSQL 'data' directory with `%p` parameter from PostgresSQL server, hence here
# `target_path` is expected to be an absolute path.
# For example, the target_path `/var/lib/pgsql/11/data/pg_wal/RECOVERYXLOG` comes from pghoard restore_command
# prepended %p parameter `pg_wal/RECOVERYXLOG` with `/var/lib/pgsql/11/data`

# Use pathlib to resolve the symlinks and '.' in case of untrusted user input, e.g.,
# /var/lib/pgsql/11/data/../../../../../etc/passwd, could be actually /etc/passwd
data_dir = Path(config["pg_data_directory"]).resolve()

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
xlog_file = Path(target_path).resolve()

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
if data_dir not in xlog_file.parents:
raise HttpResponse(f"Invalid xlog file path {target_path}, it should be in data directory", status=400)

def get_wal_or_timeline_file(self, site, filename, filetype):
target_path = self.headers.get("x-pghoard-target-path")
if not target_path:
raise HttpResponse("x-pghoard-target-path header missing from download", status=400)

site_config = self.server.config["backup_sites"][site]
xlog_dir = get_pg_wal_directory(site_config)

self._validate_target_path(site_config, target_path)
self._process_completed_download_operations()

# See if we have already prefetched the file
site_config = self.server.config["backup_sites"][site]
xlog_dir = get_pg_wal_directory(site_config)
prefetch_target_path = os.path.join(xlog_dir, "{}.pghoard.prefetch".format(filename))
if os.path.exists(prefetch_target_path):
ex = self._try_save_and_verify_restored_file(filetype, filename, prefetch_target_path, target_path)
Expand Down
6 changes: 3 additions & 3 deletions test/basebackup/test_basebackup.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,9 @@ def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir, pg_version:
switch_wal(conn)
self._test_create_basebackup(capsys, db, pghoard, "local-tar")
switch_wal(conn)

backup_out = tmpdir.join("test-restore").strpath
backup_ts_out = tmpdir.join("test-restore-tstest").strpath
data_dir = pghoard.config["backup_sites"][pghoard.test_site]["pg_data_directory"]
backup_out = os.path.join(data_dir, "test-restore")
backup_ts_out = os.path.join(data_dir, "test-restore-tstest")

# Tablespaces are extracted to their previous absolute paths by default, but the path must be empty
# and it isn't as it's still used by the running PG
Expand Down
46 changes: 37 additions & 9 deletions test/test_webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,15 @@ def test_get_invalid(self, pghoard, tmpdir):
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))}
conn.request("GET", nonexistent_wal, headers=headers)
status = conn.getresponse().status
assert status == 400

wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "test_get_invalid")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", nonexistent_wal, headers=headers)
status = conn.getresponse().status
assert status == 404

# no x-pghoard-target-path for head
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))}
conn.request("HEAD", nonexistent_wal, headers=headers)
Expand Down Expand Up @@ -431,12 +439,14 @@ def test_get_invalid(self, pghoard, tmpdir):
)

# write to non-existent directory
headers = {"x-pghoard-target-path": str(tmpdir.join("NA", "test_get_invalid"))}
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "NA", "test_get_invalid")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 409

def test_get_invalid_retry(self, pghoard_no_mp, tmpdir):
def test_get_invalid_retry(self, pghoard_no_mp):
# inject a failure by making a static function fail
failures = [0, ""]
pghoard = pghoard_no_mp
Expand Down Expand Up @@ -465,7 +475,9 @@ def failing_func(*args):
pghoard.webserver.server.prefetch_404.clear()
failures[0] = 2 + prefetch_n
failures[1] = "test_two_fails_success"
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_2"))}
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "test_get_invalid_2")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 201
Expand All @@ -475,7 +487,8 @@ def failing_func(*args):
pghoard.webserver.server.prefetch_404.clear()
failures[0] = 4 + prefetch_n
failures[1] = "test_three_fails_error"
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_3"))}
restore_target = os.path.join(wal_dir, "test_get_invalid_3")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 500
Expand All @@ -485,7 +498,7 @@ def failing_func(*args):
for ta in pghoard.transfer_agents:
ta.site_transfers = {}

def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
def test_retry_fetches_remote(self, pghoard_no_mp):
"""First fetch request for file returns data from local disk, second from cloud object storage"""
pghoard = pghoard_no_mp
valid_wal_seg = "0000DDDD0000000D000000FC"
Expand Down Expand Up @@ -518,8 +531,9 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
f.write(wal_header_for_file(other_segment))

conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"])
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
local_name = os.path.join(wal_dir, "test_get_local")

local_name = str(tmpdir.join("test_get_local"))
headers = {"x-pghoard-target-path": local_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand All @@ -529,7 +543,7 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
assert recent_files["xlog"]["name"] == valid_wal_seg
assert 0 < (time.time() - recent_files["xlog"]["time"]) < 1

storage_name = str(tmpdir.join("test_get_storage"))
storage_name = os.path.join(wal_dir, "test_get_storage")
headers = {"x-pghoard-target-path": storage_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand All @@ -543,13 +557,13 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
# Fetch another 10 WAL segments that are also available on disk,
# after which the other one is fetched from disk again
for other_path in other_paths:
headers = {"x-pghoard-target-path": str(tmpdir.join("RECOVERYXLOG"))}
headers = {"x-pghoard-target-path": os.path.join(wal_dir, "RECOVERYXLOG")}
conn.request("GET", other_path, headers=headers)
status = conn.getresponse().status
assert status == 201

# Now get the original one again
storage_name = str(tmpdir.join("test_get_storage2"))
storage_name = os.path.join(wal_dir, "test_get_storage2")
headers = {"x-pghoard-target-path": storage_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand Down Expand Up @@ -737,3 +751,17 @@ def test_parse_request_invalid_path(self, pghoard):
resp = conn.getresponse()
assert resp.status == 400
assert resp.read() == b"Invalid 'archive' request, only single file retrieval is supported for now"

def test_uncontrolled_target_path(self, pghoard):
wal_seg = "0000000100000001000000AB"
wal_file = "/{}/archive/{}".format(pghoard.test_site, wal_seg)
conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"])
headers = {"x-pghoard-target-path": "/etc/passwd"}
conn.request("GET", wal_file, headers=headers)
status = conn.getresponse().status
assert status == 400

headers = {"x-pghoard-target-path": "/root/.ssh/id_rsa"}
conn.request("GET", wal_file, headers=headers)
status = conn.getresponse().status
assert status == 400

0 comments on commit 3fbc406

Please sign in to comment.