Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Valid the input path from header 'x-pghoard-target-path' #608

Merged
merged 1 commit into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions pghoard/webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,16 +427,35 @@
os.unlink(prefetch_target_path)
return e

@staticmethod
def _validate_target_path(config, target_path):
# The `restore_command` (postgres_command.py or pghoard_postgres_command_go.go) called by PostgresSQL has
# prepended the PostgresSQL 'data' directory with `%p` parameter from PostgresSQL server, hence here
# `target_path` is expected to be an absolute path.
# For example, the target_path `/var/lib/pgsql/11/data/pg_wal/RECOVERYXLOG` comes from pghoard restore_command
# prepended %p parameter `pg_wal/RECOVERYXLOG` with `/var/lib/pgsql/11/data`

# Use pathlib to resolve the symlinks and '.' in case of untrusted user input, e.g.,
# /var/lib/pgsql/11/data/../../../../../etc/passwd, could be actually /etc/passwd
if not os.path.isabs(target_path):
raise HttpResponse(f"Invalid xlog file path {target_path}, an absolute path expected", status=400)
data_dir = Path(config["pg_data_directory"]).resolve()
Dismissed Show dismissed Hide dismissed
xlog_file = Path(target_path).resolve()
Dismissed Show dismissed Hide dismissed
if data_dir not in xlog_file.parents:
0xlianhu marked this conversation as resolved.
Show resolved Hide resolved
raise HttpResponse(f"Invalid xlog file path {target_path}, it should be in data directory", status=400)

def get_wal_or_timeline_file(self, site, filename, filetype):
target_path = self.headers.get("x-pghoard-target-path")
if not target_path:
raise HttpResponse("x-pghoard-target-path header missing from download", status=400)

site_config = self.server.config["backup_sites"][site]
xlog_dir = get_pg_wal_directory(site_config)

self._validate_target_path(site_config, target_path)
self._process_completed_download_operations()

# See if we have already prefetched the file
site_config = self.server.config["backup_sites"][site]
xlog_dir = get_pg_wal_directory(site_config)
prefetch_target_path = os.path.join(xlog_dir, "{}.pghoard.prefetch".format(filename))
if os.path.exists(prefetch_target_path):
ex = self._try_save_and_verify_restored_file(filetype, filename, prefetch_target_path, target_path)
Expand Down
6 changes: 3 additions & 3 deletions test/basebackup/test_basebackup.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,9 +493,9 @@ def test_basebackups_tablespaces(self, capsys, db, pghoard, tmpdir, pg_version:
switch_wal(conn)
self._test_create_basebackup(capsys, db, pghoard, "local-tar")
switch_wal(conn)

backup_out = tmpdir.join("test-restore").strpath
backup_ts_out = tmpdir.join("test-restore-tstest").strpath
data_dir = pghoard.config["backup_sites"][pghoard.test_site]["pg_data_directory"]
backup_out = os.path.join(data_dir, "test-restore")
backup_ts_out = os.path.join(data_dir, "test-restore-tstest")

# Tablespaces are extracted to their previous absolute paths by default, but the path must be empty
# and it isn't as it's still used by the running PG
Expand Down
51 changes: 42 additions & 9 deletions test/test_webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,15 @@ def test_get_invalid(self, pghoard, tmpdir):
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))}
conn.request("GET", nonexistent_wal, headers=headers)
status = conn.getresponse().status
assert status == 400

wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "test_get_invalid")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", nonexistent_wal, headers=headers)
status = conn.getresponse().status
assert status == 404

# no x-pghoard-target-path for head
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid"))}
conn.request("HEAD", nonexistent_wal, headers=headers)
Expand Down Expand Up @@ -431,12 +439,14 @@ def test_get_invalid(self, pghoard, tmpdir):
)

# write to non-existent directory
headers = {"x-pghoard-target-path": str(tmpdir.join("NA", "test_get_invalid"))}
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "NA", "test_get_invalid")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 409

def test_get_invalid_retry(self, pghoard_no_mp, tmpdir):
def test_get_invalid_retry(self, pghoard_no_mp):
# inject a failure by making a static function fail
failures = [0, ""]
pghoard = pghoard_no_mp
Expand Down Expand Up @@ -465,7 +475,9 @@ def failing_func(*args):
pghoard.webserver.server.prefetch_404.clear()
failures[0] = 2 + prefetch_n
failures[1] = "test_two_fails_success"
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_2"))}
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
restore_target = os.path.join(wal_dir, "test_get_invalid_2")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 201
Expand All @@ -475,7 +487,8 @@ def failing_func(*args):
pghoard.webserver.server.prefetch_404.clear()
failures[0] = 4 + prefetch_n
failures[1] = "test_three_fails_error"
headers = {"x-pghoard-target-path": str(tmpdir.join("test_get_invalid_3"))}
restore_target = os.path.join(wal_dir, "test_get_invalid_3")
headers = {"x-pghoard-target-path": restore_target}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
assert status == 500
Expand All @@ -485,7 +498,7 @@ def failing_func(*args):
for ta in pghoard.transfer_agents:
ta.site_transfers = {}

def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
def test_retry_fetches_remote(self, pghoard_no_mp):
"""First fetch request for file returns data from local disk, second from cloud object storage"""
pghoard = pghoard_no_mp
valid_wal_seg = "0000DDDD0000000D000000FC"
Expand Down Expand Up @@ -518,8 +531,9 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
f.write(wal_header_for_file(other_segment))

conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"])
wal_dir = get_pg_wal_directory(pghoard.config["backup_sites"][pghoard.test_site])
local_name = os.path.join(wal_dir, "test_get_local")

local_name = str(tmpdir.join("test_get_local"))
headers = {"x-pghoard-target-path": local_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand All @@ -529,7 +543,7 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
assert recent_files["xlog"]["name"] == valid_wal_seg
assert 0 < (time.time() - recent_files["xlog"]["time"]) < 1

storage_name = str(tmpdir.join("test_get_storage"))
storage_name = os.path.join(wal_dir, "test_get_storage")
headers = {"x-pghoard-target-path": storage_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand All @@ -543,13 +557,13 @@ def test_retry_fetches_remote(self, pghoard_no_mp, tmpdir):
# Fetch another 10 WAL segments that are also available on disk,
# after which the other one is fetched from disk again
for other_path in other_paths:
headers = {"x-pghoard-target-path": str(tmpdir.join("RECOVERYXLOG"))}
headers = {"x-pghoard-target-path": os.path.join(wal_dir, "RECOVERYXLOG")}
conn.request("GET", other_path, headers=headers)
status = conn.getresponse().status
assert status == 201

# Now get the original one again
storage_name = str(tmpdir.join("test_get_storage2"))
storage_name = os.path.join(wal_dir, "test_get_storage2")
headers = {"x-pghoard-target-path": storage_name}
conn.request("GET", valid_wal, headers=headers)
status = conn.getresponse().status
Expand Down Expand Up @@ -737,3 +751,22 @@ def test_parse_request_invalid_path(self, pghoard):
resp = conn.getresponse()
assert resp.status == 400
assert resp.read() == b"Invalid 'archive' request, only single file retrieval is supported for now"

def test_uncontrolled_target_path(self, pghoard):
wal_seg = "0000000100000001000000AB"
wal_file = "/{}/archive/{}".format(pghoard.test_site, wal_seg)
conn = HTTPConnection(host="127.0.0.1", port=pghoard.config["http_port"])
headers = {"x-pghoard-target-path": "/etc/passwd"}
conn.request("GET", wal_file, headers=headers)
status = conn.getresponse().status
assert status == 400

headers = {"x-pghoard-target-path": "/root/.ssh/id_rsa"}
conn.request("GET", wal_file, headers=headers)
status = conn.getresponse().status
assert status == 400

headers = {"x-pghoard-target-path": "../../../../etc/passwd"}
conn.request("GET", wal_file, headers=headers)
status = conn.getresponse().status
assert status == 400
Loading