Skip to content

Commit

Permalink
🐛 Only allow to search *owned* files in storage search endpoint (ITIS…
Browse files Browse the repository at this point in the history
  • Loading branch information
bisgaard-itis authored May 3, 2024
1 parent 30a295d commit 5fe0a1c
Show file tree
Hide file tree
Showing 16 changed files with 66 additions and 96 deletions.
53 changes: 25 additions & 28 deletions api/specs/storage/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,14 @@ paths:
responses:
'204':
description: Successful Response
/v0/simcore-s3/files/metadata:search:
/v0/simcore-s3/files/metadata:search_owned:
post:
tags:
- simcore-s3
summary: search for files starting with
description: search for files starting with `startswith` and/or matching a sha256_checksum
in the file_meta_data table
operationId: search_files
operationId: search_owned_files
parameters:
- required: true
schema:
Expand All @@ -683,16 +683,6 @@ paths:
title: Sha256 Checksum
name: sha256_checksum
in: query
- required: false
schema:
type: string
enum:
- read
- write
title: Access Right
default: read
name: access_right
in: query
responses:
'200':
description: Successful Response
Expand Down Expand Up @@ -1292,36 +1282,40 @@ components:
title: PresignedLink
S3Settings:
properties:
S3_SECURE:
type: boolean
title: S3 Secure
default: false
S3_ENDPOINT:
type: string
title: S3 Endpoint
S3_ACCESS_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Access Key
S3_SECRET_KEY:
type: string
title: S3 Secret Key
S3_ACCESS_TOKEN:
type: string
title: S3 Access Token
S3_BUCKET_NAME:
type: string
maxLength: 50
minLength: 1
title: S3 Bucket Name
S3_ENDPOINT:
type: string
maxLength: 65536
minLength: 1
format: uri
title: S3 Endpoint
description: do not define if using standard AWS
S3_REGION:
type: string
maxLength: 50
minLength: 1
title: S3 Region
default: us-east-1
S3_SECRET_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Secret Key
additionalProperties: false
type: object
required:
- S3_ENDPOINT
- S3_ACCESS_KEY
- S3_SECRET_KEY
- S3_BUCKET_NAME
- S3_REGION
- S3_SECRET_KEY
title: S3Settings
description: "- Customized configuration for all settings\n- If a field is a\
\ BaseCustomSettings subclass, it allows creating a default from env vars\
Expand Down Expand Up @@ -1381,6 +1375,9 @@ components:
title: TaskGet
TaskProgress:
properties:
task_id:
type: string
title: Task Id
message:
type: string
title: Message
Expand Down
4 changes: 2 additions & 2 deletions api/specs/storage/scripts/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,14 +343,14 @@ async def delete_folders_of_project(
f"/{API_VTAG}/simcore-s3/files/metadata:search",
response_model=Envelope[FileMetaDataGet],
tags=TAGS_SIMCORE_S3,
summary="search for files starting with",
summary="search for owned files",
operation_id="search_files",
)
async def search_files(
user_id: UserID,
kind: Literal["owned"],
startswith: str = "",
sha256_checksum: SHA256Str | None = None,
access_right: Literal["read", "write"] = "read",
):
"""search for files starting with `startswith` and/or matching a sha256_checksum in the file_meta_data table"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
UploadLinks,
)
from ...services.service_exception_handling import DEFAULT_BACKEND_SERVICE_STATUS_CODES
from ...services.storage import (
AccessRight,
StorageApi,
StorageFileMetaData,
to_file_api_model,
)
from ...services.storage import StorageApi, StorageFileMetaData, to_file_api_model
from ..dependencies.authentication import get_current_user_id
from ..dependencies.services import get_api_client
from ._common import API_SERVER_DEV_FEATURES_ENABLED
Expand Down Expand Up @@ -73,16 +68,16 @@ async def _get_file(
file_id: UUID,
storage_client: StorageApi,
user_id: int,
access_right: AccessRight,
):
"""Gets metadata for a given file resource"""

try:
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[
StorageFileMetaData
] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right=access_right,
)
if not stored_files:
msg = "Not found in storage"
Expand Down Expand Up @@ -289,7 +284,6 @@ async def get_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="read",
)


Expand All @@ -307,11 +301,10 @@ async def search_files_page(
file_id: UUID | None = None,
):
"""Search files"""
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[StorageFileMetaData] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=sha256_checksum,
access_right="read",
)
if page_params.offset > len(stored_files):
_logger.debug("File with sha256_checksum=%d not found.", sha256_checksum)
Expand Down Expand Up @@ -342,7 +335,6 @@ async def delete_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="write",
)
await storage_client.delete_file(
user_id=user_id, quoted_storage_file_id=file.quoted_storage_file_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,10 @@ async def get_job_outputs(
if isinstance(value, BaseFileLink):
file_id: UUID = File.create_id(*value.path.split("/"))

found = await storage_client.search_files(
found = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right="read",
)
if found:
assert len(found) == 1 # nosec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]:
response = await self.client.post(
"/simcore-s3/files/metadata:search",
params={
"kind": "owned",
"user_id": str(user_id),
"startswith": "api/",
},
Expand All @@ -74,24 +75,23 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]:
return files

@_exception_mapper({})
async def search_files(
async def search_owned_files(
self,
*,
user_id: int,
file_id: UUID | None,
sha256_checksum: SHA256Str | None,
access_right: AccessRight,
) -> list[StorageFileMetaData]:
# NOTE: can NOT use /locations/0/files/metadata with uuid_filter=api/ because
# logic in storage 'wrongly' assumes that all data is associated to a project and
# here there is no project, so it would always returns an empty
params: dict = {
"kind": "owned",
"user_id": f"{user_id}",
"startswith": None if file_id is None else f"api/{file_id}",
"sha256_checksum": None
if sha256_checksum is None
else f"{sha256_checksum}",
"access_right": access_right,
}

response = await self.client.post(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,10 @@ async def create_job_outputs_from_project_outputs(
path = value["path"]
file_id: UUID = File.create_id(*path.split("/"))

if found := await storage_client.search_files(
if found := await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right="read",
):
assert len(found) == 1 # nosec
results[name] = to_file_api_model(found[0])
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/delete_file.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e",
"query": "kind=owned&user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e",
"request_payload": null,
"response_body": {
"data": [
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/get_solver_outputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,14 @@
},
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d&access_right=read')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d&access_right=read",
"query": "kind=owned&user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d",
"request_payload": null,
"response_body": {
"data": [
Expand Down
8 changes: 4 additions & 4 deletions services/api-server/tests/mocks/run_study_workflow.json
Original file line number Diff line number Diff line change
Expand Up @@ -3238,15 +3238,15 @@
}
},
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4&access_right=read')>",
"name": "POST /simcore-s3/files/metadata:search_owned",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search_owned?user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path": "/v0/simcore-s3/files/metadata:search_owned",
"path_parameters": []
},
"query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4&access_right=read",
"query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4",
"response_body": {
"data": []
}
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/search_file_checksum.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d",
"query": "kind=owned&user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d",
"request_payload": null,
"response_body": {
"data": [
Expand Down
4 changes: 0 additions & 4 deletions services/api-server/tests/unit/test_api_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,6 @@ def search_side_effect(
path_params: dict[str, Any],
capture: HttpApiCallCaptureModel,
) -> dict[str, Any]:
request_query: dict[str, str] = dict(
pair.split("=") for pair in request.url.query.decode("utf8").split("&")
)
assert request_query.get("access_right") == "write"
assert isinstance(capture.response_body, dict)
response: dict[str, Any] = capture.response_body
return response
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,11 @@ async def search_files(request: web.Request) -> web.Response:
get_dsm_provider(request.app).get(SimcoreS3DataManager.get_location_id()),
)
data: list[FileMetaData]
if query_params.access_right == "read":
data = await dsm.search_read_access_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
elif query_params.access_right == "write":
data = await dsm.search_owned_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
else:
raise ValueError(f"The query param {query_params.access_right=} is unexpected")
data = await dsm.search_owned_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
log.debug(
"Found %d files starting with '%s'",
len(data),
Expand Down
2 changes: 1 addition & 1 deletion services/storage/src/simcore_service_storage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class DeleteFolderQueryParams(StorageQueryParamsBase):
class SearchFilesQueryParams(StorageQueryParamsBase):
startswith: str = ""
sha256_checksum: SHA256Str | None = None
access_right: Literal["read", "write"] = "read"
kind: Literal["owned"]


class LocationPathParams(BaseModel):
Expand Down
12 changes: 0 additions & 12 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,18 +710,6 @@ async def _get_size_and_num_files(

return parse_obj_as(ByteSize, total_size), total_num_s3_objects

async def search_read_access_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
async with self.engine.acquire() as conn:
can_read_projects_ids = await get_readable_project_ids(conn, user_id)
return await self._search_files(
user_id=user_id,
project_ids=can_read_projects_ids,
file_id_prefix=file_id_prefix,
sha256_checksum=sha256_checksum,
)

async def search_owned_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
Expand Down
2 changes: 1 addition & 1 deletion services/storage/tests/unit/test_dsm_soft_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def test_create_soft_link(
# assert output_file.last_modified < link_file.fmd.last_modified

# can find
files_list = await simcore_s3_dsm.search_read_access_files(
files_list = await simcore_s3_dsm.search_owned_files(
user_id, f"api/{api_file_id}/{file_name}", None
)
assert len(files_list) == 1
Expand Down
Loading

0 comments on commit 5fe0a1c

Please sign in to comment.