Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Only allow to search *owned* files in storage search endpoint #5772

Merged
53 changes: 25 additions & 28 deletions api/specs/storage/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,14 @@ paths:
responses:
'204':
description: Successful Response
/v0/simcore-s3/files/metadata:search:
/v0/simcore-s3/files/metadata:search_owned:
post:
tags:
- simcore-s3
summary: search for files starting with
description: search for files starting with `startswith` and/or matching a sha256_checksum
in the file_meta_data table
operationId: search_files
operationId: search_owned_files
parameters:
- required: true
schema:
Expand All @@ -683,16 +683,6 @@ paths:
title: Sha256 Checksum
name: sha256_checksum
in: query
- required: false
schema:
type: string
enum:
- read
- write
title: Access Right
default: read
name: access_right
in: query
responses:
'200':
description: Successful Response
Expand Down Expand Up @@ -1292,36 +1282,40 @@ components:
title: PresignedLink
S3Settings:
properties:
S3_SECURE:
type: boolean
title: S3 Secure
default: false
S3_ENDPOINT:
type: string
title: S3 Endpoint
S3_ACCESS_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Access Key
S3_SECRET_KEY:
type: string
title: S3 Secret Key
S3_ACCESS_TOKEN:
type: string
title: S3 Access Token
S3_BUCKET_NAME:
type: string
maxLength: 50
minLength: 1
title: S3 Bucket Name
S3_ENDPOINT:
type: string
maxLength: 65536
minLength: 1
format: uri
title: S3 Endpoint
description: do not define if using standard AWS
S3_REGION:
type: string
maxLength: 50
minLength: 1
title: S3 Region
default: us-east-1
S3_SECRET_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Secret Key
additionalProperties: false
type: object
required:
- S3_ENDPOINT
- S3_ACCESS_KEY
- S3_SECRET_KEY
- S3_BUCKET_NAME
- S3_REGION
- S3_SECRET_KEY
title: S3Settings
description: "- Customized configuration for all settings\n- If a field is a\
\ BaseCustomSettings subclass, it allows creating a default from env vars\
Expand Down Expand Up @@ -1381,6 +1375,9 @@ components:
title: TaskGet
TaskProgress:
properties:
task_id:
type: string
title: Task Id
message:
type: string
title: Message
Expand Down
4 changes: 2 additions & 2 deletions api/specs/storage/scripts/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,14 +343,14 @@ async def delete_folders_of_project(
f"/{API_VTAG}/simcore-s3/files/metadata:search",
response_model=Envelope[FileMetaDataGet],
tags=TAGS_SIMCORE_S3,
summary="search for files starting with",
summary="search for owned files",
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
operation_id="search_files",
)
async def search_files(
user_id: UserID,
kind: Literal["owned"],
startswith: str = "",
sha256_checksum: SHA256Str | None = None,
access_right: Literal["read", "write"] = "read",
):
"""search for files starting with `startswith` and/or matching a sha256_checksum in the file_meta_data table"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
UploadLinks,
)
from ...services.service_exception_handling import DEFAULT_BACKEND_SERVICE_STATUS_CODES
from ...services.storage import (
AccessRight,
StorageApi,
StorageFileMetaData,
to_file_api_model,
)
from ...services.storage import StorageApi, StorageFileMetaData, to_file_api_model
from ..dependencies.authentication import get_current_user_id
from ..dependencies.services import get_api_client
from ._common import API_SERVER_DEV_FEATURES_ENABLED
Expand Down Expand Up @@ -73,16 +68,16 @@ async def _get_file(
file_id: UUID,
storage_client: StorageApi,
user_id: int,
access_right: AccessRight,
):
"""Gets metadata for a given file resource"""

try:
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[
StorageFileMetaData
] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right=access_right,
)
if not stored_files:
msg = "Not found in storage"
Expand Down Expand Up @@ -289,7 +284,6 @@ async def get_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="read",
)


Expand All @@ -307,11 +301,10 @@ async def search_files_page(
file_id: UUID | None = None,
):
"""Search files"""
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[StorageFileMetaData] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=sha256_checksum,
access_right="read",
)
if page_params.offset > len(stored_files):
_logger.debug("File with sha256_checksum=%d not found.", sha256_checksum)
Expand Down Expand Up @@ -342,7 +335,6 @@ async def delete_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="write",
)
await storage_client.delete_file(
user_id=user_id, quoted_storage_file_id=file.quoted_storage_file_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,10 @@ async def get_job_outputs(
if isinstance(value, BaseFileLink):
file_id: UUID = File.create_id(*value.path.split("/"))

found = await storage_client.search_files(
found = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right="read",
)
if found:
assert len(found) == 1 # nosec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]:
response = await self.client.post(
"/simcore-s3/files/metadata:search",
params={
"kind": "owned",
"user_id": str(user_id),
"startswith": "api/",
},
Expand All @@ -74,24 +75,23 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]:
return files

@_exception_mapper({})
async def search_files(
async def search_owned_files(
self,
*,
user_id: int,
file_id: UUID | None,
sha256_checksum: SHA256Str | None,
access_right: AccessRight,
) -> list[StorageFileMetaData]:
# NOTE: can NOT use /locations/0/files/metadata with uuid_filter=api/ because
# logic in storage 'wrongly' assumes that all data is associated to a project and
# here there is no project, so it would always returns an empty
params: dict = {
"kind": "owned",
"user_id": f"{user_id}",
"startswith": None if file_id is None else f"api/{file_id}",
"sha256_checksum": None
if sha256_checksum is None
else f"{sha256_checksum}",
"access_right": access_right,
}

response = await self.client.post(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,10 @@ async def create_job_outputs_from_project_outputs(
path = value["path"]
file_id: UUID = File.create_id(*path.split("/"))

if found := await storage_client.search_files(
if found := await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right="read",
):
assert len(found) == 1 # nosec
results[name] = to_file_api_model(found[0])
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/delete_file.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e",
"query": "kind=owned&user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e",
"request_payload": null,
"response_body": {
"data": [
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/get_solver_outputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,14 +219,14 @@
},
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d&access_right=read')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d&access_right=read",
"query": "kind=owned&user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d",
"request_payload": null,
"response_body": {
"data": [
Expand Down
8 changes: 4 additions & 4 deletions services/api-server/tests/mocks/run_study_workflow.json
Original file line number Diff line number Diff line change
Expand Up @@ -3238,15 +3238,15 @@
}
},
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4&access_right=read')>",
"name": "POST /simcore-s3/files/metadata:search_owned",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search_owned?user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path": "/v0/simcore-s3/files/metadata:search_owned",
"path_parameters": []
},
"query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4&access_right=read",
"query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4",
"response_body": {
"data": []
}
Expand Down
4 changes: 2 additions & 2 deletions services/api-server/tests/mocks/search_file_checksum.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[
{
"name": "POST /simcore-s3/files/metadata:search",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d')>",
"description": "<Request('POST', 'http://storage:8080/v0/simcore-s3/files/metadata:search?kind=owned&user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d')>",
"method": "POST",
"host": "storage",
"path": {
"path": "/v0/simcore-s3/files/metadata:search",
"path_parameters": []
},
"query": "user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d",
"query": "kind=owned&user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d",
"request_payload": null,
"response_body": {
"data": [
Expand Down
4 changes: 0 additions & 4 deletions services/api-server/tests/unit/test_api_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,6 @@ def search_side_effect(
path_params: dict[str, Any],
capture: HttpApiCallCaptureModel,
) -> dict[str, Any]:
request_query: dict[str, str] = dict(
pair.split("=") for pair in request.url.query.decode("utf8").split("&")
)
assert request_query.get("access_right") == "write"
assert isinstance(capture.response_body, dict)
response: dict[str, Any] = capture.response_body
return response
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,20 +130,11 @@ async def search_files(request: web.Request) -> web.Response:
get_dsm_provider(request.app).get(SimcoreS3DataManager.get_location_id()),
)
data: list[FileMetaData]
if query_params.access_right == "read":
data = await dsm.search_read_access_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
elif query_params.access_right == "write":
data = await dsm.search_owned_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
else:
raise ValueError(f"The query param {query_params.access_right=} is unexpected")
data = await dsm.search_owned_files(
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
log.debug(
"Found %d files starting with '%s'",
len(data),
Expand Down
2 changes: 1 addition & 1 deletion services/storage/src/simcore_service_storage/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class DeleteFolderQueryParams(StorageQueryParamsBase):
class SearchFilesQueryParams(StorageQueryParamsBase):
startswith: str = ""
sha256_checksum: SHA256Str | None = None
access_right: Literal["read", "write"] = "read"
kind: Literal["owned"]


class LocationPathParams(BaseModel):
Expand Down
12 changes: 0 additions & 12 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,18 +710,6 @@ async def _get_size_and_num_files(

return parse_obj_as(ByteSize, total_size), total_num_s3_objects

async def search_read_access_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
async with self.engine.acquire() as conn:
can_read_projects_ids = await get_readable_project_ids(conn, user_id)
return await self._search_files(
user_id=user_id,
project_ids=can_read_projects_ids,
file_id_prefix=file_id_prefix,
sha256_checksum=sha256_checksum,
)

async def search_owned_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
Expand Down
2 changes: 1 addition & 1 deletion services/storage/tests/unit/test_dsm_soft_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def test_create_soft_link(
# assert output_file.last_modified < link_file.fmd.last_modified

# can find
files_list = await simcore_s3_dsm.search_read_access_files(
files_list = await simcore_s3_dsm.search_owned_files(
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
user_id, f"api/{api_file_id}/{file_name}", None
)
assert len(files_list) == 1
Expand Down
Loading
Loading