Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Only allow to search *owned* files in storage search endpoint #5772

Merged
53 changes: 25 additions & 28 deletions api/specs/storage/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -652,14 +652,14 @@ paths:
responses:
'204':
description: Successful Response
/v0/simcore-s3/files/metadata:search:
/v0/simcore-s3/files/metadata:search_owned:
post:
tags:
- simcore-s3
summary: search for files starting with
description: search for files starting with `startswith` and/or matching a sha256_checksum
in the file_meta_data table
operationId: search_files
operationId: search_owned_files
parameters:
- required: true
schema:
Expand All @@ -683,16 +683,6 @@ paths:
title: Sha256 Checksum
name: sha256_checksum
in: query
- required: false
schema:
type: string
enum:
- read
- write
title: Access Right
default: read
name: access_right
in: query
responses:
'200':
description: Successful Response
Expand Down Expand Up @@ -1292,36 +1282,40 @@ components:
title: PresignedLink
S3Settings:
properties:
S3_SECURE:
type: boolean
title: S3 Secure
default: false
S3_ENDPOINT:
type: string
title: S3 Endpoint
S3_ACCESS_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Access Key
S3_SECRET_KEY:
type: string
title: S3 Secret Key
S3_ACCESS_TOKEN:
type: string
title: S3 Access Token
S3_BUCKET_NAME:
type: string
maxLength: 50
minLength: 1
title: S3 Bucket Name
S3_ENDPOINT:
type: string
maxLength: 65536
minLength: 1
format: uri
title: S3 Endpoint
description: do not define if using standard AWS
S3_REGION:
type: string
maxLength: 50
minLength: 1
title: S3 Region
default: us-east-1
S3_SECRET_KEY:
type: string
maxLength: 50
minLength: 1
title: S3 Secret Key
additionalProperties: false
type: object
required:
- S3_ENDPOINT
- S3_ACCESS_KEY
- S3_SECRET_KEY
- S3_BUCKET_NAME
- S3_REGION
- S3_SECRET_KEY
title: S3Settings
description: "- Customized configuration for all settings\n- If a field is a\
\ BaseCustomSettings subclass, it allows creating a default from env vars\
Expand Down Expand Up @@ -1381,6 +1375,9 @@ components:
title: TaskGet
TaskProgress:
properties:
task_id:
type: string
title: Task Id
message:
type: string
title: Message
Expand Down
9 changes: 4 additions & 5 deletions api/specs/storage/scripts/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


from enum import Enum
from typing import Any, Literal
from typing import Any

from fastapi import FastAPI, Query, status
from models_library.api_schemas_storage import (
Expand Down Expand Up @@ -340,17 +340,16 @@ async def delete_folders_of_project(


@app.post(
f"/{API_VTAG}/simcore-s3/files/metadata:search",
f"/{API_VTAG}/simcore-s3/files/metadata:search_owned",
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
response_model=Envelope[FileMetaDataGet],
tags=TAGS_SIMCORE_S3,
summary="search for files starting with",
operation_id="search_files",
operation_id="search_owned_files",
)
async def search_files(
async def search_owned_files(
user_id: UserID,
startswith: str = "",
sha256_checksum: SHA256Str | None = None,
access_right: Literal["read", "write"] = "read",
):
"""search for files starting with `startswith` and/or matching a sha256_checksum in the file_meta_data table"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
UploadLinks,
)
from ...services.service_exception_handling import DEFAULT_BACKEND_SERVICE_STATUS_CODES
from ...services.storage import (
AccessRight,
StorageApi,
StorageFileMetaData,
to_file_api_model,
)
from ...services.storage import StorageApi, StorageFileMetaData, to_file_api_model
from ..dependencies.authentication import get_current_user_id
from ..dependencies.services import get_api_client
from ._common import API_SERVER_DEV_FEATURES_ENABLED
Expand Down Expand Up @@ -73,16 +68,16 @@ async def _get_file(
file_id: UUID,
storage_client: StorageApi,
user_id: int,
access_right: AccessRight,
):
"""Gets metadata for a given file resource"""

try:
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[
StorageFileMetaData
] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right=access_right,
)
if not stored_files:
msg = "Not found in storage"
Expand Down Expand Up @@ -289,7 +284,6 @@ async def get_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="read",
)


Expand All @@ -307,11 +301,10 @@ async def search_files_page(
file_id: UUID | None = None,
):
"""Search files"""
stored_files: list[StorageFileMetaData] = await storage_client.search_files(
stored_files: list[StorageFileMetaData] = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=sha256_checksum,
access_right="read",
)
if page_params.offset > len(stored_files):
_logger.debug("File with sha256_checksum=%d not found.", sha256_checksum)
Expand Down Expand Up @@ -342,7 +335,6 @@ async def delete_file(
file_id=file_id,
storage_client=storage_client,
user_id=user_id,
access_right="write",
)
await storage_client.delete_file(
user_id=user_id, quoted_storage_file_id=file.quoted_storage_file_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,10 @@ async def get_job_outputs(
if isinstance(value, BaseFileLink):
file_id: UUID = File.create_id(*value.path.split("/"))

found = await storage_client.search_files(
found = await storage_client.search_owned_files(
user_id=user_id,
file_id=file_id,
sha256_checksum=None,
access_right="read",
)
if found:
assert len(found) == 1 # nosec
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,12 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]:
return files

@_exception_mapper({})
async def search_files(
async def search_owned_files(
self,
*,
user_id: int,
file_id: UUID | None,
sha256_checksum: SHA256Str | None,
access_right: AccessRight,
) -> list[StorageFileMetaData]:
# NOTE: can NOT use /locations/0/files/metadata with uuid_filter=api/ because
# logic in storage 'wrongly' assumes that all data is associated to a project and
Expand All @@ -91,11 +90,10 @@ async def search_files(
"sha256_checksum": None
if sha256_checksum is None
else f"{sha256_checksum}",
"access_right": access_right,
}

response = await self.client.post(
"/simcore-s3/files/metadata:search",
"/simcore-s3/files/metadata:search_owned",
params={k: v for k, v in params.items() if v is not None},
)
response.raise_for_status()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,10 @@ async def delete_folders_of_project(request: web.Request) -> NoReturn:
raise web.HTTPNoContent(content_type=MIMETYPE_APPLICATION_JSON)


@routes.post(f"/{API_VTAG}/simcore-s3/files/metadata:search", name="search_files")
async def search_files(request: web.Request) -> web.Response:
@routes.post(
f"/{API_VTAG}/simcore-s3/files/metadata:search_owned", name="search_owned_files"
)
async def search_owned_files(request: web.Request) -> web.Response:
query_params = parse_request_query_parameters_as(SearchFilesQueryParams, request)
log.debug(
"received call to search_files with %s",
Expand All @@ -130,20 +132,11 @@ async def search_files(request: web.Request) -> web.Response:
get_dsm_provider(request.app).get(SimcoreS3DataManager.get_location_id()),
)
data: list[FileMetaData]
if query_params.access_right == "read":
data = await dsm.search_read_access_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
elif query_params.access_right == "write":
data = await dsm.search_owned_files(
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
else:
raise ValueError(f"The query param {query_params.access_right=} is unexpected")
data = await dsm.search_owned_files(
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
query_params.user_id,
file_id_prefix=query_params.startswith,
sha256_checksum=query_params.sha256_checksum,
)
log.debug(
"Found %d files starting with '%s'",
len(data),
Expand Down
3 changes: 1 addition & 2 deletions services/storage/src/simcore_service_storage/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import urllib.parse
from dataclasses import dataclass
from typing import Final, Literal
from typing import Final
from uuid import UUID

from models_library.api_schemas_storage import (
Expand Down Expand Up @@ -202,7 +202,6 @@ class DeleteFolderQueryParams(StorageQueryParamsBase):
class SearchFilesQueryParams(StorageQueryParamsBase):
startswith: str = ""
sha256_checksum: SHA256Str | None = None
access_right: Literal["read", "write"] = "read"


class LocationPathParams(BaseModel):
Expand Down
12 changes: 0 additions & 12 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,18 +710,6 @@ async def _get_size_and_num_files(

return parse_obj_as(ByteSize, total_size), total_num_s3_objects

async def search_read_access_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
async with self.engine.acquire() as conn:
can_read_projects_ids = await get_readable_project_ids(conn, user_id)
return await self._search_files(
user_id=user_id,
project_ids=can_read_projects_ids,
file_id_prefix=file_id_prefix,
sha256_checksum=sha256_checksum,
)

async def search_owned_files(
self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None
):
Expand Down
2 changes: 1 addition & 1 deletion services/storage/tests/unit/test_dsm_soft_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def test_create_soft_link(
# assert output_file.last_modified < link_file.fmd.last_modified

# can find
files_list = await simcore_s3_dsm.search_read_access_files(
files_list = await simcore_s3_dsm.search_owned_files(
bisgaard-itis marked this conversation as resolved.
Show resolved Hide resolved
user_id, f"api/{api_file_id}/{file_name}", None
)
assert len(files_list) == 1
Expand Down
Loading