From 5fe0a1c331ec151fa5f40e2c5c7d0822c6e4fd07 Mon Sep 17 00:00:00 2001 From: Mads Bisgaard <126242332+bisgaard-itis@users.noreply.github.com> Date: Fri, 3 May 2024 23:27:15 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Only=20allow=20to=20search=20*ow?= =?UTF-8?q?ned*=20files=20in=20storage=20search=20endpoint=20(#5772)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/specs/storage/openapi.yaml | 53 +++++++++---------- api/specs/storage/scripts/storage.py | 4 +- .../api/routes/files.py | 18 ++----- .../api/routes/solvers_jobs_getters.py | 3 +- .../services/storage.py | 6 +-- .../services/study_job_models_converters.py | 3 +- .../api-server/tests/mocks/delete_file.json | 4 +- .../tests/mocks/get_solver_outputs.json | 4 +- .../tests/mocks/run_study_workflow.json | 8 +-- .../tests/mocks/search_file_checksum.json | 4 +- .../api-server/tests/unit/test_api_files.py | 4 -- .../handlers_simcore_s3.py | 19 ++----- .../src/simcore_service_storage/models.py | 2 +- .../simcore_service_storage/simcore_s3_dsm.py | 12 ----- .../storage/tests/unit/test_dsm_soft_links.py | 2 +- .../tests/unit/test_handlers_simcore_s3.py | 16 ++++-- 16 files changed, 66 insertions(+), 96 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index f53af5c8f31..9507bc7a964 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -652,14 +652,14 @@ paths: responses: '204': description: Successful Response - /v0/simcore-s3/files/metadata:search: + /v0/simcore-s3/files/metadata:search_owned: post: tags: - simcore-s3 summary: search for files starting with description: search for files starting with `startswith` and/or matching a sha256_checksum in the file_meta_data table - operationId: search_files + operationId: search_owned_files parameters: - required: true schema: @@ -683,16 +683,6 @@ paths: title: Sha256 Checksum name: sha256_checksum in: query - - required: false - schema: - type: string - enum: - - read - - write - title: Access Right - default: read - name: access_right - in: query responses: '200': description: Successful Response @@ -1292,36 +1282,40 @@ components: title: PresignedLink S3Settings: properties: - S3_SECURE: - type: boolean - title: S3 Secure - default: false - S3_ENDPOINT: - type: string - title: S3 Endpoint S3_ACCESS_KEY: type: string + maxLength: 50 + minLength: 1 title: S3 Access Key - S3_SECRET_KEY: - type: string - title: S3 Secret Key - S3_ACCESS_TOKEN: - type: string - title: S3 Access Token S3_BUCKET_NAME: type: string + maxLength: 50 + minLength: 1 title: S3 Bucket Name + S3_ENDPOINT: + type: string + maxLength: 65536 + minLength: 1 + format: uri + title: S3 Endpoint + description: do not define if using standard AWS S3_REGION: type: string + maxLength: 50 + minLength: 1 title: S3 Region - default: us-east-1 + S3_SECRET_KEY: + type: string + maxLength: 50 + minLength: 1 + title: S3 Secret Key additionalProperties: false type: object required: - - S3_ENDPOINT - S3_ACCESS_KEY - - S3_SECRET_KEY - S3_BUCKET_NAME + - S3_REGION + - S3_SECRET_KEY title: S3Settings description: "- Customized configuration for all settings\n- If a field is a\ \ BaseCustomSettings subclass, it allows creating a default from env vars\ @@ -1381,6 +1375,9 @@ components: title: TaskGet TaskProgress: properties: + task_id: + type: string + title: Task Id message: type: string title: Message diff --git a/api/specs/storage/scripts/storage.py b/api/specs/storage/scripts/storage.py index 4392aca49f2..c71b2217845 100644 --- a/api/specs/storage/scripts/storage.py +++ b/api/specs/storage/scripts/storage.py @@ -343,14 +343,14 @@ async def delete_folders_of_project( f"/{API_VTAG}/simcore-s3/files/metadata:search", response_model=Envelope[FileMetaDataGet], tags=TAGS_SIMCORE_S3, - summary="search for files starting with", + summary="search for owned files", operation_id="search_files", ) async def search_files( user_id: UserID, + kind: Literal["owned"], startswith: str = "", sha256_checksum: SHA256Str | None = None, - access_right: Literal["read", "write"] = "read", ): """search for files starting with `startswith` and/or matching a sha256_checksum in the file_meta_data table""" diff --git a/services/api-server/src/simcore_service_api_server/api/routes/files.py b/services/api-server/src/simcore_service_api_server/api/routes/files.py index 20ed3da96cf..ec414207513 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/files.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/files.py @@ -40,12 +40,7 @@ UploadLinks, ) from ...services.service_exception_handling import DEFAULT_BACKEND_SERVICE_STATUS_CODES -from ...services.storage import ( - AccessRight, - StorageApi, - StorageFileMetaData, - to_file_api_model, -) +from ...services.storage import StorageApi, StorageFileMetaData, to_file_api_model from ..dependencies.authentication import get_current_user_id from ..dependencies.services import get_api_client from ._common import API_SERVER_DEV_FEATURES_ENABLED @@ -73,16 +68,16 @@ async def _get_file( file_id: UUID, storage_client: StorageApi, user_id: int, - access_right: AccessRight, ): """Gets metadata for a given file resource""" try: - stored_files: list[StorageFileMetaData] = await storage_client.search_files( + stored_files: list[ + StorageFileMetaData + ] = await storage_client.search_owned_files( user_id=user_id, file_id=file_id, sha256_checksum=None, - access_right=access_right, ) if not stored_files: msg = "Not found in storage" @@ -289,7 +284,6 @@ async def get_file( file_id=file_id, storage_client=storage_client, user_id=user_id, - access_right="read", ) @@ -307,11 +301,10 @@ async def search_files_page( file_id: UUID | None = None, ): """Search files""" - stored_files: list[StorageFileMetaData] = await storage_client.search_files( + stored_files: list[StorageFileMetaData] = await storage_client.search_owned_files( user_id=user_id, file_id=file_id, sha256_checksum=sha256_checksum, - access_right="read", ) if page_params.offset > len(stored_files): _logger.debug("File with sha256_checksum=%d not found.", sha256_checksum) @@ -342,7 +335,6 @@ async def delete_file( file_id=file_id, storage_client=storage_client, user_id=user_id, - access_right="write", ) await storage_client.delete_file( user_id=user_id, quoted_storage_file_id=file.quoted_storage_file_id diff --git a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py index 05876ab20fd..d2608511ab6 100644 --- a/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py +++ b/services/api-server/src/simcore_service_api_server/api/routes/solvers_jobs_getters.py @@ -251,11 +251,10 @@ async def get_job_outputs( if isinstance(value, BaseFileLink): file_id: UUID = File.create_id(*value.path.split("/")) - found = await storage_client.search_files( + found = await storage_client.search_owned_files( user_id=user_id, file_id=file_id, sha256_checksum=None, - access_right="read", ) if found: assert len(found) == 1 # nosec diff --git a/services/api-server/src/simcore_service_api_server/services/storage.py b/services/api-server/src/simcore_service_api_server/services/storage.py index 3bf59737d58..6663b3f5139 100644 --- a/services/api-server/src/simcore_service_api_server/services/storage.py +++ b/services/api-server/src/simcore_service_api_server/services/storage.py @@ -61,6 +61,7 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]: response = await self.client.post( "/simcore-s3/files/metadata:search", params={ + "kind": "owned", "user_id": str(user_id), "startswith": "api/", }, @@ -74,24 +75,23 @@ async def list_files(self, user_id: int) -> list[StorageFileMetaData]: return files @_exception_mapper({}) - async def search_files( + async def search_owned_files( self, *, user_id: int, file_id: UUID | None, sha256_checksum: SHA256Str | None, - access_right: AccessRight, ) -> list[StorageFileMetaData]: # NOTE: can NOT use /locations/0/files/metadata with uuid_filter=api/ because # logic in storage 'wrongly' assumes that all data is associated to a project and # here there is no project, so it would always returns an empty params: dict = { + "kind": "owned", "user_id": f"{user_id}", "startswith": None if file_id is None else f"api/{file_id}", "sha256_checksum": None if sha256_checksum is None else f"{sha256_checksum}", - "access_right": access_right, } response = await self.client.post( diff --git a/services/api-server/src/simcore_service_api_server/services/study_job_models_converters.py b/services/api-server/src/simcore_service_api_server/services/study_job_models_converters.py index 52f75655c35..27f3cd42c82 100644 --- a/services/api-server/src/simcore_service_api_server/services/study_job_models_converters.py +++ b/services/api-server/src/simcore_service_api_server/services/study_job_models_converters.py @@ -111,11 +111,10 @@ async def create_job_outputs_from_project_outputs( path = value["path"] file_id: UUID = File.create_id(*path.split("/")) - if found := await storage_client.search_files( + if found := await storage_client.search_owned_files( user_id=user_id, file_id=file_id, sha256_checksum=None, - access_right="read", ): assert len(found) == 1 # nosec results[name] = to_file_api_model(found[0]) diff --git a/services/api-server/tests/mocks/delete_file.json b/services/api-server/tests/mocks/delete_file.json index 8df441678a3..32d19eb22b6 100644 --- a/services/api-server/tests/mocks/delete_file.json +++ b/services/api-server/tests/mocks/delete_file.json @@ -1,14 +1,14 @@ [ { "name": "POST /simcore-s3/files/metadata:search", - "description": "", + "description": "", "method": "POST", "host": "storage", "path": { "path": "/v0/simcore-s3/files/metadata:search", "path_parameters": [] }, - "query": "user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e", + "query": "kind=owned&user_id=1&startswith=api/cc3dd190-8c87-3686-b581-d9f7809d312e", "request_payload": null, "response_body": { "data": [ diff --git a/services/api-server/tests/mocks/get_solver_outputs.json b/services/api-server/tests/mocks/get_solver_outputs.json index db36901a96d..e5d7f9ba7d7 100644 --- a/services/api-server/tests/mocks/get_solver_outputs.json +++ b/services/api-server/tests/mocks/get_solver_outputs.json @@ -219,14 +219,14 @@ }, { "name": "POST /simcore-s3/files/metadata:search", - "description": "", + "description": "", "method": "POST", "host": "storage", "path": { "path": "/v0/simcore-s3/files/metadata:search", "path_parameters": [] }, - "query": "user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d&access_right=read", + "query": "kind=owned&user_id=1&startswith=api/4ea24645-fd8c-339b-9621-ae045d45d94d", "request_payload": null, "response_body": { "data": [ diff --git a/services/api-server/tests/mocks/run_study_workflow.json b/services/api-server/tests/mocks/run_study_workflow.json index 8a16b45ff2c..08eefa6f6e7 100644 --- a/services/api-server/tests/mocks/run_study_workflow.json +++ b/services/api-server/tests/mocks/run_study_workflow.json @@ -3238,15 +3238,15 @@ } }, { - "name": "POST /simcore-s3/files/metadata:search", - "description": "", + "name": "POST /simcore-s3/files/metadata:search_owned", + "description": "", "method": "POST", "host": "storage", "path": { - "path": "/v0/simcore-s3/files/metadata:search", + "path": "/v0/simcore-s3/files/metadata:search_owned", "path_parameters": [] }, - "query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4&access_right=read", + "query": "user_id=1&startswith=api%2F5b0cd3cd-5ceb-3d74-9961-246840c1e1d4", "response_body": { "data": [] } diff --git a/services/api-server/tests/mocks/search_file_checksum.json b/services/api-server/tests/mocks/search_file_checksum.json index f0e2165a4ed..2b61f6cdf1a 100644 --- a/services/api-server/tests/mocks/search_file_checksum.json +++ b/services/api-server/tests/mocks/search_file_checksum.json @@ -1,14 +1,14 @@ [ { "name": "POST /simcore-s3/files/metadata:search", - "description": "", + "description": "", "method": "POST", "host": "storage", "path": { "path": "/v0/simcore-s3/files/metadata:search", "path_parameters": [] }, - "query": "user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d", + "query": "kind=owned&user_id=2&sha256_checksum=92c7a39ce451ee57edd6da0b9c734ca9e6423a20410f73ce55e0d07cfd603b9d", "request_payload": null, "response_body": { "data": [ diff --git a/services/api-server/tests/unit/test_api_files.py b/services/api-server/tests/unit/test_api_files.py index a67a0d57384..28a28f28d20 100644 --- a/services/api-server/tests/unit/test_api_files.py +++ b/services/api-server/tests/unit/test_api_files.py @@ -196,10 +196,6 @@ def search_side_effect( path_params: dict[str, Any], capture: HttpApiCallCaptureModel, ) -> dict[str, Any]: - request_query: dict[str, str] = dict( - pair.split("=") for pair in request.url.query.decode("utf8").split("&") - ) - assert request_query.get("access_right") == "write" assert isinstance(capture.response_body, dict) response: dict[str, Any] = capture.response_body return response diff --git a/services/storage/src/simcore_service_storage/handlers_simcore_s3.py b/services/storage/src/simcore_service_storage/handlers_simcore_s3.py index 9533c69c97a..38caf0fe9b8 100644 --- a/services/storage/src/simcore_service_storage/handlers_simcore_s3.py +++ b/services/storage/src/simcore_service_storage/handlers_simcore_s3.py @@ -130,20 +130,11 @@ async def search_files(request: web.Request) -> web.Response: get_dsm_provider(request.app).get(SimcoreS3DataManager.get_location_id()), ) data: list[FileMetaData] - if query_params.access_right == "read": - data = await dsm.search_read_access_files( - query_params.user_id, - file_id_prefix=query_params.startswith, - sha256_checksum=query_params.sha256_checksum, - ) - elif query_params.access_right == "write": - data = await dsm.search_owned_files( - query_params.user_id, - file_id_prefix=query_params.startswith, - sha256_checksum=query_params.sha256_checksum, - ) - else: - raise ValueError(f"The query param {query_params.access_right=} is unexpected") + data = await dsm.search_owned_files( + query_params.user_id, + file_id_prefix=query_params.startswith, + sha256_checksum=query_params.sha256_checksum, + ) log.debug( "Found %d files starting with '%s'", len(data), diff --git a/services/storage/src/simcore_service_storage/models.py b/services/storage/src/simcore_service_storage/models.py index 8444863d3e8..7da93886760 100644 --- a/services/storage/src/simcore_service_storage/models.py +++ b/services/storage/src/simcore_service_storage/models.py @@ -202,7 +202,7 @@ class DeleteFolderQueryParams(StorageQueryParamsBase): class SearchFilesQueryParams(StorageQueryParamsBase): startswith: str = "" sha256_checksum: SHA256Str | None = None - access_right: Literal["read", "write"] = "read" + kind: Literal["owned"] class LocationPathParams(BaseModel): diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 4124e8f837c..8d4634af710 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -710,18 +710,6 @@ async def _get_size_and_num_files( return parse_obj_as(ByteSize, total_size), total_num_s3_objects - async def search_read_access_files( - self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None - ): - async with self.engine.acquire() as conn: - can_read_projects_ids = await get_readable_project_ids(conn, user_id) - return await self._search_files( - user_id=user_id, - project_ids=can_read_projects_ids, - file_id_prefix=file_id_prefix, - sha256_checksum=sha256_checksum, - ) - async def search_owned_files( self, user_id: UserID, file_id_prefix: str, sha256_checksum: SHA256Str | None ): diff --git a/services/storage/tests/unit/test_dsm_soft_links.py b/services/storage/tests/unit/test_dsm_soft_links.py index aab63b5cecc..8e5db68774a 100644 --- a/services/storage/tests/unit/test_dsm_soft_links.py +++ b/services/storage/tests/unit/test_dsm_soft_links.py @@ -124,7 +124,7 @@ async def test_create_soft_link( # assert output_file.last_modified < link_file.fmd.last_modified # can find - files_list = await simcore_s3_dsm.search_read_access_files( + files_list = await simcore_s3_dsm.search_owned_files( user_id, f"api/{api_file_id}/{file_name}", None ) assert len(files_list) == 1 diff --git a/services/storage/tests/unit/test_handlers_simcore_s3.py b/services/storage/tests/unit/test_handlers_simcore_s3.py index d7a258a4efd..1f62b0d990d 100644 --- a/services/storage/tests/unit/test_handlers_simcore_s3.py +++ b/services/storage/tests/unit/test_handlers_simcore_s3.py @@ -566,7 +566,7 @@ async def test_create_and_delete_folders_from_project_burst( @pytest.mark.parametrize("search_startswith", [True, False]) @pytest.mark.parametrize("search_sha256_checksum", [True, False]) -@pytest.mark.parametrize("access_right", ["read", "write"]) +@pytest.mark.parametrize("kind", ["owned", "read", None]) async def test_search_files( client: TestClient, user_id: UserID, @@ -574,19 +574,27 @@ async def test_search_files( faker: Faker, search_startswith: bool, search_sha256_checksum: bool, - access_right: Literal["read", "write"], + kind: Literal["owned"], ): assert client.app _file_name: str = faker.file_name() _sha256_checksum: SHA256Str = parse_obj_as(SHA256Str, faker.sha256()) _query_params: dict[str, Any] = { "user_id": user_id, - "access_right": access_right, + "kind": kind, "startswith": "", } - url = client.app.router["search_files"].url_for().with_query(**_query_params) + url = ( + client.app.router["search_files"] + .url_for() + .with_query(**{k: v for k, v in _query_params.items() if v is not None}) + ) response = await client.post(f"{url}") + + if kind != "owned": + _ = await assert_status(response, status.HTTP_422_UNPROCESSABLE_ENTITY) + return data, error = await assert_status(response, status.HTTP_200_OK) assert not error list_fmds = parse_obj_as(list[FileMetaDataGet], data)