Skip to content

Commit

Permalink
Refactor existing endpoints and create endpoints
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristopherSpelt committed Nov 5, 2024
1 parent 260286d commit 0882bca
Show file tree
Hide file tree
Showing 20 changed files with 784 additions and 544 deletions.
Binary file removed .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@ index.json

# Logging files
*.log

.DS_Store
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ repos:
rev: v0.7.2
hooks:
- id: ruff
args: [--config, pyproject.toml]
- id: ruff-format
args: [--config, pyproject.toml]
- repo: local
hooks:
- id: validate-schema
Expand Down
843 changes: 434 additions & 409 deletions poetry.lock

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,16 @@ readme = "README.md"
description = ""
authors = ["ai-validatie-team <[email protected]>"]
repository = "https://github.com/MinBZK/task-registry"
keywords = ["AI", "Validation", "Instrument", "Task", "Registry"]
keywords = ["AI", "Validation", "Instrument", "Requirement", "Measure", "Task", "Registry"]
license = "EUPL-1.2"
classifiers = [
"Development Status :: Alpha",
"Framework :: FastAPI",
"Topic :: Software Development :: Libraries :: Python Modules",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Typing :: Typed"
]
packages = [
{ include = "task_registry" }
]
Expand Down Expand Up @@ -48,7 +56,7 @@ types-pyyaml = "^6.0.12.20240724"
# Ruff settings: https://docs.astral.sh/ruff/configuration/
[tool.ruff]
line-length = 120
target-version = "py311"
target-version = "py312"
src = ["task_registry", "tests", "script"]
include = ['script/validate']

Expand Down
5 changes: 3 additions & 2 deletions task_registry/api/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from fastapi import APIRouter
from task_registry.api.routes import health, instruments, urns
from task_registry.api.routes import health, instruments, measures, requirements

api_router = APIRouter()
api_router.include_router(health.router, prefix="/health", tags=["health"])
api_router.include_router(instruments.router, prefix="/instruments", tags=["instruments"])
api_router.include_router(urns.router, prefix="/urns", tags=["urns"])
api_router.include_router(measures.router, prefix="/measures", tags=["measures"])
api_router.include_router(requirements.router, prefix="/requirements", tags=["requirements"])
35 changes: 29 additions & 6 deletions task_registry/api/routes/instruments.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging

from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from task_registry.lifespan import CACHED_DATA
from task_registry.data import Tasks
from task_registry.lifespan import CACHED_REGISTRY

router = APIRouter()

Expand All @@ -11,9 +13,30 @@

@router.get(
"/",
summary="Overview of all the Instruments in the Task Registry",
description="This endpoint returns a JSON with all the Instruments in the Task Registry.",
summary="Overview of all the instruments in the task registry.",
description="This endpoint returns a JSON with all the instruments in the task registry.",
responses={200: {"description": "JSON with all the instruments."}},
)
async def get_root() -> JSONResponse:
return JSONResponse(content=CACHED_DATA["index"])
async def get_instruments() -> JSONResponse:
encoded_content = jsonable_encoder(CACHED_REGISTRY.get_tasks_index(Tasks.INSTRUMENTS))
return JSONResponse(content=encoded_content)


# Optional parameter 'version' is included, but not used. In a new ticket
# versioning of instruments should be handled.
@router.get(
"/urn/{urn}",
summary="Get the contents of the specific instrument which has given URN.",
description="This endpoint returns a JSON with the contents of a specific instrument identified by URN"
" and version.",
responses={
200: {"description": "JSON with the specific contents of the instrument."},
400: {"description": "The URN does not exist or is not valid."},
},
)
async def get_instrument(urn: str, version: str = "latest") -> JSONResponse:
try:
content = CACHED_REGISTRY.get_task(urn, Tasks.INSTRUMENTS)
return JSONResponse(content=content)
except KeyError as err:
raise HTTPException(status_code=400, detail=f"invalid urn: {urn}") from err
42 changes: 42 additions & 0 deletions task_registry/api/routes/measures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import logging

from fastapi import APIRouter, HTTPException
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from task_registry.data import Tasks
from task_registry.lifespan import CACHED_REGISTRY

router = APIRouter()

logger = logging.getLogger(__name__)


@router.get(
"/",
summary="Overview of all the measures in the task registry",
description="This endpoint returns a JSON with all the measures in the task registry.",
responses={200: {"description": "JSON with all the measures."}},
)
async def get_measures() -> JSONResponse:
encoded_content = jsonable_encoder(CACHED_REGISTRY.get_tasks_index(Tasks.MEASURES))
return JSONResponse(content=encoded_content)


# Optional parameter 'version' is included, but not used. In a new ticket
# versioning of measures should be handled.
@router.get(
"/urn/{urn}",
summary="Get the contents of the specific measure by URN",
description="This endpoint returns a JSON with the contents of a specific measure identified by URN"
" and version.",
responses={
200: {"description": "JSON with the specific contents of the measure."},
400: {"description": "The URN does not exist or is not valid."},
},
)
async def get_measure(urn: str, version: str = "latest") -> JSONResponse:
try:
content = CACHED_REGISTRY.get_task(urn, Tasks.MEASURES)
return JSONResponse(content=content)
except KeyError as err:
raise HTTPException(status_code=400, detail=f"invalid urn: {urn}") from err
42 changes: 42 additions & 0 deletions task_registry/api/routes/requirements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import logging

from fastapi import APIRouter, HTTPException
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from task_registry.data import Tasks
from task_registry.lifespan import CACHED_REGISTRY

router = APIRouter()

logger = logging.getLogger(__name__)


@router.get(
"/",
summary="Overview of all the requirements in the task registry.",
description="This endpoint returns a JSON with all the requirements in the task registry.",
responses={200: {"description": "JSON with all the requirements."}},
)
async def get_requirements() -> JSONResponse:
encoded_content = jsonable_encoder(CACHED_REGISTRY.get_tasks_index(Tasks.REQUIREMENTS))
return JSONResponse(content=encoded_content)


# Optional parameter 'version' is included, but not used. In a new ticket
# versioning of requirements should be handled.
@router.get(
"/urn/{urn}",
summary="Get the contents of the specific instrument which has given URN.",
description="This endpoint returns a JSON with the contents of a specific instrument identified by URN"
" and version.",
responses={
200: {"description": "JSON with the specific contents of the instrument."},
400: {"description": "The URN does not exist or is not valid."},
},
)
async def get_requirement(urn: str, version: str = "latest") -> JSONResponse:
try:
content = CACHED_REGISTRY.get_task(urn, Tasks.REQUIREMENTS)
return JSONResponse(content=content)
except KeyError as err:
raise HTTPException(status_code=400, detail=f"invalid urn: {urn}") from err
31 changes: 0 additions & 31 deletions task_registry/api/routes/urns.py

This file was deleted.

9 changes: 7 additions & 2 deletions task_registry/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@
# Self type is not available in Python 3.10 so create our own with TypeVar
SelfSettings = TypeVar("SelfSettings", bound="Settings")

PROJECT_NAME: str = "TR"
PROJECT_DESCRIPTION: str = "Task Registry"
PROJECT_NAME: str = "Task Registry"
PROJECT_SUMMARY: str = """
API service for the task registry. This API can be used to retrieve
measures, requirements and instruments in this registry.
"""
VERSION: str = "0.1.0" # replace in CI/CD pipeline
LICENSE_NAME: str = "EUPL-1.2 license"
LICENSE_URL: str = "https://eupl.eu/1.2/en/"


class Settings(BaseSettings):
Expand Down
124 changes: 80 additions & 44 deletions task_registry/data.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,103 @@
import logging
import os
from enum import StrEnum
from pathlib import Path
from typing import Any

import yaml
from pydantic import BaseModel

logger = logging.getLogger(__name__)


def get_file_size(file_path: str) -> int:
return os.path.getsize(file_path) # pragma: no cover
class Tasks(StrEnum):
INSTRUMENTS = "instruments"
REQUIREMENTS = "requirements"
MEASURES = "measures"


def create_urn_mappper(entries: list[dict[str, Any]]) -> dict[str, Any]:
urn_mapper: dict[str, Any] = {}
for instrument in entries:
path = Path(instrument["path"])
try:
with open(str(path)) as f:
urn_mapper[instrument["urn"]] = yaml.safe_load(f)
except FileNotFoundError:
logger.exception(f"Instrument file with path {path} not found.") # pragma: no cover
class Link(BaseModel):
self: str

except yaml.YAMLError:
logger.exception(f"Instrument file with path {path} could not be parsed.") # pragma: no cover

return urn_mapper
class FileInfo(BaseModel):
type: str
size: int
name: str
path: str
urn: str
download_url: str
_links: Link


class Index(BaseModel):
type: str
size: int
name: str
path: str
download_url: str
_links: Link
entries: list[FileInfo]


class CachedRegistry:
def __init__(self) -> None:
self.index_cache: dict[Tasks, Index] = {}
self.tasks_cache: dict[tuple[str, Tasks], Any] = {}

def add_tasks(self, tasks: Tasks) -> None:
index = generate_index(tasks)
self.index_cache[tasks] = index

for task in index.entries:
path = Path(task.path)
try:
with open(str(path)) as f:
self.tasks_cache[(task.urn, tasks)] = yaml.safe_load(f)
except FileNotFoundError:
logger.exception(f"Task file with path {path} not found.") # pragma: no cover
except yaml.YAMLError:
logger.exception(f"Task file with path {path} could not be parsed.") # pragma: no cover

def get_tasks_index(self, tasks: Tasks) -> Index:
return self.index_cache[tasks]

def get_task(self, urn: str, tasks: Tasks) -> dict[str, Any]:
return self.tasks_cache[(urn, tasks)]


def generate_index(
tasks: Tasks,
base_url: str = "https://task-registry.apps.digilab.network",
directory: str = "instruments",
) -> dict[str, Any]:
index: dict[str, Any] = {
"type": "dir",
"size": 0,
"name": directory,
"path": directory,
"download_url": f"{base_url}/instruments",
"_links": {
"self": f"{base_url}/instruments",
},
"entries": [],
}

for root, _, files in os.walk(directory):
) -> Index:
tasks_url = f"{base_url}/{tasks}"
entries: list[FileInfo] = []

for root, _, files in os.walk(tasks):
for file in files:
if file.endswith(".yaml"):
file_path = os.path.join(root, file)
relative_path = file_path.replace("\\", "/")
with open(file_path) as f:
instrument = yaml.safe_load(f)
file_info = {
"type": "file",
"size": get_file_size(file_path),
"name": file,
"path": relative_path,
"urn": instrument["urn"],
"download_url": f"{base_url}/urns/?urn={instrument['urn']}",
"_links": {
"self": f"{base_url}/urns/?urn={instrument['urn']}",
},
}
index["entries"].append(file_info)

return index
task = yaml.safe_load(f)
task_url = f"{tasks_url}/urn/{task['urn']}"
file_info = FileInfo(
type="file",
size=os.path.getsize(file_path),
name=file,
path=relative_path,
urn=task["urn"],
download_url=task_url,
_links=Link(self=task_url),
)
entries.append(file_info)

return Index(
type="dir",
size=0,
name=tasks.value,
path=tasks.value,
download_url=tasks_url,
_links=Link(self=tasks_url),
entries=entries,
)
Loading

0 comments on commit 0882bca

Please sign in to comment.