Skip to content

Commit

Permalink
Add api fucntionality
Browse files Browse the repository at this point in the history
  • Loading branch information
BibMartin authored and BibMartin committed Mar 14, 2022
1 parent 05b7ada commit b8b533c
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 88 deletions.
78 changes: 78 additions & 0 deletions datasetter/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import forge
from typing import Optional
import pandas as pd
import json


def as_json(doc):
return json.loads(
pd.Series([doc])
.to_json(orient='records', date_format='iso')
)[0]


def add_dataset(dataset, fast_api, uri):
"""Create FastAPI endpoints to serve the dataset.
Parameters
----------
fast_api : fastapi.applications.FastAPI
A FastAPI application object. A new endpoint will be added in this application.
uri : str
The relative uri where the endpoint will be added.
dataset : datasetter.dataset.Dataset
The dataset object to be served.
Returns
-------
Nothing : The endpoint is created as a side effect (inplace) in the `fast_api` application.
"""
uri = '/' + uri.strip('/')
facet = forge.kwarg('facet', type=str)
rows = forge.kwarg('rows', default=10, type=Optional[int])
skip = forge.kwarg('skip', default=0, type=Optional[int])
kwargs = [forge.kwarg(facet, default=None, type=Optional[str])
for facet in dataset.facets]

@fast_api.get(uri + "/")
def get_metadata():
return as_json(dataset.metadata)

@fast_api.get(uri + "/count")
@forge.sign(*kwargs)
def count(**kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
count = dataset.count(**filters)
return as_json({
"count": int(count),
"filters": filters,
})

@fast_api.get(uri + "/count-by/{facet}")
@forge.sign(facet, rows, skip, *kwargs)
def count_by(facet, rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
result = dataset.count_by(facet, rows=rows, skip=skip, **filters)
return as_json({
"facet": facet,
# "count": len(result), # TODO : add "nunique" feature in count_by schema
"rows": len(result),
"skip": skip,
"filters": filters,
"data": {str(key): int(val) for key, val in result.items()},
})

@fast_api.get(uri + "/sample")
@forge.sign(rows, skip, *kwargs)
def sample(rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
result = dataset.sample(rows=rows, skip=skip, **filters)
count = dataset.count(**filters)
return as_json({
# "facet": facet,
"count": count,
"rows": len(result),
"skip": skip,
"filters": filters,
"data": result.to_dict(orient='records'),
})
78 changes: 0 additions & 78 deletions datasetter/dataset.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,4 @@
# Dataset : base class for datasetter

from typing import Optional
import pandas as pd
import json


def as_json(doc):
return json.loads(
pd.Series([doc])
.to_json(orient='records', date_format='iso')
)[0]


class FacetUnavailableError(Exception):
"""This class is used to raise exceptions due to unavailable facets."""
pass
Expand Down Expand Up @@ -78,68 +65,3 @@ def sample(self, rows=10, skip=0, **filters):
A sample from the data.
"""
raise NotImplementedError()

def fastapi_serve(self, fast_api, uri):
"""Create FastAPI endpoints to serve the dataset.
Parameters
----------
fast_api : fastapi.applications.FastAPI
A FastAPI application object. A new endpoint will be added in this application.
uri : str
The relative uri where the endpoint will be added.
Returns
-------
Nothing : The endpoint is created as a side effect (inplace) in the `fast_api` application.
"""
uri = '/' + uri.strip('/')
import forge
facet = forge.kwarg('facet', type=str)
rows = forge.kwarg('rows', default=10, type=Optional[int])
skip = forge.kwarg('skip', default=0, type=Optional[int])
kwargs = [forge.kwarg(facet, default=None, type=Optional[str])
for facet in self.facets]

@fast_api.get(uri + "/")
def get_metadata():
return as_json(self.metadata)

@fast_api.get(uri + "/count")
@forge.sign(*kwargs)
def count(**kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
count = self.count(**filters)
return as_json({
"count": int(count),
"filters": filters,
})

@fast_api.get(uri + "/count-by/{facet}")
@forge.sign(facet, rows, skip, *kwargs)
def count_by(facet, rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
result = self.count_by(facet, rows=rows, skip=skip, **filters)
return as_json({
"facet": facet,
# "count": len(result), # TODO : add "nunique" feature in count_by schema
"rows": len(result),
"skip": skip,
"filters": filters,
"data": {str(key): int(val) for key, val in result.items()},
})

@fast_api.get(uri + "/sample")
@forge.sign(rows, skip, *kwargs)
def sample(rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
result = self.sample(rows=rows, skip=skip, **filters)
count = self.count(**filters)
return as_json({
# "facet": facet,
"count": count,
"rows": len(result),
"skip": skip,
"filters": filters,
"data": result.to_dict(orient='records'),
})
13 changes: 13 additions & 0 deletions test/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# import pytest
import numpy as np
import pandas as pd

from datasetter.api import as_json


def test_as_json():
assert as_json(None) is None
assert as_json(pd.NaT) is None
assert as_json(np.NaN) is None
assert as_json({}) == {}
assert isinstance(as_json(pd.Timestamp.utcnow()), str)
11 changes: 1 addition & 10 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
import pytest
import numpy as np
import pandas as pd

from datasetter.dataset import Dataset, as_json


def test_as_json():
assert as_json(None) is None
assert as_json(pd.NaT) is None
assert as_json(np.NaN) is None
assert as_json({}) == {}
assert isinstance(as_json(pd.Timestamp.utcnow()), str)
from datasetter.dataset import Dataset


def test_dataframe():
Expand Down

0 comments on commit b8b533c

Please sign in to comment.