Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add api fucntionality #4

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,34 @@ Name: greek, dtype: int64
'description': 'A column with numbers.'}],
'facets': ['letter', 'greek']}
```


## Create an API with `datasetter` and `fastapi`

Datasetter comes with a wrapper to simply create an API based on your datasets.
Based on the code above, just add :

```python
>>> from fastapi import FastAPI
>>> from datasetter.api import add_dataset

>>> app = FastAPI()
>>> add_dataset(app, 'random-letters', dataset)
```

Then your have a `fastAPI` application that you can run with `uvicorn` :

```bash
$ uvicorn main:app --host 0.0.0.0 --port 8000
```

or

```python
>>> import uvicorn
>>> uvicorn.run(
>>> app,
>>> host="0.0.0.0",
>>> port=8000,
>>> )
```
84 changes: 84 additions & 0 deletions datasetter/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import forge
from fastapi import HTTPException
from typing import Optional
import pandas as pd
import json
from datasetter.dataset import FacetUnavailableError


def as_json(doc):
return json.loads(
pd.Series([doc])
.to_json(orient='records', date_format='iso')
)[0]


def add_dataset(fast_api, uri, dataset):
"""Create FastAPI endpoints to serve the dataset.

Parameters
----------
fast_api : fastapi.applications.FastAPI
A FastAPI application object. A new endpoint will be added in this application.
uri : str
The relative uri where the endpoint will be added.
dataset : datasetter.dataset.Dataset
The dataset object to be served.

Returns
-------
Nothing : The endpoint is created as a side effect (inplace) in the `fast_api` application.
"""
uri = '/' + uri.strip('/')
facet = forge.kwarg('facet', type=str)
rows = forge.kwarg('rows', default=10, type=Optional[int])
skip = forge.kwarg('skip', default=0, type=Optional[int])
kwargs = [forge.kwarg(facet, default=None, type=Optional[str])
for facet in dataset.facets]

@fast_api.get(uri + "/")
def get_metadata():
return as_json(dataset.metadata())

@fast_api.get(uri + "/count")
@forge.sign(*kwargs)
def count(**kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
count = dataset.count(**filters)
return as_json({
"count": int(count),
"filters": filters,
})

@fast_api.get(uri + "/count-by/{facet}")
@forge.sign(facet, rows, skip, *kwargs)
def count_by(facet, rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
try:
result = dataset.count_by(facet, rows=rows, skip=skip, **filters)
except FacetUnavailableError:
raise HTTPException(status_code=404,
detail="FacetUnavailableError: no facet {}".format(facet))
return as_json({
"facet": facet,
# "count": len(result), # TODO : add "nunique" feature in count_by schema
"rows": len(result),
"skip": skip,
"filters": filters,
"data": [{"value": str(key), "count": int(val)} for key, val in result.items()],
})

@fast_api.get(uri + "/sample")
@forge.sign(rows, skip, *kwargs)
def sample(rows=10, skip=0, **kwargs):
filters = {key: val for key, val in kwargs.items() if val is not None}
result = dataset.sample(rows=rows, skip=skip, **filters)
count = dataset.count(**filters)
return as_json({
# "facet": facet,
"count": count,
"rows": len(result),
"skip": skip,
"filters": filters,
"data": result.to_dict(orient='records'),
})
12 changes: 0 additions & 12 deletions datasetter/dataset.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,4 @@
# Dataset : base class for datasetter

import pandas as pd
import json


def as_json(doc):
return json.loads(
pd.Series([doc])
.to_json(orient='records', date_format='iso')
)[0]


class FacetUnavailableError(Exception):
"""This class is used to raise exceptions due to unavailable facets."""
pass
Expand Down
16 changes: 16 additions & 0 deletions datasetter/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
def doc_equal(doc1, doc2):
"""Test if two JSON-like documents are equel."""
try:
if isinstance(doc1, dict):
assert isinstance(doc2, dict)
for key in doc1.keys():
assert key in doc2 and doc_equal(doc1[key], doc2[key])
elif isinstance(doc1, list):
assert isinstance(doc2, list) and len(doc1) == len(doc2)
for x, y in zip(doc1, doc2):
assert doc_equal(x, y)
else:
assert doc1 == doc2
return True
except AssertionError:
return False
Empty file added test/__init__.py
Empty file.
160 changes: 160 additions & 0 deletions test/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# import pytest
import numpy as np
import pandas as pd
from fastapi import FastAPI
from fastapi.testclient import TestClient

from datasetter.utils import doc_equal
from datasetter.api import as_json, add_dataset
from datasetter.pandas_dataset import PandasDataset


df = pd.DataFrame([
['A', 'alpha', 1],
['A', 'beta', 13],
['A', 'gamma', 8],
['B', 'alpha', 1],
['B', 'beta', 31],
['C', 'gamma', 9],
['C', 'alpha', 2],
['D', 'beta', 21],
['D', 'gamma', 0],
], columns=['letter', 'greek', 'number'])

metadata = {
"description": "A simple dataset to make tests.",
"facets": ['letter', 'greek'],
"name": "Random letters",
"columns": [
{"name": "letter", "type": "string", "description": "A column with letters."},
{"name": "greek", "type": "string", "description": "A column with greek letters."},
{"name": "number", "type": "integer", "description": "A column with numbers."},
]}

dataset = PandasDataset(df, **metadata)

app = FastAPI()
add_dataset(app, 'pandas-dataset', dataset)
# Launch app from parent directory with:
# > uvicorn test.test_api:app --host 0.0.0.0 --port 8000 --reload

client = TestClient(app)


def test_as_json():
assert as_json(None) is None
assert as_json(pd.NaT) is None
assert as_json(np.NaN) is None
assert as_json({}) == {}
assert isinstance(as_json(pd.Timestamp.utcnow()), str)


def test_metadata():
r = client.get('/pandas-dataset/')
assert r.status_code == 200
assert doc_equal(r.json(), metadata)


def test_count():
r = client.get('/pandas-dataset/count')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': 9, 'filters': {}})

r = client.get('/pandas-dataset/count?letter=A')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': 3, 'filters': {'letter': 'A'}})

r = client.get('/pandas-dataset/count',
params=dict(letter='A', greek='alpha'))
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': 1, 'filters': {'letter': 'A', 'greek': 'alpha'}})


def test_count_by():
r = client.get('/pandas-dataset/count-by/letter')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'facet': 'letter',
'rows': 4,
'skip': 0,
'filters': {},
'data': [
{'value': 'A', 'count': 3},
{'value': 'B', 'count': 2},
{'value': 'C', 'count': 2},
{'value': 'D', 'count': 2}]})

r = client.get('/pandas-dataset/count-by/greek')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'facet': 'greek',
'rows': 3,
'skip': 0,
'filters': {},
'data': [
{'value': 'alpha', 'count': 3},
{'value': 'beta', 'count': 3},
{'value': 'gamma', 'count': 3}]})

r = client.get('/pandas-dataset/count-by/foo')
assert r.status_code == 404
assert doc_equal(
r.json(),
{'detail': 'FacetUnavailableError: no facet foo'})

r = client.get('/pandas-dataset/count-by/number')
assert r.status_code == 404
assert doc_equal(
r.json(),
{'detail': 'FacetUnavailableError: no facet number'})


def test_sample():
r = client.get('/pandas-dataset/sample')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': min(10, dataset.count()),
'rows': min(10, dataset.count()),
'skip': 0,
'filters': {},
'data': df.to_dict(orient='records')})

r = client.get('/pandas-dataset/sample?rows=3')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': min(10, dataset.count()),
'rows': min(3, dataset.count()),
'skip': 0,
'filters': {},
'data': df.head(3).to_dict(orient='records')})

r = client.get('/pandas-dataset/sample?letter=A')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': min(10, dataset.count(letter="A")),
'rows': min(3, dataset.count(letter="A")),
'skip': 0,
'filters': {'letter': 'A'},
'data': dataset.sample(letter='A').to_dict(orient='records')})

r = client.get('/pandas-dataset/sample?letter=A&rows=2')
assert r.status_code == 200
assert doc_equal(
r.json(),
{'count': dataset.count(letter="A"),
'rows': min(2, dataset.count(letter="A")),
'skip': 0,
'filters': {'letter': 'A'},
'data': (dataset.sample(letter='A').head(2)
.to_dict(orient='records'))})
12 changes: 1 addition & 11 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
import pytest
import numpy as np
import pandas as pd

from datasetter.dataset import Dataset, as_json


def test_as_json():
assert as_json(None) is None
assert as_json(pd.NaT) is None
assert as_json(np.NaN) is None
assert as_json({}) == {}
assert isinstance(as_json(pd.Timestamp.utcnow()), str)
from datasetter.dataset import Dataset


def test_dataframe():
Expand Down
1 change: 0 additions & 1 deletion test/test_pandas_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,3 @@ def test_sample():

with pytest.raises(FacetUnavailableError):
dataset.sample(number=12)
assert True
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[flake8]
max-line-length = 100
exclude = .git,__pycache__,datasetter.egg-info,.eggs,.tox,.ipynb_checkpoints


[tox]
envlist = py38,py39
Expand Down