Skip to content

Commit

Permalink
Added Transformation Model and its routes (#39)
Browse files Browse the repository at this point in the history
* Added Transformation Model and its routes
  • Loading branch information
punith300i authored Nov 17, 2023
1 parent 2f1505b commit 1ad1345
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 118 deletions.
6 changes: 4 additions & 2 deletions sand/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sand.commands.load import load_dataset
from sand.container import use_container
from sand.helpers.dependency_injection import use_auto_inject
from sand.models import Project, SemanticModel, Table, TableRow
from sand.models import Project, SemanticModel, Table, TableRow, Transformation
from sand.models import db as dbconn
from sand.models import init_db

Expand All @@ -21,7 +21,9 @@
def init(db):
"""Init database"""
init_db(db)
dbconn.create_tables([Project, Table, TableRow, SemanticModel], safe=True)
dbconn.create_tables(
[Project, Table, TableRow, SemanticModel, Transformation], safe=True
)
if Project.select().where(fn.Lower(Project.name) == "default").count() == 0:
Project(name="Default", description="The default project").save()

Expand Down
4 changes: 2 additions & 2 deletions sand/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sand.controllers.search import search_bp
from sand.controllers.settings import setting_bp
from sand.controllers.table import table_bp, table_row_bp
from sand.controllers.transform import transform_bp
from sand.controllers.transformation import transformation_bp
from sand.helpers.namespace import NamespaceService
from sand.models import EntityAR, SemanticModel
from sand.models.ontology import OntClassAR, OntPropertyAR
Expand All @@ -33,7 +33,7 @@ def get_flask_app(
table_row_bp,
setting_bp,
search_bp,
transform_bp,
transformation_bp,
generate_api(
SemanticModel,
deserializers={"data": sand_deser.deserialize_graph},
Expand Down
156 changes: 90 additions & 66 deletions sand/controllers/transform.py → sand/controllers/transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,26 @@

from sand.models.table import Link, Table, TableRow
from gena.deserializer import get_dataclass_deserializer
from gena import generate_api
from sand.models import Transformation

transform_bp = Blueprint("transform", "transform")
transformation_bp = generate_api(Transformation)


@dataclass
class Context:
""" Context dataclass to access the row of the cell that is being transformed."""
"""Context dataclass to access the row of the cell that is being transformed."""

index: int
row: List[Union[str, float]]


@dataclass
class TransformRequestPayload:
"""Request Payload dataclass to validate the request obtained from the API call"""

type: Literal["map", "filter", "split", "concatenate"]
table_id: int
mode: str
datapath: Union[str, List[str]]
code: str
Expand All @@ -47,10 +52,10 @@ class Tdata(TypedDict):
def filter_traceback_errors() -> str:
"""Filters traceback errors, removes sensitive information
Args:
Args:
Returns:
Error String without the sensitive information.
Returns:
Error String without the sensitive information.
"""
(exc, value, tb) = sys.exc_info()
tb = tb.tb_next
Expand All @@ -61,17 +66,20 @@ def filter_traceback_errors() -> str:
ItemIndex = int


def transform_map(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_map(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements map transform, performs map operation over each cell, for a given column
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Returns:
list of Tdata objects, data transformed after applying map transform
Returns:
list of Tdata objects, data transformed after applying map transform
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -90,20 +98,23 @@ def transform_map(transform_func: Callable[[Any, Context], Any], data: Iterable[
return transformed_data


def transform_filter(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_filter(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements filter transform, performs filter operation over each cell, for a given column
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Returns:
list of Tdata objects, data transformed after applying filter transform
Returns:
list of Tdata objects, data transformed after applying filter transform
Raises:
BadRequest: An error occurred when the transform_func on execution, does not return a boolean
Raises:
BadRequest: An error occurred when the transform_func on execution, does not return a boolean
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -124,20 +135,23 @@ def transform_filter(transform_func: Callable[[Any, Context], Any], data: Iterab
return transformed_data


def transform_split(transform_func: Callable[[Any, Context], Any], data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int) -> List[Tdata]:
def transform_split(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List[Tdata]:
"""Implements split transform, performs split operation over each cell, for a given column
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Returns:
list of Tdata objects, data transformed after applying split transform
Returns:
list of Tdata objects, data transformed after applying split transform
Raises:
BadRequest: An error occurred when transform_func on execution, does not return a list
Raises:
BadRequest: An error occurred when transform_func on execution, does not return a list
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -158,17 +172,20 @@ def transform_split(transform_func: Callable[[Any, Context], Any], data: Iterabl
return transformed_data


def transform_concatenate(transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]], tolerance: int) -> List:
def transform_concatenate(
transform_func: Callable[[Any, Context], Any],
data: Iterable[Tuple[ItemIndex, Item, Context]],
tolerance: int,
) -> List:
"""Implements concatenate transform, performs concatenate operation over each cell, for a given column
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Args:
transform_func: User defined python function defined by the user
data: iterable with Column data and context object
tolerance: contains the API request data
Returns:
list of Tdata objects, data transformed after applying concatenate transform
Returns:
list of Tdata objects, data transformed after applying concatenate transform
"""
transformed_data = []
for path, value, context in data:
Expand All @@ -190,30 +207,30 @@ def transform_concatenate(transform_func: Callable[[Any, Context], Any],
def custom_getitem_guard(obj: Any, index: int) -> Any:
"""Implements __getitem__ restrictedpython policy and wraps _getitem_ function
Args:
obj: object that has __getitem__ implementation in python
index: index of the element that can be accessed from obj
Args:
obj: object that has __getitem__ implementation in python
index: index of the element that can be accessed from obj
Returns:
object element at position index
Returns:
object element at position index
"""
return obj[index]


def compile_function(code: str) -> Callable:
"""Executes code in string in a restricted mode using restrictedpython
Args:
code: object that has __getitem__ implementation in python
Args:
code: object that has __getitem__ implementation in python
Returns:
Callable function that wraps the code as a function body
Returns:
Callable function that wraps the code as a function body
Raises:
BadRequest: An error occurred when the code has compilation error
Raises:
BadRequest: An error occurred when the code has compilation error
"""
loc = {}
safe_globals.update({'_getitem_': custom_getitem_guard})
safe_globals.update({"_getitem_": custom_getitem_guard})
compiled_result = compile_restricted_function("value,context", code, "<function>")

if compiled_result.errors:
Expand All @@ -224,23 +241,26 @@ def compile_function(code: str) -> Callable:
return loc["<function>"]


@transform_bp.route(
f"/{transform_bp.name}/<table_id>/transformations", methods=["POST"]
)
def transform(table_id: int):
table = Table.get_by_id(table_id)
table_rows: List[TableRow] = list(
TableRow.select().where(TableRow.table == table).order_by(TableRow.index)
)

@transformation_bp.route(f"/{transformation_bp.name}/test", methods=["POST"])
def transform():
if isinstance(request.json["datapath"], str):
request.json["datapath"] = [request.json["datapath"]]

request_data = transform_request_deserializer(request.json)
table = Table.get_by_id(request_data.table_id)
table_rows: List[TableRow] = list(
TableRow.select().where(TableRow.table == table).order_by(TableRow.index)
)
transform_func = compile_function(request_data.code)
col_index_list = [table.columns.index(column) for column in request_data.datapath]
data = ((table_row.index, [table_row.row[col_index] for col_index in col_index_list],
Context(index=table_row.index, row=table_row.row)) for table_row in table_rows[:request_data.rows])
data = (
(
table_row.index,
[table_row.row[col_index] for col_index in col_index_list],
Context(index=table_row.index, row=table_row.row),
)
for table_row in table_rows[: request_data.rows]
)

transformed_data = None

Expand All @@ -256,7 +276,9 @@ def transform(table_id: int):
raise BadRequest(
"For transform type map the outputpath should be a single column"
)
transformed_data = transform_filter(transform_func, data, request_data.tolerance)
transformed_data = transform_filter(
transform_func, data, request_data.tolerance
)

elif request_data.type == "split":
if request_data.outputpath is None:
Expand All @@ -266,6 +288,8 @@ def transform(table_id: int):
transformed_data = transform_split(transform_func, data, request_data.tolerance)

elif request_data.type == "concatenate":
transformed_data = transform_concatenate(transform_func, data, request_data.tolerance)
transformed_data = transform_concatenate(
transform_func, data, request_data.tolerance
)

return jsonify(transformed_data)
3 changes: 2 additions & 1 deletion sand/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from sand.models.project import Project
from sand.models.semantic_model import SemanticModel
from sand.models.table import Table, TableRow, Link, ContextPage
from sand.models.transformation import Transformation

all_tables = [Project, SemanticModel, Table, TableRow]
all_tables = [Project, SemanticModel, Table, TableRow, Transformation]
45 changes: 45 additions & 0 deletions sand/models/transformation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from __future__ import annotations
from typing import Literal, Union, List
from peewee import (
CharField,
ForeignKeyField,
TextField,
IntegerField,
BooleanField,
)
from playhouse.sqlite_ext import JSONField

from sand.models.base import BaseModel
from sand.models.table import Table


class Transformation(BaseModel):
name = CharField()
table = ForeignKeyField(Table, backref="transformations", on_delete="CASCADE")
mode = CharField()
datapath: Union[List[str], str] = JSONField() # type: ignore
outputpath: List[str] = JSONField() # type: ignore
type = CharField()
code = TextField()
on_error: Literal[
"set_to_blank", "store_error", "keep_original", "abort"
] = CharField()
is_draft = BooleanField()
order = IntegerField()
insert_after = ForeignKeyField("self", null=True, on_delete="SET NULL")

def to_dict(self):
return {
"id": self.id,
"name": self.name,
"table": self.table_id,
"type": self.type,
"mode": self.mode,
"datapath": self.datapath,
"outputpath": self.outputpath,
"code": self.on_error,
"on_error": self.on_error,
"is_draft": self.is_draft,
"order": self.order,
"insert_after": self.insert_after,
}
Loading

0 comments on commit 1ad1345

Please sign in to comment.