Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): correctly handle nested Postgres JSON querying #154

Merged
merged 3 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bento_lib/package.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = bento_lib
version = 10.1.0
version = 10.1.1a1
authors = David Lougheed, Paul Pillot
author_emails = [email protected], [email protected]
46 changes: 33 additions & 13 deletions bento_lib/search/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

QUERY_ROOT = q.Literal("$root")
SQL_ROOT = sql.Identifier("_root")
SQL_NOTHING = sql.SQL("")


# TODO: Python 3.7: Data Class
Expand All @@ -41,6 +42,7 @@ def __init__(
relations: OptionalComposablePair,
aliases: OptionalComposablePair,
current_alias_str: Optional[str],
current_alias_sql_schema: Optional[sql.SQL],
key_link: Optional[Tuple[str, str]],
field_alias: Optional[str],
search_properties: dict,
Expand All @@ -49,6 +51,7 @@ def __init__(
self.relations: OptionalComposablePair = relations
self.aliases: OptionalComposablePair = aliases
self.current_alias_str: Optional[str] = current_alias_str
self.current_alias_sql_schema: Optional[sql.SQL] = current_alias_sql_schema
self.key_link: Optional[Tuple[str, str]] = key_link
self.field_alias: Optional[str] = field_alias
self.search_properties: dict = search_properties
Expand Down Expand Up @@ -80,23 +83,28 @@ def json_schema_to_postgres_type(schema: JSONSchema) -> str:
return "TEXT" # TODO


def json_schema_to_postgres_schema(name: str, schema: JSONSchema) -> Tuple[Optional[sql.Composable], Optional[str]]:
def json_schema_to_postgres_schema(
name: str,
schema: JSONSchema,
) -> Tuple[Optional[sql.Composable], Optional[str], Optional[sql.Composable]]:
"""
Maps a JSON object schema to a Postgres schema for on-the-fly mapping.
:param name: the name to give the fake table.
:param schema: JSON schema to map.
"""

if schema["type"] != "object":
return None, None
return None, None, None

return (
sql.SQL("{}({})").format(
sql.Identifier(name),
sql.SQL(", ").join(sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s)))
for p, s in schema["properties"].items())),
"{}({})".format(name, ", ".join("{} {}".format(p, json_schema_to_postgres_type(s))
for p, s in schema["properties"].items()))
sql.Identifier(name),
name,
sql.SQL("({})").format(
sql.SQL(", ").join(
sql.SQL("{} {}").format(sql.Identifier(p), sql.SQL(json_schema_to_postgres_type(s)))
for p, s in schema["properties"].items()
)
),
)


Expand Down Expand Up @@ -145,6 +153,7 @@ def collect_resolve_join_tables(
new_aliased_resolve_path = re.sub(r"[$\[\]]+", "", f"{aliased_resolve_path_str}_{schema_field}")
current_alias = None
current_alias_str = None
current_alias_sql_schema = None

if current_relation is None and schema["type"] in ("array", "object"):
if db_field is None:
Expand All @@ -165,7 +174,8 @@ def collect_resolve_join_tables(
else: # object
# will be used to call either json_to_record(...) or jsonb_to_record(...):
relation_sql_template = "{structure_type}_to_record({field})"
current_alias, current_alias_str = json_schema_to_postgres_schema(new_aliased_resolve_path, schema)
current_alias, current_alias_str, current_alias_sql_schema = json_schema_to_postgres_schema(
new_aliased_resolve_path, schema)

current_relation = sql.SQL(relation_sql_template).format(
structure_type=sql.SQL(structure_type), # json or jsonb here
Expand Down Expand Up @@ -224,6 +234,7 @@ def collect_resolve_join_tables(
relations=relations,
aliases=aliases,
current_alias_str=current_alias_str,
current_alias_sql_schema=current_alias_sql_schema,
key_link=key_link,
field_alias=db_field,
search_properties=search_properties,
Expand Down Expand Up @@ -306,14 +317,19 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable:
# (e.g., nested objects stored in their own relations).
# If there is just 1 entry in terms, no join will occur, and it'll just be set to its alias.
sql.SQL(" LEFT JOIN ").join((
sql.SQL("{r1} AS {a1}").format(r1=terms[0].relations.current, a1=terms[0].aliases.current),
sql.SQL("{r1} AS {a1}{s1}").format(
r1=terms[0].relations.current,
a1=terms[0].aliases.current,
s1=terms[0].current_alias_sql_schema or SQL_NOTHING,
),
*(
sql.SQL("{r1} AS {a1} ON {a0}.{f0} = {a1}.{f1}").format(
sql.SQL("{r1} AS {a1}{s1} ON {a0}.{f0} = {a1}.{f1}").format(
r1=term.relations.current,
a0=term.aliases.parent,
a1=term.aliases.current,
f0=sql.Identifier(term.key_link[0]),
f1=sql.Identifier(term.key_link[1])
f1=sql.Identifier(term.key_link[1]),
s1=term.current_alias_sql_schema or SQL_NOTHING,
)
for term in terms[1:]
if term.key_link is not None
Expand All @@ -322,7 +338,11 @@ def join_fragment(ast: q.AST, schema: JSONSchema) -> sql.Composable:

# Then, include any additional (non-terms[0]) non-joined relations.
*(
sql.SQL("{r1} AS {a1}").format(r1=term.relations.current, a1=term.aliases.current)
sql.SQL("{r1} AS {a1}{s1}").format(
r1=term.relations.current,
a1=term.aliases.current,
s1=term.current_alias_sql_schema or SQL_NOTHING,
)
for term in terms[1:]
if term.key_link is None and term.relations.current is not None
),
Expand Down
45 changes: 40 additions & 5 deletions tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import psycopg2.sql
from bento_lib.search import build_search_response, data_structure, operations, postgres, queries
from datetime import datetime
from pytest import mark, raises
Expand Down Expand Up @@ -142,6 +143,23 @@
"operations": [operations.SEARCH_OP_EQ],
"queryable": "all"
}
},
"test2": {
"type": "object",
"properties": {
"test": {
"type": "string",
"search": {
"operations": [operations.SEARCH_OP_EQ],
"queryable": "all"
}
}
},
"search": {
"database": {
"type": "json"
}
}
}
},
"search": {
Expand Down Expand Up @@ -643,6 +661,11 @@
{"query": ["#ilike", ["#resolve", "biosamples", "[item]", "procedure", "code", "label"], "[%duM\\%\\_my]"], # 40
"ds": (False, False, 2, 0),
"ps": (False, ("[%duM\\%\\_my]",))},

# Testing nested Postgres JSON schema-creation
{"query": ["#eq", ["#resolve", "biosamples", "[item]", "test_postgres_array", "[item]", "test2", "test"], "a"],
"ds": (False, True, 2, 2), # Accessing 2 biosamples, each with 1 test_postgres_array item
"ps": (False, ("a",))},
]

TEST_LARGE_QUERY_1 = [
Expand Down Expand Up @@ -717,7 +740,7 @@
{
"procedure": {"code": {"id": "TEST", "label": "TEST LABEL"}},
"tumor_grade": [{"id": "TG1", "label": "TG1 LABEL"}, {"id": "TG2", "label": "TG2 LABEL"}],
"test_postgres_array": [{"test": "test_value"}],
"test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}],
"test_json_array": [{"test": "test_value"}],
},
{
Expand All @@ -727,7 +750,7 @@
{"id": "TG4", "label": "TG4 LABEL"},
{"id": "TG5", "label": "TG5 LABEL"},
],
"test_postgres_array": [{"test": "test_value"}],
"test_postgres_array": [{"test": "test_value", "test2": {"test": "a"}}],
"test_json_array": [{"test": "test_value"}],
}
],
Expand Down Expand Up @@ -888,15 +911,27 @@ def test_queries_and_ast():

def test_postgres_schemas():
null_schema = postgres.json_schema_to_postgres_schema("test", {"type": "integer"})
assert null_schema[0] is None and null_schema[1] is None
assert null_schema[0] is None and null_schema[1] is None and null_schema[2] is None

for s, p in zip(JSON_SCHEMA_TYPES, POSTGRES_TYPES):
assert postgres.json_schema_to_postgres_schema("test", {
res = postgres.json_schema_to_postgres_schema("test", {
"type": "object",
"properties": {
"test2": {"type": s}
}
})[1] == f"test(test2 {p})"
})
assert res[1] == "test"
assert res[2] == psycopg2.sql.Composed([
psycopg2.sql.SQL("("),
psycopg2.sql.Composed([
psycopg2.sql.Composed([
psycopg2.sql.Identifier("test2"),
psycopg2.sql.SQL(" "),
psycopg2.sql.SQL(p)
])
]),
psycopg2.sql.SQL(")"),
])


def test_postgres_collect_resolve_join_tables():
Expand Down