-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add printSchema support for duckdb and postgres (#29)
- Loading branch information
Showing
14 changed files
with
240 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import typing as t | ||
|
||
from sqlglot import exp | ||
|
||
from sqlframe.base.catalog import Column | ||
from sqlframe.base.dataframe import ( | ||
GROUP_DATA, | ||
NA, | ||
SESSION, | ||
STAT, | ||
WRITER, | ||
_BaseDataFrame, | ||
) | ||
|
||
|
||
class PrintSchemaFromTempObjectsMixin( | ||
_BaseDataFrame, t.Generic[SESSION, WRITER, NA, STAT, GROUP_DATA] | ||
): | ||
def _get_columns_from_temp_object(self) -> t.List[Column]: | ||
table = exp.to_table(self.session._random_id) | ||
self.session._execute( | ||
exp.Create( | ||
this=table, | ||
kind="VIEW", | ||
replace=True, | ||
properties=exp.Properties(expressions=[exp.TemporaryProperty()]), | ||
expression=self.expression, | ||
) | ||
) | ||
return self.session.catalog.listColumns( | ||
table.sql(dialect=self.session.input_dialect), include_temp=True | ||
) | ||
|
||
def printSchema(self, level: t.Optional[int] = None) -> None: | ||
def print_schema( | ||
column_name: str, column_type: exp.DataType, nullable: bool, current_level: int | ||
): | ||
if level and current_level >= level: | ||
return | ||
if current_level > 0: | ||
print(" | " * current_level, end="") | ||
print( | ||
f" |-- {column_name}: {column_type.sql(self.session.output_dialect).lower()} (nullable = {str(nullable).lower()})" | ||
) | ||
if column_type.this == exp.DataType.Type.STRUCT: | ||
for column_def in column_type.expressions: | ||
print_schema(column_def.name, column_def.args["kind"], True, current_level + 1) | ||
if column_type.this == exp.DataType.Type.ARRAY: | ||
for data_type in column_type.expressions: | ||
print_schema("element", data_type, True, current_level + 1) | ||
if column_type.this == exp.DataType.Type.MAP: | ||
print_schema("key", column_type.expressions[0], True, current_level + 1) | ||
print_schema("value", column_type.expressions[1], True, current_level + 1) | ||
|
||
columns = self._get_columns_from_temp_object() | ||
print("root") | ||
for column in columns: | ||
print_schema( | ||
column.name, | ||
exp.DataType.build(column.dataType, dialect=self.session.output_dialect), | ||
column.nullable, | ||
0, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import datetime | ||
|
||
from sqlframe.base.types import Row | ||
from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession | ||
|
||
pytest_plugins = ["tests.integration.fixtures"] | ||
|
||
|
||
def test_print_schema_basic(duckdb_employee: DuckDBDataFrame, capsys): | ||
duckdb_employee.printSchema() | ||
captured = capsys.readouterr() | ||
assert ( | ||
captured.out.strip() | ||
== """ | ||
root | ||
|-- employee_id: int (nullable = true) | ||
|-- fname: text (nullable = true) | ||
|-- lname: text (nullable = true) | ||
|-- age: int (nullable = true) | ||
|-- store_id: int (nullable = true)""".strip() | ||
) | ||
|
||
|
||
def test_print_schema_nested(duckdb_session: DuckDBSession, capsys): | ||
df = duckdb_session.createDataFrame( | ||
[ | ||
( | ||
1, | ||
2.0, | ||
"foo", | ||
{"a": 1}, | ||
[Row(a=1, b=2)], | ||
[1, 2, 3], | ||
Row(a=1), | ||
datetime.date(2022, 1, 1), | ||
datetime.datetime(2022, 1, 1, 0, 0, 0), | ||
datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), | ||
True, | ||
) | ||
], | ||
[ | ||
"bigint_col", | ||
"double_col", | ||
"string_col", | ||
"map<string,bigint>_col", | ||
"array<struct<a:bigint,b:bigint>>", | ||
"array<bigint>_col", | ||
"struct<a:bigint>_col", | ||
"date_col", | ||
"timestamp_col", | ||
"timestamptz_col", | ||
"boolean_col", | ||
], | ||
) | ||
df.printSchema() | ||
captured = capsys.readouterr() | ||
assert ( | ||
captured.out.strip() | ||
== """ | ||
root | ||
|-- bigint_col: bigint (nullable = true) | ||
|-- double_col: double (nullable = true) | ||
|-- string_col: text (nullable = true) | ||
|-- map<string,bigint>_col: map(text, bigint) (nullable = true) | ||
| |-- key: text (nullable = true) | ||
| |-- value: bigint (nullable = true) | ||
|-- array<struct<a:bigint,b:bigint>>: struct(a bigint, b bigint)[] (nullable = true) | ||
| |-- element: struct(a bigint, b bigint) (nullable = true) | ||
| | |-- a: bigint (nullable = true) | ||
| | |-- b: bigint (nullable = true) | ||
|-- array<bigint>_col: bigint[] (nullable = true) | ||
| |-- element: bigint (nullable = true) | ||
|-- struct<a:bigint>_col: struct(a bigint) (nullable = true) | ||
| |-- a: bigint (nullable = true) | ||
|-- date_col: date (nullable = true) | ||
|-- timestamp_col: timestamp (nullable = true) | ||
|-- timestamptz_col: timestamptz (nullable = true) | ||
|-- boolean_col: boolean (nullable = true)""".strip() | ||
) |
64 changes: 64 additions & 0 deletions
64
tests/integration/engines/postgres/test_postgres_dataframe.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import datetime | ||
|
||
from sqlframe.base.types import Row | ||
from sqlframe.duckdb import DuckDBDataFrame, DuckDBSession | ||
|
||
pytest_plugins = ["tests.integration.fixtures"] | ||
|
||
|
||
def test_print_schema_basic(postgres_employee: DuckDBDataFrame, capsys): | ||
postgres_employee.printSchema() | ||
captured = capsys.readouterr() | ||
assert ( | ||
captured.out.strip() | ||
== """ | ||
root | ||
|-- employee_id: int (nullable = true) | ||
|-- fname: text (nullable = true) | ||
|-- lname: text (nullable = true) | ||
|-- age: int (nullable = true) | ||
|-- store_id: int (nullable = true)""".strip() | ||
) | ||
|
||
|
||
def test_print_schema_nested(postgres_session: DuckDBSession, capsys): | ||
df = postgres_session.createDataFrame( | ||
[ | ||
( | ||
1, | ||
2.0, | ||
"foo", | ||
[1, 2, 3], | ||
datetime.date(2022, 1, 1), | ||
datetime.datetime(2022, 1, 1, 0, 0, 0), | ||
datetime.datetime(2022, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), | ||
True, | ||
) | ||
], | ||
[ | ||
"bigint_col", | ||
"double_col", | ||
"string_col", | ||
"array<bigint>_col", | ||
"date_col", | ||
"timestamp_col", | ||
"timestamptz_col", | ||
"boolean_col", | ||
], | ||
) | ||
df.printSchema() | ||
captured = capsys.readouterr() | ||
# array does not include type | ||
assert ( | ||
captured.out.strip() | ||
== """ | ||
root | ||
|-- bigint_col: bigint (nullable = true) | ||
|-- double_col: double precision (nullable = true) | ||
|-- string_col: text (nullable = true) | ||
|-- array<bigint>_col: array (nullable = true) | ||
|-- date_col: date (nullable = true) | ||
|-- timestamp_col: timestamp (nullable = true) | ||
|-- timestamptz_col: timestamptz (nullable = true) | ||
|-- boolean_col: boolean (nullable = true)""".strip() | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters