Skip to content

Commit

Permalink
feat: properly handle filename duckdb csv (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
eakmanrq authored May 23, 2024
1 parent 9051a6a commit 1441d0a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 7 deletions.
5 changes: 4 additions & 1 deletion sqlframe/duckdb/readwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ def load(
"""
if schema:
column_mapping = ensure_column_mapping(schema)
select_columns = [x.expression for x in self._to_casted_columns(column_mapping)]
select_column_mapping = column_mapping.copy()
if options.get("filename"):
select_column_mapping["filename"] = "VARCHAR"
select_columns = [x.expression for x in self._to_casted_columns(select_column_mapping)]
if format == "csv":
duckdb_columns = ", ".join(
[f"'{column}': '{dtype}'" for column, dtype in column_mapping.items()]
Expand Down
55 changes: 49 additions & 6 deletions tests/integration/engines/duck/test_duckdb_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,56 @@ def test_employee_extra_line_csv(duckdb_session: DuckDBSession):
auto_detect=False,
)
assert df.collect() == [
Row(**{"employee_id": 1, "fname": "Jack", "lname": "Shephard", "age": 37, "store_id": 1}),
Row(**{"employee_id": 2, "fname": "John", "lname": "Locke", "age": 65, "store_id": 1}),
Row(**{"employee_id": 3, "fname": "Kate", "lname": "Austen", "age": 37, "store_id": 2}),
Row(
**{"employee_id": 4, "fname": "Claire", "lname": "Littleton", "age": 27, "store_id": 2}
**{
"employee_id": 1,
"fname": "Jack",
"lname": "Shephard",
"age": 37,
"store_id": 1,
"filename": "tests/fixtures/employee_extra_line.csv",
}
),
Row(
**{
"employee_id": 2,
"fname": "John",
"lname": "Locke",
"age": 65,
"store_id": 1,
"filename": "tests/fixtures/employee_extra_line.csv",
}
),
Row(
**{
"employee_id": 3,
"fname": "Kate",
"lname": "Austen",
"age": 37,
"store_id": 2,
"filename": "tests/fixtures/employee_extra_line.csv",
}
),
Row(
**{
"employee_id": 4,
"fname": "Claire",
"lname": "Littleton",
"age": 27,
"store_id": 2,
"filename": "tests/fixtures/employee_extra_line.csv",
}
),
Row(
**{
"employee_id": 5,
"fname": "Hugo",
"lname": "Reyes",
"age": 29,
"store_id": 100,
"filename": "tests/fixtures/employee_extra_line.csv",
}
),
Row(**{"employee_id": 5, "fname": "Hugo", "lname": "Reyes", "age": 29, "store_id": 100}),
]


Expand All @@ -34,7 +77,7 @@ def test_employee_extra_line_csv_multiple(duckdb_session: DuckDBSession):
schema="employee_id INT, fname STRING, lname STRING, age INT, store_id INT",
skip=1,
header=1,
filename=1,
filename=0,
null_padding=True,
ignore_errors=1,
auto_detect=False,
Expand Down

0 comments on commit 1441d0a

Please sign in to comment.