-
Notifications
You must be signed in to change notification settings - Fork 203
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add custom parent-child relationships example (#1678)
- Loading branch information
1 parent
8828a2c
commit df0e132
Showing
3 changed files
with
147 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
69 changes: 69 additions & 0 deletions
69
docs/examples/parent_child_relationship/parent_child_relationship.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
""" | ||
--- | ||
title: Load parent table records into child table | ||
description: Learn how to integrate custom parent keys into child records | ||
keywords: [parent child relationship, parent key] | ||
--- | ||
This example demonstrates handling data with parent-child relationships using the `dlt` library. | ||
You learn how to integrate specific fields (e.g., primary, foreign keys) from a parent record into each child record. | ||
In this example, we'll explore how to: | ||
- Add `parent_id` into each child record using `add_parent_id` function | ||
- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this | ||
custom logic to every record in the dataset | ||
:::note important | ||
Please note that dlt metadata, including `_dlt_id` and `_dlt_load_id`, will still be loaded into the tables. | ||
::: | ||
""" | ||
|
||
from typing import List, Dict, Any, Generator | ||
import dlt | ||
|
||
# Define a dlt resource with write disposition to 'merge' | ||
@dlt.resource(name="parent_with_children", write_disposition={"disposition": "merge"}) | ||
def data_source() -> Generator[List[Dict[str, Any]], None, None]: | ||
# Example data | ||
data = [ | ||
{ | ||
"parent_id": 1, | ||
"parent_name": "Alice", | ||
"children": [ | ||
{"child_id": 1, "child_name": "Child 1"}, | ||
{"child_id": 2, "child_name": "Child 2"}, | ||
], | ||
}, | ||
{ | ||
"parent_id": 2, | ||
"parent_name": "Bob", | ||
"children": [{"child_id": 3, "child_name": "Child 3"}], | ||
}, | ||
] | ||
|
||
yield data | ||
|
||
# Function to add parent_id to each child record within a parent record | ||
def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]: | ||
parent_id_key = "parent_id" | ||
for child in record["children"]: | ||
child[parent_id_key] = record[parent_id_key] | ||
return record | ||
|
||
if __name__ == "__main__": | ||
# Create and configure the dlt pipeline | ||
pipeline = dlt.pipeline( | ||
pipeline_name="generic_pipeline", | ||
destination="duckdb", | ||
dataset_name="dataset", | ||
) | ||
|
||
# Run the pipeline | ||
load_info = pipeline.run( | ||
data_source() | ||
.add_map(add_parent_id), | ||
primary_key="parent_id" | ||
) | ||
# Output the load information after pipeline execution | ||
print(load_info) |
78 changes: 78 additions & 0 deletions
78
docs/examples/parent_child_relationship/test_parent_child_relationship.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
|
||
import pytest | ||
|
||
from tests.utils import skipifgithubfork | ||
|
||
|
||
""" | ||
--- | ||
title: Load parent table records into child table | ||
description: Learn how to integrate custom parent keys into child records | ||
keywords: [parent child relationship, parent key] | ||
--- | ||
This example demonstrates handling data with parent-child relationships using | ||
the `dlt` library. You learn how to integrate specific fields (e.g., primary, | ||
foreign keys) from a parent record into each child record. | ||
In this example, we'll explore how to: | ||
- Add `parent_id` into each child record using `add_parent_id` function | ||
- Use the [`add_map` function](https://dlthub.com/docs/api_reference/extract/resource#add_map) to apply this | ||
custom logic to every record in the dataset | ||
:::note important | ||
Please note that dlt metadata, including `_dlt_id` and `_dlt_load_id`, will still be loaded into the tables. | ||
::: | ||
""" | ||
|
||
from typing import List, Dict, Any, Generator | ||
import dlt | ||
|
||
# Define a dlt resource with write disposition to 'merge' | ||
@dlt.resource(name="parent_with_children", write_disposition={"disposition": "merge"}) | ||
def data_source() -> Generator[List[Dict[str, Any]], None, None]: | ||
# Example data | ||
data = [ | ||
{ | ||
"parent_id": 1, | ||
"parent_name": "Alice", | ||
"children": [ | ||
{"child_id": 1, "child_name": "Child 1"}, | ||
{"child_id": 2, "child_name": "Child 2"}, | ||
], | ||
}, | ||
{ | ||
"parent_id": 2, | ||
"parent_name": "Bob", | ||
"children": [{"child_id": 3, "child_name": "Child 3"}], | ||
}, | ||
] | ||
|
||
yield data | ||
|
||
# Function to add parent_id to each child record within a parent record | ||
def add_parent_id(record: Dict[str, Any]) -> Dict[str, Any]: | ||
parent_id_key = "parent_id" | ||
for child in record["children"]: | ||
child[parent_id_key] = record[parent_id_key] | ||
return record | ||
|
||
@skipifgithubfork | ||
@pytest.mark.forked | ||
def test_parent_child_relationship(): | ||
# Create and configure the dlt pipeline | ||
pipeline = dlt.pipeline( | ||
pipeline_name="generic_pipeline", | ||
destination="duckdb", | ||
dataset_name="dataset", | ||
) | ||
|
||
# Run the pipeline | ||
load_info = pipeline.run( | ||
data_source() | ||
.add_map(add_parent_id), | ||
primary_key="parent_id" | ||
) | ||
# Output the load information after pipeline execution | ||
print(load_info) |