From 9f3be716bef75c618517ecc54e45007528df234c Mon Sep 17 00:00:00 2001 From: Dave Date: Mon, 25 Mar 2024 15:56:20 +0100 Subject: [PATCH] post merge file delete --- docs/examples/connector_x_arrow/load_arrow.py | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 docs/examples/connector_x_arrow/load_arrow.py diff --git a/docs/examples/connector_x_arrow/load_arrow.py b/docs/examples/connector_x_arrow/load_arrow.py deleted file mode 100644 index b3c654cef9..0000000000 --- a/docs/examples/connector_x_arrow/load_arrow.py +++ /dev/null @@ -1,41 +0,0 @@ -import connectorx as cx - -import dlt -from dlt.sources.credentials import ConnectionStringCredentials - - -def read_sql_x( - conn_str: ConnectionStringCredentials = dlt.secrets.value, - query: str = dlt.config.value, -): - yield cx.read_sql( - conn_str.to_native_representation(), - query, - return_type="arrow2", - protocol="binary", - ) - - -def genome_resource(): - # create genome resource with merge on `upid` primary key - genome = dlt.resource( - name="genome", - write_disposition="merge", - primary_key="upid", - standalone=True, - )(read_sql_x)( - "mysql://rfamro@mysql-rfam-public.ebi.ac.uk:4497/Rfam", # type: ignore[arg-type] - "SELECT * FROM genome ORDER BY created LIMIT 1000", - ) - # add incremental on created at - genome.apply_hints(incremental=dlt.sources.incremental("created")) - return genome - - -if __name__ == "__main__": - pipeline = dlt.pipeline(destination="duckdb") - genome = genome_resource() - - print(pipeline.run(genome)) - print(pipeline.last_trace.last_normalize_info) - # NOTE: run pipeline again to see that no more records got loaded thanks to incremental loading