forked from gage-russell/pandas-lineage
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmarquez_example.py
30 lines (21 loc) · 925 Bytes
/
marquez_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
```
cd examples/marquez-example/
source ./start_marquez.sh
python getting_started.py
```
***formated with Black***
"""
from uuid import uuid4
import pandas
from pandas_lineage import JobRun, read_csv, read_parquet
# run 1
job_run = JobRun(run_id=uuid4().hex, namespace="marquez-examples", name="marquez-example-1")
start = job_run.emit_start()
csv_input_1 = read_csv("./mock_csv.csv", dataset_name="csv_input_dataset_1", job_run=job_run)
parquet_input_2 = read_parquet("./mock_parquet.snappy.parquet", dataset_name="parquet_input_dataset_2", job_run=job_run)
csv_output_1 = csv_input_1.dropna(how="all", axis=1)
parquet_output_2 = pandas.concat([csv_output_1, parquet_input_2])
csv_output_1.to_csv("./test.csv", dataset_name="csv_output_dataset_1", job_run=job_run)
parquet_output_2.to_parquet("./test.snappy.parquet", dataset_name="parquet_output_dataset_2", job_run=job_run)
complete = job_run.emit_complete()