-
Notifications
You must be signed in to change notification settings - Fork 0
/
data.py
43 lines (33 loc) · 1.31 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from google.cloud import bigquery
import pandas as pd
# UPLOAD DATA TO BQ
PROJECT = "my-project"
DATASET = "taxifare_lecture"
TABLE = "lecture_data"
# Should probably be set in a config file
credentials_path = 'path/to/credentials.json'
project_id = 'your-project-id'
# TODO set up table, dtable knowledge required
table = f"{PROJECT}.{DATASET}.{TABLE}"
# TODO Choose
# create client from service account
client = bigquery.Client.from_service_account_json(credentials_path, project=project_id)
# OR generic?
client = bigquery.Client(project=gcp_project)
def upload_dataframe(df_to_upload : pd.DataFrame, destination_table : str =table, write_mode : str = "WRITE_TRUNCATE"):
'''Takes a dataframe and uploads it to BigQuery'''
# upload the dataframe to BigQuery
if write_mode=="WRITE_TRUNCATE":
df_to_upload.to_gbq(destination_table, project_id=project_id, if_exists='replace', credentials=client)
elif write_mode=="WRITE_APPEND":
df_to_upload.to_gbq(destination_table, project_id=project_id, if_exists='append', credentials=client)
# TODO Table knowedge required
query = f"""
SELECT *
FROM {PROJECT}.{DATASET}.{TABLE}
"""
def query_bq(query):
'''Takes a query and returns a dataframe'''
query_job = client.query(query)
result = query_job.result()
return result.to_dataframe()