Skip to content

Commit

Permalink
feat(data-warehouse): Added support for using a different dataset for…
Browse files Browse the repository at this point in the history
… temp tables in bigquery (#26506)
  • Loading branch information
Gilbert09 authored Nov 29, 2024
1 parent b1810ea commit 88ba586
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const sourceFieldToElement = (field: SourceFieldConfig, sourceConfig: SourceConf
<LemonField key={field.name} name={[field.name, 'enabled']} label={field.label}>
{({ value, onChange }) => (
<>
{!!field.caption && <p>{field.caption}</p>}
<LemonSwitch
checked={value === undefined || value === null ? lastValue?.['enabled'] : value}
onChange={onChange}
Expand Down
17 changes: 17 additions & 0 deletions frontend/src/scenes/data-warehouse/new/sourceWizardLogic.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,23 @@ export const SOURCE_DETAILS: Record<ExternalDataSourceType, SourceConfig> = {
required: true,
placeholder: '',
},
{
type: 'switch-group',
name: 'temporary-dataset',
label: 'Use a different dataset for the temporary tables?',
caption:
"We have to create and delete temporary tables when querying your data, this is a requirement of querying large BigQuery tables. We can use a different dataset if you'd like to limit the permissions available to the service account provided.",
default: false,
fields: [
{
type: 'text',
name: 'temporary_dataset_id',
label: 'Dataset ID for temporary tables',
required: true,
placeholder: '',
},
],
},
],
caption: '',
},
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4455,6 +4455,7 @@ export interface SourceFieldSwitchGroupConfig {
label: string
default: string | number | boolean
fields: SourceFieldConfig[]
caption?: string
}

export interface SourceFieldFileUploadConfig {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,14 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs):
client_email = model.pipeline.job_inputs.get("client_email")
token_uri = model.pipeline.job_inputs.get("token_uri")

destination_table = f"{project_id}.{dataset_id}.__posthog_import_{inputs.run_id}_{str(datetime.now().timestamp()).replace('.', '')}"
temporary_dataset_id = model.pipeline.job_inputs.get("temporary_dataset_id")
using_temporary_dataset = (
model.pipeline.job_inputs.get("using_temporary_dataset", False) and temporary_dataset_id is not None
)

destination_table_dataset_id = temporary_dataset_id if using_temporary_dataset else dataset_id
destination_table = f"{project_id}.{destination_table_dataset_id}.__posthog_import_{inputs.run_id}_{str(datetime.now().timestamp()).replace('.', '')}"

try:
source = bigquery_source(
dataset_id=dataset_id,
Expand Down
6 changes: 6 additions & 0 deletions posthog/warehouse/api/external_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,10 @@ def _handle_bigquery_source(
client_email = key_file.get("client_email")
token_uri = key_file.get("token_uri")

temporary_dataset = request.data.get("temporary-dataset", {})
using_temporary_dataset = temporary_dataset.get("enabled", False)
temporary_dataset_id = temporary_dataset.get("temporary_dataset_id", None)

new_source_model = ExternalDataSource.objects.create(
source_id=str(uuid.uuid4()),
connection_id=str(uuid.uuid4()),
Expand All @@ -697,6 +701,8 @@ def _handle_bigquery_source(
"private_key_id": private_key_id,
"client_email": client_email,
"token_uri": token_uri,
"using_temporary_dataset": using_temporary_dataset,
"temporary_dataset_id": temporary_dataset_id,
},
prefix=prefix,
)
Expand Down

0 comments on commit 88ba586

Please sign in to comment.