Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support CSV ingestion through the UI #9280

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import React from 'react';
import { Alert } from 'antd';

const CSV_FORMAT_LINK = 'https://datahubproject.io/docs/generated/ingestion/sources/csv';

export const CSVInfo = () => {
const link = (
<a href={CSV_FORMAT_LINK} target="_blank" rel="noopener noreferrer">
link
</a>
);

return (
<Alert
style={{ marginBottom: '10px' }}
type="warning"
banner
message={
<>
Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For example,
You can create a file in google sheets following the
format at this {link} and then construct the CSV URL by publishing your google sheet in the CSV
format.
</>
}
/>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ import { ANTD_GRAY } from '../../../entity/shared/constants';
import { YamlEditor } from './YamlEditor';
import RecipeForm from './RecipeForm/RecipeForm';
import { SourceBuilderState, SourceConfig } from './types';
import { LOOKER, LOOK_ML } from './constants';
import { CSV, LOOKER, LOOK_ML } from './constants';
import { LookerWarning } from './LookerWarning';
import { CSVInfo } from './CSVInfo';

export const ControlsContainer = styled.div`
display: flex;
Expand Down Expand Up @@ -81,6 +82,8 @@ function RecipeBuilder(props: Props) {
return (
<div>
{(type === LOOKER || type === LOOK_ML) && <LookerWarning type={type} />}
{type === CSV && <CSVInfo />}

<HeaderContainer>
<Title style={{ marginBottom: 0 }} level={5}>
{sourceConfigs?.displayName} Recipe
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ import {
PROJECT_NAME,
} from './lookml';
import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto';
import { BIGQUERY_BETA, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
import { BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants';
import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta';
import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql';
import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql';
Expand Down Expand Up @@ -140,6 +140,7 @@ import {
INCLUDE_VIEW_LINEAGE,
INCLUDE_PROJECTIONS_LINEAGE,
} from './vertica';
import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILE_URL, CSV_WRITE_SEMANTICS } from './csv';

export enum RecipeSections {
Connection = 0,
Expand Down Expand Up @@ -453,6 +454,11 @@ export const RECIPE_FIELDS: RecipeFields = {
],
filterSectionTooltip: 'Include or exclude specific Schemas, Tables, Views and Projections from ingestion.',
},
[CSV]: {
fields: [CSV_FILE_URL],
filterFields: [],
advancedFields: [CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS],
},
};

export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS));
Expand Down
60 changes: 60 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { RecipeField, FieldType } from './common';

const validateURL = (fieldName) => {
return {
validator(_, value) {
const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/);
const isURLValid = URLPattern.test(value);
if (!value || isURLValid) {
return Promise.resolve();
}
return Promise.reject(new Error(`A valid ${fieldName} is required.`));
},
};
};

export const CSV_FILE_URL: RecipeField = {
name: 'filename',
label: 'File URL',
tooltip: 'File URL of the CSV file to ingest.',
type: FieldType.TEXT,
fieldPath: 'source.config.filename',
placeholder: 'File URL',
required: true,
rules: [() => validateURL('File URL')],
};

export const CSV_ARRAY_DELIMITER: RecipeField = {
name: 'array_delimiter',
label: 'Array delimiter',
tooltip: 'Delimiter to use when parsing array fields (tags, terms and owners)',
type: FieldType.TEXT,
fieldPath: 'source.config.array_delimiter',
placeholder: 'Array delimiter',
rules: null,
};

export const CSV_DELIMITER: RecipeField = {
name: 'delimiter',
label: 'Delimiter',
tooltip: 'Delimiter to use when parsing CSV',
type: FieldType.TEXT,
fieldPath: 'source.config.delimiter',
placeholder: 'Delimiter',
rules: null,
};

export const CSV_WRITE_SEMANTICS: RecipeField = {
name: 'write_semantics',
label: 'Write Semantics',
tooltip:
'Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be "PATCH" or "OVERRIDE"',
type: FieldType.SELECT,
options: [
{ label: 'PATCH', value: 'PATCH' },
{ label: 'OVERRIDE', value: 'OVERRIDE' },
],
fieldPath: 'source.config.write_semantics',
placeholder: 'Write Semantics',
rules: null,
};
4 changes: 4 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import verticaLogo from '../../../../images/verticalogo.png';
import mlflowLogo from '../../../../images/mlflowlogo.png';
import dynamodbLogo from '../../../../images/dynamodblogo.png';
import fivetranLogo from '../../../../images/fivetranlogo.png';
import csvLogo from '../../../../images/csv-logo.png';

export const ATHENA = 'athena';
export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`;
Expand Down Expand Up @@ -108,6 +109,8 @@ export const VERTICA = 'vertica';
export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`;
export const FIVETRAN = 'fivetran';
export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`;
export const CSV = 'csv-enricher';
export const CSV_URN = `urn:li:dataPlatform:${CSV}`;

export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
Expand Down Expand Up @@ -142,6 +145,7 @@ export const PLATFORM_URN_TO_LOGO = {
[UNITY_CATALOG_URN]: databricksLogo,
[VERTICA_URN]: verticaLogo,
[FIVETRAN_URN]: fivetranLogo,
[CSV_URN]: csvLogo,
};

export const SOURCE_TO_PLATFORM_URN = {
Expand Down
7 changes: 7 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,13 @@
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/",
"recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV"
},
{
"urn": "urn:li:dataPlatform:csv-enricher",
"name": "csv-enricher",
"displayName": "CSV",
"docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'",
"recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH"
},
{
"urn": "urn:li:dataPlatform:custom",
"name": "custom",
Expand Down
22 changes: 22 additions & 0 deletions datahub-web-react/src/app/ingest/source/conf/csv/csv.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { SourceConfig } from '../types';
import csvLogo from '../../../../../images/csv-logo.png';

const placeholderRecipe = `\
source:
type: csv-enricher
config:
filename: # URL of your csv file to ingest, e.g. https://docs.google.com/spreadsheets/d/DOCID/export?format=csv
array_delimiter: |
delimiter: ,
write_semantics: PATCH
`;

const csvConfig: SourceConfig = {
type: 'csv-enricher',
placeholderRecipe,
displayName: 'CSV',
docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv',
logoUrl: csvLogo,
};

export default csvConfig;
2 changes: 2 additions & 0 deletions datahub-web-react/src/app/ingest/source/conf/sources.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { SourceConfig } from './types';
import hiveConfig from './hive/hive';
import oracleConfig from './oracle/oracle';
import tableauConfig from './tableau/tableau';
import csvConfig from './csv/csv';

const baseUrl = window.location.origin;

Expand Down Expand Up @@ -46,6 +47,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array<SourceConfig> = [
glueConfig,
oracleConfig,
hiveConfig,
csvConfig,
{
type: 'custom',
placeholderRecipe: DEFAULT_PLACEHOLDER_RECIPE,
Expand Down
Binary file added datahub-web-react/src/images/csv-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions metadata-service/war/src/main/resources/boot/data_platforms.json
Original file line number Diff line number Diff line change
Expand Up @@ -574,5 +574,15 @@
"type": "OTHERS",
"logoUrl": "/assets/platforms/fivetranlogo.png"
}
},
{
"urn": "urn:li:dataPlatform:csv",
"aspect": {
"datasetNameDelimiter": ".",
"name": "csv",
"displayName": "CSV",
"type": "OTHERS",
"logoUrl": "/assets/platforms/csv-logo.png"
}
}
]
Loading