From 9906d019b52f7d215d88967854585bd146ed83f6 Mon Sep 17 00:00:00 2001 From: purnimagarg1 Date: Tue, 21 Nov 2023 18:48:26 +0530 Subject: [PATCH 1/7] Support CSV Ingestion through the UI --- .../source/builder/RecipeForm/constants.ts | 8 +++- .../ingest/source/builder/RecipeForm/csv.ts | 43 ++++++++++++++++++ .../app/ingest/source/builder/constants.ts | 4 ++ .../app/ingest/source/builder/sources.json | 7 +++ .../src/app/ingest/source/conf/csv/csv.ts | 22 +++++++++ .../src/app/ingest/source/conf/sources.tsx | 2 + datahub-web-react/src/images/csv-logo.png | Bin 0 -> 12029 bytes .../main/resources/boot/data_platforms.json | 10 ++++ 8 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts create mode 100644 datahub-web-react/src/app/ingest/source/conf/csv/csv.ts create mode 100644 datahub-web-react/src/images/csv-logo.png diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index 351876fe6b16a..e463cc5f32ec0 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -83,7 +83,7 @@ import { PROJECT_NAME, } from './lookml'; import { PRESTO, PRESTO_HOST_PORT, PRESTO_DATABASE, PRESTO_USERNAME, PRESTO_PASSWORD } from './presto'; -import { BIGQUERY_BETA, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants'; +import { BIGQUERY_BETA, CSV, DBT_CLOUD, MYSQL, POWER_BI, UNITY_CATALOG, VERTICA } from '../constants'; import { BIGQUERY_BETA_PROJECT_ID, DATASET_ALLOW, DATASET_DENY, PROJECT_ALLOW, PROJECT_DENY } from './bigqueryBeta'; import { MYSQL_HOST_PORT, MYSQL_PASSWORD, MYSQL_USERNAME } from './mysql'; import { MSSQL, MSSQL_DATABASE, MSSQL_HOST_PORT, MSSQL_PASSWORD, MSSQL_USERNAME } from './mssql'; @@ -140,6 +140,7 @@ import { INCLUDE_VIEW_LINEAGE, INCLUDE_PROJECTIONS_LINEAGE, } from './vertica'; +import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILENAME, CSV_WRITE_SEMANTICS } from './csv'; export enum RecipeSections { Connection = 0, @@ -453,6 +454,11 @@ export const RECIPE_FIELDS: RecipeFields = { ], filterSectionTooltip: 'Include or exclude specific Schemas, Tables, Views and Projections from ingestion.', }, + [CSV]: { + fields: [CSV_FILENAME, CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS], + filterFields: [], + advancedFields: [], + }, }; export const CONNECTORS_WITH_FORM = new Set(Object.keys(RECIPE_FIELDS)); diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts new file mode 100644 index 0000000000000..980caa06982eb --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts @@ -0,0 +1,43 @@ +import { RecipeField, FieldType } from './common'; + +export const CSV_FILENAME: RecipeField = { + name: 'filename', + label: 'File name', + tooltip: 'File path or URL of CSV file to ingest.', + type: FieldType.TEXT, + fieldPath: 'source.config.filename', + placeholder: 'File name', + required: true, + rules: null, +}; + +export const CSV_ARRAY_DELIMITER: RecipeField = { + name: 'array_delimiter', + label: 'Array delimiter', + tooltip: 'Delimiter to use when parsing array fields (tags, terms and owners)', + type: FieldType.TEXT, + fieldPath: 'source.config.array_delimiter', + placeholder: 'Array delimiter', + rules: null, +}; + +export const CSV_DELIMITER: RecipeField = { + name: 'delimiter', + label: 'Delimiter', + tooltip: 'Delimiter to use when parsing CSV', + type: FieldType.TEXT, + fieldPath: 'source.config.delimiter', + placeholder: 'Delimiter', + rules: null, +}; + +export const CSV_WRITE_SEMANTICS: RecipeField = { + name: 'write_semantics', + label: 'Write Semantics', + tooltip: + 'Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be "PATCH" or "OVERRIDE"', + type: FieldType.TEXT, + fieldPath: 'source.config.write_semantics', + placeholder: 'Write Semantics', + rules: null, +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index fdb094d721304..f2a92a2e09009 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -30,6 +30,7 @@ import verticaLogo from '../../../../images/verticalogo.png'; import mlflowLogo from '../../../../images/mlflowlogo.png'; import dynamodbLogo from '../../../../images/dynamodblogo.png'; import fivetranLogo from '../../../../images/fivetranlogo.png'; +import csvLogo from '../../../../images/csv-logo.png'; export const ATHENA = 'athena'; export const ATHENA_URN = `urn:li:dataPlatform:${ATHENA}`; @@ -108,6 +109,8 @@ export const VERTICA = 'vertica'; export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`; export const FIVETRAN = 'fivetran'; export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`; +export const CSV = 'csv'; +export const CSV_URN = `urn:li:dataPlatform:${CSV}`; export const PLATFORM_URN_TO_LOGO = { [ATHENA_URN]: athenaLogo, @@ -142,6 +145,7 @@ export const PLATFORM_URN_TO_LOGO = { [UNITY_CATALOG_URN]: databricksLogo, [VERTICA_URN]: verticaLogo, [FIVETRAN_URN]: fivetranLogo, + [CSV_URN]: csvLogo, }; export const SOURCE_TO_PLATFORM_URN = { diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index 9619abebbd54e..f04e998d9434d 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -223,6 +223,13 @@ "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/", "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV" }, + { + "urn": "urn:li:dataPlatform:csv", + "name": "csv", + "displayName": "CSV", + "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'", + "recipe": "source: \n type: csv-enricher \n config: \n # relative path to your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH" + }, { "urn": "urn:li:dataPlatform:custom", "name": "custom", diff --git a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts new file mode 100644 index 0000000000000..16efae3a472ad --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts @@ -0,0 +1,22 @@ +import { SourceConfig } from '../types'; +import csvLogo from '../../../../../images/csv-logo.png'; + +const placeholderRecipe = `\ +source: + type: csv-enricher + config: + filename: # relative path to your csv file to ingest, e.g. ./path/to/your/file.csv + array_delimiter: | + delimiter: , + write_semantics: PATCH +`; + +const csvConfig: SourceConfig = { + type: 'csv', + placeholderRecipe, + displayName: 'CSV', + docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv', + logoUrl: csvLogo, +}; + +export default csvConfig; diff --git a/datahub-web-react/src/app/ingest/source/conf/sources.tsx b/datahub-web-react/src/app/ingest/source/conf/sources.tsx index a3cdb0a8f5843..4dbeeb5c975e9 100644 --- a/datahub-web-react/src/app/ingest/source/conf/sources.tsx +++ b/datahub-web-react/src/app/ingest/source/conf/sources.tsx @@ -16,6 +16,7 @@ import { SourceConfig } from './types'; import hiveConfig from './hive/hive'; import oracleConfig from './oracle/oracle'; import tableauConfig from './tableau/tableau'; +import csvConfig from './csv/csv'; const baseUrl = window.location.origin; @@ -46,6 +47,7 @@ export const SOURCE_TEMPLATE_CONFIGS: Array = [ glueConfig, oracleConfig, hiveConfig, + csvConfig, { type: 'custom', placeholderRecipe: DEFAULT_PLACEHOLDER_RECIPE, diff --git a/datahub-web-react/src/images/csv-logo.png b/datahub-web-react/src/images/csv-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b5fdc189cf58ec8c62a14ff58fcc271c36bb1a4f GIT binary patch literal 12029 zcmdUVbyQUGx9=H-loXVdlt!eKln@XYT0kiYNf}@O0f!DnLPcPZZfPkMkeCq=L_$L8 z8XBcL2bg(>-@WhMx9(f-ci%sE-TT&pb>PH4-?R5;$9L~fjG?|J^(EFz002sb%gB05okEzmN`(5(n@ki_bkXA7c+EALQewjsOygyy=2) z^|pWP>3GxQsdL8W9aaF~0$}QQP5d%9uz{&r4}y;fM-$ai5%F5jjoP8vbhU!>d2kZ$ zxdIB5gF&W*6a(F*=mSGqQ9!w)T3-D% z!tlyZg@i90NbAY|8WOs$qW*MD%DXCfYy9Zc*Q%kOtGanDkXq^#zwTQVC>qZTL;WZI zto%})roLjtEAH3VikXZrNd9${#!hTE?S3T|l50Lv6~tXQuok`JHcO%6JS_?{dYuY{ zXp@gJ)MpN7G$kX|?%4ub8{L$DB%S|8g;DODio&Ak!)xDgo;ck+JKn@(xHwzK8-1`j ztAi9TVqfw0O4``YZC0Cq@x8O9G4ViXi*#ZlGm8+x6beg!YupgHdAf&qpCt#4?~d%= zE7`eCWsbUKbsJurXjXlajA3)pw9Yh&nX@~?YY*tY6HL07a+?{b8C`5Y-s7r8)4+G$V=@d~x2bIc zUrOz6su)Q{RD*|#Z$K^?dj@FK^wt?)2&i>o)||Dgw!kl?y1&@4f}(AtRTxC&)a@r4 z0@L#fo~FJKYhqefLBhX>P%|=0G{l)r>;2ZnMG`50WnWb@zlv%%%tI(IPfIL(XvA*#Rj&h=BJLjM;h`)6zJ2Az)acKxcl-3AYlv#Ouu_v^a|m{ERfFr5cfNweZh^6d0u5M# z@Ene5;q$NK7Ao`2YD}|h!6Q8%PP1R2 zWk1&*j6Fui$LO0aU`V*P;Qf_|3YhqN2nML3EBH#v3w*fy%W#znRrZ742I6QH^s5$! z3wXilwK;#HDgJSC#I5NiU1jG5^4n<#>vk;tk$EE*_|;U}mY$6SgZDd?#l3e5GYLpG zI6En@QSG)SyR<736GZMornO;5KTo!NLoDCf!C<<##Mh5Cm!lF-y2CGRqGU89aU+xD z4ohl|w=;^hjR`fyo7B_JySMx;86Nozy1hTLV8kBy)nWaO;%mug%vFW{abgftaf&Amg zIO!0Pt(!xv7x?WYn&6K|aCR`my~3*X=b zz`!L+XPtl)K^2&DeNO6_`HI1+shn0pNDhuM(mGl=yWAIx|Z*09iu$$Vu7l;zgxEi;qW%m0{=$_Dgg!u&NkS;@ z0A4Z??8H1k!b=sFc~$Z_GL8(JGSL;9S!cQ*dC!!&r^wd1=_!bL_PF}OJ&Ctxk30~S z>^j=5K<>js?^zw_&oYl5*PnWRs1;zCAIv-C;Sr16wwsmcYD7>g zbNyfwdP(4VD)J6tAN0-r(eVz}xUvSZlE0jKc z!#4N(PTid-LrdM*C1OS>dyED;k?v+;{(_F~lud7eUFxY{>RBN9Ps!4hAEf*OWP*Bl z$wWAN6uVw55>nM$V0~GAvp94=8Z)WY4u0mg2i?g1P3Ao3#lE7rrjX%5@*SgSCW%v} zVZ;l+t4nr8{HLl#Y-gJ~(TlQVdUz;NX7c@BYn-NxgV9>bjH_lhI^NMNsp}BX@3yh{9Lm; zW?MDz&>byn*nNJ>fae`8nxwy|=0`-tQGgf~eiF}UWmr;61HFqG0I+A4Y!LQ~f9 z1$6W_P}u`AIC;C3YpZenOG^*obB_D5_^Uc!oU6$z&B09-0w;4$QdoZ_brbjYWzV>m zezLn_fwKdxcyhtvzWbj65q^}(+q?q=Qc#8zC_JRJlYQOvTe&~zu5tCg2PT;!o6@cS z@qWhRU561j9Gu49+4*4X;N4F#q9&=YL=<9=J`GvaLA1SiVc5I}Vno?V1z()_pkV_P zQ(CD(kn;>gCpw8GaAUbj_RnTDS4e$KKAwpoQ-9`m^OO11mHjtU`boy&8AUs*-pA^E zy$q~zgyFxX9yf+BJu_U@l{)7XINz^|k^lr_nXCR}=coJ%?Csx0@JZ=>F~AjR!m#sG;l+bUzNKlRHN| zkz^6WX`{pCpf)dYE(Ho$#jh(lO@n$8jX?5Vd(6aV-F9zuD#P>5*}i+jYxuzfUEdum z@?aNMBJEGN5zvSUPgEmAtyFjiUulK;PawD4N@Huekcz6uVHml#Hn|^1ryO78hlw7} zCnOdqYc01|xmK!hSPH7m$17oH97yBY4?1 z`x&z5ZJpZLqoUjPnDnMkw{nOqdsjbIM2IJK+<G|Xw0Q0TEx zw~OFGayOBTDV~o;-R*0}H?6a#-viSd{qbpJv(m4hEh_0pl@|ILL@(7d2)eo$(q&!H z0ka`0QgnnwFcm8MBY)EebRb^HzVL>&c1W%{euzZVmMIHl=f|8XQA`cankt7ohuFhV zN~;Pt`_6#;(($fV#ZiIMM^D#m*v-JKaLaWgkCBkO5;#ap^-dO_{FlzPpEtrjT$8~r z#$t@R7R^0%Pw<>6n5`K*)CBPBY~@|Irn;uoZaS^L^`olz$qF|V%9nj|kQg&}gkehoqkPgCmKd+VHX2fLFNeLl=%T${X)l3!R0A@|*~HsF{g!zuVSe;pg9|5vmpw zY6s7lR3RrM`*S9=*p6D3K5V8ulkhBgO$aCE5nzB%GM7_5-58K#d3NHNc!U@EwJb2F zc|S{52F4l8=0TcXdwEcCU2eCLH%altAPU^l_yncyNf%;oZ+!61={8&<&>s_T@26d* z>=#J>B4y^U+=eyI%Fan2&Yy`Wwf(R|QlAE_&TxXe;dtY+q;OBos&6{`ITcjkb%D&hEzf3mXS5FXh?l}q8#4N z@=z4wk!Nv!!!<>$^W!F7*YVy_Uk|M=YdHqREY@A~N9kvS{cldb57gg1bmv~%q^(D^ zjQ!38;#WVl%~liSZ$ZO~;L+I$ZqJ$~Jq{|pqvzyjOVqDtc4OYInb+;#k6Bf&&$qO9 z=8o82BW5pT8modeh$*jqT;bPrStqfyNH}CXepAu?*D5v6o_fjD} ziY{=?i1!cSZINsIfo(tC!++h49eX$o8#;w=YG==WY+lMzvt5 zK8U@y_!{%AyO`o}CNmRVs^%L_y0(PC4%tlHX6)Wy4Gp;OwJ&?r=&9- zE$~P*0n9q3(@L^6%t{mzZ2+BeBJXzl0Q~vZ3OrN!2K@WYO-1=r?CO_Hg}6Yo5h8RF zQc8TgnT6T!xcamW%Ra=PqZw-)KFIv4SY<;(<$OLVYewJ!;AeRLt0=Zb72RfhEH!lWKbHEq#LPD-cjB z^=Ud)shya0#ieRV9Iw1_$6imy=x0PbL6eNKB*|Joy4a{9o2Xz?eEBYSCdrxit`~<1 zVgE#E)|d*36qndhRh4C*XQ9(~S}kVz^6GX0IdGts-2^yz5_{FhoiL2{Vfs880#2kT z&a6QUk5B>fHOzz89%6X0Rh9wpJpu(L_gmHRf!(uU#SY&&AG)#3f$(p4{$jU zn8(+Bc)hn)h)DMk4j$wu&4Gj=kHIO+kh|yVEq?qFDbN*t;>h?;3)k z@o7!YBlH{RoNcST;4*`#45J>dc7_=2<4S`BoUD;wUhSYTmSa`kbyEg2%PGs8=L)zV zeeWNI+(_WC!?4ziY#nmsy?bI1)+CC&dv^8)uDY#t(!4whuEFC7amVzEK^b02{4>Sv-(Vt@)Ei~@Y42E z)q0|OfsRV~wDLpV(mNcQ;X5l#C61wATEHM0p)SDlVS@$m5ggyblhMt4!B+ba#o|M^ z72>?unM;1l;P?V$th%4CO3+!;28pFu8wK+;dAgBBtQDWEk_jT6l+Ou+_o}`p5^m{{ z0_@!eD3jQ{1~NhqqeO`3)>}mNXoKZuvB%(2(2;-t^J924VSJIsx;rw-z`#gOe=mBr ziYO0}Sw#iPUK>$Vg zD5M!N&>3#=UeD{9fi+b3JlRb5;}KzEi8A^|Kc?~$SU$XXEiwF9G7e@^M;+m)YvMc_ z5cYxv{2m3|eQ*y1SxdhKPX|1I-#$`JT{Xc&?--u?{P}uq`u8von~O`bB(OZoLm~YD zqWaw4a}qYoUT%{{kfShPWobxk`n?yuL#=YCprbUkQ@*BMEj9hl0~)cm`i6+&lvkL!o{3zab&D>~H(+{Y4; zJF9N2Q9V@Z#y!~hRQR;v3^7YN&h%DO{JvE}uvM;4XECJGzIVi}?%rEsp5U|*lgA)> z$>+A!Um9143ck{xwNu>)WiEdsZYCG}Q0&S&OB!$TOY!Bgn(&VM0=h#3#9`?Wu5f3fE$;7}k|rngFRTdt<(49J z@GU0|fz1umw5M9$okp@P?qMy`0UDo!8^P|0IFXCKya_k>6`m5j6Q5-Yg7D~NqJ%_l zJZDL8;jvXGrN(ZDoPSpz?ss#Lrf2v)YX;HFDQ*g*`-73?sey-9<^&4K&rmrk}P; z=Wb^H(%{@~@VnCI83wK&3uZG1sGc<%s-b9F?A{ENAw!B2Dnqz`coBN(6KmH^QM6P; zVT5kXW=#?Y{&*b3_eED>T93*2CZ}4oPV^bW_CgTOK9kvl3{TC>7ePxuZ$r0`!RnYf6rl9Fo-G4VK~HB=N}qtFWLN(dOB+!-5JIFVG78UZnLIY5>!3&UiKqf6X7iG z&e@yzeJjpeX}g5r)Ym6$-mKYv%z85vJ9)%ZehxARLN2%fQZ|1-2JD%R{W?&K9Y}>OA^u3I{lqhQWXt( zyVqv@L8Tu`0wtFPB&V{@FD-)Yt`=v>Dut^Fr$WyAr4`6^=>W|c&8=mj77{WRsY%W6 zKofGHcd@$UDYwtrgZC3It&bhqyX{%?dLx6y2b!U({gAIe!(lD*fTg zh$Fs~Iv2SwJgf5hTXyz|-@&~k{~jWhH)ZvYyawH}u@zz$~5;x@0Fu)s2 zf|br{Chr5&xO0*185o??g7-TuZ)woM_+A(^xI0KYo&9(ox9%$K06}t`AFu!X;Zqj{ zkNHB{H?Sk~HIwdbFPgd*Y+AD#mlaZgEx<)F)R0nvpCUL`O)K#Edsbb#)mS&vyws_X zlYWv)*&*Zgalyqn{aTfi%`G1Xo4s!Xncu)JC~CMra*&|i4dnia@Vo)eqrom@s1L(- zJ8vzmTV~gUCH;s^$gk##_C!feukjJDS%$Px#TsnTUy^aLF-jP1(@L&@d>Sd`c78j?!_ld@NNqK}#Yi{S5m(JYDEpLy+#gK6W>(<5J? zL$+8W+=h*Ef=pep24f1eHl#PY!7q~mw}xDqIg2hp*k0Wk9sDme5w@y4rFSD>RQE=L@&_F4&LF$)`*bO;WH8s8D+xZz-Ff532E$)bH|(vd`1bT-U2@W1l-Ig2Ltn z`2_~`bPnC8Ro|o#NsKdll1Y@0DOhD|ykWYAd)>Nc2JU8IKM2z>0@02)46Zyt2R1RC ztPdjF9m!W{ufO_q-~_3B)DWNg^JYQ7~A6VJL` z62<%Q+%z(=(p*sT`(w$GDtW0Dy7T^PtYvrg31xDi)JDTVq7QGawLO$yO}<57fMe*pFpMam8j%kXXlOEx^NbHKq>IG2M6CoL-Ad3jv) z<2ADB-mjWDuzsz^EHa%!e(QG%7Jr{BOa`{luZxzXD%77blpKqd^!WTZ=m8{W9C9WGj3+-|uY4bFT04HJ@`Kyvv>&lduAU z2s$f9QS1Cm2KP5Dr|2l7(n>yxnHJCNm~6MMUoA0sOV@8VC%w3KGuY8Rcu9$9Zx@lT z7o71DZJ^zliU=j`52ni7S3zkjfihp(m#2v2aef^s{uwtA!0V{%8K(9n^g z`)h;8Iow-n5h0WPOjh0R#PGn}3DraWJdYax)8m}xrKlF#@NbbegFUd_7S{|}3oEK~ zl_rR-+LHee=D5s16F#ti4n5stf~)7gVQ^^X zlz->MK+g&IrTY$a1#lY{TM^)4IvmqX!XfqIk`8k@MzZXgQ|;4}sxD`zUC-9c%4cc?&f%i#Hhhw*|swIu8X`#Oj<@wSnq=%~7 zo-1nxr0d&37bMob&QIb+PX%GWVLsWll-M4j+rqmonD1hD;X z<^S!`A912Pidzpkit>jrl)Iis_vun@yWd%nVFuo*3WRfmxx8#i-y#DPAftxKbXw14 zY4z9Dee^(?v5xv5&u?CH2|vZhU2&PF2p{f3%zi9-!*|XI4KrHd`i_(`VJWLh&S^)O zJ#q?OdO=5sal-dS{OVxWj1`|!Y<97iZUZM)J>w_i#IAy$cdtm7%NEM1-76*}v4IjS z`=$m=%|{OgFl=ebU^57W{Q>cGssYrCZ+O)-J;X*n$yX#>rxtg0wgzJI$1?u3hBxT>dd{9WEvvAHxEEcs4w^|DW2eu;j8RVGs=-n3~#AvoZg=h~&Y z(;@g)Vf7w7K;v)`O9QMmhQ!f_Ui$QU1dolTQ__oVw(l)s+e7>bllJ)<#=4bGGSd$| zxe}c3lI_G|aS&v;;R=I`D7c4Q+jcuEbjyU;N*_ybTi>m)kH^OQMPB9R0Nh7mQb%>?Jl>tj%xk#_WQ%XNx7+ zgLPv8oFt$vZuH@yQR3=UIpF5w2o-y4D)DnED_-pVT z)e6x&lDyCqs>7nbWP+tPSG>( z^Y@$rG&VEuw~PMmvv*z*ir6(4)N2Z3w!Vb9A~-VAE=&V$x#0vnkwJ%Ba!*4Ft>`$p zA_8M-y;NOjB%aZy8aQxo`^|d!dExYqh`?zyU7LtiF>Sxr#98z4LGe{mT93#ae@JC*X^FC+Jm1_%$_1BsNkbV#24!K!L+ zumjP`eIR#bpnP~_G+vq(D0KHna>3FI?dp}b7>(c{_nhx2c&XJl3hY_N@QM{UVN_&SjBIdJ_yW|{tb)%;UR z>VLnRlR#1%nYh`B@3g2EY9a-a>nqv~GAE5r|6iD~@qLy5py$p8y6}*O`GjPGtfV1Q zL_a$GnIoXZ_gNI=qPUE_ew2cw+~@}4wFN6k;2;GFRz1f1>6zoS-UX#s_+oUVbYbRO zf$cxdFX+MOC#Ixf?-(~jD7V`|=1_t8vW>Yc{t%p=I)mpyVv_S&XLV^mF-XEh(NvH= z>TAVT*^ZCwT(Fcu^Ix)mGQ1a4s}l~gk|0@fNM2Zl9nrBw!Wy@=K4DB|7g0h^1^7o3 yllGgZg0CH-$0=36P`O}C{bv%iVW@pAf-+YEBx&bEe-cPt2VnQ~)hktPpZ^a=hZQ0K literal 0 HcmV?d00001 diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json index 3c70eda8561b8..0574f3fda4017 100644 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ b/metadata-service/war/src/main/resources/boot/data_platforms.json @@ -574,5 +574,15 @@ "type": "OTHERS", "logoUrl": "/assets/platforms/fivetranlogo.png" } + }, + { + "urn": "urn:li:dataPlatform:csv", + "aspect": { + "datasetNameDelimiter": ".", + "name": "csv", + "displayName": "CSV", + "type": "OTHERS", + "logoUrl": "/assets/platforms/csv-logo.png" + } } ] From ed937a14bd8878660f8bf4bb32b8cbf8da3f032d Mon Sep 17 00:00:00 2001 From: purnimagarg1 Date: Tue, 21 Nov 2023 19:03:40 +0530 Subject: [PATCH 2/7] Update csv config file --- .../src/app/ingest/source/conf/csv/csv.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts index 16efae3a472ad..fb34ff9b1748b 100644 --- a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts +++ b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts @@ -5,14 +5,14 @@ const placeholderRecipe = `\ source: type: csv-enricher config: - filename: # relative path to your csv file to ingest, e.g. ./path/to/your/file.csv - array_delimiter: | - delimiter: , - write_semantics: PATCH + filename: # relative path to your csv file to ingest, e.g. ./path/to/your/file.csv + array_delimiter: | + delimiter: , + write_semantics: PATCH `; const csvConfig: SourceConfig = { - type: 'csv', + type: 'csv-enricher', placeholderRecipe, displayName: 'CSV', docsUrl: 'https://datahubproject.io/docs/generated/ingestion/sources/csv', From a9bd4fd0c194c41b0ee1a5beba6b974bb764ecce Mon Sep 17 00:00:00 2001 From: purnimagarg1 Date: Wed, 22 Nov 2023 16:52:10 +0530 Subject: [PATCH 3/7] Validate the URL field and move the other fields to Advanced fields in the form --- .../source/builder/RecipeForm/constants.ts | 6 ++-- .../ingest/source/builder/RecipeForm/csv.ts | 30 +++++++++++++++---- .../app/ingest/source/builder/constants.ts | 2 +- .../app/ingest/source/builder/sources.json | 6 ++-- .../src/app/ingest/source/conf/csv/csv.ts | 2 +- 5 files changed, 32 insertions(+), 14 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts index e463cc5f32ec0..844bf50926764 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/constants.ts @@ -140,7 +140,7 @@ import { INCLUDE_VIEW_LINEAGE, INCLUDE_PROJECTIONS_LINEAGE, } from './vertica'; -import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILENAME, CSV_WRITE_SEMANTICS } from './csv'; +import { CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_FILE_URL, CSV_WRITE_SEMANTICS } from './csv'; export enum RecipeSections { Connection = 0, @@ -455,9 +455,9 @@ export const RECIPE_FIELDS: RecipeFields = { filterSectionTooltip: 'Include or exclude specific Schemas, Tables, Views and Projections from ingestion.', }, [CSV]: { - fields: [CSV_FILENAME, CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS], + fields: [CSV_FILE_URL], filterFields: [], - advancedFields: [], + advancedFields: [CSV_ARRAY_DELIMITER, CSV_DELIMITER, CSV_WRITE_SEMANTICS], }, }; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts index 980caa06982eb..44f8d7c7d0149 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts @@ -1,14 +1,28 @@ import { RecipeField, FieldType } from './common'; -export const CSV_FILENAME: RecipeField = { +const validateURL = (fieldName) => { + return { + validator(_, value) { + const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/); + const isURLValid = URLPattern.test(value); + console.log(value, isURLValid); + if (!value || isURLValid) { + return Promise.resolve(); + } + return Promise.reject(new Error(`A valid ${fieldName} is required.`)); + }, + }; +}; + +export const CSV_FILE_URL: RecipeField = { name: 'filename', - label: 'File name', - tooltip: 'File path or URL of CSV file to ingest.', + label: 'File URL', + tooltip: 'File URL of the CSV file to ingest.', type: FieldType.TEXT, fieldPath: 'source.config.filename', - placeholder: 'File name', + placeholder: 'File URL', required: true, - rules: null, + rules: [() => validateURL('File URL')], }; export const CSV_ARRAY_DELIMITER: RecipeField = { @@ -36,7 +50,11 @@ export const CSV_WRITE_SEMANTICS: RecipeField = { label: 'Write Semantics', tooltip: 'Whether the new tags, terms and owners to be added will override the existing ones added only by this source or not. Value for this config can be "PATCH" or "OVERRIDE"', - type: FieldType.TEXT, + type: FieldType.SELECT, + options: [ + { label: 'PATCH', value: 'PATCH' }, + { label: 'OVERRIDE', value: 'OVERRIDE' }, + ], fieldPath: 'source.config.write_semantics', placeholder: 'Write Semantics', rules: null, diff --git a/datahub-web-react/src/app/ingest/source/builder/constants.ts b/datahub-web-react/src/app/ingest/source/builder/constants.ts index f2a92a2e09009..08538729de40b 100644 --- a/datahub-web-react/src/app/ingest/source/builder/constants.ts +++ b/datahub-web-react/src/app/ingest/source/builder/constants.ts @@ -109,7 +109,7 @@ export const VERTICA = 'vertica'; export const VERTICA_URN = `urn:li:dataPlatform:${VERTICA}`; export const FIVETRAN = 'fivetran'; export const FIVETRAN_URN = `urn:li:dataPlatform:${FIVETRAN}`; -export const CSV = 'csv'; +export const CSV = 'csv-enricher'; export const CSV_URN = `urn:li:dataPlatform:${CSV}`; export const PLATFORM_URN_TO_LOGO = { diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index f04e998d9434d..2dc2598c1a0ab 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -224,11 +224,11 @@ "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV" }, { - "urn": "urn:li:dataPlatform:csv", - "name": "csv", + "urn": "urn:li:dataPlatform:csv-enricher", + "name": "csv-enricher", "displayName": "CSV", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/csv'", - "recipe": "source: \n type: csv-enricher \n config: \n # relative path to your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH" + "recipe": "source: \n type: csv-enricher \n config: \n # URL of your csv file to ingest \n filename: \n array_delimiter: '|' \n delimiter: ',' \n write_semantics: PATCH" }, { "urn": "urn:li:dataPlatform:custom", diff --git a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts index fb34ff9b1748b..e1dc22c086fb4 100644 --- a/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts +++ b/datahub-web-react/src/app/ingest/source/conf/csv/csv.ts @@ -5,7 +5,7 @@ const placeholderRecipe = `\ source: type: csv-enricher config: - filename: # relative path to your csv file to ingest, e.g. ./path/to/your/file.csv + filename: # URL of your csv file to ingest, e.g. https://docs.google.com/spreadsheets/d/DOCID/export?format=csv array_delimiter: | delimiter: , write_semantics: PATCH From 04f3ce370ba64b57a310405d3bad7c5c74cb302c Mon Sep 17 00:00:00 2001 From: purnimagarg1 Date: Wed, 22 Nov 2023 18:52:02 +0530 Subject: [PATCH 4/7] Show informatory text at the top of CSV Ingestion form --- .../src/app/ingest/source/builder/CSVInfo.tsx | 27 +++++++++++++++++++ .../ingest/source/builder/RecipeBuilder.tsx | 5 +++- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx diff --git a/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx new file mode 100644 index 0000000000000..7e8a363b30c2c --- /dev/null +++ b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx @@ -0,0 +1,27 @@ +import React from 'react'; +import { Alert } from 'antd'; + +const CSV_FORMAT_LINK = 'https://datahubproject.io/docs/generated/ingestion/sources/csv'; + +export const CSVInfo = () => { + const link = ( + + link + + ); + + return ( + + Add the URL of your CSV file to be ingested. You can create a file in google sheets following the + format at this {link} and then construct the CSV URL by publishing your google sheet in the CSV + format. + + } + /> + ); +}; diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx index bee9b04cee100..db1f0fdd4dfa6 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx @@ -7,8 +7,9 @@ import { ANTD_GRAY } from '../../../entity/shared/constants'; import { YamlEditor } from './YamlEditor'; import RecipeForm from './RecipeForm/RecipeForm'; import { SourceBuilderState, SourceConfig } from './types'; -import { LOOKER, LOOK_ML } from './constants'; +import { CSV, LOOKER, LOOK_ML } from './constants'; import { LookerWarning } from './LookerWarning'; +import { CSVInfo } from './CSVInfo'; export const ControlsContainer = styled.div` display: flex; @@ -81,6 +82,8 @@ function RecipeBuilder(props: Props) { return (
{(type === LOOKER || type === LOOK_ML) && } + {type === CSV && } + {sourceConfigs?.displayName} Recipe From 05d355ad0b82a8b1fd60b8692d564f6d70cc6a88 Mon Sep 17 00:00:00 2001 From: purnimagarg1 <purnima.garg@apptware.com> Date: Wed, 22 Nov 2023 18:55:24 +0530 Subject: [PATCH 5/7] Remove unused code --- .../src/app/ingest/source/builder/RecipeForm/csv.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts index 44f8d7c7d0149..fba4f3b9d0164 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/csv.ts @@ -5,7 +5,6 @@ const validateURL = (fieldName) => { validator(_, value) { const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[\w.-]+)+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/); const isURLValid = URLPattern.test(value); - console.log(value, isURLValid); if (!value || isURLValid) { return Promise.resolve(); } From 2a6351c7eca737e7ca471d75b733dce269f8d9c1 Mon Sep 17 00:00:00 2001 From: Gabe Lyons <itsgabelyons@gmail.com> Date: Wed, 22 Nov 2023 08:48:29 -0800 Subject: [PATCH 6/7] Update CSVInfo.tsx --- datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx index 7e8a363b30c2c..2dc84ea43603f 100644 --- a/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx @@ -17,7 +17,8 @@ export const CSVInfo = () => { banner message={ <> - Add the URL of your CSV file to be ingested. You can create a file in google sheets following the + Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For example, + You can create a file in google sheets following the format at this {link} and then construct the CSV URL by publishing your google sheet in the CSV format. </> From 04b8769da2f182ecea381005208136e25ab7abf1 Mon Sep 17 00:00:00 2001 From: purnimagarg1 <purnima.garg@apptware.com> Date: Fri, 24 Nov 2023 20:12:15 +0530 Subject: [PATCH 7/7] Update CSVInfor.tsx --- .../src/app/ingest/source/builder/CSVInfo.tsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx index 2dc84ea43603f..87d632bb228b5 100644 --- a/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/CSVInfo.tsx @@ -17,10 +17,9 @@ export const CSVInfo = () => { banner message={ <> - Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For example, - You can create a file in google sheets following the - format at this {link} and then construct the CSV URL by publishing your google sheet in the CSV - format. + Add the URL of your CSV file to be ingested. This will work for any web-hosted CSV file. For + example, You can create a file in google sheets following the format at this {link} and then + construct the CSV URL by publishing your google sheet in the CSV format. </> } />