From a2740468473c5794546c8865c15bd950fb7911d3 Mon Sep 17 00:00:00 2001 From: Jonathan Rosenberg <96974219+Jonathan-Rosenberg@users.noreply.github.com> Date: Wed, 6 Mar 2024 14:44:25 +0200 Subject: [PATCH] change unity docs to fit the metadata pass --- docs/howto/hooks/lua.md | 8 +++++--- docs/integrations/unity-catalog.md | 4 ++-- pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/howto/hooks/lua.md b/docs/howto/hooks/lua.md index e30183746b9..0f3368ea285 100644 --- a/docs/howto/hooks/lua.md +++ b/docs/howto/hooks/lua.md @@ -284,7 +284,7 @@ local client = databricks.client("https://my-host.cloud.databricks.com", "my-ser local schema_name = client.create_schema("main", "mycatalog", true) ``` -### `databricks/client.register_external_table(table_name, physical_path, warehouse_id, catalog_name, schema_name)` +### `databricks/client.register_external_table(table_name, physical_path, warehouse_id, catalog_name, schema_name, metadata)` Registers an external table under the provided warehouse ID, catalog name, and schema name. In order for this method call to succeed, an external location should be configured in the catalog, with the @@ -299,6 +299,8 @@ Parameters: `Connection Details`, or by running `databricks warehouses get`, choosing your SQL warehouse and fetching its ID). - `catalog_name(string)`: The name of the catalog under which a schema will be created (or fetched from). - `schema_name(string)`: The name of the schema under which the table will be created. +- `metadata(table)`: A table of metadata to be added to the table's registration. The metadata table should be of the form: + `{key1 = "value1", key2 = "value2", ...}`. Example: @@ -730,7 +732,7 @@ Parameters: A package used to register exported Delta Lake tables to Databricks' Unity catalog. -### `lakefs/catalogexport/unity_exporter.register_tables(action, table_descriptors_path, delta_table_paths, databricks_client, warehouse_id)` +### `lakefs/catalogexport/unity_exporter.register_tables(action, table_descriptors_path, delta_table_details, databricks_client, warehouse_id)` The function used to register exported Delta Lake tables in Databricks' Unity Catalog. The registration will use the following paths to register the table: @@ -741,7 +743,7 @@ Parameters: - `action(table)`: The global action table - `table_descriptors_path(string)`: The path under which the table descriptors of the provided `table_paths` reside. -- `delta_table_paths(table)`: Table names to physical paths mapping (e.g. `{ table1 = "s3://mybucket/mytable1", table2 = "s3://mybucket/mytable2" }`) +- `delta_table_details(table)`: Table names to physical paths mapping and table metadata (e.g. `{table1 = {path = "s3://mybucket/mytable1", metadata = {id = "table_1_id", name = "table1", ...}}, table2 = {path = "s3://mybucket/mytable2", metadata = {id = "table_2_id", name = "table2", ...}}}`.) - `databricks_client(table)`: A Databricks client that implements `create_or_get_schema: function(id, catalog_name)` and `register_external_table: function(table_name, physical_path, warehouse_id, catalog_name, schema_name)` - `warehouse_id(string)`: Databricks warehouse ID. diff --git a/docs/integrations/unity-catalog.md b/docs/integrations/unity-catalog.md index 6652e3a1b61..b5eb5d910a3 100644 --- a/docs/integrations/unity-catalog.md +++ b/docs/integrations/unity-catalog.md @@ -132,11 +132,11 @@ local sc = aws.s3_client(args.aws.access_key_id, args.aws.secret_access_key, arg -- Export Delta Lake tables export: local delta_client = formats.delta_client(args.lakefs.access_key_id, args.lakefs.secret_access_key, args.aws.region) -local delta_table_locations = delta_export.export_delta_log(action, args.table_defs, sc.put_object, delta_client, "_lakefs_tables") +local delta_table_details = delta_export.export_delta_log(action, args.table_defs, sc.put_object, delta_client, "_lakefs_tables") -- Register the exported table in Unity Catalog: local databricks_client = databricks.client(args.databricks_host, args.databricks_token) -local registration_statuses = unity_export.register_tables(action, "_lakefs_tables", delta_table_locations, databricks_client, args.warehouse_id) +local registration_statuses = unity_export.register_tables(action, "_lakefs_tables", delta_table_details, databricks_client, args.warehouse_id) for t, status in pairs(registration_statuses) do print("Unity catalog registration for table \"" .. t .. "\" completed with commit schema status : " .. status .. "\n") diff --git a/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua b/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua index 9af12d0804a..6beb6b20353 100644 --- a/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua +++ b/pkg/actions/lua/lakefs/catalogexport/unity_exporter.lua @@ -18,7 +18,7 @@ local extractor = require("lakefs/catalogexport/table_extractor") Returns a ": status" map for registration of provided tables. ]] -local function register_tables(action, table_descriptors_path, delta_table_paths, databricks_client, warehouse_id) +local function register_tables(action, table_descriptors_path, delta_table_details, databricks_client, warehouse_id) local repo = action.repository_id local commit_id = action.commit_id if not commit_id then @@ -26,7 +26,7 @@ local function register_tables(action, table_descriptors_path, delta_table_paths end local branch_id = action.branch_id local response = {} - for table_name_yaml, table_details in pairs(delta_table_paths) do + for table_name_yaml, table_details in pairs(delta_table_details) do local tny = table_name_yaml if not strings.has_suffix(tny, ".yaml") then tny = tny .. ".yaml"