From f7aa5375ff845baf632b91ae5f7d60ea1d97f884 Mon Sep 17 00:00:00 2001 From: zalmane Date: Thu, 16 May 2024 11:21:11 +0300 Subject: [PATCH] add placeholder for `relational.lookup` block (#364) * add placeholder for relational.lookup block * update json schemas * update autogenerated docs --------- Co-authored-by: github-actions --- .../blocks/relational/lookup/__init__.py | 0 .../relational/lookup/block.schema.json | 71 + docs/reference/blocks/relational_lookup.md | 67 + schemas/job.schema.json | 1573 +++++++++-------- 4 files changed, 968 insertions(+), 743 deletions(-) create mode 100644 core/src/datayoga_core/blocks/relational/lookup/__init__.py create mode 100644 core/src/datayoga_core/blocks/relational/lookup/block.schema.json create mode 100644 docs/reference/blocks/relational_lookup.md diff --git a/core/src/datayoga_core/blocks/relational/lookup/__init__.py b/core/src/datayoga_core/blocks/relational/lookup/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/core/src/datayoga_core/blocks/relational/lookup/block.schema.json b/core/src/datayoga_core/blocks/relational/lookup/block.schema.json new file mode 100644 index 00000000..6fcc67bd --- /dev/null +++ b/core/src/datayoga_core/blocks/relational/lookup/block.schema.json @@ -0,0 +1,71 @@ +{ + "title": "relational.write", + "description": "Lookup in a relational table", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "load_snowflake", + "type": "relational.write", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo", + "load_strategy": "APPEND" + } + } + ], + "properties": { + "condition": { + "type": "string", + "title": "The lookup condition", + "description": "Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table", + "examples": ["lookup.account_number = incoming.i_acct_no"] + }, + "query": { + "type": "string", + "title": "Query string to use as an override to the built in query", + "description": "Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table", + "examples": [ + "select country_code,country_name from countries where is_active=1" + ] + }, + "schema": { + "type": "string", + "title": "The table schema of the lookup table", + "description": "If not specified, no specific schema will be used when connecting to the database.", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The lookup table name", + "description": "Lookup table name", + "examples": ["countries"] + }, + "order_by": { + "type": "array", + "title": "List of keys to use for ordering. Applicable for multiple matches", + "items": { + "type": ["string"], + "title": "name of column" + }, + "examples": [["country_name"]] + }, + "fields": { + "type": "array", + "title": "Columns to add to the output from the lookup table", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [["fname", { "lname": "last_name" }, "address", "gender"]] + }, + "multiple_match_policy": { + "type": "string", + "enum": ["first", "last", "all"], + "description": "How to handle multiple matches in the lookup table", + "default": "" + } + }, + "required": [] +} diff --git a/docs/reference/blocks/relational_lookup.md b/docs/reference/blocks/relational_lookup.md new file mode 100644 index 00000000..be34fd85 --- /dev/null +++ b/docs/reference/blocks/relational_lookup.md @@ -0,0 +1,67 @@ +--- +parent: Blocks +grand_parent: Reference +--- + +# relational\.write + +Lookup in a relational table + + +**Properties** + +|Name|Type|Description|Required| +|----|----|-----------|--------| +|**condition**
(The lookup condition)|`string`|Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table
|no| +|**query**
(Query string to use as an override to the built in query)|`string`|Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table
|no| +|**schema**
(The table schema of the lookup table)|`string`|If not specified, no specific schema will be used when connecting to the database.
|no| +|**table**
(The lookup table name)|`string`|Lookup table name
|no| +|[**order\_by**](#order_by)
(List of keys to use for ordering\. Applicable for multiple matches)|`array`||no| +|[**fields**](#fields)
(Columns to add to the output from the lookup table)|`array`||no| +|**multiple\_match\_policy**|`string`|How to handle multiple matches in the lookup table
Default: `""`
Enum: `"first"`, `"last"`, `"all"`
|no| + +**Additional Properties:** not allowed +**Example** + +```yaml +id: load_snowflake +type: relational.write +properties: + connection: eu_datalake + table: employees + schema: dbo + load_strategy: APPEND + +``` + + +## order\_by\[\]: List of keys to use for ordering\. Applicable for multiple matches + +**Items: name of column** + +**Item Type:** `string` +**Example** + +```yaml +- country_name + +``` + + +## fields\[\]: Columns to add to the output from the lookup table + +**Items: name of column** + +**No properties.** + +**Example** + +```yaml +- fname +- lname: last_name +- address +- gender + +``` + + diff --git a/schemas/job.schema.json b/schemas/job.schema.json index 8bba4130..cc8f7407 100644 --- a/schemas/job.schema.json +++ b/schemas/job.schema.json @@ -20,25 +20,26 @@ "uses": { "enum": [ "map", - "filter", - "http.write", + "files.read_csv", + "remove_field", + "relational.lookup", + "relational.read", + "relational.write", + "jinja_template", + "rename_field", + "sequence", "http.receiver", - "redis.write", + "http.write", "redis.lookup", "redis.read_stream", - "remove_field", + "redis.write", "azure.read_event_hub", - "std.write", - "std.read", - "jinja_template", - "relational.write", - "relational.read", - "rename_field", + "filter", "add_field", - "sequence", - "files.read_csv", - "parquet.write", + "std.read", + "std.write", "parquet.read", + "parquet.write", "cassandra.write" ] } @@ -100,7 +101,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "filter" + "const": "files.read_csv" } }, "required": ["uses"] @@ -108,23 +109,63 @@ "then": { "properties": { "with": { - "title": "filter", - "description": "Filter records", + "title": "files.read_csv", + "description": "Read data from CSV", "type": "object", "properties": { - "expression": { - "description": "Expression", + "file": { + "description": "Filename. Can contain a regexp or glob expression", "type": "string" }, - "language": { - "description": "Language", + "encoding": { + "description": "Encoding to use for reading the file", "type": "string", - "enum": ["jmespath", "sql"] + "default": "utf-8" + }, + "fields": { + "type": "array", + "title": "List of columns to use", + "description": "List of columns to use for extract", + "default": null, + "examples": [["fname", "lname"]], + "minLength": 1, + "additionalItems": true, + "items": { + "type": "string", + "description": "field name", + "examples": ["fname"] + } + }, + "skip": { + "description": "Number of lines to skip", + "type": "number", + "minimum": 0, + "default": 0 + }, + "delimiter": { + "description": "Delimiter to use for splitting the csv records", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "," + }, + "batch_size": { + "description": "Number of records to read per batch", + "type": "number", + "minimum": 1, + "default": 1000 + }, + "quotechar": { + "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "\"" } }, "additionalProperties": false, - "required": ["expression", "language"], - "examples": [{ "language": "sql", "expression": "age>20" }] + "required": ["file"], + "examples": [{ "file": "archive.csv", "delimiter": ";" }] } } } @@ -135,7 +176,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "http.write" + "const": "remove_field" } }, "required": ["uses"] @@ -143,115 +184,638 @@ "then": { "properties": { "with": { - "title": "http.write", - "description": "Write data using an HTTP request", + "title": "remove_field", + "description": "Remove fields", "type": "object", - "properties": { - "connection": { - "type": "string", - "title": "The connection to use for the HTTP request", - "description": "Logical connection name as defined in the connections.dy.yaml", - "examples": ["api_connection", "external_service"] - }, - "endpoint": { - "oneOf": [ - { - "type": "string", - "title": "API Endpoint", - "description": "The endpoint URL for the HTTP request", - "examples": ["/users"] - }, - { - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "required": ["expression", "language"], - "examples": [ - { - "expression": "concat(['/users/', id]')", - "language": "jmespath" - } - ] - } - ] - }, - "method": { - "type": "string", - "title": "HTTP Method", - "description": "HTTP method to be used for the request", - "enum": ["GET", "PUT", "POST", "DELETE"], - "examples": ["GET", "POST"] - }, - "payload": { - "type": "object", - "title": "Request Payload", - "description": "Data to be sent in the request body", - "additionalProperties": { - "oneOf": [ - { "type": "string" }, - { + "oneOf": [ + { + "description": "Remove multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { "type": "object", "properties": { - "expression": { - "description": "Expression", + "field": { + "description": "Field", "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] } }, - "required": ["expression", "language"] + "additionalProperties": false, + "required": ["field"] } - ] - } + } + }, + "required": ["fields"], + "additionalProperties": false, + "examples": [ + { + "fields": [ + { "field": "credit_card" }, + { "field": "name.mname" } + ] + } + ] }, - "extra_headers": { - "type": "object", - "title": "Additional HTTP Headers", - "description": "Extra headers to be included in the HTTP request", - "additionalProperties": { - "oneOf": [ - { "type": "string" }, - { - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "required": ["expression", "language"] - } - ] + { + "description": "Remove one field", + "properties": { + "field": { "description": "Field", "type": "string" } + }, + "additionalProperties": false, + "required": ["field"], + "examples": [{ "field": "credit_card" }] + } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "relational.lookup" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "relational.write", + "description": "Lookup in a relational table", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "load_snowflake", + "type": "relational.write", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo", + "load_strategy": "APPEND" } + } + ], + "properties": { + "condition": { + "type": "string", + "title": "The lookup condition", + "description": "Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table", + "examples": ["lookup.account_number = incoming.i_acct_no"] }, - "extra_query_parameters": { - "description": "Extra query parameters to be included in the HTTP request", - "type": "object", - "additionalProperties": { - "oneOf": [ - { "type": "string" }, - { - "type": "object", - "properties": { - "expression": { - "description": "Expression", + "query": { + "type": "string", + "title": "Query string to use as an override to the built in query", + "description": "Use any valid SQL syntax. Use the alias `lookup` for the lookup table and `incoming` for the base table", + "examples": [ + "select country_code,country_name from countries where is_active=1" + ] + }, + "schema": { + "type": "string", + "title": "The table schema of the lookup table", + "description": "If not specified, no specific schema will be used when connecting to the database.", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The lookup table name", + "description": "Lookup table name", + "examples": ["countries"] + }, + "order_by": { + "type": "array", + "title": "List of keys to use for ordering. Applicable for multiple matches", + "items": { "type": ["string"], "title": "name of column" }, + "examples": [["country_name"]] + }, + "fields": { + "type": "array", + "title": "Columns to add to the output from the lookup table", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [ + ["fname", { "lname": "last_name" }, "address", "gender"] + ] + }, + "multiple_match_policy": { + "type": "string", + "enum": ["first", "last", "all"], + "description": "How to handle multiple matches in the lookup table", + "default": "" + } + }, + "required": [] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "relational.read" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "relational.read", + "description": "Read a table from an SQL-compatible data store", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "read_snowflake", + "type": "relational.read", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo" + } + } + ], + "properties": { + "connection": { + "type": "string", + "title": "The connection to use for loading", + "description": "Logical connection name as defined in the connections.dy.yaml", + "examples": ["europe_db", "target", "eu_dwh"] + }, + "schema": { + "type": "string", + "title": "The table schema of the table", + "description": "If left blank, the default schema of this connection will be used as defined in the connections.dy.yaml", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The table name", + "description": "Table name", + "examples": ["employees"] + }, + "columns": { + "type": "array", + "title": "Optional subset of columns to load", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [["fname", { "lname": "last_name" }]] + } + }, + "required": ["connection", "table"] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "relational.write" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "relational.write", + "description": "Write into a SQL-compatible data store", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "load_snowflake", + "type": "relational.write", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo", + "load_strategy": "APPEND" + } + } + ], + "properties": { + "connection": { + "type": "string", + "title": "The connection to use for loading", + "description": "Logical connection name as defined in the connections.dy.yaml", + "examples": ["europe_db", "target", "eu_dwh"] + }, + "schema": { + "type": "string", + "title": "The table schema of the target table", + "description": "If not specified, no specific schema will be used when connecting to the database.", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The target table name", + "description": "Target table name", + "examples": ["employees"] + }, + "keys": { + "type": "array", + "title": "Business keys to use in case of `load_strategy` is UPSERT or working with `opcode_field`", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [["fname", { "lname": "last_name" }]] + }, + "mapping": { + "type": "array", + "title": "Fields to write", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [ + ["fname", { "lname": "last_name" }, "address", "gender"] + ] + }, + "foreach": { + "type": "string", + "title": "Split a column into multiple records with a JMESPath expression", + "description": "Use a JMESPath expression to split a column into multiple records. The expression should be in the format column: expression.", + "pattern": "^(?!:).*:.*(?20" }] } } } @@ -1075,7 +1238,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "sequence" + "const": "std.read" } }, "required": ["uses"] @@ -1083,29 +1246,8 @@ "then": { "properties": { "with": { - "title": "sequence", - "description": "Add a sequence number field to data", - "type": "object", - "additionalProperties": false, - "examples": [], - "required": [], - "properties": { - "field": { - "type": "string", - "title": "Name of new sequence field" - }, - "start": { - "type": "number", - "title": "Start entry", - "default": 1, - "examples": [] - }, - "increment": { - "type": "number", - "title": "Increment between sequences", - "examples": [] - } - } + "title": "std.read", + "description": "Read from the standard input" } } } @@ -1116,7 +1258,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "files.read_csv" + "const": "std.write" } }, "required": ["uses"] @@ -1124,63 +1266,8 @@ "then": { "properties": { "with": { - "title": "files.read_csv", - "description": "Read data from CSV", - "type": "object", - "properties": { - "file": { - "description": "Filename. Can contain a regexp or glob expression", - "type": "string" - }, - "encoding": { - "description": "Encoding to use for reading the file", - "type": "string", - "default": "utf-8" - }, - "fields": { - "type": "array", - "title": "List of columns to use", - "description": "List of columns to use for extract", - "default": null, - "examples": [["fname", "lname"]], - "minLength": 1, - "additionalItems": true, - "items": { - "type": "string", - "description": "field name", - "examples": ["fname"] - } - }, - "skip": { - "description": "Number of lines to skip", - "type": "number", - "minimum": 0, - "default": 0 - }, - "delimiter": { - "description": "Delimiter to use for splitting the csv records", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "," - }, - "batch_size": { - "description": "Number of records to read per batch", - "type": "number", - "minimum": 1, - "default": 1000 - }, - "quotechar": { - "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "\"" - } - }, - "additionalProperties": false, - "required": ["file"], - "examples": [{ "file": "archive.csv", "delimiter": ";" }] + "title": "std.write", + "description": "Write to the standard output" } } } @@ -1191,7 +1278,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "parquet.write" + "const": "parquet.read" } }, "required": ["uses"] @@ -1199,8 +1286,8 @@ "then": { "properties": { "with": { - "title": "parquet.write", - "description": "Write data to parquet", + "title": "parquet.read", + "description": "Read data from parquet", "type": "object", "properties": { "file": { @@ -1221,7 +1308,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "parquet.read" + "const": "parquet.write" } }, "required": ["uses"] @@ -1229,8 +1316,8 @@ "then": { "properties": { "with": { - "title": "parquet.read", - "description": "Read data from parquet", + "title": "parquet.write", + "description": "Write data to parquet", "type": "object", "properties": { "file": {