From b8896f1128f0119d5295d2e1cd1cee6c0c4ad66a Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 09:44:07 +0200 Subject: [PATCH 01/10] Changed jobs to *.dy.yaml --- cli/src/datayoga/__main__.py | 2 +- core/README.md | 4 +- core/src/datayoga_core/__init__.py | 37 +++++++++---------- .../resources/scaffold/README.md | 4 +- .../jobs/sample/{hello.yaml => hello.dy.yaml} | 0 docs/creating-jobs.md | 12 +++--- docs/directory-structure.md | 4 +- docs/library.md | 2 +- ...redis.yaml => airbnb_csv_to_redis.dy.yaml} | 0 .../{connections.yaml => connections.dy.yaml} | 0 ...roducer.yaml => test_csv_producer.dy.yaml} | 0 ...ducer.yaml => test_redis_producer.dy.yaml} | 0 integration-tests/common/utils.py | 10 +++-- .../{csv_to_pg.yaml => csv_to_pg.dy.yaml} | 0 ...csv_to_redis.yaml => csv_to_redis.dy.yaml} | 0 ...tp_to_redis.yaml => http_to_redis.dy.yaml} | 0 .../{pg_to_redis.yaml => pg_to_redis.dy.yaml} | 0 ...to_stdout.yaml => redis_to_stdout.dy.yaml} | 0 ...to_stdout.yaml => redis_to_stdout.dy.yaml} | 0 ...up_hash.yaml => redis_lookup_hash.dy.yaml} | 0 ...up_list.yaml => redis_lookup_list.dy.yaml} | 0 ...okup_set.yaml => redis_lookup_set.dy.yaml} | 0 ...t.yaml => redis_lookup_sorted_set.dy.yaml} | 0 ...tring.yaml => redis_lookup_string.dy.yaml} | 0 ...aml => redis_lookup_string_nested.dy.yaml} | 0 ...sandra.yaml => redis_to_cassandra.dy.yaml} | 0 ...redis_to_db2.yaml => redis_to_db2.dy.yaml} | 0 ...s_to_mysql.yaml => redis_to_mysql.dy.yaml} | 0 ...to_oracle.yaml => redis_to_oracle.dy.yaml} | 0 .../{redis_to_pg.yaml => redis_to_pg.dy.yaml} | 0 ...server.yaml => redis_to_sqlserver.dy.yaml} | 0 ...to_stdout.yaml => stdin_to_stdout.dy.yaml} | 0 32 files changed, 38 insertions(+), 37 deletions(-) rename core/src/datayoga_core/resources/scaffold/jobs/sample/{hello.yaml => hello.dy.yaml} (100%) rename examples/{airbnb_csv_to_redis.yaml => airbnb_csv_to_redis.dy.yaml} (100%) rename examples/{connections.yaml => connections.dy.yaml} (100%) rename examples/{test_csv_producer.yaml => test_csv_producer.dy.yaml} (100%) rename examples/{test_redis_producer.yaml => test_redis_producer.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{csv_to_pg.yaml => csv_to_pg.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{csv_to_redis.yaml => csv_to_redis.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{http_to_redis.yaml => http_to_redis.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{pg_to_redis.yaml => pg_to_redis.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/redis/abort/{redis_to_stdout.yaml => redis_to_stdout.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/redis/ignore/{redis_to_stdout.yaml => redis_to_stdout.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_hash.yaml => redis_lookup_hash.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_list.yaml => redis_lookup_list.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_set.yaml => redis_lookup_set.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_sorted_set.yaml => redis_lookup_sorted_set.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_string.yaml => redis_lookup_string.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_lookup_string_nested.yaml => redis_lookup_string_nested.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_cassandra.yaml => redis_to_cassandra.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_db2.yaml => redis_to_db2.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_mysql.yaml => redis_to_mysql.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_oracle.yaml => redis_to_oracle.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_pg.yaml => redis_to_pg.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{redis_to_sqlserver.yaml => redis_to_sqlserver.dy.yaml} (100%) rename integration-tests/resources/jobs/tests/{stdin_to_stdout.yaml => stdin_to_stdout.dy.yaml} (100%) diff --git a/cli/src/datayoga/__main__.py b/cli/src/datayoga/__main__.py index 1f760613..60b40d3c 100644 --- a/cli/src/datayoga/__main__.py +++ b/cli/src/datayoga/__main__.py @@ -109,7 +109,7 @@ def run( sys.exit(1) # validate the job - job_file = path.join(directory, "jobs", job_name.replace(".", os.sep) + ".yaml") + job_file = path.join(directory, "jobs", job_name.replace(".", os.sep) + ".dy.yaml") try: job_settings = utils.read_yaml(job_file) logger.debug(f"job_settings: {job_settings}") diff --git a/core/README.md b/core/README.md index c4dbe06a..4a5ae701 100644 --- a/core/README.md +++ b/core/README.md @@ -16,7 +16,7 @@ This demonstrates how to transform data using a DataYoga job. ### Create a Job -Use this `example.yaml`: +Use this `example.dy.yaml`: ```yaml steps: @@ -65,7 +65,7 @@ from datayoga_core.job import Job from datayoga_core.result import Result, Status from datayoga_core.utils import read_yaml -job_settings = read_yaml("example.yaml") +job_settings = read_yaml("example.dy.yaml") job = dy.compile(job_settings) assert job.transform([{"fname": "jane", "lname": "smith", "country_code": 1, "country_name": "usa", "credit_card": "1234-5678-0000-9999", "gender": "F"}]).processed == [ diff --git a/core/src/datayoga_core/__init__.py b/core/src/datayoga_core/__init__.py index bf0928e8..1476e2d8 100644 --- a/core/src/datayoga_core/__init__.py +++ b/core/src/datayoga_core/__init__.py @@ -11,33 +11,29 @@ logger = logging.getLogger("dy") -def compile( - job_settings: Dict[str, Any], - whitelisted_blocks: Optional[List[str]] = None) -> Job: - """ - Compiles a job in YAML +def compile(job_settings: Dict[str, Any], whitelisted_blocks: Optional[List[str]] = None) -> Job: + """Compiles a job in YAML. Args: - job_settings (Dict[str, Any]): Job settings + job_settings (Dict[str, Any]): Job settings. whitelisted_blocks: (Optional[List[str]], optional): Whitelisted blocks. Defaults to None. Returns: - Job: Compiled job + Job: Compiled job. """ logger.debug("Compiling job") return Job.compile(job_settings, whitelisted_blocks) def validate(job_settings: Dict[str, Any], whitelisted_blocks: Optional[List[str]] = None): - """ - Validates a job in YAML + """Validates a job in YAML. Args: - job_settings (Dict[str, Any]): Job settings + job_settings (Dict[str, Any]): Job settings. whitelisted_blocks: (Optional[List[str]], optional): Whitelisted blocks. Defaults to None. Raises: - ValueError: When the job is invalid + ValueError: When the job is invalid. """ logger.debug("Validating job") try: @@ -49,21 +45,22 @@ def validate(job_settings: Dict[str, Any], whitelisted_blocks: Optional[List[str raise ValueError(e) -def transform(job_settings: Dict[str, Any], - data: List[Dict[str, Any]], - context: Optional[Context] = None, - whitelisted_blocks: Optional[List[str]] = None) -> JobResult: - """ - Transforms data against a certain job +def transform( + job_settings: Dict[str, Any], + data: List[Dict[str, Any]], + context: Optional[Context] = None, + whitelisted_blocks: Optional[List[str]] = None +) -> JobResult: + """Transforms data against a certain job. Args: - job_settings (Dict[str, Any]): Job settings - data (List[Dict[str, Any]]): Data to transform + job_settings (Dict[str, Any]): Job settings. + data (List[Dict[str, Any]]): Data to transform. context (Optional[Context]): Context. Defaults to None. whitelisted_blocks: (Optional[List[str]]): Whitelisted blocks. Defaults to None. Returns: - JobResult: Job result + JobResult: Job result. """ job = compile(job_settings, whitelisted_blocks) job.init(context) diff --git a/core/src/datayoga_core/resources/scaffold/README.md b/core/src/datayoga_core/resources/scaffold/README.md index e076b341..ecb58b3f 100644 --- a/core/src/datayoga_core/resources/scaffold/README.md +++ b/core/src/datayoga_core/resources/scaffold/README.md @@ -10,13 +10,13 @@ ├── connections.yaml └── jobs └── sample - └── hello.yaml + └── hello.dy.yaml ``` - `.gitignore`: For convenience, this is used to ignore the data folder. - `data`: Folder to store data input files or output. This folder can be located anywhere as long as the runner has access to it. - `connections.yaml`: Contains definitions of source and target connectors and other general settings. -- `jobs`: Source job YAMLs. These can be nested and referenced as modules using a dot notation. e.g. `jobs/sample/hello.yaml` is referenced as `sample.hello` when running the job. +- `jobs`: Source job YAMLs. These can be nested and referenced as modules using a dot notation. e.g. `jobs/sample/hello.dy.yaml` is referenced as `sample.hello` when running the job. ## Run a Job diff --git a/core/src/datayoga_core/resources/scaffold/jobs/sample/hello.yaml b/core/src/datayoga_core/resources/scaffold/jobs/sample/hello.dy.yaml similarity index 100% rename from core/src/datayoga_core/resources/scaffold/jobs/sample/hello.yaml rename to core/src/datayoga_core/resources/scaffold/jobs/sample/hello.dy.yaml diff --git a/docs/creating-jobs.md b/docs/creating-jobs.md index 09d76b11..888224b4 100644 --- a/docs/creating-jobs.md +++ b/docs/creating-jobs.md @@ -4,9 +4,9 @@ nav_order: 5 # Creating Jobs -Jobs are created by creating yaml files in the `jobs` folder. Each job is composed of several `steps` that activate `blocks`. A `block` defines the business logic of an action. For example, a `block` can write to a Kafka stream, can read from a cloud API, can transform structure, or enrich a message with external data. A `steps` activates the `block` with a set of parameters. +Jobs are created by creating `dy.yaml` files in the `jobs` folder. Each job is composed of several `steps` that activate `blocks`. A `block` defines the business logic of an action. For example, a `block` can write to a Kafka stream, can read from a cloud API, can transform structure, or enrich a message with external data. A `steps` activates the `block` with a set of parameters. -## Overview of the Job Yaml Structure +## Overview of the Job YAML Structure Each Job must start with a block that either produces data or accepts data from external sources. The subsequent blocks each receive the output of the previous step as an input. The data will be streamed through these blocks as data flows through the chain. @@ -33,7 +33,7 @@ It supports both async processing, multi-threading, and multi-processing to enab To deploy a job to the DataYoga Runner, use the DataYoga CLI. ```bash -datayoga run jobname.yaml +datayoga run jobname ``` ## Tutorial - a Job that Reads from Redis and Writes to Postgres @@ -58,7 +58,7 @@ docker run -p 5432:5432 -e POSTGRES_PASSWORD=mysecretpassword -d postgres DataYoga manages connections in a special file named `connections.yaml`. Each connection is defined with a logical name and can define properties needed for the connection. Reference to environment variables, interpolation, and secrets is available. -Add the connections to Redis and Postgres above to the connections.yaml: +Add the connections to Redis and Postgres above to the `connections.yaml`: ```bash cat << EOF > connections.yaml @@ -79,7 +79,7 @@ EOF ### Create the Job ```bash -cat << EOF > redis_to_pg.yaml +cat << EOF > redis_to_pg.dy.yaml steps: - uses: redis.read_stream with: @@ -114,5 +114,5 @@ EOF ### Run the Job in the DataYoga Runner ```bash -datayoga run redis_to_pg.yaml +datayoga run redis_to_pg ``` diff --git a/docs/directory-structure.md b/docs/directory-structure.md index 3366a358..2a73dede 100644 --- a/docs/directory-structure.md +++ b/docs/directory-structure.md @@ -14,10 +14,10 @@ The `datayoga init` command produces the following directory structure: ├── connections.yaml └── jobs └── sample - └── hello.yaml + └── hello.dy.yaml ``` - `.gitignore`: For convenience, this is used to ignore the data folder. - `data`: Folder to store data input files or output. This folder can be located anywhere as long as the runner has access to it. - `connections.yaml`: Contains definitions of source and target connectors and other general settings. -- `jobs`: Source job YAMLs. These can be nested and referenced as modules using a dot notation. e.g. `jobs/sample/hello.yaml` is referenced as `sample.hello` when running the job. +- `jobs`: Source job YAMLs. These can be nested and referenced as modules using a dot notation. e.g. `jobs/sample/hello.dy.yaml` is referenced as `sample.hello` when running the job. diff --git a/docs/library.md b/docs/library.md index 5b0973d3..bbdcdc9c 100644 --- a/docs/library.md +++ b/docs/library.md @@ -72,7 +72,7 @@ import datayoga_core as dy from datayoga_core.job import Job from datayoga_core.utils import read_yaml -job_settings = read_yaml("example.yaml") +job_settings = read_yaml("example.dy.yaml") job = dy.compile(job_settings) assert job.transform({"fname": "jane", "lname": "smith", "country_code": 1, "country_name": "usa", "credit_card": "1234-5678-0000-9999", "gender": "F"})[0] == {"first_name": "jane", "last_name": "smith", "country": "1 - USA", "full_name": "jane smith", "greeting": "Hello Ms. jane smith"} diff --git a/examples/airbnb_csv_to_redis.yaml b/examples/airbnb_csv_to_redis.dy.yaml similarity index 100% rename from examples/airbnb_csv_to_redis.yaml rename to examples/airbnb_csv_to_redis.dy.yaml diff --git a/examples/connections.yaml b/examples/connections.dy.yaml similarity index 100% rename from examples/connections.yaml rename to examples/connections.dy.yaml diff --git a/examples/test_csv_producer.yaml b/examples/test_csv_producer.dy.yaml similarity index 100% rename from examples/test_csv_producer.yaml rename to examples/test_csv_producer.dy.yaml diff --git a/examples/test_redis_producer.yaml b/examples/test_redis_producer.dy.yaml similarity index 100% rename from examples/test_redis_producer.yaml rename to examples/test_redis_producer.dy.yaml diff --git a/integration-tests/common/utils.py b/integration-tests/common/utils.py index fbf10851..7cafeba7 100644 --- a/integration-tests/common/utils.py +++ b/integration-tests/common/utils.py @@ -31,7 +31,7 @@ def execute_program(command: str, background: bool = False) -> Optional[Popen]: def wait_program(process: Popen, sig: Optional[int] = signal.SIGTERM, ignore_errors: bool = False): """Waits a child program to finish and logs its output. - Sends a signal to the process if it set + Sends a signal to the process if it set. Args: process (Popen): Process to kill. @@ -57,8 +57,12 @@ def wait_program(process: Popen, sig: Optional[int] = signal.SIGTERM, ignore_err raise ValueError("command failed") -def run_job(job_name: str, piped_from: Optional[str] = None, piped_to: Optional[str] = None, - background: bool = False) -> Optional[Popen]: +def run_job( + job_name: str, + piped_from: Optional[str] = None, + piped_to: Optional[str] = None, + background: bool = False +) -> Optional[Popen]: """Runs a job using the `datayoga` command-line tool. Args: diff --git a/integration-tests/resources/jobs/tests/csv_to_pg.yaml b/integration-tests/resources/jobs/tests/csv_to_pg.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/csv_to_pg.yaml rename to integration-tests/resources/jobs/tests/csv_to_pg.dy.yaml diff --git a/integration-tests/resources/jobs/tests/csv_to_redis.yaml b/integration-tests/resources/jobs/tests/csv_to_redis.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/csv_to_redis.yaml rename to integration-tests/resources/jobs/tests/csv_to_redis.dy.yaml diff --git a/integration-tests/resources/jobs/tests/http_to_redis.yaml b/integration-tests/resources/jobs/tests/http_to_redis.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/http_to_redis.yaml rename to integration-tests/resources/jobs/tests/http_to_redis.dy.yaml diff --git a/integration-tests/resources/jobs/tests/pg_to_redis.yaml b/integration-tests/resources/jobs/tests/pg_to_redis.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/pg_to_redis.yaml rename to integration-tests/resources/jobs/tests/pg_to_redis.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis/abort/redis_to_stdout.yaml b/integration-tests/resources/jobs/tests/redis/abort/redis_to_stdout.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis/abort/redis_to_stdout.yaml rename to integration-tests/resources/jobs/tests/redis/abort/redis_to_stdout.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis/ignore/redis_to_stdout.yaml b/integration-tests/resources/jobs/tests/redis/ignore/redis_to_stdout.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis/ignore/redis_to_stdout.yaml rename to integration-tests/resources/jobs/tests/redis/ignore/redis_to_stdout.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_hash.yaml b/integration-tests/resources/jobs/tests/redis_lookup_hash.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_hash.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_hash.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_list.yaml b/integration-tests/resources/jobs/tests/redis_lookup_list.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_list.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_list.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_set.yaml b/integration-tests/resources/jobs/tests/redis_lookup_set.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_set.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_set.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_sorted_set.yaml b/integration-tests/resources/jobs/tests/redis_lookup_sorted_set.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_sorted_set.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_sorted_set.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_string.yaml b/integration-tests/resources/jobs/tests/redis_lookup_string.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_string.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_string.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_lookup_string_nested.yaml b/integration-tests/resources/jobs/tests/redis_lookup_string_nested.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_lookup_string_nested.yaml rename to integration-tests/resources/jobs/tests/redis_lookup_string_nested.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_cassandra.yaml b/integration-tests/resources/jobs/tests/redis_to_cassandra.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_cassandra.yaml rename to integration-tests/resources/jobs/tests/redis_to_cassandra.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_db2.yaml b/integration-tests/resources/jobs/tests/redis_to_db2.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_db2.yaml rename to integration-tests/resources/jobs/tests/redis_to_db2.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_mysql.yaml b/integration-tests/resources/jobs/tests/redis_to_mysql.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_mysql.yaml rename to integration-tests/resources/jobs/tests/redis_to_mysql.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_oracle.yaml b/integration-tests/resources/jobs/tests/redis_to_oracle.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_oracle.yaml rename to integration-tests/resources/jobs/tests/redis_to_oracle.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_pg.yaml b/integration-tests/resources/jobs/tests/redis_to_pg.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_pg.yaml rename to integration-tests/resources/jobs/tests/redis_to_pg.dy.yaml diff --git a/integration-tests/resources/jobs/tests/redis_to_sqlserver.yaml b/integration-tests/resources/jobs/tests/redis_to_sqlserver.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/redis_to_sqlserver.yaml rename to integration-tests/resources/jobs/tests/redis_to_sqlserver.dy.yaml diff --git a/integration-tests/resources/jobs/tests/stdin_to_stdout.yaml b/integration-tests/resources/jobs/tests/stdin_to_stdout.dy.yaml similarity index 100% rename from integration-tests/resources/jobs/tests/stdin_to_stdout.yaml rename to integration-tests/resources/jobs/tests/stdin_to_stdout.dy.yaml From e2ba3d3d694aba7a6aa54684be004ccbf0c9ea21 Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 09:59:47 +0200 Subject: [PATCH 02/10] revert connections --- examples/{connections.dy.yaml => connections.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/{connections.dy.yaml => connections.yaml} (100%) diff --git a/examples/connections.dy.yaml b/examples/connections.yaml similarity index 100% rename from examples/connections.dy.yaml rename to examples/connections.yaml From 84141015f74f2842bf6f735b836af2f7ca2ec717 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 13 Mar 2024 08:00:32 +0000 Subject: [PATCH 03/10] update json schemas --- schemas/job.schema.json | 1158 +++++++++++++++++++-------------------- 1 file changed, 579 insertions(+), 579 deletions(-) diff --git a/schemas/job.schema.json b/schemas/job.schema.json index 683bd708..36dac71a 100644 --- a/schemas/job.schema.json +++ b/schemas/job.schema.json @@ -19,31 +19,116 @@ "properties": { "uses": { "enum": [ + "map", + "filter", "http.write", "http.receiver", - "filter", - "std.write", - "std.read", - "jinja_template", - "rename_field", - "sequence", - "add_field", "redis.write", "redis.lookup", "redis.read_stream", "remove_field", "azure.read_event_hub", + "std.write", + "std.read", + "jinja_template", "relational.write", "relational.read", - "map", + "rename_field", + "add_field", + "sequence", + "files.read_csv", "parquet.write", "parquet.read", - "cassandra.write", - "files.read_csv" + "cassandra.write" ] } }, "allOf": [ + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "map" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "map", + "description": "Maps a record into a new output based on expressions", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": ["object", "string"] + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["expression", "language"], + "examples": [ + { + "expression": { + "first_name": "first_name", + "last_name": "last_name", + "greeting": "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", + "country": "country", + "full_name": "full_name" + }, + "language": "sql" + }, + { + "expression": "{\"CustomerId\": \"customer_id\", \"FirstName\": \"first_name\", \"LastName\": \"last_name\", \"Company\": \"company\", \"Location\": {\"Street\": \"address\", \"City\": \"city\", \"State\": \"state\", \"Country\": \"country\", \"PostalCode\": \"postal_code\"}, \"Phone\": \"phone\", \"Fax\": \"fax\", \"Email\": \"email\"}", + "language": "jmespath" + } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "filter" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "filter", + "description": "Filter records", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["expression", "language"], + "examples": [{ "language": "sql", "expression": "age>20" }] + } + } + } + }, { "if": { "properties": { @@ -290,7 +375,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "filter" + "const": "redis.write" } }, "required": ["uses"] @@ -298,43 +383,48 @@ "then": { "properties": { "with": { - "title": "filter", - "description": "Filter records", + "title": "redis.write", + "description": "Write to a Redis data structure", "type": "object", "properties": { - "expression": { - "description": "Expression", + "connection": { + "title": "Connection name", "type": "string" }, - "language": { - "description": "Language", + "command": { + "enum": [ + "HSET", + "SADD", + "XADD", + "RPUSH", + "LPUSH", + "SET", + "ZADD" + ], + "default": "HSET", "type": "string", - "enum": ["jmespath", "sql"] + "title": "Redis command", + "description": "Redis command" + }, + "key": { + "description": "Field to use as the Redis key", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "required": ["expression", "language"] } }, "additionalProperties": false, - "required": ["expression", "language"], - "examples": [{ "language": "sql", "expression": "age>20" }] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "std.write" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "std.write", - "description": "Write to the standard output" + "required": ["connection", "key"] } } } @@ -345,7 +435,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "std.read" + "const": "redis.lookup" } }, "required": ["uses"] @@ -353,8 +443,38 @@ "then": { "properties": { "with": { - "title": "std.read", - "description": "Read from the standard input" + "title": "redis.lookup", + "description": "Lookup data from Redis using the given command and key", + "type": "object", + "properties": { + "connection": { + "title": "Connection name", + "type": "string" + }, + "cmd": { + "title": "Redis command", + "description": "The command to execute", + "type": "string" + }, + "args": { + "title": "Redis command arguments", + "description": "The list of expressions produces arguments", + "type": "array", + "items": { "type": "string" } + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + }, + "field": { + "type": "string", + "title": "Target field", + "description": "The field to write the result to" + } + }, + "additionalProperties": false, + "required": ["connection", "cmd", "args", "language", "field"] } } } @@ -365,7 +485,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "jinja_template" + "const": "redis.read_stream" } }, "required": ["uses"] @@ -373,28 +493,28 @@ "then": { "properties": { "with": { - "title": "jinja_template", - "description": "Apply Jinja template to a field", + "title": "redis.read_stream", + "description": "Read from Redis stream", "type": "object", "properties": { - "field": { "description": "Field", "type": "string" }, - "template": { - "description": "Jinja Template", + "connection": { + "description": "Connection name", "type": "string" + }, + "stream_name": { + "type": "string", + "title": "Source stream name", + "description": "Source stream name" + }, + "snapshot": { + "type": "boolean", + "title": "Snapshot current entries and quit", + "description": "Snapshot current entries and quit", + "default": false } }, "additionalProperties": false, - "required": ["field", "template"], - "examples": [ - { - "field": "name.full_name", - "template": "{{ name.fname }} {{ name.lname }}" - }, - { - "field": "name.fname_upper", - "template": "{{ name.fname | upper }}" - } - ] + "required": ["connection", "stream_name"] } } } @@ -405,7 +525,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "rename_field" + "const": "remove_field" } }, "required": ["uses"] @@ -413,12 +533,12 @@ "then": { "properties": { "with": { - "title": "rename_field", - "description": "Renames fields. All other fields remain unchanged", + "title": "remove_field", + "description": "Remove fields", "type": "object", "oneOf": [ { - "description": "Rename multiple fields", + "description": "Remove multiple fields", "properties": { "fields": { "type": "array", @@ -426,147 +546,13 @@ "items": { "type": "object", "properties": { - "from_field": { - "description": "From field", - "type": "string" - }, - "to_field": { - "description": "To field", + "field": { + "description": "Field", "type": "string" } }, "additionalProperties": false, - "required": ["from_field", "to_field"], - "examples": [ - { - "fields": [ - { - "from_field": "name.lname", - "to_field": "name.last_name" - }, - { - "from_field": "name.fname", - "to_field": "name.first_name" - } - ] - } - ] - } - } - }, - "required": ["fields"], - "additionalProperties": false - }, - { - "description": "Rename one field", - "properties": { - "from_field": { - "description": "From field", - "type": "string" - }, - "to_field": { - "description": "To field", - "type": "string" - } - }, - "additionalProperties": false, - "required": ["from_field", "to_field"], - "examples": [ - { - "from_field": "name.lname", - "to_field": "name.last_name" - } - ] - } - ] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "sequence" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "sequence", - "description": "Add a sequence number field to data", - "type": "object", - "additionalProperties": false, - "examples": [], - "required": [], - "properties": { - "field": { - "type": "string", - "title": "Name of new sequence field" - }, - "start": { - "type": "number", - "title": "Start entry", - "default": 1, - "examples": [] - }, - "increment": { - "type": "number", - "title": "Increment between sequences", - "examples": [] - } - } - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "add_field" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "add_field", - "description": "Add fields to a record", - "type": "object", - "oneOf": [ - { - "description": "Add multiple fields", - "properties": { - "fields": { - "type": "array", - "description": "Fields", - "items": { - "type": "object", - "properties": { - "field": { - "description": "Field", - "type": "string" - }, - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "additionalProperties": false, - "required": ["field", "expression", "language"] + "required": ["field"] } } }, @@ -575,43 +561,20 @@ "examples": [ { "fields": [ - { - "field": "name.full_name", - "language": "jmespath", - "expression": "concat([name.fname, ' ', name.lname])" - }, - { - "field": "name.fname_upper", - "language": "jmespath", - "expression": "upper(name.fname)" - } + { "field": "credit_card" }, + { "field": "name.mname" } ] } ] }, { - "description": "Add one field", + "description": "Remove one field", "properties": { - "field": { "description": "Field", "type": "string" }, - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } + "field": { "description": "Field", "type": "string" } }, "additionalProperties": false, - "required": ["field", "expression", "language"], - "examples": [ - { - "field": "country", - "language": "sql", - "expression": "country_code || ' - ' || UPPER(country_name)" - } - ] + "required": ["field"], + "examples": [{ "field": "credit_card" }] } ] } @@ -624,7 +587,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.write" + "const": "azure.read_event_hub" } }, "required": ["uses"] @@ -632,98 +595,43 @@ "then": { "properties": { "with": { - "title": "redis.write", - "description": "Write to a Redis data structure", + "title": "azure.read_event_hub", + "description": "Read from Azure Event Hub", "type": "object", "properties": { - "connection": { - "title": "Connection name", - "type": "string" - }, - "command": { - "enum": [ - "HSET", - "SADD", - "XADD", - "RPUSH", - "LPUSH", - "SET", - "ZADD" - ], - "default": "HSET", + "event_hub_connection_string": { "type": "string", - "title": "Redis command", - "description": "Redis command" - }, - "key": { - "description": "Field to use as the Redis key", - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "required": ["expression", "language"] - } - }, - "additionalProperties": false, - "required": ["connection", "key"] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "redis.lookup" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "redis.lookup", - "description": "Lookup data from Redis using the given command and key", - "type": "object", - "properties": { - "connection": { - "title": "Connection name", - "type": "string" + "description": "The connection string for the Azure Event Hub namespace." }, - "cmd": { - "title": "Redis command", - "description": "The command to execute", - "type": "string" + "event_hub_consumer_group_name": { + "type": "string", + "description": "The name of the consumer group to read events from." }, - "args": { - "title": "Redis command arguments", - "description": "The list of expressions produces arguments", - "type": "array", - "items": { "type": "string" } + "event_hub_name": { + "type": "string", + "description": "The name of the Azure Event Hub." }, - "language": { - "description": "Language", + "checkpoint_store_connection_string": { "type": "string", - "enum": ["jmespath", "sql"] + "description": "The connection string for the Azure Storage account used as the checkpoint store." }, - "field": { + "checkpoint_store_container_name": { "type": "string", - "title": "Target field", - "description": "The field to write the result to" + "description": "The name of the container within the checkpoint store to store the checkpoints." + }, + "batch_size": { + "type": "integer", + "description": "The maximum number of events to receive in each batch.", + "default": 300 } }, - "additionalProperties": false, - "required": ["connection", "cmd", "args", "language", "field"] + "required": [ + "event_hub_connection_string", + "event_hub_consumer_group_name", + "event_hub_name", + "checkpoint_store_connection_string", + "checkpoint_store_container_name" + ] } } } @@ -734,7 +642,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.read_stream" + "const": "std.write" } }, "required": ["uses"] @@ -742,28 +650,8 @@ "then": { "properties": { "with": { - "title": "redis.read_stream", - "description": "Read from Redis stream", - "type": "object", - "properties": { - "connection": { - "description": "Connection name", - "type": "string" - }, - "stream_name": { - "type": "string", - "title": "Source stream name", - "description": "Source stream name" - }, - "snapshot": { - "type": "boolean", - "title": "Snapshot current entries and quit", - "description": "Snapshot current entries and quit", - "default": false - } - }, - "additionalProperties": false, - "required": ["connection", "stream_name"] + "title": "std.write", + "description": "Write to the standard output" } } } @@ -774,7 +662,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "remove_field" + "const": "std.read" } }, "required": ["uses"] @@ -782,104 +670,47 @@ "then": { "properties": { "with": { - "title": "remove_field", - "description": "Remove fields", - "type": "object", - "oneOf": [ - { - "description": "Remove multiple fields", - "properties": { - "fields": { - "type": "array", - "description": "Fields", - "items": { - "type": "object", - "properties": { - "field": { - "description": "Field", - "type": "string" - } - }, - "additionalProperties": false, - "required": ["field"] - } - } - }, - "required": ["fields"], - "additionalProperties": false, - "examples": [ - { - "fields": [ - { "field": "credit_card" }, - { "field": "name.mname" } - ] - } - ] - }, - { - "description": "Remove one field", - "properties": { - "field": { "description": "Field", "type": "string" } - }, - "additionalProperties": false, - "required": ["field"], - "examples": [{ "field": "credit_card" }] - } - ] + "title": "std.read", + "description": "Read from the standard input" } } } }, { "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "azure.read_event_hub" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "azure.read_event_hub", - "description": "Read from Azure Event Hub", - "type": "object", - "properties": { - "event_hub_connection_string": { - "type": "string", - "description": "The connection string for the Azure Event Hub namespace." - }, - "event_hub_consumer_group_name": { - "type": "string", - "description": "The name of the consumer group to read events from." - }, - "event_hub_name": { - "type": "string", - "description": "The name of the Azure Event Hub." - }, - "checkpoint_store_connection_string": { - "type": "string", - "description": "The connection string for the Azure Storage account used as the checkpoint store." - }, - "checkpoint_store_container_name": { - "type": "string", - "description": "The name of the container within the checkpoint store to store the checkpoints." - }, - "batch_size": { - "type": "integer", - "description": "The maximum number of events to receive in each batch.", - "default": 300 + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "jinja_template" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "jinja_template", + "description": "Apply Jinja template to a field", + "type": "object", + "properties": { + "field": { "description": "Field", "type": "string" }, + "template": { + "description": "Jinja Template", + "type": "string" } }, - "required": [ - "event_hub_connection_string", - "event_hub_consumer_group_name", - "event_hub_name", - "checkpoint_store_connection_string", - "checkpoint_store_container_name" + "additionalProperties": false, + "required": ["field", "template"], + "examples": [ + { + "field": "name.full_name", + "template": "{{ name.fname }} {{ name.lname }}" + }, + { + "field": "name.fname_upper", + "template": "{{ name.fname | upper }}" + } ] } } @@ -976,22 +807,263 @@ "description": "Used for `TYPE2` load_strategy. An SQL expression used to identify which rows are active", "examples": ["is_active='Y'", "deletedAt is null"] }, - "inactive_record_mapping": { - "type": "array", - "title": "Used for `TYPE2` load_strategy. The columns mapping to use to close out an active record", - "description": "A list of columns to use. Use any valid SQL expression for the source. If 'target' is omitted, will default to the name of the source column", - "default": [], + "inactive_record_mapping": { + "type": "array", + "title": "Used for `TYPE2` load_strategy. The columns mapping to use to close out an active record", + "description": "A list of columns to use. Use any valid SQL expression for the source. If 'target' is omitted, will default to the name of the source column", + "default": [], + "examples": [ + [ + { "source": "CURRENT_DATE", "target": "deletedAt" }, + { "source": "'Y'", "target": "is_active" } + ] + ] + } + }, + "required": ["connection", "table"], + "allOf": [ + { "not": { "required": ["opcode_field", "load_strategy"] } } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "relational.read" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "relational.read", + "description": "Read a table from an SQL-compatible data store", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "read_snowflake", + "type": "relational.read", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo" + } + } + ], + "properties": { + "connection": { + "type": "string", + "title": "The connection to use for loading", + "description": "Logical connection name as defined in the connections.yaml", + "examples": ["europe_db", "target", "eu_dwh"] + }, + "schema": { + "type": "string", + "title": "The table schema of the table", + "description": "If left blank, the default schema of this connection will be used as defined in the connections.yaml", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The table name", + "description": "Table name", + "examples": ["employees"] + }, + "columns": { + "type": "array", + "title": "Optional subset of columns to load", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [["fname", { "lname": "last_name" }]] + } + }, + "required": ["connection", "table"] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "rename_field" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "rename_field", + "description": "Renames fields. All other fields remain unchanged", + "type": "object", + "oneOf": [ + { + "description": "Rename multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { + "type": "object", + "properties": { + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["from_field", "to_field"], + "examples": [ + { + "fields": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + }, + { + "from_field": "name.fname", + "to_field": "name.first_name" + } + ] + } + ] + } + } + }, + "required": ["fields"], + "additionalProperties": false + }, + { + "description": "Rename one field", + "properties": { + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["from_field", "to_field"], + "examples": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + } + ] + } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "add_field" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "add_field", + "description": "Add fields to a record", + "type": "object", + "oneOf": [ + { + "description": "Add multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { + "type": "object", + "properties": { + "field": { + "description": "Field", + "type": "string" + }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"] + } + } + }, + "required": ["fields"], + "additionalProperties": false, + "examples": [ + { + "fields": [ + { + "field": "name.full_name", + "language": "jmespath", + "expression": "concat([name.fname, ' ', name.lname])" + }, + { + "field": "name.fname_upper", + "language": "jmespath", + "expression": "upper(name.fname)" + } + ] + } + ] + }, + { + "description": "Add one field", + "properties": { + "field": { "description": "Field", "type": "string" }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"], "examples": [ - [ - { "source": "CURRENT_DATE", "target": "deletedAt" }, - { "source": "'Y'", "target": "is_active" } - ] + { + "field": "country", + "language": "sql", + "expression": "country_code || ' - ' || UPPER(country_name)" + } ] } - }, - "required": ["connection", "table"], - "allOf": [ - { "not": { "required": ["opcode_field", "load_strategy"] } } ] } } @@ -1003,7 +1075,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "relational.read" + "const": "sequence" } }, "required": ["uses"] @@ -1011,51 +1083,29 @@ "then": { "properties": { "with": { - "title": "relational.read", - "description": "Read a table from an SQL-compatible data store", + "title": "sequence", + "description": "Add a sequence number field to data", "type": "object", "additionalProperties": false, - "examples": [ - { - "id": "read_snowflake", - "type": "relational.read", - "properties": { - "connection": "eu_datalake", - "table": "employees", - "schema": "dbo" - } - } - ], + "examples": [], + "required": [], "properties": { - "connection": { - "type": "string", - "title": "The connection to use for loading", - "description": "Logical connection name as defined in the connections.yaml", - "examples": ["europe_db", "target", "eu_dwh"] - }, - "schema": { + "field": { "type": "string", - "title": "The table schema of the table", - "description": "If left blank, the default schema of this connection will be used as defined in the connections.yaml", - "examples": ["dbo"] + "title": "Name of new sequence field" }, - "table": { - "type": "string", - "title": "The table name", - "description": "Table name", - "examples": ["employees"] + "start": { + "type": "number", + "title": "Start entry", + "default": 1, + "examples": [] }, - "columns": { - "type": "array", - "title": "Optional subset of columns to load", - "items": { - "type": ["string", "object"], - "title": "name of column" - }, - "examples": [["fname", { "lname": "last_name" }]] + "increment": { + "type": "number", + "title": "Increment between sequences", + "examples": [] } - }, - "required": ["connection", "table"] + } } } } @@ -1066,7 +1116,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "map" + "const": "files.read_csv" } }, "required": ["uses"] @@ -1074,38 +1124,63 @@ "then": { "properties": { "with": { - "title": "map", - "description": "Maps a record into a new output based on expressions", + "title": "files.read_csv", + "description": "Read data from CSV", "type": "object", "properties": { - "expression": { - "description": "Expression", - "type": ["object", "string"] + "file": { + "description": "Filename. Can contain a regexp or glob expression", + "type": "string" }, - "language": { - "description": "Language", + "encoding": { + "description": "Encoding to use for reading the file", "type": "string", - "enum": ["jmespath", "sql"] + "default": "utf-8" + }, + "fields": { + "type": "array", + "title": "List of columns to use", + "description": "List of columns to use for extract", + "default": null, + "examples": [["fname", "lname"]], + "minLength": 1, + "additionalItems": true, + "items": { + "type": "string", + "description": "field name", + "examples": ["fname"] + } + }, + "skip": { + "description": "Number of lines to skip", + "type": "number", + "minimum": 0, + "default": 0 + }, + "delimiter": { + "description": "Delimiter to use for splitting the csv records", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "," + }, + "batch_size": { + "description": "Number of records to read per batch", + "type": "number", + "minimum": 1, + "default": 1000 + }, + "quotechar": { + "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "\"" } }, "additionalProperties": false, - "required": ["expression", "language"], - "examples": [ - { - "expression": { - "first_name": "first_name", - "last_name": "last_name", - "greeting": "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", - "country": "country", - "full_name": "full_name" - }, - "language": "sql" - }, - { - "expression": "{\"CustomerId\": \"customer_id\", \"FirstName\": \"first_name\", \"LastName\": \"last_name\", \"Company\": \"company\", \"Location\": {\"Street\": \"address\", \"City\": \"city\", \"State\": \"state\", \"Country\": \"country\", \"PostalCode\": \"postal_code\"}, \"Phone\": \"phone\", \"Fax\": \"fax\", \"Email\": \"email\"}", - "language": "jmespath" - } - ] + "required": ["file"], + "examples": [{ "file": "archive.csv", "delimiter": ";" }] } } } @@ -1255,81 +1330,6 @@ } } } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "files.read_csv" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "files.read_csv", - "description": "Read data from CSV", - "type": "object", - "properties": { - "file": { - "description": "Filename. Can contain a regexp or glob expression", - "type": "string" - }, - "encoding": { - "description": "Encoding to use for reading the file", - "type": "string", - "default": "utf-8" - }, - "fields": { - "type": "array", - "title": "List of columns to use", - "description": "List of columns to use for extract", - "default": null, - "examples": [["fname", "lname"]], - "minLength": 1, - "additionalItems": true, - "items": { - "type": "string", - "description": "field name", - "examples": ["fname"] - } - }, - "skip": { - "description": "Number of lines to skip", - "type": "number", - "minimum": 0, - "default": 0 - }, - "delimiter": { - "description": "Delimiter to use for splitting the csv records", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "," - }, - "batch_size": { - "description": "Number of records to read per batch", - "type": "number", - "minimum": 1, - "default": 1000 - }, - "quotechar": { - "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "\"" - } - }, - "additionalProperties": false, - "required": ["file"], - "examples": [{ "file": "archive.csv", "delimiter": ";" }] - } - } - } } ] } From c7644e9e0d6fc44781c342865c0aba63a94dffb8 Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 10:06:37 +0200 Subject: [PATCH 04/10] fixed examples --- examples/test_csv_producer.dy.yaml | 48 +++++++++++++++------------- examples/test_redis_producer.dy.yaml | 48 +++++++++++++++------------- 2 files changed, 50 insertions(+), 46 deletions(-) diff --git a/examples/test_csv_producer.dy.yaml b/examples/test_csv_producer.dy.yaml index 5c489329..05de6932 100644 --- a/examples/test_csv_producer.dy.yaml +++ b/examples/test_csv_producer.dy.yaml @@ -1,25 +1,27 @@ -- uses: files.read_csv +input: + uses: files.read_csv with: file: examples/test.csv -- uses: add_field - with: - fields: - - field: full_name - language: jmespath - expression: concat([fname, ' ' , lname]) -- uses: map - with: - expression: - { - first_name: fname, - last_name: lname, - country: country_code || ' - ' || UPPER(country_name), - full_name: full_name, - greeting: "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name" - } - language: sql -- uses: relational.write - with: - connection: hr - schema: hr - table: emp +steps: + - uses: add_field + with: + fields: + - field: full_name + language: jmespath + expression: concat([fname, ' ' , lname]) + - uses: map + with: + expression: + { + first_name: fname, + last_name: lname, + country: country_code || ' - ' || UPPER(country_name), + full_name: full_name, + greeting: "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name" + } + language: sql + - uses: relational.write + with: + connection: hr + schema: hr + table: emp diff --git a/examples/test_redis_producer.dy.yaml b/examples/test_redis_producer.dy.yaml index eea0ac60..f7a3a517 100644 --- a/examples/test_redis_producer.dy.yaml +++ b/examples/test_redis_producer.dy.yaml @@ -1,26 +1,28 @@ -- uses: redis.read_stream +input: + uses: redis.read_stream with: connection: cache stream_name: emp -- uses: add_field - with: - fields: - - field: full_name - language: jmespath - expression: concat([fname, ' ' , lname]) -- uses: map - with: - expression: - { - first_name: fname, - last_name: lname, - country: country_code || ' - ' || UPPER(country_name), - full_name: full_name, - greeting: "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name" - } - language: sql -- uses: relational.write - with: - connection: hr - schema: hr - table: emp +steps: + - uses: add_field + with: + fields: + - field: full_name + language: jmespath + expression: concat([fname, ' ' , lname]) + - uses: map + with: + expression: + { + first_name: fname, + last_name: lname, + country: country_code || ' - ' || UPPER(country_name), + full_name: full_name, + greeting: "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name" + } + language: sql + - uses: relational.write + with: + connection: hr + schema: hr + table: emp From 2e15008ffed84138a1e7a4e00781464a53543a2c Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 13 Mar 2024 08:07:41 +0000 Subject: [PATCH 05/10] update json schemas --- schemas/job.schema.json | 1146 +++++++++++++++++++-------------------- 1 file changed, 573 insertions(+), 573 deletions(-) diff --git a/schemas/job.schema.json b/schemas/job.schema.json index 36dac71a..683bd708 100644 --- a/schemas/job.schema.json +++ b/schemas/job.schema.json @@ -19,116 +19,31 @@ "properties": { "uses": { "enum": [ - "map", - "filter", "http.write", "http.receiver", + "filter", + "std.write", + "std.read", + "jinja_template", + "rename_field", + "sequence", + "add_field", "redis.write", "redis.lookup", "redis.read_stream", "remove_field", "azure.read_event_hub", - "std.write", - "std.read", - "jinja_template", "relational.write", "relational.read", - "rename_field", - "add_field", - "sequence", - "files.read_csv", + "map", "parquet.write", "parquet.read", - "cassandra.write" + "cassandra.write", + "files.read_csv" ] } }, "allOf": [ - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "map" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "map", - "description": "Maps a record into a new output based on expressions", - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": ["object", "string"] - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "additionalProperties": false, - "required": ["expression", "language"], - "examples": [ - { - "expression": { - "first_name": "first_name", - "last_name": "last_name", - "greeting": "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", - "country": "country", - "full_name": "full_name" - }, - "language": "sql" - }, - { - "expression": "{\"CustomerId\": \"customer_id\", \"FirstName\": \"first_name\", \"LastName\": \"last_name\", \"Company\": \"company\", \"Location\": {\"Street\": \"address\", \"City\": \"city\", \"State\": \"state\", \"Country\": \"country\", \"PostalCode\": \"postal_code\"}, \"Phone\": \"phone\", \"Fax\": \"fax\", \"Email\": \"email\"}", - "language": "jmespath" - } - ] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "filter" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "filter", - "description": "Filter records", - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "additionalProperties": false, - "required": ["expression", "language"], - "examples": [{ "language": "sql", "expression": "age>20" }] - } - } - } - }, { "if": { "properties": { @@ -375,7 +290,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.write" + "const": "filter" } }, "required": ["uses"] @@ -383,48 +298,23 @@ "then": { "properties": { "with": { - "title": "redis.write", - "description": "Write to a Redis data structure", + "title": "filter", + "description": "Filter records", "type": "object", "properties": { - "connection": { - "title": "Connection name", + "expression": { + "description": "Expression", "type": "string" }, - "command": { - "enum": [ - "HSET", - "SADD", - "XADD", - "RPUSH", - "LPUSH", - "SET", - "ZADD" - ], - "default": "HSET", + "language": { + "description": "Language", "type": "string", - "title": "Redis command", - "description": "Redis command" - }, - "key": { - "description": "Field to use as the Redis key", - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "required": ["expression", "language"] + "enum": ["jmespath", "sql"] } }, "additionalProperties": false, - "required": ["connection", "key"] + "required": ["expression", "language"], + "examples": [{ "language": "sql", "expression": "age>20" }] } } } @@ -435,7 +325,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.lookup" + "const": "std.write" } }, "required": ["uses"] @@ -443,38 +333,8 @@ "then": { "properties": { "with": { - "title": "redis.lookup", - "description": "Lookup data from Redis using the given command and key", - "type": "object", - "properties": { - "connection": { - "title": "Connection name", - "type": "string" - }, - "cmd": { - "title": "Redis command", - "description": "The command to execute", - "type": "string" - }, - "args": { - "title": "Redis command arguments", - "description": "The list of expressions produces arguments", - "type": "array", - "items": { "type": "string" } - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - }, - "field": { - "type": "string", - "title": "Target field", - "description": "The field to write the result to" - } - }, - "additionalProperties": false, - "required": ["connection", "cmd", "args", "language", "field"] + "title": "std.write", + "description": "Write to the standard output" } } } @@ -485,7 +345,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.read_stream" + "const": "std.read" } }, "required": ["uses"] @@ -493,28 +353,48 @@ "then": { "properties": { "with": { - "title": "redis.read_stream", - "description": "Read from Redis stream", + "title": "std.read", + "description": "Read from the standard input" + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "jinja_template" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "jinja_template", + "description": "Apply Jinja template to a field", "type": "object", "properties": { - "connection": { - "description": "Connection name", + "field": { "description": "Field", "type": "string" }, + "template": { + "description": "Jinja Template", "type": "string" - }, - "stream_name": { - "type": "string", - "title": "Source stream name", - "description": "Source stream name" - }, - "snapshot": { - "type": "boolean", - "title": "Snapshot current entries and quit", - "description": "Snapshot current entries and quit", - "default": false } }, "additionalProperties": false, - "required": ["connection", "stream_name"] + "required": ["field", "template"], + "examples": [ + { + "field": "name.full_name", + "template": "{{ name.fname }} {{ name.lname }}" + }, + { + "field": "name.fname_upper", + "template": "{{ name.fname | upper }}" + } + ] } } } @@ -525,7 +405,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "remove_field" + "const": "rename_field" } }, "required": ["uses"] @@ -533,12 +413,12 @@ "then": { "properties": { "with": { - "title": "remove_field", - "description": "Remove fields", + "title": "rename_field", + "description": "Renames fields. All other fields remain unchanged", "type": "object", "oneOf": [ { - "description": "Remove multiple fields", + "description": "Rename multiple fields", "properties": { "fields": { "type": "array", @@ -546,35 +426,57 @@ "items": { "type": "object", "properties": { - "field": { - "description": "Field", + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", "type": "string" } }, "additionalProperties": false, - "required": ["field"] - } - } - }, - "required": ["fields"], - "additionalProperties": false, - "examples": [ - { - "fields": [ - { "field": "credit_card" }, - { "field": "name.mname" } - ] + "required": ["from_field", "to_field"], + "examples": [ + { + "fields": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + }, + { + "from_field": "name.fname", + "to_field": "name.first_name" + } + ] + } + ] + } } - ] + }, + "required": ["fields"], + "additionalProperties": false }, { - "description": "Remove one field", + "description": "Rename one field", "properties": { - "field": { "description": "Field", "type": "string" } + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", + "type": "string" + } }, "additionalProperties": false, - "required": ["field"], - "examples": [{ "field": "credit_card" }] + "required": ["from_field", "to_field"], + "examples": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + } + ] } ] } @@ -587,7 +489,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "azure.read_event_hub" + "const": "sequence" } }, "required": ["uses"] @@ -595,63 +497,29 @@ "then": { "properties": { "with": { - "title": "azure.read_event_hub", - "description": "Read from Azure Event Hub", + "title": "sequence", + "description": "Add a sequence number field to data", "type": "object", + "additionalProperties": false, + "examples": [], + "required": [], "properties": { - "event_hub_connection_string": { - "type": "string", - "description": "The connection string for the Azure Event Hub namespace." - }, - "event_hub_consumer_group_name": { - "type": "string", - "description": "The name of the consumer group to read events from." - }, - "event_hub_name": { - "type": "string", - "description": "The name of the Azure Event Hub." - }, - "checkpoint_store_connection_string": { + "field": { "type": "string", - "description": "The connection string for the Azure Storage account used as the checkpoint store." + "title": "Name of new sequence field" }, - "checkpoint_store_container_name": { - "type": "string", - "description": "The name of the container within the checkpoint store to store the checkpoints." + "start": { + "type": "number", + "title": "Start entry", + "default": 1, + "examples": [] }, - "batch_size": { - "type": "integer", - "description": "The maximum number of events to receive in each batch.", - "default": 300 + "increment": { + "type": "number", + "title": "Increment between sequences", + "examples": [] } - }, - "required": [ - "event_hub_connection_string", - "event_hub_consumer_group_name", - "event_hub_name", - "checkpoint_store_connection_string", - "checkpoint_store_container_name" - ] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "std.write" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "std.write", - "description": "Write to the standard output" + } } } } @@ -662,7 +530,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "std.read" + "const": "add_field" } }, "required": ["uses"] @@ -670,8 +538,82 @@ "then": { "properties": { "with": { - "title": "std.read", - "description": "Read from the standard input" + "title": "add_field", + "description": "Add fields to a record", + "type": "object", + "oneOf": [ + { + "description": "Add multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { + "type": "object", + "properties": { + "field": { + "description": "Field", + "type": "string" + }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"] + } + } + }, + "required": ["fields"], + "additionalProperties": false, + "examples": [ + { + "fields": [ + { + "field": "name.full_name", + "language": "jmespath", + "expression": "concat([name.fname, ' ', name.lname])" + }, + { + "field": "name.fname_upper", + "language": "jmespath", + "expression": "upper(name.fname)" + } + ] + } + ] + }, + { + "description": "Add one field", + "properties": { + "field": { "description": "Field", "type": "string" }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"], + "examples": [ + { + "field": "country", + "language": "sql", + "expression": "country_code || ' - ' || UPPER(country_name)" + } + ] + } + ] } } } @@ -682,7 +624,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "jinja_template" + "const": "redis.write" } }, "required": ["uses"] @@ -690,28 +632,48 @@ "then": { "properties": { "with": { - "title": "jinja_template", - "description": "Apply Jinja template to a field", + "title": "redis.write", + "description": "Write to a Redis data structure", "type": "object", "properties": { - "field": { "description": "Field", "type": "string" }, - "template": { - "description": "Jinja Template", + "connection": { + "title": "Connection name", "type": "string" + }, + "command": { + "enum": [ + "HSET", + "SADD", + "XADD", + "RPUSH", + "LPUSH", + "SET", + "ZADD" + ], + "default": "HSET", + "type": "string", + "title": "Redis command", + "description": "Redis command" + }, + "key": { + "description": "Field to use as the Redis key", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "required": ["expression", "language"] } }, "additionalProperties": false, - "required": ["field", "template"], - "examples": [ - { - "field": "name.full_name", - "template": "{{ name.fname }} {{ name.lname }}" - }, - { - "field": "name.fname_upper", - "template": "{{ name.fname | upper }}" - } - ] + "required": ["connection", "key"] } } } @@ -722,7 +684,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "relational.write" + "const": "redis.lookup" } }, "required": ["uses"] @@ -730,100 +692,38 @@ "then": { "properties": { "with": { - "title": "relational.write", - "description": "Write into a SQL-compatible data store", + "title": "redis.lookup", + "description": "Lookup data from Redis using the given command and key", "type": "object", - "additionalProperties": false, - "examples": [ - { - "id": "load_snowflake", - "type": "relational.write", - "properties": { - "connection": "eu_datalake", - "table": "employees", - "schema": "dbo", - "load_strategy": "APPEND" - } - } - ], "properties": { "connection": { - "type": "string", - "title": "The connection to use for loading", - "description": "Logical connection name as defined in the connections.yaml", - "examples": ["europe_db", "target", "eu_dwh"] - }, - "schema": { - "type": "string", - "title": "The table schema of the target table", - "description": "If not specified, no specific schema will be used when connecting to the database.", - "examples": ["dbo"] - }, - "table": { - "type": "string", - "title": "The target table name", - "description": "Target table name", - "examples": ["employees"] + "title": "Connection name", + "type": "string" }, - "keys": { - "type": "array", - "title": "Business keys to use in case of `load_strategy` is UPSERT or working with `opcode_field`", - "items": { - "type": ["string", "object"], - "title": "name of column" - }, - "examples": [["fname", { "lname": "last_name" }]] + "cmd": { + "title": "Redis command", + "description": "The command to execute", + "type": "string" }, - "mapping": { + "args": { + "title": "Redis command arguments", + "description": "The list of expressions produces arguments", "type": "array", - "title": "Fields to write", - "items": { - "type": ["string", "object"], - "title": "name of column" - }, - "examples": [ - ["fname", { "lname": "last_name" }, "address", "gender"] - ] - }, - "foreach": { - "type": "string", - "title": "Split a column into multiple records with a JMESPath expression", - "description": "Use a JMESPath expression to split a column into multiple records. The expression should be in the format column: expression.", - "pattern": "^(?!:).*:.*(? Date: Wed, 13 Mar 2024 10:11:31 +0200 Subject: [PATCH 06/10] git pull in case the jsonschema has been updated --- .github/workflows/generate-docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/generate-docs.yml b/.github/workflows/generate-docs.yml index 16706f93..3bfbf6ed 100644 --- a/.github/workflows/generate-docs.yml +++ b/.github/workflows/generate-docs.yml @@ -76,6 +76,7 @@ jobs: run: | git config user.name github-actions git config user.email github-actions@github.com + git pull git add . git diff --cached --exit-code || git commit -m "update autogenerated docs" git push From 1c09ca07837472e616c12df5c4d1e83df50b21f9 Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 10:13:31 +0200 Subject: [PATCH 07/10] push only if there are changes --- .github/workflows/generate-jsonschema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/generate-jsonschema.yml b/.github/workflows/generate-jsonschema.yml index a188a82c..60bba160 100644 --- a/.github/workflows/generate-jsonschema.yml +++ b/.github/workflows/generate-jsonschema.yml @@ -49,4 +49,4 @@ jobs: git config user.email github-actions@github.com git add . git diff --cached --exit-code || git commit -m "update json schemas" - git push --force + git diff --exit-code || git push From e79b27a829e56df07d1f7f6cf59a2a37944a7e69 Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 10:19:52 +0200 Subject: [PATCH 08/10] unify conditions --- .github/workflows/generate-jsonschema.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/generate-jsonschema.yml b/.github/workflows/generate-jsonschema.yml index 60bba160..e45c84b3 100644 --- a/.github/workflows/generate-jsonschema.yml +++ b/.github/workflows/generate-jsonschema.yml @@ -48,5 +48,8 @@ jobs: git config user.name github-actions git config user.email github-actions@github.com git add . - git diff --cached --exit-code || git commit -m "update json schemas" - git diff --exit-code || git push + + if git diff --cached --exit-code; then + git commit -m "update json schemas" + git push + fi From e76433fe7fc14eb0bb8490109cccd1748eb6b936 Mon Sep 17 00:00:00 2001 From: spicy-sauce Date: Wed, 13 Mar 2024 10:22:22 +0200 Subject: [PATCH 09/10] fix condition --- .github/workflows/generate-jsonschema.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/generate-jsonschema.yml b/.github/workflows/generate-jsonschema.yml index e45c84b3..d1bb7d7c 100644 --- a/.github/workflows/generate-jsonschema.yml +++ b/.github/workflows/generate-jsonschema.yml @@ -48,8 +48,7 @@ jobs: git config user.name github-actions git config user.email github-actions@github.com git add . - - if git diff --cached --exit-code; then + if ! git diff --cached --exit-code; then git commit -m "update json schemas" git push fi From 9755eb7037455b005d9de122adb975d6157f7409 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 13 Mar 2024 08:23:03 +0000 Subject: [PATCH 10/10] update json schemas --- schemas/job.schema.json | 1158 +++++++++++++++++++-------------------- 1 file changed, 579 insertions(+), 579 deletions(-) diff --git a/schemas/job.schema.json b/schemas/job.schema.json index 683bd708..36dac71a 100644 --- a/schemas/job.schema.json +++ b/schemas/job.schema.json @@ -19,31 +19,116 @@ "properties": { "uses": { "enum": [ + "map", + "filter", "http.write", "http.receiver", - "filter", - "std.write", - "std.read", - "jinja_template", - "rename_field", - "sequence", - "add_field", "redis.write", "redis.lookup", "redis.read_stream", "remove_field", "azure.read_event_hub", + "std.write", + "std.read", + "jinja_template", "relational.write", "relational.read", - "map", + "rename_field", + "add_field", + "sequence", + "files.read_csv", "parquet.write", "parquet.read", - "cassandra.write", - "files.read_csv" + "cassandra.write" ] } }, "allOf": [ + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "map" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "map", + "description": "Maps a record into a new output based on expressions", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": ["object", "string"] + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["expression", "language"], + "examples": [ + { + "expression": { + "first_name": "first_name", + "last_name": "last_name", + "greeting": "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", + "country": "country", + "full_name": "full_name" + }, + "language": "sql" + }, + { + "expression": "{\"CustomerId\": \"customer_id\", \"FirstName\": \"first_name\", \"LastName\": \"last_name\", \"Company\": \"company\", \"Location\": {\"Street\": \"address\", \"City\": \"city\", \"State\": \"state\", \"Country\": \"country\", \"PostalCode\": \"postal_code\"}, \"Phone\": \"phone\", \"Fax\": \"fax\", \"Email\": \"email\"}", + "language": "jmespath" + } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "filter" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "filter", + "description": "Filter records", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["expression", "language"], + "examples": [{ "language": "sql", "expression": "age>20" }] + } + } + } + }, { "if": { "properties": { @@ -290,7 +375,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "filter" + "const": "redis.write" } }, "required": ["uses"] @@ -298,43 +383,48 @@ "then": { "properties": { "with": { - "title": "filter", - "description": "Filter records", + "title": "redis.write", + "description": "Write to a Redis data structure", "type": "object", "properties": { - "expression": { - "description": "Expression", + "connection": { + "title": "Connection name", "type": "string" }, - "language": { - "description": "Language", + "command": { + "enum": [ + "HSET", + "SADD", + "XADD", + "RPUSH", + "LPUSH", + "SET", + "ZADD" + ], + "default": "HSET", "type": "string", - "enum": ["jmespath", "sql"] + "title": "Redis command", + "description": "Redis command" + }, + "key": { + "description": "Field to use as the Redis key", + "type": "object", + "properties": { + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "required": ["expression", "language"] } }, "additionalProperties": false, - "required": ["expression", "language"], - "examples": [{ "language": "sql", "expression": "age>20" }] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "std.write" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "std.write", - "description": "Write to the standard output" + "required": ["connection", "key"] } } } @@ -345,7 +435,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "std.read" + "const": "redis.lookup" } }, "required": ["uses"] @@ -353,8 +443,38 @@ "then": { "properties": { "with": { - "title": "std.read", - "description": "Read from the standard input" + "title": "redis.lookup", + "description": "Lookup data from Redis using the given command and key", + "type": "object", + "properties": { + "connection": { + "title": "Connection name", + "type": "string" + }, + "cmd": { + "title": "Redis command", + "description": "The command to execute", + "type": "string" + }, + "args": { + "title": "Redis command arguments", + "description": "The list of expressions produces arguments", + "type": "array", + "items": { "type": "string" } + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + }, + "field": { + "type": "string", + "title": "Target field", + "description": "The field to write the result to" + } + }, + "additionalProperties": false, + "required": ["connection", "cmd", "args", "language", "field"] } } } @@ -365,7 +485,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "jinja_template" + "const": "redis.read_stream" } }, "required": ["uses"] @@ -373,28 +493,28 @@ "then": { "properties": { "with": { - "title": "jinja_template", - "description": "Apply Jinja template to a field", + "title": "redis.read_stream", + "description": "Read from Redis stream", "type": "object", "properties": { - "field": { "description": "Field", "type": "string" }, - "template": { - "description": "Jinja Template", + "connection": { + "description": "Connection name", "type": "string" + }, + "stream_name": { + "type": "string", + "title": "Source stream name", + "description": "Source stream name" + }, + "snapshot": { + "type": "boolean", + "title": "Snapshot current entries and quit", + "description": "Snapshot current entries and quit", + "default": false } }, "additionalProperties": false, - "required": ["field", "template"], - "examples": [ - { - "field": "name.full_name", - "template": "{{ name.fname }} {{ name.lname }}" - }, - { - "field": "name.fname_upper", - "template": "{{ name.fname | upper }}" - } - ] + "required": ["connection", "stream_name"] } } } @@ -405,7 +525,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "rename_field" + "const": "remove_field" } }, "required": ["uses"] @@ -413,12 +533,12 @@ "then": { "properties": { "with": { - "title": "rename_field", - "description": "Renames fields. All other fields remain unchanged", + "title": "remove_field", + "description": "Remove fields", "type": "object", "oneOf": [ { - "description": "Rename multiple fields", + "description": "Remove multiple fields", "properties": { "fields": { "type": "array", @@ -426,147 +546,13 @@ "items": { "type": "object", "properties": { - "from_field": { - "description": "From field", - "type": "string" - }, - "to_field": { - "description": "To field", + "field": { + "description": "Field", "type": "string" } }, "additionalProperties": false, - "required": ["from_field", "to_field"], - "examples": [ - { - "fields": [ - { - "from_field": "name.lname", - "to_field": "name.last_name" - }, - { - "from_field": "name.fname", - "to_field": "name.first_name" - } - ] - } - ] - } - } - }, - "required": ["fields"], - "additionalProperties": false - }, - { - "description": "Rename one field", - "properties": { - "from_field": { - "description": "From field", - "type": "string" - }, - "to_field": { - "description": "To field", - "type": "string" - } - }, - "additionalProperties": false, - "required": ["from_field", "to_field"], - "examples": [ - { - "from_field": "name.lname", - "to_field": "name.last_name" - } - ] - } - ] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "sequence" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "sequence", - "description": "Add a sequence number field to data", - "type": "object", - "additionalProperties": false, - "examples": [], - "required": [], - "properties": { - "field": { - "type": "string", - "title": "Name of new sequence field" - }, - "start": { - "type": "number", - "title": "Start entry", - "default": 1, - "examples": [] - }, - "increment": { - "type": "number", - "title": "Increment between sequences", - "examples": [] - } - } - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "add_field" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "add_field", - "description": "Add fields to a record", - "type": "object", - "oneOf": [ - { - "description": "Add multiple fields", - "properties": { - "fields": { - "type": "array", - "description": "Fields", - "items": { - "type": "object", - "properties": { - "field": { - "description": "Field", - "type": "string" - }, - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "additionalProperties": false, - "required": ["field", "expression", "language"] + "required": ["field"] } } }, @@ -575,43 +561,20 @@ "examples": [ { "fields": [ - { - "field": "name.full_name", - "language": "jmespath", - "expression": "concat([name.fname, ' ', name.lname])" - }, - { - "field": "name.fname_upper", - "language": "jmespath", - "expression": "upper(name.fname)" - } + { "field": "credit_card" }, + { "field": "name.mname" } ] } ] }, { - "description": "Add one field", + "description": "Remove one field", "properties": { - "field": { "description": "Field", "type": "string" }, - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } + "field": { "description": "Field", "type": "string" } }, "additionalProperties": false, - "required": ["field", "expression", "language"], - "examples": [ - { - "field": "country", - "language": "sql", - "expression": "country_code || ' - ' || UPPER(country_name)" - } - ] + "required": ["field"], + "examples": [{ "field": "credit_card" }] } ] } @@ -624,7 +587,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.write" + "const": "azure.read_event_hub" } }, "required": ["uses"] @@ -632,98 +595,43 @@ "then": { "properties": { "with": { - "title": "redis.write", - "description": "Write to a Redis data structure", + "title": "azure.read_event_hub", + "description": "Read from Azure Event Hub", "type": "object", "properties": { - "connection": { - "title": "Connection name", - "type": "string" - }, - "command": { - "enum": [ - "HSET", - "SADD", - "XADD", - "RPUSH", - "LPUSH", - "SET", - "ZADD" - ], - "default": "HSET", + "event_hub_connection_string": { "type": "string", - "title": "Redis command", - "description": "Redis command" - }, - "key": { - "description": "Field to use as the Redis key", - "type": "object", - "properties": { - "expression": { - "description": "Expression", - "type": "string" - }, - "language": { - "description": "Language", - "type": "string", - "enum": ["jmespath", "sql"] - } - }, - "required": ["expression", "language"] - } - }, - "additionalProperties": false, - "required": ["connection", "key"] - } - } - } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "redis.lookup" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "redis.lookup", - "description": "Lookup data from Redis using the given command and key", - "type": "object", - "properties": { - "connection": { - "title": "Connection name", - "type": "string" + "description": "The connection string for the Azure Event Hub namespace." }, - "cmd": { - "title": "Redis command", - "description": "The command to execute", - "type": "string" + "event_hub_consumer_group_name": { + "type": "string", + "description": "The name of the consumer group to read events from." }, - "args": { - "title": "Redis command arguments", - "description": "The list of expressions produces arguments", - "type": "array", - "items": { "type": "string" } + "event_hub_name": { + "type": "string", + "description": "The name of the Azure Event Hub." }, - "language": { - "description": "Language", + "checkpoint_store_connection_string": { "type": "string", - "enum": ["jmespath", "sql"] + "description": "The connection string for the Azure Storage account used as the checkpoint store." }, - "field": { + "checkpoint_store_container_name": { "type": "string", - "title": "Target field", - "description": "The field to write the result to" + "description": "The name of the container within the checkpoint store to store the checkpoints." + }, + "batch_size": { + "type": "integer", + "description": "The maximum number of events to receive in each batch.", + "default": 300 } }, - "additionalProperties": false, - "required": ["connection", "cmd", "args", "language", "field"] + "required": [ + "event_hub_connection_string", + "event_hub_consumer_group_name", + "event_hub_name", + "checkpoint_store_connection_string", + "checkpoint_store_container_name" + ] } } } @@ -734,7 +642,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "redis.read_stream" + "const": "std.write" } }, "required": ["uses"] @@ -742,28 +650,8 @@ "then": { "properties": { "with": { - "title": "redis.read_stream", - "description": "Read from Redis stream", - "type": "object", - "properties": { - "connection": { - "description": "Connection name", - "type": "string" - }, - "stream_name": { - "type": "string", - "title": "Source stream name", - "description": "Source stream name" - }, - "snapshot": { - "type": "boolean", - "title": "Snapshot current entries and quit", - "description": "Snapshot current entries and quit", - "default": false - } - }, - "additionalProperties": false, - "required": ["connection", "stream_name"] + "title": "std.write", + "description": "Write to the standard output" } } } @@ -774,7 +662,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "remove_field" + "const": "std.read" } }, "required": ["uses"] @@ -782,104 +670,47 @@ "then": { "properties": { "with": { - "title": "remove_field", - "description": "Remove fields", - "type": "object", - "oneOf": [ - { - "description": "Remove multiple fields", - "properties": { - "fields": { - "type": "array", - "description": "Fields", - "items": { - "type": "object", - "properties": { - "field": { - "description": "Field", - "type": "string" - } - }, - "additionalProperties": false, - "required": ["field"] - } - } - }, - "required": ["fields"], - "additionalProperties": false, - "examples": [ - { - "fields": [ - { "field": "credit_card" }, - { "field": "name.mname" } - ] - } - ] - }, - { - "description": "Remove one field", - "properties": { - "field": { "description": "Field", "type": "string" } - }, - "additionalProperties": false, - "required": ["field"], - "examples": [{ "field": "credit_card" }] - } - ] + "title": "std.read", + "description": "Read from the standard input" } } } }, { "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "azure.read_event_hub" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "azure.read_event_hub", - "description": "Read from Azure Event Hub", - "type": "object", - "properties": { - "event_hub_connection_string": { - "type": "string", - "description": "The connection string for the Azure Event Hub namespace." - }, - "event_hub_consumer_group_name": { - "type": "string", - "description": "The name of the consumer group to read events from." - }, - "event_hub_name": { - "type": "string", - "description": "The name of the Azure Event Hub." - }, - "checkpoint_store_connection_string": { - "type": "string", - "description": "The connection string for the Azure Storage account used as the checkpoint store." - }, - "checkpoint_store_container_name": { - "type": "string", - "description": "The name of the container within the checkpoint store to store the checkpoints." - }, - "batch_size": { - "type": "integer", - "description": "The maximum number of events to receive in each batch.", - "default": 300 + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "jinja_template" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "jinja_template", + "description": "Apply Jinja template to a field", + "type": "object", + "properties": { + "field": { "description": "Field", "type": "string" }, + "template": { + "description": "Jinja Template", + "type": "string" } }, - "required": [ - "event_hub_connection_string", - "event_hub_consumer_group_name", - "event_hub_name", - "checkpoint_store_connection_string", - "checkpoint_store_container_name" + "additionalProperties": false, + "required": ["field", "template"], + "examples": [ + { + "field": "name.full_name", + "template": "{{ name.fname }} {{ name.lname }}" + }, + { + "field": "name.fname_upper", + "template": "{{ name.fname | upper }}" + } ] } } @@ -976,22 +807,263 @@ "description": "Used for `TYPE2` load_strategy. An SQL expression used to identify which rows are active", "examples": ["is_active='Y'", "deletedAt is null"] }, - "inactive_record_mapping": { - "type": "array", - "title": "Used for `TYPE2` load_strategy. The columns mapping to use to close out an active record", - "description": "A list of columns to use. Use any valid SQL expression for the source. If 'target' is omitted, will default to the name of the source column", - "default": [], + "inactive_record_mapping": { + "type": "array", + "title": "Used for `TYPE2` load_strategy. The columns mapping to use to close out an active record", + "description": "A list of columns to use. Use any valid SQL expression for the source. If 'target' is omitted, will default to the name of the source column", + "default": [], + "examples": [ + [ + { "source": "CURRENT_DATE", "target": "deletedAt" }, + { "source": "'Y'", "target": "is_active" } + ] + ] + } + }, + "required": ["connection", "table"], + "allOf": [ + { "not": { "required": ["opcode_field", "load_strategy"] } } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "relational.read" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "relational.read", + "description": "Read a table from an SQL-compatible data store", + "type": "object", + "additionalProperties": false, + "examples": [ + { + "id": "read_snowflake", + "type": "relational.read", + "properties": { + "connection": "eu_datalake", + "table": "employees", + "schema": "dbo" + } + } + ], + "properties": { + "connection": { + "type": "string", + "title": "The connection to use for loading", + "description": "Logical connection name as defined in the connections.yaml", + "examples": ["europe_db", "target", "eu_dwh"] + }, + "schema": { + "type": "string", + "title": "The table schema of the table", + "description": "If left blank, the default schema of this connection will be used as defined in the connections.yaml", + "examples": ["dbo"] + }, + "table": { + "type": "string", + "title": "The table name", + "description": "Table name", + "examples": ["employees"] + }, + "columns": { + "type": "array", + "title": "Optional subset of columns to load", + "items": { + "type": ["string", "object"], + "title": "name of column" + }, + "examples": [["fname", { "lname": "last_name" }]] + } + }, + "required": ["connection", "table"] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "rename_field" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "rename_field", + "description": "Renames fields. All other fields remain unchanged", + "type": "object", + "oneOf": [ + { + "description": "Rename multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { + "type": "object", + "properties": { + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["from_field", "to_field"], + "examples": [ + { + "fields": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + }, + { + "from_field": "name.fname", + "to_field": "name.first_name" + } + ] + } + ] + } + } + }, + "required": ["fields"], + "additionalProperties": false + }, + { + "description": "Rename one field", + "properties": { + "from_field": { + "description": "From field", + "type": "string" + }, + "to_field": { + "description": "To field", + "type": "string" + } + }, + "additionalProperties": false, + "required": ["from_field", "to_field"], + "examples": [ + { + "from_field": "name.lname", + "to_field": "name.last_name" + } + ] + } + ] + } + } + } + }, + { + "if": { + "properties": { + "uses": { + "description": "Block type", + "type": "string", + "const": "add_field" + } + }, + "required": ["uses"] + }, + "then": { + "properties": { + "with": { + "title": "add_field", + "description": "Add fields to a record", + "type": "object", + "oneOf": [ + { + "description": "Add multiple fields", + "properties": { + "fields": { + "type": "array", + "description": "Fields", + "items": { + "type": "object", + "properties": { + "field": { + "description": "Field", + "type": "string" + }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"] + } + } + }, + "required": ["fields"], + "additionalProperties": false, + "examples": [ + { + "fields": [ + { + "field": "name.full_name", + "language": "jmespath", + "expression": "concat([name.fname, ' ', name.lname])" + }, + { + "field": "name.fname_upper", + "language": "jmespath", + "expression": "upper(name.fname)" + } + ] + } + ] + }, + { + "description": "Add one field", + "properties": { + "field": { "description": "Field", "type": "string" }, + "expression": { + "description": "Expression", + "type": "string" + }, + "language": { + "description": "Language", + "type": "string", + "enum": ["jmespath", "sql"] + } + }, + "additionalProperties": false, + "required": ["field", "expression", "language"], "examples": [ - [ - { "source": "CURRENT_DATE", "target": "deletedAt" }, - { "source": "'Y'", "target": "is_active" } - ] + { + "field": "country", + "language": "sql", + "expression": "country_code || ' - ' || UPPER(country_name)" + } ] } - }, - "required": ["connection", "table"], - "allOf": [ - { "not": { "required": ["opcode_field", "load_strategy"] } } ] } } @@ -1003,7 +1075,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "relational.read" + "const": "sequence" } }, "required": ["uses"] @@ -1011,51 +1083,29 @@ "then": { "properties": { "with": { - "title": "relational.read", - "description": "Read a table from an SQL-compatible data store", + "title": "sequence", + "description": "Add a sequence number field to data", "type": "object", "additionalProperties": false, - "examples": [ - { - "id": "read_snowflake", - "type": "relational.read", - "properties": { - "connection": "eu_datalake", - "table": "employees", - "schema": "dbo" - } - } - ], + "examples": [], + "required": [], "properties": { - "connection": { - "type": "string", - "title": "The connection to use for loading", - "description": "Logical connection name as defined in the connections.yaml", - "examples": ["europe_db", "target", "eu_dwh"] - }, - "schema": { + "field": { "type": "string", - "title": "The table schema of the table", - "description": "If left blank, the default schema of this connection will be used as defined in the connections.yaml", - "examples": ["dbo"] + "title": "Name of new sequence field" }, - "table": { - "type": "string", - "title": "The table name", - "description": "Table name", - "examples": ["employees"] + "start": { + "type": "number", + "title": "Start entry", + "default": 1, + "examples": [] }, - "columns": { - "type": "array", - "title": "Optional subset of columns to load", - "items": { - "type": ["string", "object"], - "title": "name of column" - }, - "examples": [["fname", { "lname": "last_name" }]] + "increment": { + "type": "number", + "title": "Increment between sequences", + "examples": [] } - }, - "required": ["connection", "table"] + } } } } @@ -1066,7 +1116,7 @@ "uses": { "description": "Block type", "type": "string", - "const": "map" + "const": "files.read_csv" } }, "required": ["uses"] @@ -1074,38 +1124,63 @@ "then": { "properties": { "with": { - "title": "map", - "description": "Maps a record into a new output based on expressions", + "title": "files.read_csv", + "description": "Read data from CSV", "type": "object", "properties": { - "expression": { - "description": "Expression", - "type": ["object", "string"] + "file": { + "description": "Filename. Can contain a regexp or glob expression", + "type": "string" }, - "language": { - "description": "Language", + "encoding": { + "description": "Encoding to use for reading the file", "type": "string", - "enum": ["jmespath", "sql"] + "default": "utf-8" + }, + "fields": { + "type": "array", + "title": "List of columns to use", + "description": "List of columns to use for extract", + "default": null, + "examples": [["fname", "lname"]], + "minLength": 1, + "additionalItems": true, + "items": { + "type": "string", + "description": "field name", + "examples": ["fname"] + } + }, + "skip": { + "description": "Number of lines to skip", + "type": "number", + "minimum": 0, + "default": 0 + }, + "delimiter": { + "description": "Delimiter to use for splitting the csv records", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "," + }, + "batch_size": { + "description": "Number of records to read per batch", + "type": "number", + "minimum": 1, + "default": 1000 + }, + "quotechar": { + "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", + "type": "string", + "minLength": 1, + "maxLength": 1, + "default": "\"" } }, "additionalProperties": false, - "required": ["expression", "language"], - "examples": [ - { - "expression": { - "first_name": "first_name", - "last_name": "last_name", - "greeting": "'Hello ' || CASE WHEN gender = 'F' THEN 'Ms.' WHEN gender = 'M' THEN 'Mr.' ELSE 'N/A' END || ' ' || full_name", - "country": "country", - "full_name": "full_name" - }, - "language": "sql" - }, - { - "expression": "{\"CustomerId\": \"customer_id\", \"FirstName\": \"first_name\", \"LastName\": \"last_name\", \"Company\": \"company\", \"Location\": {\"Street\": \"address\", \"City\": \"city\", \"State\": \"state\", \"Country\": \"country\", \"PostalCode\": \"postal_code\"}, \"Phone\": \"phone\", \"Fax\": \"fax\", \"Email\": \"email\"}", - "language": "jmespath" - } - ] + "required": ["file"], + "examples": [{ "file": "archive.csv", "delimiter": ";" }] } } } @@ -1255,81 +1330,6 @@ } } } - }, - { - "if": { - "properties": { - "uses": { - "description": "Block type", - "type": "string", - "const": "files.read_csv" - } - }, - "required": ["uses"] - }, - "then": { - "properties": { - "with": { - "title": "files.read_csv", - "description": "Read data from CSV", - "type": "object", - "properties": { - "file": { - "description": "Filename. Can contain a regexp or glob expression", - "type": "string" - }, - "encoding": { - "description": "Encoding to use for reading the file", - "type": "string", - "default": "utf-8" - }, - "fields": { - "type": "array", - "title": "List of columns to use", - "description": "List of columns to use for extract", - "default": null, - "examples": [["fname", "lname"]], - "minLength": 1, - "additionalItems": true, - "items": { - "type": "string", - "description": "field name", - "examples": ["fname"] - } - }, - "skip": { - "description": "Number of lines to skip", - "type": "number", - "minimum": 0, - "default": 0 - }, - "delimiter": { - "description": "Delimiter to use for splitting the csv records", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "," - }, - "batch_size": { - "description": "Number of records to read per batch", - "type": "number", - "minimum": 1, - "default": 1000 - }, - "quotechar": { - "description": "A one-character string used to quote fields containing special characters, such as the delimiter or quotechar, or which contain new-line characters. It defaults to '", - "type": "string", - "minLength": 1, - "maxLength": 1, - "default": "\"" - } - }, - "additionalProperties": false, - "required": ["file"], - "examples": [{ "file": "archive.csv", "delimiter": ";" }] - } - } - } } ] }