From 92572ef8a516d4051d46c7b1001b0c1a8998e9be Mon Sep 17 00:00:00 2001 From: Matyas Manninger Date: Mon, 23 May 2022 14:27:13 +0200 Subject: [PATCH 1/3] add map type conversion --- avro_to_bigquery/converter.py | 16 ++++++++++++++++ tests/unit/test_converter.py | 26 +++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/avro_to_bigquery/converter.py b/avro_to_bigquery/converter.py index 015324b..268b561 100644 --- a/avro_to_bigquery/converter.py +++ b/avro_to_bigquery/converter.py @@ -91,6 +91,22 @@ def _convert_complex_type(avro_type): field_type = AVRO_TO_BIGQUERY_TYPES[avro_type["items"]] elif avro_type["type"] == "enum": field_type = AVRO_TO_BIGQUERY_TYPES[avro_type["type"]] + elif avro_type["type"] == "map": + field_type = "RECORD" + mode = "REPEATED" + # Create artificial fields to represent map in BQ + key_field = { + "name": "key", + "type": "string", + "doc": "Key for map avro field" + } + value_field = { + "name": "value", + "type": avro_type["values"], + "doc": "Value for map avro field" + } + fields = tuple(map(lambda f: _convert_field(f), + [key_field, value_field])) elif "logicalType" in avro_type: field_type = AVRO_TO_BIGQUERY_TYPES[avro_type["logicalType"]] else: diff --git a/tests/unit/test_converter.py b/tests/unit/test_converter.py index 805d5fe..3a37f16 100644 --- a/tests/unit/test_converter.py +++ b/tests/unit/test_converter.py @@ -100,6 +100,20 @@ def test_convert_avro_schema_to_bigquery_schema(): }, ], }, + { + "name": "map_field", + "type": {"type": "map", "values": "int"} + }, + { + "name": "complex_map", + "type": { + "type": "map", + "values": { + "type": "array", + "items": "int" + } + } + }, ], } @@ -107,7 +121,7 @@ def test_convert_avro_schema_to_bigquery_schema(): s = convert_schema(avs) # assert - assert len(s) == 15 + assert len(s) == 17 assert s[0].name == "full_name" assert s[1].field_type == "INTEGER" assert s[2].description == "Just a boolean tester" @@ -132,6 +146,16 @@ def test_convert_avro_schema_to_bigquery_schema(): assert s[13].field_type == "DATE" assert s[13].mode == "REPEATED" assert s[14].field_type == "STRING" + assert s[15].name == "map_field" + assert s[15].field_type == "RECORD" + assert s[15].mode == "REPEATED" + assert s[15].fields[0].field_type == "STRING" + assert s[15].fields[1].field_type == "INTEGER" + assert s[15].fields[0].name == "key" + assert s[15].fields[1].name == "value" + assert s[16].fields[0].field_type == "STRING" + assert s[16].fields[1].field_type == "INTEGER" + assert s[16].fields[1].mode == "REPEATED" def test_incorrect_nullable_field(): From eefe1da86bf3a7dbf2ea37f77a85038b47132359 Mon Sep 17 00:00:00 2001 From: Matyas Manninger Date: Thu, 30 Jun 2022 16:19:09 +0200 Subject: [PATCH 2/3] black test_converter.py --- tests/unit/test_converter.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_converter.py b/tests/unit/test_converter.py index 3a37f16..ea4e243 100644 --- a/tests/unit/test_converter.py +++ b/tests/unit/test_converter.py @@ -100,19 +100,13 @@ def test_convert_avro_schema_to_bigquery_schema(): }, ], }, - { - "name": "map_field", - "type": {"type": "map", "values": "int"} - }, + {"name": "map_field", "type": {"type": "map", "values": "int"}}, { "name": "complex_map", "type": { "type": "map", - "values": { - "type": "array", - "items": "int" - } - } + "values": {"type": "array", "items": "int"}, + }, }, ], } From ba62a49b2e3abd6cf6bbd20743846e1e6c796541 Mon Sep 17 00:00:00 2001 From: Matyas Manninger Date: Thu, 30 Jun 2022 16:25:18 +0200 Subject: [PATCH 3/3] black converter.py --- avro_to_bigquery/converter.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/avro_to_bigquery/converter.py b/avro_to_bigquery/converter.py index 268b561..38c814a 100644 --- a/avro_to_bigquery/converter.py +++ b/avro_to_bigquery/converter.py @@ -98,15 +98,16 @@ def _convert_complex_type(avro_type): key_field = { "name": "key", "type": "string", - "doc": "Key for map avro field" + "doc": "Key for map avro field", } value_field = { "name": "value", "type": avro_type["values"], - "doc": "Value for map avro field" + "doc": "Value for map avro field", } - fields = tuple(map(lambda f: _convert_field(f), - [key_field, value_field])) + fields = tuple( + map(lambda f: _convert_field(f), [key_field, value_field]) + ) elif "logicalType" in avro_type: field_type = AVRO_TO_BIGQUERY_TYPES[avro_type["logicalType"]] else: