From d1982d921516aea33b5dcc7db70006d29562dc63 Mon Sep 17 00:00:00 2001 From: spenes Date: Fri, 28 Jun 2024 01:05:28 +0300 Subject: [PATCH] RDB Databricks Loader: remove atomic field lengths when RDB Loader creates Databricks table --- .../databricks/DatabricksEventsTable.scala | 170 +++++++++--------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/modules/databricks-loader/src/main/scala/com/snowplowanalytics/snowplow/loader/databricks/DatabricksEventsTable.scala b/modules/databricks-loader/src/main/scala/com/snowplowanalytics/snowplow/loader/databricks/DatabricksEventsTable.scala index 45e767d5a..2cbaa20a3 100644 --- a/modules/databricks-loader/src/main/scala/com/snowplowanalytics/snowplow/loader/databricks/DatabricksEventsTable.scala +++ b/modules/databricks-loader/src/main/scala/com/snowplowanalytics/snowplow/loader/databricks/DatabricksEventsTable.scala @@ -14,88 +14,88 @@ object DatabricksEventsTable { def statement(tableName: String): String = s""" |CREATE TABLE IF NOT EXISTS $tableName ( - | app_id VARCHAR(255), - | platform VARCHAR(255), + | app_id STRING, + | platform STRING, | etl_tstamp TIMESTAMP, - | collector_tstamp TIMESTAMP NOT NULL, + | collector_tstamp TIMESTAMP NOT NULL, | dvce_created_tstamp TIMESTAMP, - | event VARCHAR(128), - | event_id VARCHAR(36) NOT NULL, + | event STRING, + | event_id STRING NOT NULL, | txn_id INTEGER, - | name_tracker VARCHAR(128), - | v_tracker VARCHAR(100), - | v_collector VARCHAR(100) NOT NULL, - | v_etl VARCHAR(100) NOT NULL, - | user_id VARCHAR(255), - | user_ipaddress VARCHAR(128), - | user_fingerprint VARCHAR(128), - | domain_userid VARCHAR(128), + | name_tracker STRING, + | v_tracker STRING, + | v_collector STRING NOT NULL, + | v_etl STRING NOT NULL, + | user_id STRING, + | user_ipaddress STRING, + | user_fingerprint STRING, + | domain_userid STRING, | domain_sessionidx SMALLINT, - | network_userid VARCHAR(128), - | geo_country VARCHAR(2), - | geo_region VARCHAR(3), - | geo_city VARCHAR(75), - | geo_zipcode VARCHAR(15), + | network_userid STRING, + | geo_country STRING, + | geo_region STRING, + | geo_city STRING, + | geo_zipcode STRING, | geo_latitude DOUBLE, | geo_longitude DOUBLE, - | geo_region_name VARCHAR(100), - | ip_isp VARCHAR(100), - | ip_organization VARCHAR(128), - | ip_domain VARCHAR(128), - | ip_netspeed VARCHAR(100), - | page_url VARCHAR(4096), - | page_title VARCHAR(2000), - | page_referrer VARCHAR(4096), - | page_urlscheme VARCHAR(16), - | page_urlhost VARCHAR(255), + | geo_region_name STRING, + | ip_isp STRING, + | ip_organization STRING, + | ip_domain STRING, + | ip_netspeed STRING, + | page_url STRING, + | page_title STRING, + | page_referrer STRING, + | page_urlscheme STRING, + | page_urlhost STRING, | page_urlport INTEGER, - | page_urlpath VARCHAR(3000), - | page_urlquery VARCHAR(6000), - | page_urlfragment VARCHAR(3000), - | refr_urlscheme VARCHAR(16), - | refr_urlhost VARCHAR(255), + | page_urlpath STRING, + | page_urlquery STRING, + | page_urlfragment STRING, + | refr_urlscheme STRING, + | refr_urlhost STRING, | refr_urlport INTEGER, - | refr_urlpath VARCHAR(6000), - | refr_urlquery VARCHAR(6000), - | refr_urlfragment VARCHAR(3000), - | refr_medium VARCHAR(25), - | refr_source VARCHAR(50), - | refr_term VARCHAR(255), - | mkt_medium VARCHAR(255), - | mkt_source VARCHAR(255), - | mkt_term VARCHAR(255), - | mkt_content VARCHAR(500), - | mkt_campaign VARCHAR(255), - | se_category VARCHAR(1000), - | se_action VARCHAR(1000), - | se_label VARCHAR(4096), - | se_property VARCHAR(1000), + | refr_urlpath STRING, + | refr_urlquery STRING, + | refr_urlfragment STRING, + | refr_medium STRING, + | refr_source STRING, + | refr_term STRING, + | mkt_medium STRING, + | mkt_source STRING, + | mkt_term STRING, + | mkt_content STRING, + | mkt_campaign STRING, + | se_category STRING, + | se_action STRING, + | se_label STRING, + | se_property STRING, | se_value DOUBLE, - | tr_orderid VARCHAR(255), - | tr_affiliation VARCHAR(255), + | tr_orderid STRING, + | tr_affiliation STRING, | tr_total DECIMAL(18,2), | tr_tax DECIMAL(18,2), | tr_shipping DECIMAL(18,2), - | tr_city VARCHAR(255), - | tr_state VARCHAR(255), - | tr_country VARCHAR(255), - | ti_orderid VARCHAR(255), - | ti_sku VARCHAR(255), - | ti_name VARCHAR(255), - | ti_category VARCHAR(255), + | tr_city STRING, + | tr_state STRING, + | tr_country STRING, + | ti_orderid STRING, + | ti_sku STRING, + | ti_name STRING, + | ti_category STRING, | ti_price DECIMAL(18,2), | ti_quantity INTEGER, | pp_xoffset_min INTEGER, | pp_xoffset_max INTEGER, | pp_yoffset_min INTEGER, | pp_yoffset_max INTEGER, - | useragent VARCHAR(1000), - | br_name VARCHAR(50), - | br_family VARCHAR(50), - | br_version VARCHAR(50), - | br_type VARCHAR(50), - | br_renderengine VARCHAR(50), - | br_lang VARCHAR(255), + | useragent STRING, + | br_name STRING, + | br_family STRING, + | br_version STRING, + | br_type STRING, + | br_renderengine STRING, + | br_lang STRING, | br_features_pdf BOOLEAN, | br_features_flash BOOLEAN, | br_features_java BOOLEAN, @@ -106,41 +106,41 @@ object DatabricksEventsTable { | br_features_gears BOOLEAN, | br_features_silverlight BOOLEAN, | br_cookies BOOLEAN, - | br_colordepth VARCHAR(12), + | br_colordepth STRING, | br_viewwidth INTEGER, | br_viewheight INTEGER, - | os_name VARCHAR(50), - | os_family VARCHAR(50), - | os_manufacturer VARCHAR(50), - | os_timezone VARCHAR(255), - | dvce_type VARCHAR(50), + | os_name STRING, + | os_family STRING, + | os_manufacturer STRING, + | os_timezone STRING, + | dvce_type STRING, | dvce_ismobile BOOLEAN, | dvce_screenwidth INTEGER, | dvce_screenheight INTEGER, - | doc_charset VARCHAR(128), + | doc_charset STRING, | doc_width INTEGER, | doc_height INTEGER, - | tr_currency VARCHAR(3), + | tr_currency STRING, | tr_total_base DECIMAL(18, 2), | tr_tax_base DECIMAL(18, 2), | tr_shipping_base DECIMAL(18, 2), - | ti_currency VARCHAR(3), + | ti_currency STRING, | ti_price_base DECIMAL(18, 2), - | base_currency VARCHAR(3), - | geo_timezone VARCHAR(64), - | mkt_clickid VARCHAR(128), - | mkt_network VARCHAR(64), - | etl_tags VARCHAR(500), + | base_currency STRING, + | geo_timezone STRING, + | mkt_clickid STRING, + | mkt_network STRING, + | etl_tags STRING, | dvce_sent_tstamp TIMESTAMP, - | refr_domain_userid VARCHAR(128), + | refr_domain_userid STRING, | refr_dvce_tstamp TIMESTAMP, - | domain_sessionid VARCHAR(128), + | domain_sessionid STRING, | derived_tstamp TIMESTAMP, - | event_vendor VARCHAR(1000), - | event_name VARCHAR(1000), - | event_format VARCHAR(128), - | event_version VARCHAR(128), - | event_fingerprint VARCHAR(128), + | event_vendor STRING, + | event_name STRING, + | event_format STRING, + | event_version STRING, + | event_fingerprint STRING, | true_tstamp TIMESTAMP, | load_tstamp TIMESTAMP, | collector_tstamp_date DATE GENERATED ALWAYS AS (DATE(collector_tstamp))