From 805b5e6fa1fe559c51099b14bf7d8170b1f42a97 Mon Sep 17 00:00:00 2001 From: PhillipHenry Date: Sat, 9 Dec 2023 10:27:58 +0000 Subject: [PATCH] rebuilt site --- docs/ChangeDataFlow.html | 49 ++++++++------- docs/ChangeDataFlowStreaming.html | 7 ++- docs/DeltaLakeCRUD.html | 99 +++++++++++++++++++++++++++++++ docs/MergingData.html | 58 ++++++++++++++++++ docs/index.html | 6 +- 5 files changed, 189 insertions(+), 30 deletions(-) create mode 100644 docs/DeltaLakeCRUD.html create mode 100644 docs/MergingData.html diff --git a/docs/ChangeDataFlow.html b/docs/ChangeDataFlow.html index 862cfa7..bcdf66a 100644 --- a/docs/ChangeDataFlow.html +++ b/docs/ChangeDataFlow.html @@ -46,8 +46,7 @@ -
Run starting. Expected test count is: 0
-ChangeDataFlowSpec:
+
ChangeDataFlowSpec:
 + See https://www.databricks.com/blog/2021/06/09/how-to-simplify-cdc-with-delta-lakes-change-data-feed.html 
 A dataset that is CDC enabled
 - should be created and populated
@@ -66,9 +65,9 @@
 +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+
 |version|timestamp              |userId|userName|operation   |operationParameters                                                                                             |job |notebook|clusterId|readVersion|isolationLevel|isBlindAppend|operationMetrics                                            |userMetadata|engineInfo                         |
 +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+
-|2      |2023-12-06 14:05:09.157|NULL  |NULL    |WRITE       |{mode -> Append, partitionBy -> []}                                                                             |NULL|NULL    |NULL     |1          |Serializable  |true         |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3371}|NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
-|1      |2023-12-06 14:05:07.713|NULL  |NULL    |WRITE       |{mode -> Append, partitionBy -> []}                                                                             |NULL|NULL    |NULL     |0          |Serializable  |true         |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3371}|NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
-|0      |2023-12-06 14:05:01.405|NULL  |NULL    |CREATE TABLE|{isManaged -> true, description -> NULL, partitionBy -> [], properties -> {"delta.enableChangeDataFeed":"true"}}|NULL|NULL    |NULL     |NULL       |Serializable  |true         |{}                                                          |NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
+|2      |2023-12-09 10:26:50.665|NULL  |NULL    |WRITE       |{mode -> Append, partitionBy -> []}                                                                             |NULL|NULL    |NULL     |1          |Serializable  |true         |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3371}|NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
+|1      |2023-12-09 10:26:49.617|NULL  |NULL    |WRITE       |{mode -> Append, partitionBy -> []}                                                                             |NULL|NULL    |NULL     |0          |Serializable  |true         |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3371}|NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
+|0      |2023-12-09 10:26:48.229|NULL  |NULL    |CREATE TABLE|{isManaged -> true, description -> NULL, partitionBy -> [], properties -> {"delta.enableChangeDataFeed":"true"}}|NULL|NULL    |NULL     |NULL       |Serializable  |true         |{}                                                          |NULL        |Apache-Spark/3.5.0 Delta-Lake/3.0.0|
 +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+
 
  
@@ -88,26 +87,26 @@
 +---+-------+------------+----------+-----------------------+
 |id |label  |partitionKey|date      |timestamp              |
 +---+-------+------------+----------+-----------------------+
-|0  |label_0|0           |2023-12-06|2023-12-06 14:04:52.936|
-|0  |label_0|0           |2023-12-06|2023-12-06 14:04:52.936|
-|1  |label_1|1           |2023-12-05|2023-12-06 14:04:53.136|
-|1  |label_1|1           |2023-12-05|2023-12-06 14:04:53.136|
-|2  |label_2|2           |2023-12-04|2023-12-06 14:04:53.336|
-|2  |label_2|2           |2023-12-04|2023-12-06 14:04:53.336|
-|3  |label_3|3           |2023-12-03|2023-12-06 14:04:53.536|
-|3  |label_3|3           |2023-12-03|2023-12-06 14:04:53.536|
-|4  |label_4|4           |2023-12-02|2023-12-06 14:04:53.736|
-|4  |label_4|4           |2023-12-02|2023-12-06 14:04:53.736|
-|5  |label_5|0           |2023-12-01|2023-12-06 14:04:53.936|
-|5  |label_5|0           |2023-12-01|2023-12-06 14:04:53.936|
-|6  |label_6|1           |2023-11-30|2023-12-06 14:04:54.136|
-|6  |label_6|1           |2023-11-30|2023-12-06 14:04:54.136|
-|7  |label_7|2           |2023-11-29|2023-12-06 14:04:54.336|
-|7  |label_7|2           |2023-11-29|2023-12-06 14:04:54.336|
-|8  |label_8|3           |2023-11-28|2023-12-06 14:04:54.536|
-|8  |label_8|3           |2023-11-28|2023-12-06 14:04:54.536|
-|9  |label_9|4           |2023-11-27|2023-12-06 14:04:54.736|
-|9  |label_9|4           |2023-11-27|2023-12-06 14:04:54.736|
+|0  |label_0|0           |2023-12-09|2023-12-09 10:26:19.581|
+|0  |label_0|0           |2023-12-09|2023-12-09 10:26:19.581|
+|1  |label_1|1           |2023-12-08|2023-12-09 10:26:19.781|
+|1  |label_1|1           |2023-12-08|2023-12-09 10:26:19.781|
+|2  |label_2|2           |2023-12-07|2023-12-09 10:26:19.981|
+|2  |label_2|2           |2023-12-07|2023-12-09 10:26:19.981|
+|3  |label_3|3           |2023-12-06|2023-12-09 10:26:20.181|
+|3  |label_3|3           |2023-12-06|2023-12-09 10:26:20.181|
+|4  |label_4|4           |2023-12-05|2023-12-09 10:26:20.381|
+|4  |label_4|4           |2023-12-05|2023-12-09 10:26:20.381|
+|5  |label_5|0           |2023-12-04|2023-12-09 10:26:20.581|
+|5  |label_5|0           |2023-12-04|2023-12-09 10:26:20.581|
+|6  |label_6|1           |2023-12-03|2023-12-09 10:26:20.781|
+|6  |label_6|1           |2023-12-03|2023-12-09 10:26:20.781|
+|7  |label_7|2           |2023-12-02|2023-12-09 10:26:20.981|
+|7  |label_7|2           |2023-12-02|2023-12-09 10:26:20.981|
+|8  |label_8|3           |2023-12-01|2023-12-09 10:26:21.181|
+|8  |label_8|3           |2023-12-01|2023-12-09 10:26:21.181|
+|9  |label_9|4           |2023-11-30|2023-12-09 10:26:21.381|
+|9  |label_9|4           |2023-11-30|2023-12-09 10:26:21.381|
 +---+-------+------------+----------+-----------------------+
 only showing top 20 rows
 
diff --git a/docs/ChangeDataFlowStreaming.html b/docs/ChangeDataFlowStreaming.html
index bbd3a19..ed9560c 100644
--- a/docs/ChangeDataFlowStreaming.html
+++ b/docs/ChangeDataFlowStreaming.html
@@ -46,7 +46,8 @@
 
 
 
-
ChangeDataFlowStreamingSpec:
+
Run starting. Expected test count is: 0
+ChangeDataFlowStreamingSpec:
 + https://docs.databricks.com/en/structured-streaming/delta-lake.html 
 A dataset that is updated
 - should write its deltas to another table as a stream
@@ -68,9 +69,9 @@
 ) USING DELTA 
   + When we start streaming from ChangeDataFlowStreamingSpec to streamsink with a watermark of 4 seconds and a trigger processing time of 4000 ms 
   + And the initial count in streamsink is 0 
-  + And we append 100 rows with a timestamp ranging from 2023-12-06 14:05:20.663 to 2023-12-06 14:06:59.663 
+  + And we append 100 rows with a timestamp ranging from 2023-12-09 10:26:36.489 to 2023-12-09 10:28:15.489 
   + And we wait 4000 ms 
-  + Then the final row count at Wed Dec 06 14:05:32 UTC 2023 in streamsink is 100 rows 
+  + Then the final row count at Sat Dec 09 10:26:47 UTC 2023 in streamsink is 100 rows 
 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
diff --git a/docs/DeltaLakeCRUD.html b/docs/DeltaLakeCRUD.html new file mode 100644 index 0000000..272f582 --- /dev/null +++ b/docs/DeltaLakeCRUD.html @@ -0,0 +1,99 @@ + + + + + + +
DeltaLakeCRUDSpec:
+A Delta table
+- should be created and populated
+  + Given a table created with SQL
+CREATE TABLE DeltaLakeCRUDSpec (
+  id int,
+  label String,
+  partitionKey long,
+  date Date,
+  timestamp Timestamp
+) USING DELTA 
+  + When we write 20 rows to DeltaLakeCRUDSpec 
+  + Then the table indeed contains all the data 
+  + And the metastore contains a reference to the table DeltaLakeCRUDSpec 
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
+- should support updates with 'update DeltaLakeCRUDSpec set label='ipse locum''
+  + Given SQL 
+update
+  DeltaLakeCRUDSpec
+set
+  label = 'ipse locum' 
+  + When we execute it 
+  + Then all rows are updated 
+  + And look like:
+			Datum(10,ipse locum,0,2023-11-29,2023-12-09 10:26:21.581)
+			Datum(11,ipse locum,1,2023-11-28,2023-12-09 10:26:21.781)
+			Datum(12,ipse locum,2,2023-11-27,2023-12-09 10:26:21.981)
+			... 
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
+- should be able to have its schema updated
+  + Given SQL 
+ALTER TABLE
+  DeltaLakeCRUDSpec
+ADD
+  COLUMNS (new_string string comment 'new_string docs') 
+  + When we execute it 
+  + Then all rows are updated 
+  + And look like:
+			[10,ipse locum,0,2023-11-29,2023-12-09 10:26:21.581,null]
+			[11,ipse locum,1,2023-11-28,2023-12-09 10:26:21.781,null]
+			[12,ipse locum,2,2023-11-27,2023-12-09 10:26:21.981,null]
+			... 
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
+Run completed in 48 seconds, 54 milliseconds.
+Total number of tests run: 7
+Suites: completed 4, aborted 0
+Tests: succeeded 7, failed 0, canceled 0, ignored 0, pending 0
+All tests passed.
+ + diff --git a/docs/MergingData.html b/docs/MergingData.html new file mode 100644 index 0000000..a11ecaa --- /dev/null +++ b/docs/MergingData.html @@ -0,0 +1,58 @@ + + + + + + +
MergingDataSpec:
+Data
+- should be merged
+  + Given a table with 20 rows 
+  + When we use 'replaceWhere' to write those partitions where partitionKey = 0 
+  + Then the partition IDs that are not 0 will not change but partition 0 will have the new rows 
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +  
+ + diff --git a/docs/index.html b/docs/index.html index d862122..cbb8414 100644 --- a/docs/index.html +++ b/docs/index.html @@ -11,7 +11,8 @@ +DeltaLakeCRUD +MergingData" /> @@ -102,7 +103,8 @@ The code for these tests can be found in GitHub

ChangeDataFlow

ChangeDataFlowStreaming

-

Crud

+

DeltaLakeCRUD

+

MergingData