diff --git a/docs/ChangeDataFlow.html b/docs/ChangeDataFlow.html index f249e59..4fe31fd 100644 --- a/docs/ChangeDataFlow.html +++ b/docs/ChangeDataFlow.html @@ -65,9 +65,9 @@ +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+ |version|timestamp |userId|userName|operation |operationParameters |job |notebook|clusterId|readVersion|isolationLevel|isBlindAppend|operationMetrics |userMetadata|engineInfo | +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+ -|2 |2023-12-11 10:37:41.307|NULL |NULL |WRITE |{mode -> Append, partitionBy -> []} |NULL|NULL |NULL |1 |Serializable |true |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3373}|NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| -|1 |2023-12-11 10:37:39.671|NULL |NULL |WRITE |{mode -> Append, partitionBy -> []} |NULL|NULL |NULL |0 |Serializable |true |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3373}|NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| -|0 |2023-12-11 10:37:37.655|NULL |NULL |CREATE TABLE|{isManaged -> true, description -> NULL, partitionBy -> [], properties -> {"delta.enableChangeDataFeed":"true"}}|NULL|NULL |NULL |NULL |Serializable |true |{} |NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| +|2 |2023-12-11 13:43:48.923|NULL |NULL |WRITE |{mode -> Append, partitionBy -> []} |NULL|NULL |NULL |1 |Serializable |true |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3373}|NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| +|1 |2023-12-11 13:43:47.723|NULL |NULL |WRITE |{mode -> Append, partitionBy -> []} |NULL|NULL |NULL |0 |Serializable |true |{numFiles -> 2, numOutputRows -> 20, numOutputBytes -> 3373}|NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| +|0 |2023-12-11 13:43:46.439|NULL |NULL |CREATE TABLE|{isManaged -> true, description -> NULL, partitionBy -> [], properties -> {"delta.enableChangeDataFeed":"true"}}|NULL|NULL |NULL |NULL |Serializable |true |{} |NULL |Apache-Spark/3.5.0 Delta-Lake/3.0.0| +-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------+------------+-----------------------------------+ @@ -87,26 +87,26 @@ +---+-------+------------+----------+-----------------------+ |id |label |partitionKey|date |timestamp | +---+-------+------------+----------+-----------------------+ -|0 |label_0|0 |2023-12-11|2023-12-11 10:37:08.564| -|0 |label_0|0 |2023-12-11|2023-12-11 10:37:08.564| -|1 |label_1|1 |2023-12-10|2023-12-11 10:37:08.764| -|1 |label_1|1 |2023-12-10|2023-12-11 10:37:08.764| -|2 |label_2|2 |2023-12-09|2023-12-11 10:37:08.964| -|2 |label_2|2 |2023-12-09|2023-12-11 10:37:08.964| -|3 |label_3|3 |2023-12-08|2023-12-11 10:37:09.164| -|3 |label_3|3 |2023-12-08|2023-12-11 10:37:09.164| -|4 |label_4|4 |2023-12-07|2023-12-11 10:37:09.364| -|4 |label_4|4 |2023-12-07|2023-12-11 10:37:09.364| -|5 |label_5|0 |2023-12-06|2023-12-11 10:37:09.564| -|5 |label_5|0 |2023-12-06|2023-12-11 10:37:09.564| -|6 |label_6|1 |2023-12-05|2023-12-11 10:37:09.764| -|6 |label_6|1 |2023-12-05|2023-12-11 10:37:09.764| -|7 |label_7|2 |2023-12-04|2023-12-11 10:37:09.964| -|7 |label_7|2 |2023-12-04|2023-12-11 10:37:09.964| -|8 |label_8|3 |2023-12-03|2023-12-11 10:37:10.164| -|8 |label_8|3 |2023-12-03|2023-12-11 10:37:10.164| -|9 |label_9|4 |2023-12-02|2023-12-11 10:37:10.364| -|9 |label_9|4 |2023-12-02|2023-12-11 10:37:10.364| +|0 |label_0|0 |2023-12-11|2023-12-11 13:43:15.664| +|0 |label_0|0 |2023-12-11|2023-12-11 13:43:15.664| +|1 |label_1|1 |2023-12-10|2023-12-11 13:43:15.864| +|1 |label_1|1 |2023-12-10|2023-12-11 13:43:15.864| +|2 |label_2|2 |2023-12-09|2023-12-11 13:43:16.064| +|2 |label_2|2 |2023-12-09|2023-12-11 13:43:16.064| +|3 |label_3|3 |2023-12-08|2023-12-11 13:43:16.264| +|3 |label_3|3 |2023-12-08|2023-12-11 13:43:16.264| +|4 |label_4|4 |2023-12-07|2023-12-11 13:43:16.464| +|4 |label_4|4 |2023-12-07|2023-12-11 13:43:16.464| +|5 |label_5|0 |2023-12-06|2023-12-11 13:43:16.664| +|5 |label_5|0 |2023-12-06|2023-12-11 13:43:16.664| +|6 |label_6|1 |2023-12-05|2023-12-11 13:43:16.864| +|6 |label_6|1 |2023-12-05|2023-12-11 13:43:16.864| +|7 |label_7|2 |2023-12-04|2023-12-11 13:43:17.064| +|7 |label_7|2 |2023-12-04|2023-12-11 13:43:17.064| +|8 |label_8|3 |2023-12-03|2023-12-11 13:43:17.264| +|8 |label_8|3 |2023-12-03|2023-12-11 13:43:17.264| +|9 |label_9|4 |2023-12-02|2023-12-11 13:43:17.464| +|9 |label_9|4 |2023-12-02|2023-12-11 13:43:17.464| +---+-------+------------+----------+-----------------------+ only showing top 20 rows diff --git a/docs/ChangeDataFlowStreaming.html b/docs/ChangeDataFlowStreaming.html index d451dd0..5074969 100644 --- a/docs/ChangeDataFlowStreaming.html +++ b/docs/ChangeDataFlowStreaming.html @@ -69,9 +69,9 @@ ) USING DELTA + When we start streaming from ChangeDataFlowStreamingSpec to streamsink with a watermark of 4 seconds and a trigger processing time of 4000 ms + And the initial count in streamsink is 0 - + And we append 100 rows with a timestamp ranging from 2023-12-11 10:37:25.731 to 2023-12-11 10:39:04.731 + + And we append 100 rows with a timestamp ranging from 2023-12-11 13:43:35.293 to 2023-12-11 13:45:14.293 + And we wait 4000 ms - + Then the final row count at Mon Dec 11 10:37:36 UTC 2023 in streamsink is 100 rows + + Then the final row count at Mon Dec 11 13:43:45 UTC 2023 in streamsink is 100 rows + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/DeltaLakeCRUD.html b/docs/DeltaLakeCRUD.html index 659fa42..f41f5cb 100644 --- a/docs/DeltaLakeCRUD.html +++ b/docs/DeltaLakeCRUD.html @@ -70,9 +70,9 @@ + When we execute it + Then all rows are updated + And look like: - Datum(10,ipse locum,0,2023-12-01,2023-12-11 10:37:10.564) - Datum(11,ipse locum,1,2023-11-30,2023-12-11 10:37:10.764) - Datum(12,ipse locum,2,2023-11-29,2023-12-11 10:37:10.964) + Datum(10,ipse locum,0,2023-12-01,2023-12-11 13:43:17.664) + Datum(11,ipse locum,1,2023-11-30,2023-12-11 13:43:17.864) + Datum(12,ipse locum,2,2023-11-29,2023-12-11 13:43:18.064) ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - should be able to have its schema updated @@ -84,16 +84,16 @@ + When we execute it + Then all rows are updated + And look like: - [10,ipse locum,0,2023-12-01,2023-12-11 10:37:10.564,null] - [11,ipse locum,1,2023-11-30,2023-12-11 10:37:10.764,null] - [12,ipse locum,2,2023-11-29,2023-12-11 10:37:10.964,null] + [10,ipse locum,0,2023-12-01,2023-12-11 13:43:17.664,null] + [11,ipse locum,1,2023-11-30,2023-12-11 13:43:17.864,null] + [12,ipse locum,2,2023-11-29,2023-12-11 13:43:18.064,null] ... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -Run completed in 50 seconds, 656 milliseconds. -Total number of tests run: 7 +Run completed in 51 seconds, 680 milliseconds. +Total number of tests run: 8 Suites: completed 4, aborted 0 -Tests: succeeded 7, failed 0, canceled 0, ignored 0, pending 0 +Tests: succeeded 8, failed 0, canceled 0, ignored 0, pending 0 All tests passed. diff --git a/docs/MergingData.html b/docs/MergingData.html index 60b4594..1e3ab05 100644 --- a/docs/MergingData.html +++ b/docs/MergingData.html @@ -50,6 +50,7 @@ Data - should be merged + Given a table with 20 rows + + When we use 'replaceWhere' to write 20 new rows that have partition keys {0} and where the merge condition is 'partitionKey = 0' + And the distribution of partition keys to row counts looks like: Partition Key Count ------------- ----- @@ -58,7 +59,6 @@ 2 4 3 4 4 4 - + When we use 'replaceWhere' to write 20 new rows where partitionKey = 0 + Then the partition IDs that are not 0 will not change but partition 0 will have the new rows + And the distribution of partition keys to row counts looks like: Partition Key Count @@ -67,7 +67,13 @@ 1 4 2 4 3 4 -4 4 +4 4 +where the data with partition key '0' has been upserted ++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +- should not be merged if the partition key is not defined + + Given a table with partition keys 0, 1, 2, 3, 4 + + When we use 'replaceWhere' to write 20 new rows that have partition keys {0, 1, 2, 3, 4} and where the merge condition is 'partitionKey = 0' + + Then a DeltaInvariantViolationException$ is thrown + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +