From 43be3c680433d16a6daa20c5d6dcc770d69246b5 Mon Sep 17 00:00:00 2001 From: fjetter Date: Mon, 16 Dec 2019 18:25:17 +0100 Subject: [PATCH] Remove _KTK_HASH_BUCKET if exists --- CHANGES.rst | 11 ++++++++--- kartothek/io/dask/_update.py | 2 ++ tests/io/dask/dataframe/test_update.py | 6 ++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ec4493a8..18b58543 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,13 +2,18 @@ Changelog ========= -Version Unreleased -================== +Version 3.6.2 (2019-12-17) +========================== Improvements ^^^^^^^^^^^^ -- Add more explicit typing to :mod:`kartothek.io.eager`. +* Add more explicit typing to :mod:`kartothek.io.eager`. + +Bug fixes +^^^^^^^^^ +* Fix an issue where :func:`~kartothek.io.dask.dataframe.update_dataset_from_ddf` would create a column named "_KTK_HASH_BUCKET" in the dataset + Version 3.6.1 (2019-12-11) ========================== diff --git a/kartothek/io/dask/_update.py b/kartothek/io/dask/_update.py index 7c1017af..f60e3d10 100644 --- a/kartothek/io/dask/_update.py +++ b/kartothek/io/dask/_update.py @@ -126,6 +126,8 @@ def _store_partition( df_serializer, metadata_version, ): + if _KTK_HASH_BUCKET in df: + df = df.drop(_KTK_HASH_BUCKET, axis=1) store = store_factory() # I don't have access to the group values mps = parse_input_to_metapartition( diff --git a/tests/io/dask/dataframe/test_update.py b/tests/io/dask/dataframe/test_update.py index 0353a150..c2975f1e 100644 --- a/tests/io/dask/dataframe/test_update.py +++ b/tests/io/dask/dataframe/test_update.py @@ -171,6 +171,12 @@ def test_update_shuffle_buckets( range(unique_secondaries) ) + assert set(dataset.table_meta["core"].names) == { + "primary", + "secondary", + "sorted_column", + } + factory = DatasetFactory("output_dataset_uuid", store_factory) factory.load_all_indices()