-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[flink] Introduce zorder/order sort compact for dynamic bucket table (#…
- Loading branch information
1 parent
7e684f9
commit cef2fc1
Showing
17 changed files
with
689 additions
and
42 deletions.
There are no files selected for viewing
29 changes: 29 additions & 0 deletions
29
paimon-core/src/main/java/org/apache/paimon/index/BucketAssigner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.paimon.index; | ||
|
||
import org.apache.paimon.data.BinaryRow; | ||
|
||
/** Assigner a bucket for a record, just used in dynamic bucket table. */ | ||
public interface BucketAssigner { | ||
|
||
int assign(BinaryRow partition, int hash); | ||
|
||
void prepareCommit(long commitIdentifier); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
93 changes: 93 additions & 0 deletions
93
paimon-core/src/main/java/org/apache/paimon/index/SimpleHashBucketAssigner.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.paimon.index; | ||
|
||
import org.apache.paimon.data.BinaryRow; | ||
import org.apache.paimon.utils.Int2ShortHashMap; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
/** When we need to overwrite the table, we should use this to avoid loading index. */ | ||
public class SimpleHashBucketAssigner implements BucketAssigner { | ||
|
||
private final int numAssigners; | ||
private final int assignId; | ||
private final long targetBucketRowNumber; | ||
|
||
private final Map<BinaryRow, SimplePartitionIndex> partitionIndex; | ||
|
||
public SimpleHashBucketAssigner(int numAssigners, int assignId, long targetBucketRowNumber) { | ||
this.numAssigners = numAssigners; | ||
this.assignId = assignId; | ||
this.targetBucketRowNumber = targetBucketRowNumber; | ||
this.partitionIndex = new HashMap<>(); | ||
} | ||
|
||
@Override | ||
public int assign(BinaryRow partition, int hash) { | ||
SimplePartitionIndex index = | ||
this.partitionIndex.computeIfAbsent(partition, p -> new SimplePartitionIndex()); | ||
return index.assign(hash); | ||
} | ||
|
||
@Override | ||
public void prepareCommit(long commitIdentifier) { | ||
// do nothing | ||
} | ||
|
||
/** Simple partition bucket hash assigner. */ | ||
private class SimplePartitionIndex { | ||
|
||
public final Int2ShortHashMap hash2Bucket = new Int2ShortHashMap(); | ||
private final Map<Integer, Long> bucketInformation; | ||
private int currentBucket; | ||
|
||
private SimplePartitionIndex() { | ||
bucketInformation = new HashMap<>(); | ||
loadNewBucket(); | ||
} | ||
|
||
public int assign(int hash) { | ||
// the same hash should go into the same bucket | ||
if (hash2Bucket.containsKey(hash)) { | ||
return hash2Bucket.get(hash); | ||
} | ||
|
||
Long num = bucketInformation.computeIfAbsent(currentBucket, i -> 0L); | ||
if (num >= targetBucketRowNumber) { | ||
loadNewBucket(); | ||
} | ||
bucketInformation.compute(currentBucket, (i, l) -> l == null ? 1L : l + 1); | ||
hash2Bucket.put(hash, (short) currentBucket); | ||
return currentBucket; | ||
} | ||
|
||
private void loadNewBucket() { | ||
for (int i = 0; i < Short.MAX_VALUE; i++) { | ||
if (i % numAssigners == assignId && !bucketInformation.containsKey(i)) { | ||
currentBucket = i; | ||
return; | ||
} | ||
} | ||
throw new RuntimeException( | ||
"Can't find a suitable bucket to assign, all the bucket are assigned?"); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
69 changes: 69 additions & 0 deletions
69
paimon-core/src/test/java/org/apache/paimon/index/SimpleHashBucketAssignerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.paimon.index; | ||
|
||
import org.apache.paimon.data.BinaryRow; | ||
|
||
import org.assertj.core.api.Assertions; | ||
import org.junit.jupiter.api.Test; | ||
|
||
/** Tests for {@link SimpleHashBucketAssigner}. */ | ||
public class SimpleHashBucketAssignerTest { | ||
|
||
@Test | ||
public void testAssign() { | ||
SimpleHashBucketAssigner simpleHashBucketAssigner = new SimpleHashBucketAssigner(2, 0, 100); | ||
|
||
BinaryRow binaryRow = BinaryRow.EMPTY_ROW; | ||
int hash = 0; | ||
|
||
for (int i = 0; i < 100; i++) { | ||
int bucket = simpleHashBucketAssigner.assign(binaryRow, hash++); | ||
Assertions.assertThat(bucket).isEqualTo(0); | ||
} | ||
|
||
for (int i = 0; i < 100; i++) { | ||
int bucket = simpleHashBucketAssigner.assign(binaryRow, hash++); | ||
Assertions.assertThat(bucket).isEqualTo(2); | ||
} | ||
|
||
int bucket = simpleHashBucketAssigner.assign(binaryRow, hash++); | ||
Assertions.assertThat(bucket).isEqualTo(4); | ||
} | ||
|
||
@Test | ||
public void testAssignWithSameHash() { | ||
SimpleHashBucketAssigner simpleHashBucketAssigner = new SimpleHashBucketAssigner(2, 0, 100); | ||
|
||
BinaryRow binaryRow = BinaryRow.EMPTY_ROW; | ||
int hash = 0; | ||
|
||
for (int i = 0; i < 100; i++) { | ||
int bucket = simpleHashBucketAssigner.assign(binaryRow, hash++); | ||
Assertions.assertThat(bucket).isEqualTo(0); | ||
} | ||
|
||
// reset hash, the record will go into bucket 0 | ||
hash = 0; | ||
for (int i = 0; i < 100; i++) { | ||
int bucket = simpleHashBucketAssigner.assign(binaryRow, hash++); | ||
Assertions.assertThat(bucket).isEqualTo(0); | ||
} | ||
} | ||
} |
Oops, something went wrong.