Skip to content

Commit

Permalink
improve range based size
Browse files Browse the repository at this point in the history
  • Loading branch information
wg1026688210 committed Jan 29, 2024
1 parent 99fd2b4 commit dceed11
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -552,22 +552,24 @@ static <T> T[] allocateRangeBaseSize(List<Tuple2<T, Integer>> sampledData, int r
int boundarySize = rangesNum - 1;
@SuppressWarnings("unchecked")
T[] boundaries = (T[]) new Object[boundarySize];
long totalSize = sampledData.stream().mapToLong(t -> (long) t.f1).sum();

if (!sampledData.isEmpty()) {
double stepRange = totalSize / (double) rangesNum;
long restSize = sampledData.stream().mapToLong(t -> (long) t.f1).sum();
double stepRange = restSize / (double) rangesNum;

int currentWeight = 0;
int index = 0;

for (int i = 0; i < boundarySize; i++) {
while (currentWeight < stepRange && index < sampeNum) {
boundaries[i] = sampledData.get(Math.min(index, sampeNum - 1)).f0;
currentWeight += sampledData.get(index++).f1;
Integer sampleWeight = sampledData.get(index++).f1;
currentWeight += sampleWeight;
restSize -= sampleWeight;
}

currentWeight = 0;
stepRange = (sampeNum - index) / (double) (rangesNum - i - 1);
stepRange = restSize / (double) (rangesNum - i - 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ void testAllocateRange() {
new Tuple2<>(3, 1),
new Tuple2<>(4, 1),
new Tuple2<>(5, 4),
new Tuple2<>(6, 4));
new Tuple2<>(6, 4),
new Tuple2<>(7, 4));
Assertions.assertEquals(
"[4, 5]", Arrays.deepToString(RangeShuffle.allocateRangeBaseSize(test2, 3)));
"[4, 5, 6]", Arrays.deepToString(RangeShuffle.allocateRangeBaseSize(test2, 4)));

// the size of test data is uneven,and can not be evenly split
List<Tuple2<Integer, Integer>> test1 =
Expand Down

0 comments on commit dceed11

Please sign in to comment.