Skip to content

Commit

Permalink
polish
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzelin committed Mar 8, 2024
1 parent 8c64198 commit 934a7d6
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.types.RowKind;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.function.Function;

import static org.apache.paimon.utils.Preconditions.checkArgument;
Expand All @@ -47,7 +46,6 @@
public class LookupChangelogMergeFunctionWrapper implements MergeFunctionWrapper<ChangelogResult> {

private final LookupMergeFunction mergeFunction;
private final MergeFunction<KeyValue> mergeFunction2;
private final Function<InternalRow, KeyValue> lookup;

private final ChangelogResult reusedResult = new ChangelogResult();
Expand All @@ -67,7 +65,6 @@ public LookupChangelogMergeFunctionWrapper(
"Merge function should be a LookupMergeFunction, but is %s, there is a bug.",
mergeFunction.getClass().getName());
this.mergeFunction = (LookupMergeFunction) mergeFunction;
this.mergeFunction2 = mergeFunctionFactory.create();
this.lookup = lookup;
this.valueEqualiser = valueEqualiser;
this.changelogRowDeduplicate = changelogRowDeduplicate;
Expand All @@ -85,41 +82,19 @@ public void add(KeyValue kv) {

@Override
public ChangelogResult getResult() {
// 1. Compute the latest high level record and containLevel0 of candidates
LinkedList<KeyValue> candidates = mergeFunction.candidates();
Iterator<KeyValue> descending = candidates.descendingIterator();
KeyValue highLevel = null;
boolean containLevel0 = false;
while (descending.hasNext()) {
KeyValue kv = descending.next();
if (kv.level() > 0) {
descending.remove();
if (highLevel == null) {
highLevel = kv;
}
} else {
containLevel0 = true;
}
}
LookupMergeRecordManager recordManager = mergeFunction.recordManager();
mergeFunction.reset();

// 2. Lookup if latest high level record is absent
if (highLevel == null) {
InternalRow lookupKey = candidates.get(0).key();
highLevel = lookup.apply(lookupKey);
}
// 1. Lookup if latest high level record is absent
List<KeyValue> candidates = recordManager.getCandidates(lookup);

// 3. Calculate result
mergeFunction2.reset();
if (highLevel != null) {
mergeFunction2.add(highLevel);
}
candidates.forEach(mergeFunction2::add);
KeyValue result = mergeFunction2.getResult();
// 2. Calculate result
KeyValue result = mergeFunction.getResult(candidates);

// 4. Set changelog when there's level-0 records
// 3. Set changelog when there's level-0 records
reusedResult.reset();
if (containLevel0) {
setChangelog(highLevel, result);
if (recordManager.containsLevel0()) {
setChangelog(recordManager.highLevel(), result);
}

return reusedResult.setResult(result);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

import javax.annotation.Nullable;

import java.util.LinkedList;
import java.util.List;

/**
* A {@link MergeFunction} for lookup, this wrapper only considers the latest high level record,
Expand All @@ -35,36 +35,41 @@
public class LookupMergeFunction implements MergeFunction<KeyValue> {

private final MergeFunction<KeyValue> mergeFunction;
private final LinkedList<KeyValue> candidates = new LinkedList<>();
private final InternalRowSerializer keySerializer;
private final InternalRowSerializer valueSerializer;
private final LookupMergeRecordManager recordManager;

public LookupMergeFunction(
MergeFunction<KeyValue> mergeFunction, RowType keyType, RowType valueType) {
this.mergeFunction = mergeFunction;
this.keySerializer = new InternalRowSerializer(keyType);
this.valueSerializer = new InternalRowSerializer(valueType);
this.recordManager = new LookupMergeRecordManager();
}

@Override
public void reset() {
candidates.clear();
recordManager.reset();
}

@Override
public void add(KeyValue kv) {
candidates.add(kv.copy(keySerializer, valueSerializer));
recordManager.add(kv.copy(keySerializer, valueSerializer));
}

@Override
public KeyValue getResult() {
return getResult(recordManager.getCandidates());
}

KeyValue getResult(List<KeyValue> candidates) {
mergeFunction.reset();
candidates.forEach(mergeFunction::add);
return mergeFunction.getResult();
}

LinkedList<KeyValue> candidates() {
return candidates;
LookupMergeRecordManager recordManager() {
return recordManager.copy();
}

public static MergeFunctionFactory<KeyValue> wrap(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.mergetree.compact;

import org.apache.paimon.KeyValue;
import org.apache.paimon.data.InternalRow;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Collectors;

/** A maintainer of records added to {@link LookupMergeFunction}. */
class LookupMergeRecordManager {

private final List<KeyValue> candidates;
private final List<Integer> highLevelIndex;
private KeyValue highLevel = null;

LookupMergeRecordManager() {
this.candidates = new ArrayList<>();
this.highLevelIndex = new ArrayList<>();
}

private LookupMergeRecordManager(List<KeyValue> candidates, List<Integer> highLevelIndex) {
this.candidates = candidates;
this.highLevelIndex = highLevelIndex;
}

void add(KeyValue kv) {
candidates.add(kv);
if (kv.level() != 0) {
highLevelIndex.add(candidates.size() - 1);
highLevel = kv;
}
}

void reset() {
candidates.clear();
highLevelIndex.clear();
highLevel = null;
}

List<KeyValue> getCandidates() {
return getCandidates(null);
}

List<KeyValue> getCandidates(@Nullable Function<InternalRow, KeyValue> lookup) {
setHighLevel();

if (lookup != null && highLevel == null) {
InternalRow lookupKey = candidates.get(0).key();
highLevel = lookup.apply(lookupKey);
}

List<KeyValue> copy = new ArrayList<>(candidates);
for (int i = 0; i < highLevelIndex.size() - 1; i++) {
copy.set(i, null);
}

return copy.stream().filter(Objects::nonNull).collect(Collectors.toList());
}

private void setHighLevel() {
if (!highLevelIndex.isEmpty()) {
this.highLevel = candidates.get(highLevelIndex.get(highLevelIndex.size() - 1));
}
}

boolean containsLevel0() {
return candidates.size() > highLevelIndex.size();
}

@Nullable
KeyValue highLevel() {
return highLevel;
}

LookupMergeRecordManager copy() {
return new LookupMergeRecordManager(
new ArrayList<>(this.candidates), new ArrayList<>(this.highLevelIndex));
}
}

0 comments on commit 934a7d6

Please sign in to comment.