-
Notifications
You must be signed in to change notification settings - Fork 988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add data file root location in DataFileMeta #4751
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,8 @@ public class DataFileMeta { | |
new DataField( | ||
16, | ||
"_VALUE_STATS_COLS", | ||
DataTypes.ARRAY(DataTypes.STRING().notNull())))); | ||
DataTypes.ARRAY(DataTypes.STRING().notNull())), | ||
new DataField(17, "_DATA_ROOT_LOCATION", newStringType(true)))); | ||
|
||
public static final BinaryRow EMPTY_MIN_KEY = EMPTY_ROW; | ||
public static final BinaryRow EMPTY_MAX_KEY = EMPTY_ROW; | ||
|
@@ -120,6 +121,13 @@ public class DataFileMeta { | |
|
||
private final @Nullable List<String> valueStatsCols; | ||
|
||
/** | ||
* the data root location that the file resides in, if it is null, the file is in the default | ||
* warehouse path, when {@link CoreOptions#DATA_FILE_PATH_DIRECTORY} is set, new writen files | ||
* will be persisted in {@link CoreOptions#DATA_FILE_PATH_DIRECTORY}. | ||
*/ | ||
private final @Nullable String dataRootLocation; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to introduce a new version for CommitMessage and DataSplit. You can refer to #4322 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see that since version 0.9 [1], the versions of commitMessage and DataSplit have been changed. Do I need to make another change? I think it only needs to be changed once in version 1.0. [1] https://github.com/apache/paimon/blob/release-0.9/paimon-core/src/main/java/org/apache/paimon/table/source/DataSplit.java There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep compatibility, even it is just in 1.0-SNAPSHOT, for example, creating There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Understood, if it's for compatibility with 1.0-SNAPSHOT, it's worth doing this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename to externalPath? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem, I'm willing to express that this path is the same root path as the warehouse, not the full path. But it doesn't affect the modification of DataFileMeta this time. I'll change it to extrenalPath first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe it is better to just full path? |
||
|
||
public static DataFileMeta forAppend( | ||
String fileName, | ||
long fileSize, | ||
|
@@ -149,7 +157,8 @@ public static DataFileMeta forAppend( | |
0L, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
null); | ||
} | ||
|
||
public DataFileMeta( | ||
|
@@ -186,7 +195,8 @@ public DataFileMeta( | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
null); | ||
} | ||
|
||
public DataFileMeta( | ||
|
@@ -222,7 +232,8 @@ public DataFileMeta( | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
null); | ||
} | ||
|
||
public DataFileMeta( | ||
|
@@ -242,7 +253,8 @@ public DataFileMeta( | |
@Nullable Long deleteRowCount, | ||
@Nullable byte[] embeddedIndex, | ||
@Nullable FileSource fileSource, | ||
@Nullable List<String> valueStatsCols) { | ||
@Nullable List<String> valueStatsCols, | ||
@Nullable String dataRootLocation) { | ||
this.fileName = fileName; | ||
this.fileSize = fileSize; | ||
|
||
|
@@ -264,6 +276,7 @@ public DataFileMeta( | |
this.deleteRowCount = deleteRowCount; | ||
this.fileSource = fileSource; | ||
this.valueStatsCols = valueStatsCols; | ||
this.dataRootLocation = dataRootLocation; | ||
} | ||
|
||
public String fileName() { | ||
|
@@ -357,6 +370,19 @@ public String fileFormat() { | |
return split[split.length - 1]; | ||
} | ||
|
||
@Nullable | ||
public String getDataRootLocationString() { | ||
return dataRootLocation; | ||
} | ||
|
||
@Nullable | ||
public Path getDataRootLocation() { | ||
if (dataRootLocation == null) { | ||
return null; | ||
} | ||
return new Path(dataRootLocation); | ||
} | ||
|
||
public Optional<FileSource> fileSource() { | ||
return Optional.ofNullable(fileSource); | ||
} | ||
|
@@ -385,7 +411,8 @@ public DataFileMeta upgrade(int newLevel) { | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
public DataFileMeta rename(String newFileName) { | ||
|
@@ -406,7 +433,8 @@ public DataFileMeta rename(String newFileName) { | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
public DataFileMeta copyWithoutStats() { | ||
|
@@ -427,7 +455,8 @@ public DataFileMeta copyWithoutStats() { | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
Collections.emptyList()); | ||
Collections.emptyList(), | ||
dataRootLocation); | ||
} | ||
|
||
public List<Path> collectFiles(DataFilePathFactory pathFactory) { | ||
|
@@ -455,7 +484,8 @@ public DataFileMeta copy(List<String> newExtraFiles) { | |
deleteRowCount, | ||
embeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
public DataFileMeta copy(byte[] newEmbeddedIndex) { | ||
|
@@ -476,7 +506,8 @@ public DataFileMeta copy(byte[] newEmbeddedIndex) { | |
deleteRowCount, | ||
newEmbeddedIndex, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
@Override | ||
|
@@ -504,7 +535,8 @@ public boolean equals(Object o) { | |
&& Objects.equals(creationTime, that.creationTime) | ||
&& Objects.equals(deleteRowCount, that.deleteRowCount) | ||
&& Objects.equals(fileSource, that.fileSource) | ||
&& Objects.equals(valueStatsCols, that.valueStatsCols); | ||
&& Objects.equals(valueStatsCols, that.valueStatsCols) | ||
&& Objects.equals(dataRootLocation, that.dataRootLocation); | ||
} | ||
|
||
@Override | ||
|
@@ -526,7 +558,8 @@ public int hashCode() { | |
creationTime, | ||
deleteRowCount, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
@Override | ||
|
@@ -536,7 +569,7 @@ public String toString() { | |
+ "minKey: %s, maxKey: %s, keyStats: %s, valueStats: %s, " | ||
+ "minSequenceNumber: %d, maxSequenceNumber: %d, " | ||
+ "schemaId: %d, level: %d, extraFiles: %s, creationTime: %s, " | ||
+ "deleteRowCount: %d, fileSource: %s, valueStatsCols: %s}", | ||
+ "deleteRowCount: %d, fileSource: %s, valueStatsCols: %s, dataRootLocation: %s}", | ||
fileName, | ||
fileSize, | ||
rowCount, | ||
|
@@ -553,7 +586,8 @@ public String toString() { | |
creationTime, | ||
deleteRowCount, | ||
fileSource, | ||
valueStatsCols); | ||
valueStatsCols, | ||
dataRootLocation); | ||
} | ||
|
||
public static long getMaxSequenceNumber(List<DataFileMeta> fileMetas) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -155,6 +155,7 @@ public DataFileMeta fromRow(InternalRow row) { | |
null, | ||
null, | ||
null, | ||
null, | ||
null); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you not introduce any option in this pr? Just modify data file meta.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok