Skip to content

Commit

Permalink
Throw error before AIOUB
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Dec 5, 2023
1 parent 55af0b7 commit 3984b8e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro
rowIndex = 0;
isRowNull = false;

boolean skipFirstRow = job.getBoolean("skipHeader", true);
boolean skipFirstRow = job.getBoolean(NAME_SKIP_HEADER, true);
if (skipFirstRow) {
Preconditions.checkArgument(lastRowNum != -1, "No rows found on sheet %s", sheetValue);
rowIndex = 1;
Expand All @@ -140,16 +140,23 @@ public boolean nextKeyValue() {

isRowNull = true;
for (int cellIndex = 0; cellIndex < row.getLastCellNum(); cellIndex++) {
if (cellIndex >= fields.size()) {
throw new IllegalArgumentException(
String.format("Schema contains less fields than the number of columns in the excel file. " +
"Schema fields: %s, Excel columns: %s", fields.size(), row.getLastCellNum()));
}
Cell cell = row.getCell(cellIndex, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
if (cell == null) {
continue;
}
isRowNull = false;
Schema.Field field = fields.get(cellIndex);
String result = formatter.formatCellValue(cell);
Schema.Type type = field.getSchema().isNullable() ?
field.getSchema().getNonNullable().getType() : field.getSchema().getType();
String result = formatter.formatCellValue(cell, type);
if (result == null) {
continue;
}
isRowNull = false;
builder.convertAndSet(field.getName(), result);
}
value = builder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public class XlsInputFormatConfig extends PathTrackingConfig {
@Nullable
@Name(NAME_TERMINATE_IF_EMPTY_ROW)
@Description(DESC_TERMINATE_ROW)
private static Boolean terminateIfEmptyRow;
private Boolean terminateIfEmptyRow;

public XlsInputFormatConfig() {
super();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public XlsInputFormatDataFormatter(FormulaEvaluator evaluator) {
* @param cell the cell to format
* @return the formatted cell value
*/
public String formatCellValue(Cell cell) {
public String formatCellValue(Cell cell, Schema.Type type) {
if (cell == null) {
return null;
}
Expand All @@ -66,6 +66,11 @@ public String formatCellValue(Cell cell) {
}
return Double.toString(cell.getNumericCellValue());
case STRING:
if (type == Schema.Type.DOUBLE) {
// Edge case when schema was inferred as double but the cell is actually a string
// this can be caused by an error formula cell, as the error value is stored as a string
return null;
}
return cell.getRichStringCellValue().getString();
case BOOLEAN:
return cell.getBooleanCellValue() ? "TRUE" : "FALSE";
Expand Down

0 comments on commit 3984b8e

Please sign in to comment.