Skip to content

Commit

Permalink
[S] Rework
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Dec 11, 2023
1 parent e6ed78f commit edb65ca
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,17 @@ public static class XlsRecordReader extends RecordReader<LongWritable, Structure
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {

CombineFileSplit split = (CombineFileSplit) genericSplit;
Configuration job = context.getConfiguration();
Configuration jobConf = context.getConfiguration();
// Path of input file.
Path file = split.getPath(0);
String schema = context.getConfiguration().get("schema");
outputSchema = schema != null ? Schema.parseJson(schema) : null;
FileSystem fs = file.getFileSystem(job);
FileSystem fs = file.getFileSystem(jobConf);
fileIn = fs.open(split.getPath(0));

String sheet = job.get(SHEET_NO);
String sheetValue = job.get(SHEET_VALUE, "0");
terminateIfEmptyRow = job.getBoolean(TERMINATE_IF_EMPTY_ROW, false);
String sheet = jobConf.get(SHEET_NO);
String sheetValue = jobConf.get(SHEET_VALUE, "0");
terminateIfEmptyRow = jobConf.getBoolean(TERMINATE_IF_EMPTY_ROW, false);

try (Workbook workbook = WorkbookFactory.create(fileIn)) {
formulaEvaluator = workbook.getCreationHelper().createFormulaEvaluator();
Expand All @@ -118,7 +118,7 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro
rowIndex = 0;
isRowNull = false;

boolean skipFirstRow = job.getBoolean(NAME_SKIP_HEADER, true);
boolean skipFirstRow = jobConf.getBoolean(NAME_SKIP_HEADER, true);
if (skipFirstRow) {
Preconditions.checkArgument(lastRowNum != -1, "No rows found on sheet %s", sheetValue);
rowIndex = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public String formatCellValue(Cell cell, Schema.Type type) {
case ERROR:
return null;
default:
throw new IllegalStateException("Unexpected celltype (" + cellType + ")");
throw new IllegalStateException(String.format("Unexpected celltype (%s)", cellType));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import io.cdap.cdap.etl.api.validation.ValidatingInputFormat;
import io.cdap.plugin.format.input.PathTrackingConfig;
import io.cdap.plugin.format.input.PathTrackingInputFormatProvider;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
Expand Down Expand Up @@ -97,6 +98,7 @@ protected void addFormatProperties(Map<String, String> properties) {
properties.put(XlsInputFormat.SHEET_VALUE, conf.getSheetValue());
properties.put(XlsInputFormat.NAME_SKIP_HEADER, String.valueOf(conf.getSkipHeader()));
properties.put(XlsInputFormat.TERMINATE_IF_EMPTY_ROW, String.valueOf(conf.getTerminateIfEmptyRow()));
properties.put(FileInputFormat.SPLIT_MINSIZE, Long.toString(Long.MAX_VALUE));
}

@Override
Expand Down

0 comments on commit edb65ca

Please sign in to comment.