Skip to content

Commit

Permalink
TEZ-4407: Misleading split info in TezSplitGrouper logs when adjustin…
Browse files Browse the repository at this point in the history
…g small splits (#202) (Stamatis Zampetakis reviewed by Laszlo Bodor)
  • Loading branch information
zabetak authored Dec 23, 2024
1 parent 56426d5 commit b95defc
Showing 1 changed file with 14 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,36 +253,28 @@ public List<GroupedSplitContainer> getGroupedSplits(Configuration conf,
"Invalid max/min group lengths. Required min>0, max>=min. " +
" max: " + maxLengthPerGroup + " min: " + minLengthPerGroup);
}
int newDesiredNumSplits = -1;
if (lengthPerGroup > maxLengthPerGroup) {
// splits too big to work. Need to override with max size.
int newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1;
LOG.info("Desired splits: " + desiredNumSplits + " too small. " +
" Desired splitLength: " + lengthPerGroup +
" Max splitLength: " + maxLengthPerGroup +
" New desired splits: " + newDesiredNumSplits +
" Total length: " + totalLength +
" Original splits: " + originalSplits.size());

desiredNumSplits = newDesiredNumSplits;
newDesiredNumSplits = (int)(totalLength/maxLengthPerGroup) + 1;
} else if (lengthPerGroup < minLengthPerGroup) {
// splits too small to work. Need to override with size.
int newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1;
/**
* This is a workaround for systems like S3 that pass the same
* fake hostname for all splits.
*/
if (!allSplitsHaveLocalhost) {
desiredNumSplits = newDesiredNumSplits;
newDesiredNumSplits = (int)(totalLength/minLengthPerGroup) + 1;
if (allSplitsHaveLocalhost) {
// Workaround for systems like S3 that pass the same fake hostname for all splits.
LOG.info("Ignore {} configuration cause all splits seem to be on localhost.", TEZ_GROUPING_SPLIT_MIN_SIZE);
newDesiredNumSplits = desiredNumSplits;
}

LOG.info("Desired splits: " + desiredNumSplits + " too large. " +
" Desired splitLength: " + lengthPerGroup +
}
if (newDesiredNumSplits != -1) {
LOG.info("Desired splitLength " + lengthPerGroup + " exceeds min/max bounds. " +
" Min splitLength: " + minLengthPerGroup +
" New desired splits: " + newDesiredNumSplits +
" Final desired splits: " + desiredNumSplits +
" All splits have localhost: " + allSplitsHaveLocalhost +
" Max splitLength: " + maxLengthPerGroup +
" Desired splits: " + desiredNumSplits +
" New Desired splits: " + newDesiredNumSplits +
" Total length: " + totalLength +
" Original splits: " + originalSplits.size());
desiredNumSplits = newDesiredNumSplits;
}
}

Expand Down

0 comments on commit b95defc

Please sign in to comment.