Skip to content

Commit

Permalink
fix: do not use empty line as section separator
Browse files Browse the repository at this point in the history
  • Loading branch information
matzehecht committed Dec 17, 2022
1 parent ed16643 commit 4b1be84
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 26 deletions.
2 changes: 1 addition & 1 deletion src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ function parseArgs(argv: string[]) {
utils.logInfo('OUTPUT', `Using given file name ${outputFileOrDir}`);
}
} catch (err) {
throwErr(err, true);
throwErr(err as Error, true);
}
return {
configFile,
Expand Down
56 changes: 31 additions & 25 deletions src/extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,42 +25,48 @@ export function extract(raw: string, inputConfig: Input, parentTransaction?: Tra
inText: exInTextKeys
};

const sections = inputConfig.sections;
const sectionKeyRegExp = RegExp(`(^|[^a-zA-Z0-9])(${Object.keys(inputConfig.sections).join('|')})([^a-zA-Z0-9]|$)`);
const indexes = rawArray.reduce<[number, string][]>((prev, line, i) => {
const matches = line.match(sectionKeyRegExp);
if (matches && rawArray[i - 1].trim().length === 0) {
prev.push([i, matches[2]]);
}
return prev;
}, []);

Object.entries(sections).forEach((section) => (extractedSection[section[0]] = extractSection(rawArray, section)));
return extractedSection;
}
const sectionSlices = indexes.reduce<Record<string, string[][]>>((prev, [index, key], i) => {
const prevSectionArray = prev[key] ?? [];

function extractSection(rawArray: string[], [sectionKey, sectionType]: [string, SectionType]): ParsedSection {
// Find first line of section. If a section spans over two pages there can be two first lines.
const indexes = rawArray.reduce(
(prev, r, i) => (r.match(RegExp(`(^|[^a-zA-Z0-9])${sectionKey}([^a-zA-Z0-9]|$)`)) && rawArray[i - 1].trim().length === 0 ? prev.concat(i) : prev),
[] as number[]
);
const nextIndex = indexes[i + 1]?.[0] ?? Infinity;
const sectionSlice = rawArray.slice(index, nextIndex);
prevSectionArray.push(sectionSlice);
prev[key] = prevSectionArray;

const subSections = indexes.map((i) => extractSubSection(rawArray, sectionType, i));
return prev;
}, {});

return Array.isArray(subSections[0]) ? subSections.flat() : subSections.reduce((p, c) => ({ ...p, ...c }), {});
Object.entries(sectionSlices).forEach(([sectionKey, slices]) => (extractedSection[sectionKey] = extractSections(inputConfig.sections[sectionKey], slices)));
return extractedSection;
}

function extractSubSection(rawArray: string[], sectionType: SectionType, index: number): ParsedSection {
const trimmedLeft = rawArray.slice(index);

const endIndex = trimmedLeft.findIndex((line) => line.trim().length === 0);
function extractSections(sectionType: SectionType, slices: string[][]): ParsedSection {
const subSections = slices.map((slice) => extractSubSections(slice, sectionType));

const relevantLines = trimmedLeft.slice(0, endIndex);
return Array.isArray(subSections[0]) ? subSections.flat() : subSections.reduce((p, c) => ({ ...p, ...c }), {});
}

function extractSubSections(slice: string[], sectionType: SectionType): ParsedSection {
switch (sectionType) {
case 'keyValue':
return extractKeyVal(relevantLines);
return extractKeyVal(slice);
case 'table':
return extractTable(relevantLines);
return extractTable(slice);
case 'try':
const triedIsTable = rawArray[index].startsWith(' ');
const triedIsTable = slice[0].startsWith(' ');
if (triedIsTable) {
return extractTable(relevantLines);
return extractTable(slice);
} else {
return extractKeyVal(relevantLines);
return extractKeyVal(slice);
}
}
}
Expand Down Expand Up @@ -114,10 +120,10 @@ function extractKeyVal(rawArray: string[]): ParsedKVSection {
prev[trimmedKey] = '';
return prev;
}

const indexValue = lineWithoutKey.search(/\S/) + key.length;
const value = line.slice(indexValue).split(/\s\s/)[0];

// if the value starts with a lot white spaces (empty value but date on the right)
// skip to the stuff with rest string and well known keys.
if (indexValue - key.length < 25) {
Expand All @@ -126,7 +132,7 @@ function extractKeyVal(rawArray: string[]): ParsedKVSection {

// if the value starts with a lot white spaces (see if clause above)
// start the rest string on kex.length
const endIndexValue = (indexValue - key.length < 25 ? indexValue + value.length : key.length);
const endIndexValue = indexValue - key.length < 25 ? indexValue + value.length : key.length;
const restString = line.slice(endIndexValue).trim();

const isWellKnown = WELL_KNOWN_KEYS.find((k) => restString.startsWith(k));
Expand Down

0 comments on commit 4b1be84

Please sign in to comment.