Skip to content
This repository has been archived by the owner on Apr 13, 2024. It is now read-only.

Commit

Permalink
Extract code for reading a line at a time, from head/tail/wc builtins
Browse files Browse the repository at this point in the history
fileLines() is slightly different from what these utilities were doing:
it exposes the lines as a generator so that they can be iterated.
Internally it still reads the entire input files, but in future it can
be modified to only read one line at a time, without having to modify
the code that uses it.
  • Loading branch information
AtkinsSJ committed Mar 12, 2024
1 parent 03b15d6 commit 08f8724
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 80 deletions.
35 changes: 7 additions & 28 deletions src/puter-shell/coreutils/head.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { Exit } from './coreutil_lib/exit.js';
import { resolveRelativePath } from '../../util/path.js';
import { fileLines } from '../../util/file.js';

export default {
name: 'head',
Expand All @@ -41,9 +41,8 @@ export default {
}
},
execute: async ctx => {
const { in_, out, err } = ctx.externs;
const { out, err } = ctx.externs;
const { positionals, values } = ctx.locals;
const { filesystem } = ctx.platform;

if (positionals.length > 1) {
// TODO: Support multiple files (this is POSIX)
Expand All @@ -63,32 +62,12 @@ export default {
lineCount = parsedLineCount;
}

// TODO: head can stop reading from the input as soon as it completes lineCount lines.
let lines = [];
if (relPath === '-') {
lines = await in_.collect();
} else {
const absPath = resolveRelativePath(ctx.vars, relPath);
const fileData = await filesystem.read(absPath);
// DRY: Similar logic in wc and tail
if (fileData instanceof Blob) {
const arrayBuffer = await fileData.arrayBuffer();
const fileText = new TextDecoder().decode(arrayBuffer);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
} else if (typeof fileData === 'string') {
lines = fileData.split(/\n|\r|\r\n/).map(it => it + '\n');
} else {
// ArrayBuffer or TypedArray
const fileText = new TextDecoder().decode(fileData);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
}
}
if ( lines.length > lineCount ) {
lines = lines.slice(0, lineCount);
}

for ( const line of lines ) {
let processedLineCount = 0;
for await (const line of fileLines(ctx, relPath)) {
await out.write(line);
processedLineCount++;
if (processedLineCount >= lineCount)
break;
}
}
};
32 changes: 11 additions & 21 deletions src/puter-shell/coreutils/tail.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { Exit } from './coreutil_lib/exit.js';
import { resolveRelativePath } from '../../util/path.js';
import { fileLines } from '../../util/file.js';

export default {
name: 'tail',
Expand All @@ -41,9 +41,8 @@ export default {
}
},
execute: async ctx => {
const { in_, out, err } = ctx.externs;
const { out, err } = ctx.externs;
const { positionals, values } = ctx.locals;
const { filesystem } = ctx.platform;

if (positionals.length > 1) {
// TODO: Support multiple files (this is an extension to POSIX, but available in the GNU tail)
Expand All @@ -64,30 +63,21 @@ export default {
}

let lines = [];
if (relPath === '-') {
lines = await in_.collect();
} else {
const absPath = resolveRelativePath(ctx.vars, relPath);
const fileData = await filesystem.read(absPath);
// DRY: Similar logic in wc
if (fileData instanceof Blob) {
const arrayBuffer = await fileData.arrayBuffer();
const fileText = new TextDecoder().decode(arrayBuffer);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
} else if (typeof fileData === 'string') {
lines = fileData.split(/\n|\r|\r\n/).map(it => it + '\n');
} else {
// ArrayBuffer or TypedArray
const fileText = new TextDecoder().decode(fileData);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
for await (const line of fileLines(ctx, relPath)) {
lines.push(line);
// We keep lineCount+1 lines, to account for a possible trailing blank line.
if (lines.length > lineCount + 1) {
lines.shift();
}
}

// Ignore trailing blank line
if ( lines.length > 0 && lines[lines.length - 1] === '\n') {
// Ignore trailing blank line
lines.pop();
}
// Now we remove the extra line if it's there.
if ( lines.length > lineCount ) {
lines = lines.slice(-1 * lineCount);
lines.shift();
}

for ( const line of lines ) {
Expand Down
39 changes: 8 additions & 31 deletions src/puter-shell/coreutils/wc.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import { resolveRelativePath } from '../../util/path.js';
import { fileLines } from '../../util/file.js';

const TAB_SIZE = 8;

Expand Down Expand Up @@ -100,26 +101,15 @@ export default {

let inWord = false;
let currentLineLength = 0;
let accumulateData = async (input) => {
let stringInput;
if (input instanceof Blob) {
const arrayBuffer = await input.arrayBuffer();
stringInput = new TextDecoder().decode(arrayBuffer);
counts.bytes += arrayBuffer.byteLength;
} else if (typeof input === 'string') {
stringInput = input;
if (printBytes) {
const byteInput = new TextEncoder().encode(input);
counts.bytes += byteInput.length;
}
} else {
// ArrayBuffer or TypedArray
stringInput = new TextDecoder().decode(input);
counts.bytes += input.length;

for await (const line of fileLines(ctx, relPath)) {
counts.chars += line.length;
if (printBytes) {
const byteInput = new TextEncoder().encode(line);
counts.bytes += byteInput.length;
}
counts.chars += stringInput.length;

for (const char of stringInput) {
for (const char of line) {
// "The wc utility shall consider a word to be a non-zero-length string of characters delimited by white space."
if (/\s/.test(char)) {
if (char === '\r' || char === '\n') {
Expand All @@ -142,19 +132,6 @@ export default {
}
}

if (relPath === '-') {
let chunk, done;
const nextChunk = async () => {
({ value: chunk, done } = await ctx.externs.in_.read());
}
for ( await nextChunk() ; ! done ; await nextChunk() ) {
await accumulateData(chunk);
}
} else {
const absPath = resolveRelativePath(ctx.vars, relPath);
const fileData = await filesystem.read(absPath);
await accumulateData(fileData);
}
counts.maxLineLength = Math.max(counts.maxLineLength, currentLineLength);

newlinesWidth = Math.max(newlinesWidth, counts.newlines.toString().length);
Expand Down
28 changes: 28 additions & 0 deletions src/util/file.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import { resolveRelativePath } from './path.js';

// Iterate the given file, one line at a time.
// TODO: Make this read one line at a time, instead of all at once.
export async function* fileLines(ctx, relPath, options = { dashIsStdin: true }) {
let lines = [];
if (options.dashIsStdin && relPath === '-') {
lines = await ctx.externs.in_.collect();
} else {
const absPath = resolveRelativePath(ctx.vars, relPath);
const fileData = await ctx.platform.filesystem.read(absPath);
if (fileData instanceof Blob) {
const arrayBuffer = await fileData.arrayBuffer();
const fileText = new TextDecoder().decode(arrayBuffer);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
} else if (typeof fileData === 'string') {
lines = fileData.split(/\n|\r|\r\n/).map(it => it + '\n');
} else {
// ArrayBuffer or TypedArray
const fileText = new TextDecoder().decode(fileData);
lines = fileText.split(/\n|\r|\r\n/).map(it => it + '\n');
}
}

for (const line of lines) {
yield line;
}
}

0 comments on commit 08f8724

Please sign in to comment.