From eb8b956cc206269d450b2f237e909552eb1e6f45 Mon Sep 17 00:00:00 2001 From: Ian Thomas Date: Tue, 21 May 2024 17:16:08 +0100 Subject: [PATCH] Simplify parser to create Node objects --- src/index.ts | 2 +- src/parse.ts | 88 +++++++++++---------------------------------- src/shell.ts | 12 +++---- tests/parse.test.ts | 64 ++++++++++++++------------------- 4 files changed, 54 insertions(+), 112 deletions(-) diff --git a/src/index.ts b/src/index.ts index cb5427b..d42c26c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -2,6 +2,6 @@ export { IFileSystem } from "./file_system" export { JupyterFileSystem } from "./jupyter_file_system" export { Context } from "./context" export { OutputCallback } from "./output_callback" -export { parse, ParseError } from "./parse" +export { parse } from "./parse" export { Shell } from "./shell" export { tokenize, Token } from "./tokenize" diff --git a/src/parse.ts b/src/parse.ts index b3bbad2..617d1ca 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -3,89 +3,43 @@ import { Token, tokenize } from "./tokenize" const endOfCommand = ";&" //const ignore_trailing = ";" -export class ParseError extends Error {} -export class AST { - // This is called an AST but is just an array of commands initially. - // Eventually will change a lot to support more complexity. - constructor(readonly tokens: Token[], readonly commandOffsets: number[]) { - this._validate() - } - - command(i: number): string[] { - if (i < 0 || i >= this.commandCount) { - throw new RangeError(`index must be in range 0 to ${this.commandCount} inclusive`) - } - const startIndex = this.commandOffsets[2*i] - const endIndex = this.commandOffsets[2*i+1] - const range = [...Array(endIndex - startIndex).keys()] - return range.map((i) => this.tokens[i + startIndex].value) - } - - get commandCount(): number { - return this.commandOffsets.length / 2 - } +export abstract class Node {} - get commands(): string[][] { - const ret: string[][] = [] - for (let i = 0; i < this.commandCount; i++) { - ret.push(this.command(i)) - } - return ret - } - - private _validate(): void { - // Almost identical to TokenizedSource._validate - const n = this.commandOffsets.length - if (n == 0) { - return - } - - if (n % 2 == 1) { - throw new ParseError("Offsets has odd length") - } - for (let i = 0; i < n; i += 2) { - const start = this.commandOffsets[i] - const end = this.commandOffsets[i+1] - if (end <= start) { - throw new ParseError(`Token ${i/2} has invalid offsets [${start}, ${end}]]`) - } - if (i > 0 && this.commandOffsets[i-1] > start) { - throw new ParseError(`Token ${i/2} overlaps previous token`) - } - } - if (this.commandOffsets[0] < 0 || this.commandOffsets[n-1] > this.tokens.length) { - throw new ParseError("Offsets are outside source string") - } +export class CommandNode extends Node { + constructor(readonly name: Token, readonly suffix: Token[]) { + super() } } -export function parse(source: string): AST { + +export function parse(source: string): Node[] { const tokens = tokenize(source) - const commandOffsets: number[] = [] - const ntokens = tokens.length - let inCommand: boolean = false + const ret: Node[] = [] + let offset: number = -1 // Offset of start of current command, -1 if not in command. + const n = tokens.length - for (let i = 0; i < ntokens; i++) { + for (let i = 0; i < n; i++) { const token = tokens[i] - if (inCommand) { + if (offset >= 0) { // In command if (endOfCommand.includes(token.value)) { // Finish current command, ignore endOfCommand token. - commandOffsets.push(i) - inCommand = false + ret.push(new CommandNode(tokens[offset], tokens.slice(offset+1, i))) + offset = -1 } - } else { // !inCommand + } else { // Not in command if (!endOfCommand.includes(token.value)) { // Start new token. - commandOffsets.push(i) - inCommand = true + offset = i } } } - if (inCommand) { - // Finish last token. - commandOffsets.push(ntokens) + + if (offset >= 0) { + // Finish last command. + ret.push(new CommandNode(tokens[offset], tokens.slice(offset+1, n))) } - return new AST(tokens, commandOffsets) + + return ret } diff --git a/src/shell.ts b/src/shell.ts index 0d01e13..ae11c65 100644 --- a/src/shell.ts +++ b/src/shell.ts @@ -2,7 +2,7 @@ import { CommandRegistry } from "./command_registry" import { Context } from "./context" import { TerminalOutput } from "./io" import { OutputCallback } from "./output_callback" -import { parse } from "./parse" +import { CommandNode, parse } from "./parse" import { IFileSystem } from "./file_system" export class Shell { @@ -92,20 +92,20 @@ export class Shell { // Keeping this public for tests. async _runCommands(cmdText: string): Promise { - const ast = parse(cmdText) - const ncmds = ast.commandCount + const cmdNodes = parse(cmdText) + const ncmds = cmdNodes.length const stdout = new TerminalOutput(this._outputCallback) try { for (let i = 0; i < ncmds; ++i) { - const cmd = ast.command(i) - const cmdName = cmd[0] + const cmd = cmdNodes[i] as CommandNode + const cmdName = cmd.name.value const command = CommandRegistry.instance().create(cmdName) if (command === null) { // Give location of command in input? throw new Error(`Unknown command: '${cmdName}'`) } - const cmdArgs = cmd.slice(1) + const cmdArgs = cmd.suffix.map((token) => token.value) const context = new Context(cmdArgs, this._filesystem, stdout, this._env) //const exit_code = await command?.run(context) await command?.run(context) diff --git a/tests/parse.test.ts b/tests/parse.test.ts index 79fdbb2..1604af7 100644 --- a/tests/parse.test.ts +++ b/tests/parse.test.ts @@ -1,46 +1,34 @@ -import { parse, tokenize, ParseError } from "../src" -import { AST } from "../src/parse" +import { CommandNode, parse } from "../src/parse" +import { Token } from "../src/tokenize" describe("parse", () => { - it("should return command offsets", () => { - expect(parse("").commandOffsets).toStrictEqual([]) - expect(parse("ls").commandOffsets).toStrictEqual([0, 1]) - expect(parse("ls -al").commandOffsets).toStrictEqual([0, 2]) - expect(parse("ls -al;").commandOffsets).toStrictEqual([0, 2]) - expect(parse("ls -al;pwd").commandOffsets).toStrictEqual([0, 2, 3, 4]) - expect(parse("ls -al; pwd").commandOffsets).toStrictEqual([0, 2, 3, 4]) + it("should support no commands", () => { + expect(parse("")).toEqual([]) + expect(parse(";")).toEqual([]) + expect(parse(" ; ; ")).toEqual([]) }) - it("should return commands", () => { - expect(parse("ls").commands).toEqual([["ls"]]) - expect(parse("ls -l -a").commands).toEqual([["ls", "-l", "-a"]]) - expect(parse("ls -l -a; pwd").commands).toEqual([["ls", "-l", "-a"], ["pwd"]]) + it("should support single command", () => { + expect(parse("ls")).toEqual([ + new CommandNode(new Token(0, "ls"), []), + ]) + expect(parse("ls -al")).toEqual([ + new CommandNode(new Token(0, "ls"), [new Token(3, "-al")]), + ]) + expect(parse("ls -al;")).toEqual([ + new CommandNode(new Token(0, "ls"), [new Token(3, "-al")]), + ]) }) - it("should raise on invalid index bounds", () => { - const ast = parse("ls -al") - expect(() => ast.command(-1)).toThrow(RangeError) - expect(() => ast.command(2)).toThrow(RangeError) - }) -}) - -describe("AST validate", () => { - const tokenizedSource = tokenize("ls -al; pwd") - - it("should raise if odd number of offsets", () => { - expect(() => new AST(tokenizedSource, [0])).toThrow(ParseError) - }) - - it("should raise if offset end not greater than start", () => { - expect(() => new AST(tokenizedSource, [0, 0])).toThrow(ParseError) - }) - - it("should raise if tokens overlap", () => { - expect(() => new AST(tokenizedSource, [0, 2, 1, 3])).toThrow(ParseError) - }) - - it("should raise if offsets out of bounds", () => { - expect(() => new AST(tokenizedSource, [3, 5])).toThrow(ParseError) - expect(() => new AST(tokenizedSource, [-1, 1])).toThrow(ParseError) + it("should support multiple commands", () => { + expect(parse("ls -al;pwd")).toEqual([ + new CommandNode(new Token(0, "ls"), [new Token(3, "-al")]), + new CommandNode(new Token(7, "pwd"), []), + ]) + expect(parse("echo abc;pwd;ls -al")).toEqual([ + new CommandNode(new Token(0, "echo"), [new Token(5, "abc")]), + new CommandNode(new Token(9, "pwd"), []), + new CommandNode(new Token(13, "ls"), [new Token(16, "-al")]), + ]) }) })