From 87364cd06061eb7727d4ca782e3f8d0e6ef6a68d Mon Sep 17 00:00:00 2001 From: Denis Carriere Date: Fri, 1 Mar 2024 21:17:38 -0500 Subject: [PATCH] improve SQL create table handling --- package.json | 4 +-- src/parseSchema.spec.ts | 56 ++++++++++++++++++++++++++++++++++++++--- src/parseSchema.ts | 56 +++++++++++++++++++++++++++++++---------- 3 files changed, 97 insertions(+), 19 deletions(-) diff --git a/package.json b/package.json index 8bdbd5b..a5d628d 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "version": "0.2.4", + "version": "0.2.5", "name": "substreams-sink-csv", "description": "Substreams Sink CSV", "type": "module", @@ -27,8 +27,8 @@ }, "scripts": { "start": "tsc && node ./dist/bin/cli.js", - "pretest": "tsc --noEmit", "test": "bun test", + "posttest": "tsc --noEmit", "prepublishOnly": "tsc" }, "dependencies": { diff --git a/src/parseSchema.spec.ts b/src/parseSchema.spec.ts index 697d519..3ebf393 100644 --- a/src/parseSchema.spec.ts +++ b/src/parseSchema.spec.ts @@ -1,7 +1,55 @@ -import { describe, expect, test } from "bun:test"; -import { parseSchema } from "./parseSchema.js"; +import { expect, test } from "bun:test"; +import { parseColumn, parseCreateTable, parseSchema } from "./parseSchema.js"; -test("parseSchema", () => { +test("parseCreateTable", () => { + expect(parseCreateTable("CREATE TABLE block_meta")).toBe("block_meta"); + expect(parseCreateTable("create table block_meta")).toBe("block_meta"); + expect(parseCreateTable("CREATE TABLE IF NOT EXISTS block_meta")).toBe("block_meta"); + expect(parseCreateTable("CREATE TABLE block_meta (")).toBe("block_meta"); + expect(parseCreateTable("FOO BAR")).toBe(""); +}) + +test("parseColumn", () => { + expect(parseColumn("id INTEGER PRIMARY KEY,")).toBe("id"); + expect(parseColumn("parent_hash TEXT,")).toBe("parent_hash"); + expect(parseColumn("timestamp INTEGER")).toBe("timestamp"); + expect(parseColumn("\"timestamp\" INTEGER")).toBe("timestamp"); + expect(parseColumn("'timestamp' INTEGER")).toBe("timestamp"); + + // empty columns + expect(parseColumn(");")).toBe(""); + expect(parseColumn("PRIMARY KEY(evt_tx_hash,evt_index)")).toBe(""); + expect(parseColumn("PRIMARY KEY (ID)")).toBe(""); + expect(parseColumn("CONSTRAINT PK_Person PRIMARY KEY (ID,LastName)")).toBe(""); +}) + +test("parseSchema::factory_pair_created", () => { + const sql = ` + CREATE TABLE factory_pair_created ( + "evt_tx_hash" VARCHAR(64), + "evt_index" INT, + "evt_block_time" TIMESTAMP, + "evt_block_number" DECIMAL, + "pair" VARCHAR(40), + "param3" DECIMAL, + "token0" VARCHAR(40), + "token1" VARCHAR(40), + PRIMARY KEY(evt_tx_hash,evt_index) + );` + const tables = parseSchema(sql); + expect(tables).toEqual(new Map([["factory_pair_created", [ + "evt_tx_hash", + "evt_index", + "evt_block_time", + "evt_block_number", + "pair", + "param3", + "token0", + "token1", + ]]])); +}) + +test("parseSchema::block_meta", () => { const sql = ` CREATE TABLE block_meta ( @@ -14,4 +62,4 @@ test("parseSchema", () => { );` const tables = parseSchema(sql); expect(tables).toEqual(new Map([["block_meta", ["id", "at", "number", "hash", "parent_hash", "timestamp"]]])); -}); \ No newline at end of file +}); diff --git a/src/parseSchema.ts b/src/parseSchema.ts index 0995ab4..259c3b6 100644 --- a/src/parseSchema.ts +++ b/src/parseSchema.ts @@ -1,22 +1,52 @@ export function parseSchema(sql: string) { const tables = new Map(); // - const statements = sql.split(";"); + const statements = sql.split(";") // should return `block_meta` as table and `id, at, number, hash, parent_hash, timestamp` as columns for (const statement of statements) { - const match = statement.match(/CREATE TABLE (\w+)/); - if (match) { - const table = match[1]; - const columns = new Set([]); // use Set to avoid duplicates - const columnMatches = statement.match(/\(([\w\s,]+)\)/); - if (columnMatches) { - const columnNames = columnMatches[1].split(","); - for (const columnName of columnNames) { - columns.add(columnName.trim().split(/[ ]+/)[0]); - } - } - tables.set(table, Array.from(columns)); + const lines = statement.trim().split("\n"); + const table = parseCreateTable(lines[0]); + // console.log(table, lines); + if ( !table ) continue; + const columns = new Set(); + for ( const line of lines) { + const column = parseColumn(line); + if (column) columns.add(column); } + tables.set(table, Array.from(columns)); } return tables; } + +// must match the following statements: +// CREATE TABLE block_meta +// CREATE TABLE block_meta ( +// create table block meta +// CREATE TABLE IF NOT EXISTS block_meta +export function parseCreateTable(statement: string) { + const match = statement.match(/^CREATE TABLE/i); + if (match) { + statement = statement.replace("(", "").trim(); + return statement.split(" ").reverse()[0].trim(); + } + return ''; +} + +// must match the following statements: +// id INTEGER PRIMARY KEY, +// parent_hash TEXT, +// timestamp INTEGER +export function parseColumn(statement: string) { + statement = statement.trim().replace(/[,;]/g, ''); // remove trailing comma or semicolon + statement = statement.replace(/[\"\']/g, ''); // remove quotes + if ( statement.match(/^CREATE TABLE/i) ) return '' // ignore table name + if ( statement.match(/^PRIMARY KEY/i) ) return '' // ignore primary key as valid column + if ( statement.match(/^\)/) ) return '' // ignore closing parenthesis + if ( statement.match(/^\s*$/) ) return '' // ignore empty lines + if ( statement.match(/^CONSTRAINT/i) ) return '' // ignore constraints + const words = statement.split(" "); + if ( words.length > 1) { + return words[0].trim(); + } + return ''; +} \ No newline at end of file