Skip to content

Commit

Permalink
improve SQL create table handling
Browse files Browse the repository at this point in the history
DenisCarriere committed Mar 2, 2024
1 parent e5ee262 commit 87364cd
Showing 3 changed files with 97 additions and 19 deletions.
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "0.2.4",
"version": "0.2.5",
"name": "substreams-sink-csv",
"description": "Substreams Sink CSV",
"type": "module",
@@ -27,8 +27,8 @@
},
"scripts": {
"start": "tsc && node ./dist/bin/cli.js",
"pretest": "tsc --noEmit",
"test": "bun test",
"posttest": "tsc --noEmit",
"prepublishOnly": "tsc"
},
"dependencies": {
56 changes: 52 additions & 4 deletions src/parseSchema.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,55 @@
import { describe, expect, test } from "bun:test";
import { parseSchema } from "./parseSchema.js";
import { expect, test } from "bun:test";
import { parseColumn, parseCreateTable, parseSchema } from "./parseSchema.js";

test("parseSchema", () => {
test("parseCreateTable", () => {
expect(parseCreateTable("CREATE TABLE block_meta")).toBe("block_meta");
expect(parseCreateTable("create table block_meta")).toBe("block_meta");
expect(parseCreateTable("CREATE TABLE IF NOT EXISTS block_meta")).toBe("block_meta");
expect(parseCreateTable("CREATE TABLE block_meta (")).toBe("block_meta");
expect(parseCreateTable("FOO BAR")).toBe("");
})

test("parseColumn", () => {
expect(parseColumn("id INTEGER PRIMARY KEY,")).toBe("id");
expect(parseColumn("parent_hash TEXT,")).toBe("parent_hash");
expect(parseColumn("timestamp INTEGER")).toBe("timestamp");
expect(parseColumn("\"timestamp\" INTEGER")).toBe("timestamp");
expect(parseColumn("'timestamp' INTEGER")).toBe("timestamp");

// empty columns
expect(parseColumn(");")).toBe("");
expect(parseColumn("PRIMARY KEY(evt_tx_hash,evt_index)")).toBe("");
expect(parseColumn("PRIMARY KEY (ID)")).toBe("");
expect(parseColumn("CONSTRAINT PK_Person PRIMARY KEY (ID,LastName)")).toBe("");
})

test("parseSchema::factory_pair_created", () => {
const sql = `
CREATE TABLE factory_pair_created (
"evt_tx_hash" VARCHAR(64),
"evt_index" INT,
"evt_block_time" TIMESTAMP,
"evt_block_number" DECIMAL,
"pair" VARCHAR(40),
"param3" DECIMAL,
"token0" VARCHAR(40),
"token1" VARCHAR(40),
PRIMARY KEY(evt_tx_hash,evt_index)
);`
const tables = parseSchema(sql);
expect(tables).toEqual(new Map([["factory_pair_created", [
"evt_tx_hash",
"evt_index",
"evt_block_time",
"evt_block_number",
"pair",
"param3",
"token0",
"token1",
]]]));
})

test("parseSchema::block_meta", () => {
const sql = `
CREATE TABLE block_meta
(
@@ -14,4 +62,4 @@ test("parseSchema", () => {
);`
const tables = parseSchema(sql);
expect(tables).toEqual(new Map([["block_meta", ["id", "at", "number", "hash", "parent_hash", "timestamp"]]]));
});
});
56 changes: 43 additions & 13 deletions src/parseSchema.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,52 @@
export function parseSchema(sql: string) {
const tables = new Map<string, string[]>(); // <table, columns>
const statements = sql.split(";");
const statements = sql.split(";")

// should return `block_meta` as table and `id, at, number, hash, parent_hash, timestamp` as columns
for (const statement of statements) {
const match = statement.match(/CREATE TABLE (\w+)/);
if (match) {
const table = match[1];
const columns = new Set<string>([]); // use Set to avoid duplicates
const columnMatches = statement.match(/\(([\w\s,]+)\)/);
if (columnMatches) {
const columnNames = columnMatches[1].split(",");
for (const columnName of columnNames) {
columns.add(columnName.trim().split(/[ ]+/)[0]);
}
}
tables.set(table, Array.from(columns));
const lines = statement.trim().split("\n");
const table = parseCreateTable(lines[0]);
// console.log(table, lines);
if ( !table ) continue;
const columns = new Set<string>();
for ( const line of lines) {
const column = parseColumn(line);
if (column) columns.add(column);
}
tables.set(table, Array.from(columns));
}
return tables;
}

// must match the following statements:
// CREATE TABLE block_meta
// CREATE TABLE block_meta (
// create table block meta
// CREATE TABLE IF NOT EXISTS block_meta
export function parseCreateTable(statement: string) {
const match = statement.match(/^CREATE TABLE/i);
if (match) {
statement = statement.replace("(", "").trim();
return statement.split(" ").reverse()[0].trim();
}
return '';
}

// must match the following statements:
// id INTEGER PRIMARY KEY,
// parent_hash TEXT,
// timestamp INTEGER
export function parseColumn(statement: string) {
statement = statement.trim().replace(/[,;]/g, ''); // remove trailing comma or semicolon
statement = statement.replace(/[\"\']/g, ''); // remove quotes
if ( statement.match(/^CREATE TABLE/i) ) return '' // ignore table name
if ( statement.match(/^PRIMARY KEY/i) ) return '' // ignore primary key as valid column
if ( statement.match(/^\)/) ) return '' // ignore closing parenthesis
if ( statement.match(/^\s*$/) ) return '' // ignore empty lines
if ( statement.match(/^CONSTRAINT/i) ) return '' // ignore constraints
const words = statement.split(" ");
if ( words.length > 1) {
return words[0].trim();
}
return '';
}

0 comments on commit 87364cd

Please sign in to comment.