From 64e63e44d7fb09eb27dc026ebd74864f409aa7c1 Mon Sep 17 00:00:00 2001 From: Pablo Rodriguez Mira <36644554+PabloRMira@users.noreply.github.com> Date: Sun, 14 Feb 2021 22:23:32 +0100 Subject: [PATCH] [FIX] Formatter does not add space after comma (#147) --- docs/core.html | 38 +++---- docs/utils.html | 238 ++++++++++++++++++++++++++++++++++++++-- nbs/00_core.ipynb | 6 +- nbs/02_utils.ipynb | 128 +++++++++++++++++++++ sql_formatter/_nbdev.py | 2 + sql_formatter/core.py | 1 + sql_formatter/utils.py | 24 +++- 7 files changed, 400 insertions(+), 37 deletions(-) diff --git a/docs/core.html b/docs/core.html index ec7339a..f4cd1cf 100644 --- a/docs/core.html +++ b/docs/core.html @@ -61,9 +61,9 @@

General formattingcreate or replace table mytable as -- Mytable example /* multi line comment */ -seLecT a.asdf, b.qwer, -- some comment here +seLecT a.asdf,b.qwer, -- some comment here /* and here is a line comment inside select */ -substr(c.asdf, 1, 2) as substr_asdf, +substr(c.asdf,1,2) as substr_asdf, /* some commenT there */ case when a.asdf= 1 then 'b' /* here a case comment */ @@ -506,7 +506,7 @@

Preformatting queries -

preformat_statements[source]

preformat_statements(s)

+

preformat_statements[source]

preformat_statements(s)

Write a newline in s for all statements and uppercase them but not if they are inside a comment

@@ -1117,7 +1117,7 @@

Lowercasing query -

lowercase_query[source]

lowercase_query(s)

+

lowercase_query[source]

lowercase_query(s)

Lowercase query but let comments and text in quotes untouched

@@ -1375,7 +1375,7 @@

Add whitespaces between symbols -

add_whitespaces_query[source]

add_whitespaces_query(s)

+

add_whitespaces_query[source]

add_whitespaces_query(s)

Add whitespaces between symbols (=!<>) for query s but not for comments

@@ -1475,7 +1475,7 @@

PARTITION BY <
-

format_partition_by[source]

format_partition_by(s, base_indentation)

+

format_partition_by[source]

format_partition_by(s, base_indentation)

Format PARTITION BY line in SELECT (DISTINCT)

@@ -1513,7 +1513,7 @@

Remove (mistake) comma at end o
-

remove_wrong_end_comma[source]

remove_wrong_end_comma(split_s)

+

remove_wrong_end_comma[source]

remove_wrong_end_comma(split_s)

Remove mistakenly placed commas at the end of SELECT statement using split_s with keys "string", "comment" and "quote"

@@ -1688,7 +1688,7 @@

Helper function for case when -

format_case_when[source]

format_case_when(s)

+

format_case_when[source]

format_case_when(s)

Format case when statement in line s

@@ -1726,7 +1726,7 @@

SELECT

-

format_select[source]

format_select(s)

+

format_select[source]

format_select(s)

Format SELECT statement line s

@@ -3091,7 +3091,7 @@

FROM

-

format_from[source]

format_from(s)

+

format_from[source]

format_from(s)

Format FROM statement line s

@@ -3160,7 +3160,7 @@

(LEFT / RIGHT / INNER / OUTER) JOIN
-

format_join[source]

format_join(s)

+

format_join[source]

format_join(s)

Format JOIN statement line s

@@ -3229,7 +3229,7 @@

ON

-

format_on[source]

format_on(s)

+

format_on[source]

format_on(s)

Format ON statement line s

@@ -3428,7 +3428,7 @@

WHERE

-

format_where[source]

format_where(s)

+

format_where[source]

format_where(s)

Format WHERE statement line s

@@ -3577,7 +3577,7 @@

Format all statements -

format_statement_line[source]

format_statement_line(s)

+

format_statement_line[source]

format_statement_line(s)

Format statement line s

@@ -3721,7 +3721,7 @@

format_statement_line -

format_statements[source]

format_statements(s)

+

format_statements[source]

format_statements(s)

Format statements lines s

@@ -3799,7 +3799,7 @@

Format multiline comments -

format_multiline_comments[source]

format_multiline_comments(s)

+

format_multiline_comments[source]

format_multiline_comments(s)

Format multiline comments by replacing multiline comment [CI] by newline and adding indentation

@@ -3837,7 +3837,7 @@

Add semicolon at the end of query -

add_semicolon[source]

add_semicolon(s)

+

add_semicolon[source]

add_semicolon(s)

Add a semicolon at the of query s

@@ -4017,7 +4017,7 @@

Putting everything together -

format_simple_sql[source]

format_simple_sql(s, semicolon=False)

+

format_simple_sql[source]

format_simple_sql(s, semicolon=False)

Format a simple SQL query without subqueries s

@@ -4242,7 +4242,7 @@

Main function handling q
-

format_sql[source]

format_sql(s, semicolon=False)

+

format_sql[source]

format_sql(s, semicolon=False)

Format SQL query with subqueries s

diff --git a/docs/utils.html b/docs/utils.html index cff6468..4062512 100644 --- a/docs/utils.html +++ b/docs/utils.html @@ -1755,6 +1755,222 @@

replace_newline_chars

+

+ {% endraw %} + +
+
+

Substitute regex in SQL ignoring comments and quotes

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

sub_in_sql[source]

sub_in_sql(regex, repl, s)

+
+

Subsitute regex with repl in query s ignoring comments and text in quotes

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
assert_and_print(
+    sub_in_sql(
+        r",([\w\d])", r", \1", "select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)"
+    ),
+    "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)"
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)
+
+
+
+ +
+
+ +
+ {% endraw %} + +
+
+

Add whitespaces after comma

+
+
+
+ {% raw %} + +
+ +
+
+ +
+ + +
+

add_whitespaces_after_comma[source]

add_whitespaces_after_comma(s)

+
+

Add whitespace after comma in query s if there is no whitespace

+ +
+ +
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
assert_and_print(
+    add_whitespaces_after_comma(
+        "select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)"
+    ),
+    "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)"
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)
+
+
+
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
assert_and_print(
+    add_whitespaces_after_comma("select asdf,qwer,substr(asdf,1,2) as qwerty"),
+    "select asdf, qwer, substr(asdf, 1, 2) as qwerty"
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
select asdf, qwer, substr(asdf, 1, 2) as qwerty
+
+
+
+ +
+
+ +
+ {% endraw %} + + {% raw %} + +
+
+ +
+
+
assert_and_print(
+    add_whitespaces_after_comma("select asdf, qwer, substr(asdf,1,2) as qwerty"),
+    "select asdf, qwer, substr(asdf, 1, 2) as qwerty"
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
select asdf, qwer, substr(asdf, 1, 2) as qwerty
+
+
+
+ +
+
+
{% endraw %} @@ -1778,7 +1994,7 @@

replace_newline_chars -

identify_end_of_fields[source]

identify_end_of_fields(s)

+

identify_end_of_fields[source]

identify_end_of_fields(s)

Identify end of fields in query s

@@ -1973,7 +2189,7 @@

identify_end_of_fields<
-

add_newline_indentation[source]

add_newline_indentation(s, indentation)

+

add_newline_indentation[source]

add_newline_indentation(s, indentation)

Add newline and indentation for end of fields in query s

@@ -2146,7 +2362,7 @@

Handling subqueries -

extract_outer_subquery[source]

extract_outer_subquery(s)

+

extract_outer_subquery[source]

extract_outer_subquery(s)

Extract outer subquery in query s

@@ -2199,7 +2415,7 @@

extract_outer_subquery<
-

format_subquery[source]

format_subquery(s, previous_s)

+

format_subquery[source]

format_subquery(s, previous_s)

Format subquery in line s based on indentation on previous_s

@@ -2237,7 +2453,7 @@

Query identification -

check_sql_query[source]

check_sql_query(s)

+

check_sql_query[source]

check_sql_query(s)

Checks whether s is a SQL query based on match of CREATE TABLE / VIEW or SELECT ignoring comments and text in quotes

@@ -2435,7 +2651,7 @@

Marker to not format
-

check_skip_marker[source]

check_skip_marker(s)

+

check_skip_marker[source]

check_skip_marker(s)

Checks whether user set marker /skip-formatter/ to not format query

@@ -2519,7 +2735,7 @@

Check lines were CREATE
-

identify_create_table_view[source]

identify_create_table_view(s)

+

identify_create_table_view[source]

identify_create_table_view(s)

Identify positions of CREATE .. TABLE / VIEW statements

@@ -2594,7 +2810,7 @@

identify_create_tab
-

count_lines[source]

count_lines(s)

+

count_lines[source]

count_lines(s)

Count the number of lines in s

@@ -2669,7 +2885,7 @@

count_lines -

find_line_number[source]

find_line_number(s, positions)

+

find_line_number[source]

find_line_number(s, positions)

Find line number in s out of positions

@@ -2744,7 +2960,7 @@

find_line_number -

disimilarity[source]

disimilarity(str1, str2)

+

disimilarity[source]

disimilarity(str1, str2)

Calculate disimilarity between two strings by word

@@ -2838,7 +3054,7 @@

disimilarity -

assign_comment[source]

assign_comment(fs, cds)

+

assign_comment[source]

assign_comment(fs, cds)

Assign comments in list of dictionaries cds to formatted string fs using Jaccard distance

The comment dictionaries cds should contain the keys "comment" and "preceding" (string)

diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb index 88d25f4..ead9b25 100644 --- a/nbs/00_core.ipynb +++ b/nbs/00_core.ipynb @@ -77,9 +77,9 @@ "create or replace table mytable as -- Mytable example\n", "/* multi line\n", " comment */\n", - "seLecT a.asdf, b.qwer, -- some comment here\n", + "seLecT a.asdf,b.qwer, -- some comment here\n", "/* and here is a line comment inside select */\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", + "substr(c.asdf,1,2) as substr_asdf, \n", "/* some commenT \n", "there */\n", "case when a.asdf= 1 then 'b' /* here a case comment */\n", @@ -198,6 +198,7 @@ "#export\n", "def clean_query(s):\n", " \"Remove redundant whitespaces and mark comments boundaries and remove newlines afterwards in query `s`\"\n", + " s = add_whitespaces_after_comma(s) # add whitespaces after comma but no in comments or quotes\n", " s = remove_redundant_whitespaces(s) # remove too many whitespaces but no newlines\n", " s = mark_comments(s) # mark comments with special tokens [C], [CS] and [CI]\n", " s = replace_newline_chars(s) # remove newlines but not in the comments\n", @@ -3521,7 +3522,6 @@ "Converted 01_format_file.ipynb.\n", "Converted 02_utils.ipynb.\n", "Converted 03_validation.ipynb.\n", - "Converted 04_release.ipynb.\n", "Converted index.ipynb.\n" ] } diff --git a/nbs/02_utils.ipynb b/nbs/02_utils.ipynb index 4fec455..205caf3 100644 --- a/nbs/02_utils.ipynb +++ b/nbs/02_utils.ipynb @@ -1479,6 +1479,134 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Substitute regex in SQL ignoring comments and quotes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def sub_in_sql(regex, repl, s):\n", + " \"Subsitute `regex` with `repl` in query `s` ignoring comments and text in quotes\"\n", + " split_s = split_comment_quote(s) # split by comment / non-comment and quote / non-quote\n", + " for d in split_s: # loop on dictionaries with strings\n", + " if not d[\"comment\"] and not d[\"quote\"]: # only for non comments and non text in quotes\n", + " d[\"string\"] = re.sub(regex, repl, d[\"string\"])\n", + " s = \"\".join(d[\"string\"] for d in split_s)\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " sub_in_sql(\n", + " r\",([\\w\\d])\", r\", \\1\", \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", + " ),\n", + " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add whitespaces after comma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def add_whitespaces_after_comma(s):\n", + " \"Add whitespace after comma in query `s` if there is no whitespace\"\n", + " s = sub_in_sql(r\",([\\w\\d]+)\", r\", \\1\", s)\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\n", + " \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", + " ),\n", + " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, substr(asdf, 1, 2) as qwerty\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\"select asdf,qwer,substr(asdf,1,2) as qwerty\"),\n", + " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, substr(asdf, 1, 2) as qwerty\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\"select asdf, qwer, substr(asdf,1,2) as qwerty\"),\n", + " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/sql_formatter/_nbdev.py b/sql_formatter/_nbdev.py index f9d4e30..a35f56a 100644 --- a/sql_formatter/_nbdev.py +++ b/sql_formatter/_nbdev.py @@ -41,6 +41,8 @@ "identify_in_sql": "02_utils.ipynb", "split_by_semicolon": "02_utils.ipynb", "replace_newline_chars": "02_utils.ipynb", + "sub_in_sql": "02_utils.ipynb", + "add_whitespaces_after_comma": "02_utils.ipynb", "identify_end_of_fields": "02_utils.ipynb", "add_newline_indentation": "02_utils.ipynb", "extract_outer_subquery": "02_utils.ipynb", diff --git a/sql_formatter/core.py b/sql_formatter/core.py index 9cc7488..faebc87 100644 --- a/sql_formatter/core.py +++ b/sql_formatter/core.py @@ -32,6 +32,7 @@ # Cell def clean_query(s): "Remove redundant whitespaces and mark comments boundaries and remove newlines afterwards in query `s`" + s = add_whitespaces_after_comma(s) # add whitespaces after comma but no in comments or quotes s = remove_redundant_whitespaces(s) # remove too many whitespaces but no newlines s = mark_comments(s) # mark comments with special tokens [C], [CS] and [CI] s = replace_newline_chars(s) # remove newlines but not in the comments diff --git a/sql_formatter/utils.py b/sql_formatter/utils.py index 5c888c1..5d6de90 100644 --- a/sql_formatter/utils.py +++ b/sql_formatter/utils.py @@ -3,10 +3,10 @@ __all__ = ['assert_and_print', 'compress_dicts', 'remove_whitespaces_newline', 'remove_whitespaces_comments', 'remove_redundant_whitespaces', 'remove_whitespaces_parenthesis', 'add_whitespaces_between_symbols', 'mark_ci_comments', 'mark_comments', 'split_query', 'split_apply_concat', 'split_comment_quote', - 'split_comment', 'identify_in_sql', 'split_by_semicolon', 'replace_newline_chars', 'identify_end_of_fields', - 'add_newline_indentation', 'extract_outer_subquery', 'format_subquery', 'check_sql_query', - 'check_skip_marker', 'identify_create_table_view', 'count_lines', 'find_line_number', 'disimilarity', - 'assign_comment'] + 'split_comment', 'identify_in_sql', 'split_by_semicolon', 'replace_newline_chars', 'sub_in_sql', + 'add_whitespaces_after_comma', 'identify_end_of_fields', 'add_newline_indentation', 'extract_outer_subquery', + 'format_subquery', 'check_sql_query', 'check_skip_marker', 'identify_create_table_view', 'count_lines', + 'find_line_number', 'disimilarity', 'assign_comment'] # Cell import re @@ -473,6 +473,22 @@ def replace_newline_chars(s): clean_s = "".join([c if i not in positions else " " for i, c in enumerate(s)]) return clean_s +# Cell +def sub_in_sql(regex, repl, s): + "Subsitute `regex` with `repl` in query `s` ignoring comments and text in quotes" + split_s = split_comment_quote(s) # split by comment / non-comment and quote / non-quote + for d in split_s: # loop on dictionaries with strings + if not d["comment"] and not d["quote"]: # only for non comments and non text in quotes + d["string"] = re.sub(regex, repl, d["string"]) + s = "".join(d["string"] for d in split_s) + return s + +# Cell +def add_whitespaces_after_comma(s): + "Add whitespace after comma in query `s` if there is no whitespace" + s = sub_in_sql(r",([\w\d]+)", r", \1", s) + return s + # Cell def identify_end_of_fields(s): "Identify end of fields in query `s`"