diff --git a/docs/core.html b/docs/core.html index ec7339a..f4cd1cf 100644 --- a/docs/core.html +++ b/docs/core.html @@ -61,9 +61,9 @@
preformat_statements
[source]+
preformat_statements
(s
)
preformat_statements
[source]
preformat_statements
(s
)Write a newline in
@@ -1117,7 +1117,7 @@s
for allstatements
and uppercase them but not if they are inside a commentLowercasing query -
lowercase_query
[source]+
lowercase_query
(s
)
lowercase_query
[source]
lowercase_query
(s
)Lowercase query but let comments and text in quotes untouched
@@ -1375,7 +1375,7 @@Add whitespaces between symbols -
add_whitespaces_query
[source]+
add_whitespaces_query
(s
)
add_whitespaces_query
[source]
add_whitespaces_query
(s
)Add whitespaces between symbols (=!<>) for query
@@ -1475,7 +1475,7 @@s
but not for commentsPARTITION BY <
-
format_partition_by
[source]+
format_partition_by
(s
,base_indentation
)
format_partition_by
[source]
format_partition_by
(s
,base_indentation
)Format PARTITION BY line in SELECT (DISTINCT)
@@ -1513,7 +1513,7 @@Remove (mistake) comma at end o
-
remove_wrong_end_comma
[source]+
remove_wrong_end_comma
(split_s
)
remove_wrong_end_comma
[source]
remove_wrong_end_comma
(split_s
)Remove mistakenly placed commas at the end of SELECT statement using
@@ -1688,7 +1688,7 @@split_s
with keys "string", "comment" and "quote"Helper function for
case when
-
format_case_when
[source]+
format_case_when
(s
)
format_case_when
[source]
format_case_when
(s
)Format case when statement in line
@@ -1726,7 +1726,7 @@s
SELECT
-
format_select
[source]+
format_select
(s
)
format_select
[source]
format_select
(s
)Format SELECT statement line
@@ -3091,7 +3091,7 @@s
FROM
-
format_from
[source]+
format_from
(s
)
format_from
[source]
format_from
(s
)Format FROM statement line
@@ -3160,7 +3160,7 @@s
(LEFT / RIGHT / INNER / OUTER) JOIN
-
format_join
[source]+
format_join
(s
)
format_join
[source]
format_join
(s
)Format JOIN statement line
@@ -3229,7 +3229,7 @@s
ON
-
format_on
[source]+
format_on
(s
)
format_on
[source]
format_on
(s
)Format ON statement line
@@ -3428,7 +3428,7 @@s
WHERE
-+ {% endraw %} + + + {% raw %} + +
format_where
[source]+
format_where
(s
)
format_where
[source]
format_where
(s
)Format WHERE statement line
@@ -3577,7 +3577,7 @@s
Format all statements -
format_statement_line
[source]+
format_statement_line
(s
)
format_statement_line
[source]
format_statement_line
(s
)Format statement line
@@ -3721,7 +3721,7 @@s
format_statement_line -
format_statements
[source]+
format_statements
(s
)
format_statements
[source]
format_statements
(s
)Format statements lines
@@ -3799,7 +3799,7 @@s
Format multiline comments -
format_multiline_comments
[source]+
format_multiline_comments
(s
)
format_multiline_comments
[source]
format_multiline_comments
(s
)Format multiline comments by replacing multiline comment [CI] by newline and adding indentation
@@ -3837,7 +3837,7 @@Add semicolon at the end of query -
add_semicolon
[source]+
add_semicolon
(s
)
add_semicolon
[source]
add_semicolon
(s
)Add a semicolon at the of query
@@ -4017,7 +4017,7 @@s
Putting everything together -
format_simple_sql
[source]+
format_simple_sql
(s
,semicolon
=False
)
format_simple_sql
[source]
format_simple_sql
(s
,semicolon
=False
)Format a simple SQL query without subqueries
@@ -4242,7 +4242,7 @@s
Main function handling q
-+
format_sql
[source]+
format_sql
(s
,semicolon
=False
)
format_sql
[source]
format_sql
(s
,semicolon
=False
)Format SQL query with subqueries
diff --git a/docs/utils.html b/docs/utils.html index cff6468..4062512 100644 --- a/docs/utils.html +++ b/docs/utils.html @@ -1755,6 +1755,222 @@s
replace_newline_chars
+ ++ {% endraw %} + + {% raw %} + +++ ++ +++ + ++ +++ +
sub_in_sql
[source]++
sub_in_sql
(regex
,repl
,s
)Subsitute
+ +regex
withrepl
in querys
ignoring comments and text in quotes+ ++ {% endraw %} + + {% raw %} + +++ {% endraw %} + + + {% raw %} + ++ ++ ++++++ +assert_and_print( + sub_in_sql( + r",([\w\d])", r", \1", "select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)" + ), + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)" +) +++ ++ +++ ++ +++select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2) +++ ++ {% endraw %} + + {% raw %} + +++ ++ +++ + ++ +++ +
add_whitespaces_after_comma
[source]++
add_whitespaces_after_comma
(s
)Add whitespace after comma in query
+ +s
if there is no whitespace+ ++ {% endraw %} + + {% raw %} + +++ {% endraw %} + + {% raw %} + ++ ++ ++++++ +assert_and_print( + add_whitespaces_after_comma( + "select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)" + ), + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)" +) +++ ++ +++ ++ +++select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2) ++++ {% endraw %} + + {% raw %} + ++ ++ ++++++ +assert_and_print( + add_whitespaces_after_comma("select asdf,qwer,substr(asdf,1,2) as qwerty"), + "select asdf, qwer, substr(asdf, 1, 2) as qwerty" +) +++ ++ +++ ++ +++select asdf, qwer, substr(asdf, 1, 2) as qwerty +++{% endraw %} @@ -1778,7 +1994,7 @@+ ++ ++++++ +assert_and_print( + add_whitespaces_after_comma("select asdf, qwer, substr(asdf,1,2) as qwerty"), + "select asdf, qwer, substr(asdf, 1, 2) as qwerty" +) ++++ +++ ++ +++select asdf, qwer, substr(asdf, 1, 2) as qwerty ++
replace_newline_chars -
identify_end_of_fields
[source]+
identify_end_of_fields
(s
)
identify_end_of_fields
[source]
identify_end_of_fields
(s
)Identify end of fields in query
@@ -1973,7 +2189,7 @@s
identify_end_of_fields<
-
add_newline_indentation
[source]+
add_newline_indentation
(s
,indentation
)
add_newline_indentation
[source]
add_newline_indentation
(s
,indentation
)Add newline and indentation for end of fields in query
@@ -2146,7 +2362,7 @@s
Handling subqueries -
extract_outer_subquery
[source]+
extract_outer_subquery
(s
)
extract_outer_subquery
[source]
extract_outer_subquery
(s
)Extract outer subquery in query
@@ -2199,7 +2415,7 @@s
extract_outer_subquery<
-
format_subquery
[source]+
format_subquery
(s
,previous_s
)
format_subquery
[source]
format_subquery
(s
,previous_s
)Format subquery in line
@@ -2237,7 +2453,7 @@s
based on indentation onprevious_s
Query identification -
check_sql_query
[source]+
check_sql_query
(s
)
check_sql_query
[source]
check_sql_query
(s
)Checks whether
@@ -2435,7 +2651,7 @@s
is a SQL query based on match of CREATE TABLE / VIEW or SELECT ignoring comments and text in quotesMarker to not format
-
check_skip_marker
[source]+
check_skip_marker
(s
)
check_skip_marker
[source]
check_skip_marker
(s
)Checks whether user set marker /skip-formatter/ to not format query
@@ -2519,7 +2735,7 @@Check lines were CREATE
-
identify_create_table_view
[source]+
identify_create_table_view
(s
)
identify_create_table_view
[source]
identify_create_table_view
(s
)Identify positions of CREATE .. TABLE / VIEW statements
@@ -2594,7 +2810,7 @@
identify_create_tab
-
count_lines
[source]+
count_lines
(s
)
count_lines
[source]
count_lines
(s
)Count the number of lines in
@@ -2669,7 +2885,7 @@s
count_lines
-
find_line_number
[source]+
find_line_number
(s
,positions
)
find_line_number
[source]
find_line_number
(s
,positions
)Find line number in
@@ -2744,7 +2960,7 @@s
out ofpositions
find_line_number
-
disimilarity
[source]+
disimilarity
(str1
,str2
)
disimilarity
[source]
disimilarity
(str1
,str2
)Calculate disimilarity between two strings by word
@@ -2838,7 +3054,7 @@
disimilarity
-
assign_comment
[source]+
assign_comment
(fs
,cds
)
assign_comment
[source]
assign_comment
(fs
,cds
)Assign comments in list of dictionaries
cds
to formatted stringfs
using Jaccard distanceThe comment dictionaries
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb index 88d25f4..ead9b25 100644 --- a/nbs/00_core.ipynb +++ b/nbs/00_core.ipynb @@ -77,9 +77,9 @@ "create or replace table mytable as -- Mytable example\n", "/* multi line\n", " comment */\n", - "seLecT a.asdf, b.qwer, -- some comment here\n", + "seLecT a.asdf,b.qwer, -- some comment here\n", "/* and here is a line comment inside select */\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", + "substr(c.asdf,1,2) as substr_asdf, \n", "/* some commenT \n", "there */\n", "case when a.asdf= 1 then 'b' /* here a case comment */\n", @@ -198,6 +198,7 @@ "#export\n", "def clean_query(s):\n", " \"Remove redundant whitespaces and mark comments boundaries and remove newlines afterwards in query `s`\"\n", + " s = add_whitespaces_after_comma(s) # add whitespaces after comma but no in comments or quotes\n", " s = remove_redundant_whitespaces(s) # remove too many whitespaces but no newlines\n", " s = mark_comments(s) # mark comments with special tokens [C], [CS] and [CI]\n", " s = replace_newline_chars(s) # remove newlines but not in the comments\n", @@ -3521,7 +3522,6 @@ "Converted 01_format_file.ipynb.\n", "Converted 02_utils.ipynb.\n", "Converted 03_validation.ipynb.\n", - "Converted 04_release.ipynb.\n", "Converted index.ipynb.\n" ] } diff --git a/nbs/02_utils.ipynb b/nbs/02_utils.ipynb index 4fec455..205caf3 100644 --- a/nbs/02_utils.ipynb +++ b/nbs/02_utils.ipynb @@ -1479,6 +1479,134 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Substitute regex in SQL ignoring comments and quotes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def sub_in_sql(regex, repl, s):\n", + " \"Subsitute `regex` with `repl` in query `s` ignoring comments and text in quotes\"\n", + " split_s = split_comment_quote(s) # split by comment / non-comment and quote / non-quote\n", + " for d in split_s: # loop on dictionaries with strings\n", + " if not d[\"comment\"] and not d[\"quote\"]: # only for non comments and non text in quotes\n", + " d[\"string\"] = re.sub(regex, repl, d[\"string\"])\n", + " s = \"\".join(d[\"string\"] for d in split_s)\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " sub_in_sql(\n", + " r\",([\\w\\d])\", r\", \\1\", \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", + " ),\n", + " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add whitespaces after comma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#export\n", + "def add_whitespaces_after_comma(s):\n", + " \"Add whitespace after comma in query `s` if there is no whitespace\"\n", + " s = sub_in_sql(r\",([\\w\\d]+)\", r\", \\1\", s)\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\n", + " \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", + " ),\n", + " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, substr(asdf, 1, 2) as qwerty\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\"select asdf,qwer,substr(asdf,1,2) as qwerty\"),\n", + " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "select asdf, qwer, substr(asdf, 1, 2) as qwerty\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " add_whitespaces_after_comma(\"select asdf, qwer, substr(asdf,1,2) as qwerty\"),\n", + " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/sql_formatter/_nbdev.py b/sql_formatter/_nbdev.py index f9d4e30..a35f56a 100644 --- a/sql_formatter/_nbdev.py +++ b/sql_formatter/_nbdev.py @@ -41,6 +41,8 @@ "identify_in_sql": "02_utils.ipynb", "split_by_semicolon": "02_utils.ipynb", "replace_newline_chars": "02_utils.ipynb", + "sub_in_sql": "02_utils.ipynb", + "add_whitespaces_after_comma": "02_utils.ipynb", "identify_end_of_fields": "02_utils.ipynb", "add_newline_indentation": "02_utils.ipynb", "extract_outer_subquery": "02_utils.ipynb", diff --git a/sql_formatter/core.py b/sql_formatter/core.py index 9cc7488..faebc87 100644 --- a/sql_formatter/core.py +++ b/sql_formatter/core.py @@ -32,6 +32,7 @@ # Cell def clean_query(s): "Remove redundant whitespaces and mark comments boundaries and remove newlines afterwards in query `s`" + s = add_whitespaces_after_comma(s) # add whitespaces after comma but no in comments or quotes s = remove_redundant_whitespaces(s) # remove too many whitespaces but no newlines s = mark_comments(s) # mark comments with special tokens [C], [CS] and [CI] s = replace_newline_chars(s) # remove newlines but not in the comments diff --git a/sql_formatter/utils.py b/sql_formatter/utils.py index 5c888c1..5d6de90 100644 --- a/sql_formatter/utils.py +++ b/sql_formatter/utils.py @@ -3,10 +3,10 @@ __all__ = ['assert_and_print', 'compress_dicts', 'remove_whitespaces_newline', 'remove_whitespaces_comments', 'remove_redundant_whitespaces', 'remove_whitespaces_parenthesis', 'add_whitespaces_between_symbols', 'mark_ci_comments', 'mark_comments', 'split_query', 'split_apply_concat', 'split_comment_quote', - 'split_comment', 'identify_in_sql', 'split_by_semicolon', 'replace_newline_chars', 'identify_end_of_fields', - 'add_newline_indentation', 'extract_outer_subquery', 'format_subquery', 'check_sql_query', - 'check_skip_marker', 'identify_create_table_view', 'count_lines', 'find_line_number', 'disimilarity', - 'assign_comment'] + 'split_comment', 'identify_in_sql', 'split_by_semicolon', 'replace_newline_chars', 'sub_in_sql', + 'add_whitespaces_after_comma', 'identify_end_of_fields', 'add_newline_indentation', 'extract_outer_subquery', + 'format_subquery', 'check_sql_query', 'check_skip_marker', 'identify_create_table_view', 'count_lines', + 'find_line_number', 'disimilarity', 'assign_comment'] # Cell import re @@ -473,6 +473,22 @@ def replace_newline_chars(s): clean_s = "".join([c if i not in positions else " " for i, c in enumerate(s)]) return clean_s +# Cell +def sub_in_sql(regex, repl, s): + "Subsitute `regex` with `repl` in query `s` ignoring comments and text in quotes" + split_s = split_comment_quote(s) # split by comment / non-comment and quote / non-quote + for d in split_s: # loop on dictionaries with strings + if not d["comment"] and not d["quote"]: # only for non comments and non text in quotes + d["string"] = re.sub(regex, repl, d["string"]) + s = "".join(d["string"] for d in split_s) + return s + +# Cell +def add_whitespaces_after_comma(s): + "Add whitespace after comma in query `s` if there is no whitespace" + s = sub_in_sql(r",([\w\d]+)", r", \1", s) + return s + # Cell def identify_end_of_fields(s): "Identify end of fields in query `s`"cds
should contain the keys "comment" and "preceding" (string)