diff --git a/docs/additional_tests.html b/docs/additional_tests.html index 4ac6f9c..64fd8f3 100644 --- a/docs/additional_tests.html +++ b/docs/additional_tests.html @@ -119,6 +119,107 @@
assert_and_print(
+ format_sql(
+"""SELECT var
+ FROM table_selection as a
+ LEFT JOIN table2 as b ON a.id = b.id
+ LEFT JOIN table3 as c ON a.id = c.id
+ ORDER BY 1
+"""),
+"""
+SELECT var
+FROM table_selection as a
+ LEFT JOIN table2 as b
+ ON a.id = b.id
+ LEFT JOIN table3 as c
+ ON a.id = c.id
+ORDER BY 1
+""".strip()
+)
+
SELECT var +FROM table_selection as a + LEFT JOIN table2 as b + ON a.id = b.id + LEFT JOIN table3 as c + ON a.id = c.id +ORDER BY 1 ++
assert_and_print(
+ split_query("select var from table_selection"),
+ [{"string": "select var ", "comment": False, "quote": False, "select": True},
+ {"string": "from table_selection", "comment": False, "quote": False, "select": False}]
+)
+
[{'string': 'select var ', 'comment': False, 'quote': False, 'select': True}, {'string': 'from table_selection', 'comment': False, 'quote': False, 'select': False}] ++
split_query
-split_apply_concat
[source]+
split_apply_concat
(s
,f
)
split_apply_concat
[source]
split_apply_concat
(s
,f
)Split query
@@ -1330,7 +1330,7 @@s
, apply functionf
and concatenate stringsSplit by comment / no
-
split_comment_quote
[source]+
split_comment_quote
(s
)
split_comment_quote
[source]
split_comment_quote
(s
)Split query
@@ -1458,7 +1458,7 @@s
into dictionaries with keys 'string', 'comment' and 'quote'Split by comment / non-comment -
split_comment
[source]+
split_comment
(s
)
split_comment
[source]
split_comment
(s
)Split query
@@ -1542,7 +1542,7 @@s
into dictionaries with keys 'string', 'comment'Get posit
-
identify_in_sql
[source]+
identify_in_sql
(regex
,s
)
identify_in_sql
[source]
identify_in_sql
(regex
,s
)Find positions of
@@ -1716,7 +1716,7 @@regex
(str or list) in strings
ignoring comment and text in quotesSplit individual queries ba
-
split_by_semicolon
[source]+
split_by_semicolon
(s
)
split_by_semicolon
[source]
split_by_semicolon
(s
)Split string
@@ -1801,7 +1801,7 @@s
by semicolon but not between parenthesis or in comments
split_by_semicolon
-
replace_newline_chars
[source]+
replace_newline_chars
(s
)
replace_newline_chars
[source]
replace_newline_chars
(s
)Replace newline characters in
@@ -1877,7 +1877,7 @@s
by whitespace but not in the commentsSubstitute regex i
-
sub_in_sql
[source]+
sub_in_sql
(regex
,repl
,s
)
sub_in_sql
[source]
sub_in_sql
(regex
,repl
,s
)Subsitute
@@ -1951,7 +1951,7 @@regex
withrepl
in querys
ignoring comments and text in quotesAdd whitespaces after comma -
add_whitespaces_after_comma
[source]+
add_whitespaces_after_comma
(s
)
add_whitespaces_after_comma
[source]
add_whitespaces_after_comma
(s
)Add whitespace after comma in query
@@ -2096,7 +2096,7 @@s
if there is no whitespace
add_whitespaces_af
-
identify_end_of_fields
[source]+
identify_end_of_fields
(s
)
identify_end_of_fields
[source]
identify_end_of_fields
(s
)Identify end of fields in query
@@ -2328,7 +2328,7 @@s
identify_end_of_fields<
-
add_newline_indentation
[source]+
add_newline_indentation
(s
,indentation
)
add_newline_indentation
[source]
add_newline_indentation
(s
,indentation
)Add newline and indentation for end of fields in query
@@ -2501,7 +2501,7 @@s
Handling subqueries -
extract_outer_subquery
[source]+
extract_outer_subquery
(s
)
extract_outer_subquery
[source]
extract_outer_subquery
(s
)Extract outer subquery in query
@@ -2554,7 +2554,7 @@s
extract_outer_subquery<
-
format_subquery
[source]+
format_subquery
(s
,previous_s
)
format_subquery
[source]
format_subquery
(s
,previous_s
)Format subquery in line
@@ -2592,7 +2592,7 @@s
based on indentation onprevious_s
Query identification -
check_sql_query
[source]+
check_sql_query
(s
)
check_sql_query
[source]
check_sql_query
(s
)Checks whether
@@ -2790,7 +2790,7 @@s
is a SQL query based on match of CREATE TABLE / VIEW or SELECT ignoring comments and text in quotesMarker to not format
-
check_skip_marker
[source]+
check_skip_marker
(s
)
check_skip_marker
[source]
check_skip_marker
(s
)Checks whether user set marker /skip-formatter/ to not format query
@@ -2874,7 +2874,7 @@Check lines were CREATE
-
identify_create_table_view
[source]+
identify_create_table_view
(s
)
identify_create_table_view
[source]
identify_create_table_view
(s
)Identify positions of CREATE .. TABLE / VIEW statements
@@ -2949,7 +2949,7 @@
identify_create_tab
-
count_lines
[source]+
count_lines
(s
)
count_lines
[source]
count_lines
(s
)Count the number of lines in
@@ -3024,7 +3024,7 @@s
count_lines
-
find_line_number
[source]+
find_line_number
(s
,positions
)
find_line_number
[source]
find_line_number
(s
,positions
)Find line number in
@@ -3099,7 +3099,7 @@s
out ofpositions
find_line_number
-
disimilarity
[source]+
disimilarity
(str1
,str2
)
disimilarity
[source]
disimilarity
(str1
,str2
)Calculate disimilarity between two strings by word
@@ -3193,7 +3193,7 @@
disimilarity
-
assign_comment
[source]+
assign_comment
(fs
,cds
)
assign_comment
[source]
assign_comment
(fs
,cds
)Assign comments in list of dictionaries
cds
to formatted stringfs
using Jaccard distanceThe comment dictionaries
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb index 0e1d394..359bf4f 100644 --- a/nbs/00_core.ipynb +++ b/nbs/00_core.ipynb @@ -4,16 +4,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "#hide\n", "%load_ext autoreload\n", @@ -465,8 +456,8 @@ " split_s = split_query(s) # split by comment and non comment\n", " split_s = compress_dicts(split_s, [\"comment\", \"select\"])\n", " # compile regex before loop\n", - " create_re = re.compile(\"create\", flags=re.I)\n", - " select_re = re.compile(\"select\", flags=re.I)\n", + " create_re = re.compile(r\"\\bcreate\\b\", flags=re.I)\n", + " select_re = re.compile(r\"\\bselect\\b\", flags=re.I)\n", " for statement in statements:\n", " if create_re.match(statement): # special case CREATE with AS capitalize as well\n", " create_sub = re.compile(rf\"\\s*({statement} )(.*) as\\b\", flags=re.I)\n", diff --git a/nbs/02_utils.ipynb b/nbs/02_utils.ipynb index 38afb51..9341b2e 100644 --- a/nbs/02_utils.ipynb +++ b/nbs/02_utils.ipynb @@ -911,9 +911,11 @@ " comment_region = False # start with non-quote\n", " s_comp = [] # container for string components\n", " start = 0\n", + " select_re = re.compile(r\"^[\\n\\s\\]\\(]*\\bselect\\b\\s$\")\n", + " from_re = re.compile(r\"^[\\n\\s\\]]\\bfrom\\b\\s$\")\n", " # loop over character positions\n", " for i, c in enumerate(s):\n", - " if s_low[i:i+6] == \"select\" and k == 0: # k = 0 -> no comment\n", + " if select_re.match(s_low[max(i-1, 0):i+7]) and k == 0: # k = 0 -> no comment\n", " s_comp.append({\n", " \"string\": s[start:i], \n", " \"comment\": comment_region, \n", @@ -922,7 +924,7 @@ " })\n", " start = i\n", " select_region = True # after select starts the select region\n", - " elif s_low[i:i+4] == \"from\" and k == 0:\n", + " elif from_re.match(s_low[max(i-1, 0):i+5]) and k == 0:\n", " select_open = False\n", " s_comp.append({\n", " \"string\": s[start:i], \n", @@ -2787,6 +2789,7 @@ "Converted 01_format_file.ipynb.\n", "Converted 02_utils.ipynb.\n", "Converted 03_validation.ipynb.\n", + "Converted 99_additional_tests.ipynb.\n", "Converted index.ipynb.\n" ] } diff --git a/nbs/99_additional_tests.ipynb b/nbs/99_additional_tests.ipynb index 413528e..aec8e75 100644 --- a/nbs/99_additional_tests.ipynb +++ b/nbs/99_additional_tests.ipynb @@ -103,7 +103,95 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SELECT var\n", + "FROM table_selection as a\n", + " LEFT JOIN table2 as b\n", + " ON a.id = b.id\n", + " LEFT JOIN table3 as c\n", + " ON a.id = c.id\n", + "ORDER BY 1\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " format_sql(\n", + "\"\"\"SELECT var \n", + " FROM table_selection as a \n", + " LEFT JOIN table2 as b ON a.id = b.id \n", + " LEFT JOIN table3 as c ON a.id = c.id \n", + " ORDER BY 1\n", + "\"\"\"),\n", + "\"\"\"\n", + "SELECT var\n", + "FROM table_selection as a\n", + " LEFT JOIN table2 as b\n", + " ON a.id = b.id\n", + " LEFT JOIN table3 as c\n", + " ON a.id = c.id\n", + "ORDER BY 1\n", + "\"\"\".strip()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## utils" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### split_query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'string': 'select var ', 'comment': False, 'quote': False, 'select': True}, {'string': 'from table_selection', 'comment': False, 'quote': False, 'select': False}]\n" + ] + } + ], + "source": [ + "assert_and_print(\n", + " split_query(\"select var from table_selection\"),\n", + " [{\"string\": \"select var \", \"comment\": False, \"quote\": False, \"select\": True}, \n", + " {\"string\": \"from table_selection\", \"comment\": False, \"quote\": False, \"select\": False}]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Converted 00_core.ipynb.\n", + "Converted 01_format_file.ipynb.\n", + "Converted 02_utils.ipynb.\n", + "Converted 03_validation.ipynb.\n", + "Converted 99_additional_tests.ipynb.\n", + "Converted index.ipynb.\n" + ] + } + ], "source": [ "#hide\n", "from nbdev.export import notebook2script\n", diff --git a/sql_formatter/core.py b/sql_formatter/core.py index 63178d6..ab3c3fb 100644 --- a/sql_formatter/core.py +++ b/sql_formatter/core.py @@ -51,8 +51,8 @@ def preformat_statements(s): split_s = split_query(s) # split by comment and non comment split_s = compress_dicts(split_s, ["comment", "select"]) # compile regex before loop - create_re = re.compile("create", flags=re.I) - select_re = re.compile("select", flags=re.I) + create_re = re.compile(r"\bcreate\b", flags=re.I) + select_re = re.compile(r"\bselect\b", flags=re.I) for statement in statements: if create_re.match(statement): # special case CREATE with AS capitalize as well create_sub = re.compile(rf"\s*({statement} )(.*) as\b", flags=re.I) diff --git a/sql_formatter/utils.py b/sql_formatter/utils.py index 128a44d..a9d3723 100644 --- a/sql_formatter/utils.py +++ b/sql_formatter/utils.py @@ -218,9 +218,11 @@ def split_query(s): comment_region = False # start with non-quote s_comp = [] # container for string components start = 0 + select_re = re.compile(r"^[\n\s\]\(]*\bselect\b\s$") + from_re = re.compile(r"^[\n\s\]]\bfrom\b\s$") # loop over character positions for i, c in enumerate(s): - if s_low[i:i+6] == "select" and k == 0: # k = 0 -> no comment + if select_re.match(s_low[max(i-1, 0):i+7]) and k == 0: # k = 0 -> no comment s_comp.append({ "string": s[start:i], "comment": comment_region, @@ -229,7 +231,7 @@ def split_query(s): }) start = i select_region = True # after select starts the select region - elif s_low[i:i+4] == "from" and k == 0: + elif from_re.match(s_low[max(i-1, 0):i+5]) and k == 0: select_open = False s_comp.append({ "string": s[start:i],cds
should contain the keys "comment" and "preceding" (string)