Skip to content

Commit

Permalink
[FIX] CREATE TASK wrongly identified as query (#42)
Browse files Browse the repository at this point in the history
This fixes #41
  • Loading branch information
PabloRMira authored Dec 17, 2020
1 parent a00efc7 commit 8496d96
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 17 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ repos:

To exemplify the formatting let's say you have a SQL query like this

```python
```
example_sql = """
create or replace table mytable as -- mytable example
seLecT a.asdf, b.qwer, -- some comment here
Expand All @@ -55,7 +55,7 @@ groUp by a.asdf

Then you can use this package to format it so that it is better readable

```python
```
from sql_formatter.core import format_sql
print(format_sql(example_sql))
```
Expand All @@ -79,7 +79,7 @@ print(format_sql(example_sql))

It can even deal with subqueries and it will correct my favourite simple careless mistake (comma at the end of SELECT statement before of FROM) for you on the flow :-)

```python
```
print(format_sql("""
select asdf, cast(qwer as numeric), -- some comment
qwer1
Expand Down Expand Up @@ -109,7 +109,7 @@ where qwer1 >= 0

The formatter is also robust against nested subqueries

```python
```
print(format_sql("""
select field1, field2 from (select field1,
field2 from (select field1, field2,
Expand All @@ -131,7 +131,7 @@ field3 from table1 where a=1 and b>=100))

If you do not want to get some query formatted in your SQL file then you can use the marker `/*skip-formatter*/` in your query to disable formatting for just the corresponding query

```python
```
from sql_formatter.format_file import format_sql_commands
print(format_sql_commands(
"""
Expand Down
11 changes: 9 additions & 2 deletions docs/core.html
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ <h2 id="General-formatting">General formatting<a class="anchor-link" href="#Gene
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">example_sql</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;</span>
<span class="s2">create or replace table mytable as -- Mytable example</span>
<span class="s2">/* multi line</span>
<span class="s2"> comment */</span>
<span class="s2">seLecT a.asdf, b.qwer, -- some comment here</span>
<span class="s2">/* and here is a line comment inside select */</span>
<span class="s2">substr(c.asdf, 1, 2) as substr_asdf, </span>
Expand Down Expand Up @@ -99,6 +101,8 @@ <h2 id="General-formatting">General formatting<a class="anchor-link" href="#Gene
<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">expected_sql</span> <span class="o">=</span> <span class="s2">&quot;&quot;&quot;CREATE OR REPLACE TABLE mytable AS -- Mytable example</span>
<span class="s2">/* multi line</span>
<span class="s2">comment */</span>
<span class="s2">SELECT a.asdf,</span>
<span class="s2"> b.qwer, -- some comment here</span>
<span class="s2"> /* and here is a line comment inside select */</span>
Expand Down Expand Up @@ -820,7 +824,8 @@ <h4 id="preformat_statements" class="doc_header"><code>preformat_statements</cod
<div class="output_subarea output_stream output_stdout output_text">
<pre>
CREATE OR REPLACE TABLE mytable AS -- Mytable example[C]
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) as substr_asdf,[CS]/* some commenT there */[C]case when a.asdf= 1 then &#39;b&#39; /* here a case comment */[C]when b.qwer =2 then &#39;c&#39; else &#39;d&#39; end as new_field, -- Some comment[C]b.asdf2
SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) as substr_asdf,[CS]/* some multi line
commenT there */[C]case when a.asdf= 1 then &#39;b&#39; /* here a case comment */[C]when b.qwer =2 then &#39;c&#39; else &#39;d&#39; end as new_field, -- Some comment[C]b.asdf2
FROM table1 as a
LEFT JOIN table2 as b -- and here a comment[C]
ON a.asdf = b.asdf /* joiN this way */[C]
Expand Down Expand Up @@ -2557,6 +2562,8 @@ <h4 id="format_simple_sql" class="doc_header"><code>format_simple_sql</code><a h

<div class="output_subarea output_stream output_stdout output_text">
<pre>CREATE OR REPLACE TABLE mytable AS -- Mytable example
/* multi line
comment */
SELECT a.asdf,
b.qwer, -- some comment here
/* and here is a line comment inside select */
Expand Down Expand Up @@ -2732,7 +2739,7 @@ <h3 id="Main-function-handling-queries-with-subqueries">Main function handling q


<div class="output_markdown rendered_html output_subarea ">
<h4 id="format_sql" class="doc_header"><code>format_sql</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/core.py#L259" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>format_sql</code>(<strong><code>s</code></strong>, <strong><code>add_semicolon</code></strong>=<em><code>True</code></em>)</p>
<h4 id="format_sql" class="doc_header"><code>format_sql</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/core.py#L260" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>format_sql</code>(<strong><code>s</code></strong>, <strong><code>add_semicolon</code></strong>=<em><code>True</code></em>)</p>
</blockquote>
<p>Format SQL query with subqueries. If <code>add_semicolon</code> is True then add a semicolon at the end</p>

Expand Down
71 changes: 67 additions & 4 deletions docs/utils.html
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ <h4 id="mark_comments" class="doc_header"><code>mark_comments</code><a href="htt


<div class="output_markdown rendered_html output_subarea ">
<h4 id="identify_queries" class="doc_header"><code>identify_queries</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L254" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>identify_queries</code>(<strong><code>s</code></strong>)</p>
<h4 id="identify_queries" class="doc_header"><code>identify_queries</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L255" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>identify_queries</code>(<strong><code>s</code></strong>)</p>
</blockquote>
<p>Split by queries in string <code>s</code></p>

Expand Down Expand Up @@ -1725,6 +1725,29 @@ <h4 id="check_sql_query" class="doc_header"><code>check_sql_query</code><a href=
<div class="cell border-box-sizing code_cell rendered">
<div class="input">

<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="n">check_sql_query</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;</span>
<span class="s2">--- Table 1---</span>
<span class="s2">creaTe or replace view my_table as</span>
<span class="s2">select asdf</span>
<span class="s2">from table</span>
<span class="s2">where asdf = 1</span>
<span class="s2">&quot;&quot;&quot;</span><span class="o">.</span><span class="n">strip</span><span class="p">())</span>
</pre></div>

</div>
</div>
</div>

</div>
{% endraw %}

{% raw %}

<div class="cell border-box-sizing code_cell rendered">
<div class="input">

<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="n">check_sql_query</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;</span>
Expand Down Expand Up @@ -1761,6 +1784,46 @@ <h4 id="check_sql_query" class="doc_header"><code>check_sql_query</code><a href=
<div class="cell border-box-sizing code_cell rendered">
<div class="input">

<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">check_sql_query</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;</span>
<span class="s2">create or replace task my_task as</span>
<span class="s2">&quot;&quot;&quot;</span><span class="p">)</span>
</pre></div>

</div>
</div>
</div>

</div>
{% endraw %}

{% raw %}

<div class="cell border-box-sizing code_cell rendered">
<div class="input">

<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">check_sql_query</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;</span>
<span class="s2">create or replace task my_task as</span>
<span class="s2">create or replace table as</span>
<span class="s2">select asdf</span>
<span class="s2">&quot;&quot;&quot;</span><span class="p">)</span>
</pre></div>

</div>
</div>
</div>

</div>
{% endraw %}

{% raw %}

<div class="cell border-box-sizing code_cell rendered">
<div class="input">

<div class="inner_cell">
<div class="input_area">
<div class=" highlight hl-ipython3"><pre><span></span><span class="k">assert</span> <span class="ow">not</span> <span class="n">check_sql_query</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;</span>
Expand Down Expand Up @@ -1794,7 +1857,7 @@ <h4 id="check_sql_query" class="doc_header"><code>check_sql_query</code><a href=


<div class="output_markdown rendered_html output_subarea ">
<h4 id="check_skip_marker" class="doc_header"><code>check_skip_marker</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L249" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>check_skip_marker</code>(<strong><code>s</code></strong>)</p>
<h4 id="check_skip_marker" class="doc_header"><code>check_skip_marker</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L250" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>check_skip_marker</code>(<strong><code>s</code></strong>)</p>
</blockquote>
<p>Checks whether user set marker /<em>skip-formatter</em>/ to not format query</p>

Expand Down Expand Up @@ -1878,7 +1941,7 @@ <h4 id="check_skip_marker" class="doc_header"><code>check_skip_marker</code><a h


<div class="output_markdown rendered_html output_subarea ">
<h4 id="identify_queries" class="doc_header"><code>identify_queries</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L254" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>identify_queries</code>(<strong><code>s</code></strong>)</p>
<h4 id="identify_queries" class="doc_header"><code>identify_queries</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L255" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>identify_queries</code>(<strong><code>s</code></strong>)</p>
</blockquote>
<p>Split by queries in string <code>s</code></p>

Expand Down Expand Up @@ -1959,7 +2022,7 @@ <h4 id="identify_queries" class="doc_header"><code>identify_queries</code><a hre


<div class="output_markdown rendered_html output_subarea ">
<h4 id="split_by_query" class="doc_header"><code>split_by_query</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L283" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>split_by_query</code>(<strong><code>s</code></strong>)</p>
<h4 id="split_by_query" class="doc_header"><code>split_by_query</code><a href="https://github.com/PabloRMira/sql_formatter/tree/master/sql_formatter/utils.py#L284" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>split_by_query</code>(<strong><code>s</code></strong>)</p>
</blockquote>
<p>Split string <code>s</code> by its queries</p>

Expand Down
23 changes: 20 additions & 3 deletions nbs/00_core.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
}
],
"source": [
"#hide\n",
"%load_ext autoreload\n",
Expand Down Expand Up @@ -75,6 +84,8 @@
"source": [
"example_sql = \"\"\"\n",
"create or replace table mytable as -- Mytable example\n",
"/* multi line\n",
" comment */\n",
"seLecT a.asdf, b.qwer, -- some comment here\n",
"/* and here is a line comment inside select */\n",
"substr(c.asdf, 1, 2) as substr_asdf, \n",
Expand Down Expand Up @@ -107,6 +118,8 @@
"outputs": [],
"source": [
"expected_sql = \"\"\"CREATE OR REPLACE TABLE mytable AS -- Mytable example\n",
"/* multi line\n",
"comment */\n",
"SELECT a.asdf,\n",
" b.qwer, -- some comment here\n",
" /* and here is a line comment inside select */\n",
Expand Down Expand Up @@ -660,7 +673,8 @@
"text": [
"\n",
"CREATE OR REPLACE TABLE mytable AS -- Mytable example[C]\n",
"SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) as substr_asdf,[CS]/* some commenT there */[C]case when a.asdf= 1 then 'b' /* here a case comment */[C]when b.qwer =2 then 'c' else 'd' end as new_field, -- Some comment[C]b.asdf2\n",
"SELECT a.asdf, b.qwer, -- some comment here[C][CS]/* and here is a line comment inside select */[C]substr(c.asdf, 1, 2) as substr_asdf,[CS]/* some multi line\n",
"commenT there */[C]case when a.asdf= 1 then 'b' /* here a case comment */[C]when b.qwer =2 then 'c' else 'd' end as new_field, -- Some comment[C]b.asdf2\n",
"FROM table1 as a\n",
"LEFT JOIN table2 as b -- and here a comment[C]\n",
"ON a.asdf = b.asdf /* joiN this way */[C]\n",
Expand Down Expand Up @@ -1846,7 +1860,8 @@
" s = lowercase_query(s) # everything lowercased but not the comments\n",
" s = preformat_statements(s) # add breaklines for the main statements\n",
" s = format_statements(s) # format statements\n",
" s = re.sub(r\"\\[C\\]\", \"\", s) # replace remaining [EOC]\n",
" s = re.sub(r\"\\[C\\]\", \"\", s) # replace remaining [C]\n",
" s = re.sub(r\"\\[CS\\]\", \"\\n\", s) # replace remainig [CS]\n",
" if add_semicolon:\n",
" s = add_ending_semicolon(s) # add ending semicolon if not there yet\n",
" return s"
Expand All @@ -1862,6 +1877,8 @@
"output_type": "stream",
"text": [
"CREATE OR REPLACE TABLE mytable AS -- Mytable example\n",
"/* multi line\n",
"comment */\n",
"SELECT a.asdf,\n",
" b.qwer, -- some comment here\n",
" /* and here is a line comment inside select */\n",
Expand Down
42 changes: 41 additions & 1 deletion nbs/02_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,8 @@
"#export\n",
"def check_sql_query(s):\n",
" \"Checks whether `s` is a SQL query\"\n",
" return bool(re.match(pattern=r\".*(?:select|create).*\", string=s, flags=re.I | re.DOTALL))"
" return (bool(re.search(pattern=r\".*(?:select|create.{0,10}(?:table|view)).*\", string=s, flags=re.I)) and\n",
" not bool(re.search(pattern=r\"create(?!.*(?:table|view))\", string=s, flags=re.I)))"
]
},
{
Expand All @@ -1209,6 +1210,21 @@
"\"\"\".strip())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"assert check_sql_query(\"\"\"\n",
"--- Table 1---\n",
"creaTe or replace view my_table as\n",
"select asdf\n",
"from table\n",
"where asdf = 1\n",
"\"\"\".strip())"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -1229,6 +1245,30 @@
"assert not check_sql_query(\"use database my_database;\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"assert not check_sql_query(\"\"\"\n",
"create or replace task my_task as\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"assert not check_sql_query(\"\"\"\n",
"create or replace task my_task as\n",
"create or replace table as\n",
"select asdf\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
3 changes: 2 additions & 1 deletion sql_formatter/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ def format_simple_sql(s, add_semicolon=True):
s = lowercase_query(s) # everything lowercased but not the comments
s = preformat_statements(s) # add breaklines for the main statements
s = format_statements(s) # format statements
s = re.sub(r"\[C\]", "", s) # replace remaining [EOC]
s = re.sub(r"\[C\]", "", s) # replace remaining [C]
s = re.sub(r"\[CS\]", "\n", s) # replace remainig [CS]
if add_semicolon:
s = add_ending_semicolon(s) # add ending semicolon if not there yet
return s
Expand Down
3 changes: 2 additions & 1 deletion sql_formatter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ def format_subquery(s, previous_s):
# Cell
def check_sql_query(s):
"Checks whether `s` is a SQL query"
return bool(re.match(pattern=r".*(?:select|create).*", string=s, flags=re.I | re.DOTALL))
return (bool(re.search(pattern=r".*(?:select|create.{0,10}(?:table|view)).*", string=s, flags=re.I)) and
not bool(re.search(pattern=r"create(?!.*(?:table|view))", string=s, flags=re.I)))

# Cell
def check_skip_marker(s):
Expand Down

0 comments on commit 8496d96

Please sign in to comment.