From a5f5c5f180e8aa20ea31954053094cf19907416c Mon Sep 17 00:00:00 2001 From: Pablo Rodriguez Mira <36644554+PabloRMira@users.noreply.github.com> Date: Fri, 30 Apr 2021 22:05:45 +0200 Subject: [PATCH] [FIX] Variable names ending with `and` get split in WHERE statements (#165) --- README.md | 10 +-- docs/_data/sidebars/home_sidebar.yml | 3 + docs/additional_tests.html | 127 +++++++++++++++++++++++++++ docs/index.html | 7 ++ docs/sidebar.json | 3 +- nbs/00_core.ipynb | 16 +++- nbs/99_additional_tests.ipynb | 123 ++++++++++++++++++++++++++ sql_formatter/core.py | 4 +- 8 files changed, 282 insertions(+), 11 deletions(-) create mode 100644 docs/additional_tests.html create mode 100644 nbs/99_additional_tests.ipynb diff --git a/README.md b/README.md index 4db237f..9d8cfde 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ for a custom maximum line length truncation of e.g. 50 To exemplify the formatting let's say you have a SQL query like this -```python +``` example_sql = """ create or replace table mytable as -- mytable example seLecT a.asdf, b.qwer, -- some comment here @@ -107,7 +107,7 @@ groUp by a.asdf Then you can use this package to format it so that it is better readable -```python +``` from sql_formatter.core import format_sql print(format_sql(example_sql)) ``` @@ -131,7 +131,7 @@ print(format_sql(example_sql)) It can even deal with subqueries and it will correct my favourite simple careless mistake (comma at the end of SELECT statement before of FROM) for you on the flow :-) -```python +``` print(format_sql(""" select asdf, cast(qwer as numeric), -- some comment qwer1 @@ -161,7 +161,7 @@ where qwer1 >= 0 The formatter is also robust against nested subqueries -```python +``` print(format_sql(""" select field1, field2 from (select field1, field2 from (select field1, field2, @@ -183,7 +183,7 @@ field3 from table1 where a=1 and b>=100)) If you do not want to get some query formatted in your SQL file then you can use the marker `/*skip-formatter*/` in your query to disable formatting for just the corresponding query -```python +``` from sql_formatter.format_file import format_sql_commands print(format_sql_commands( """ diff --git a/docs/_data/sidebars/home_sidebar.yml b/docs/_data/sidebars/home_sidebar.yml index 1e9a634..4d27a29 100644 --- a/docs/_data/sidebars/home_sidebar.yml +++ b/docs/_data/sidebars/home_sidebar.yml @@ -21,6 +21,9 @@ entries: - output: web,pdf title: validation url: validation.html + - output: web,pdf + title: Additional tests + url: additional_tests.html output: web title: sql_formatter output: web diff --git a/docs/additional_tests.html b/docs/additional_tests.html new file mode 100644 index 0000000..4ac6f9c --- /dev/null +++ b/docs/additional_tests.html @@ -0,0 +1,127 @@ +--- + +title: Additional tests + + +keywords: fastai +sidebar: home_sidebar + + + +nb_path: "nbs/99_additional_tests.ipynb" +--- + + +
+ + {% raw %} + +
+ +
+ {% endraw %} + +
+
+

core

+
+
+
+
+
+

format_where

+
+
+
+ {% raw %} + +
+
+ +
+
+
assert_and_print(
+    format_where(
+        "WHERE brand = 'my_brand'"
+    ), "WHERE  brand = 'my_brand'"
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
WHERE  brand = 'my_brand'
+
+
+
+ +
+
+ +
+ {% endraw %} + +
+
+

format_sql

+
+
+
+ {% raw %} + +
+
+ +
+
+
assert_and_print(
+    format_sql("SELECT brand FROM table WHERE brand = 'my_brand'"),
+"""
+SELECT brand
+FROM   table
+WHERE  brand = 'my_brand'
+""".strip()
+)
+
+ +
+
+
+ +
+
+ +
+ +
+
SELECT brand
+FROM   table
+WHERE  brand = 'my_brand'
+
+
+
+ +
+
+ +
+ {% endraw %} + +
+ + diff --git a/docs/index.html b/docs/index.html index 4d44267..43509b5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -83,6 +83,13 @@

Usage with pre-commit

pre-commit is a nice development tool to automatize the binding of pre-commit hooks. After installation and configuration pre-commit will run your hooks before you commit any change.

To add sql-formatter as a hook to your pre-commit configuration to format your SQL files before commit, just add the following lines to your .pre-commit-config.yaml:

+
repos:
+  - repo: https://github.com/PabloRMira/sql_formatter
+    rev: master
+    hooks:
+    - id: sql_formatter
+
+

If you want to install sql-formatter locally and use that instead of using pre-commit's default environment, set repo: local in your .pre-commit-config.yaml file:

repos:
   - repo: local
     hooks:
diff --git a/docs/sidebar.json b/docs/sidebar.json
index c0b4234..9229f18 100644
--- a/docs/sidebar.json
+++ b/docs/sidebar.json
@@ -4,6 +4,7 @@
     "core": "core.html",
     "format_file": "format_file.html",
     "utils": "utils.html",
-    "validation": "validation.html"
+    "validation": "validation.html",
+    "Additional tests": "additional_tests.html"
   }
 }
\ No newline at end of file
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb
index 3fdfbac..0e1d394 100644
--- a/nbs/00_core.ipynb
+++ b/nbs/00_core.ipynb
@@ -4,7 +4,16 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "#hide\n",
     "%load_ext autoreload\n",
@@ -3019,8 +3028,8 @@
     "    # split by comment / non comment, quote / non-quote\n",
     "    split_s = split_comment_quote(s)\n",
     "    # define regex before loop\n",
-    "    indent_and = re.compile(r\"\\s*(and)\\b\", flags=re.I)\n",
-    "    indent_or = re.compile(r\"\\s*(or)\\b\", flags=re.I)\n",
+    "    indent_and = re.compile(r\"\\s*\\b(and)\\b\", flags=re.I)\n",
+    "    indent_or = re.compile(r\"\\s*\\b(or)\\b\", flags=re.I)\n",
     "    for d in split_s:\n",
     "        if not d[\"comment\"] and not d[\"quote\"]:\n",
     "            s_aux = d[\"string\"]\n",
@@ -4060,6 +4069,7 @@
       "Converted 01_format_file.ipynb.\n",
       "Converted 02_utils.ipynb.\n",
       "Converted 03_validation.ipynb.\n",
+      "Converted 99_additional_tests.ipynb.\n",
       "Converted index.ipynb.\n"
      ]
     }
diff --git a/nbs/99_additional_tests.ipynb b/nbs/99_additional_tests.ipynb
new file mode 100644
index 0000000..413528e
--- /dev/null
+++ b/nbs/99_additional_tests.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#hide\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "%config Completer.use_jedi = False  # workaround for buggy jedi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Additional tests"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#hide\n",
+    "from nbdev.showdoc import *\n",
+    "from sql_formatter.utils import *\n",
+    "from sql_formatter.core import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## core"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### format_where"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WHERE  brand = 'my_brand'\n"
+     ]
+    }
+   ],
+   "source": [
+    "assert_and_print(\n",
+    "    format_where(\n",
+    "        \"WHERE brand = 'my_brand'\"\n",
+    "    ), \"WHERE  brand = 'my_brand'\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### format_sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SELECT brand\n",
+      "FROM   table\n",
+      "WHERE  brand = 'my_brand'\n"
+     ]
+    }
+   ],
+   "source": [
+    "assert_and_print(\n",
+    "    format_sql(\"SELECT brand FROM table WHERE brand = 'my_brand'\"),\n",
+    "\"\"\"\n",
+    "SELECT brand\n",
+    "FROM   table\n",
+    "WHERE  brand = 'my_brand'\n",
+    "\"\"\".strip()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#hide\n",
+    "from nbdev.export import notebook2script\n",
+    "notebook2script()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:sql-formatter-dev] *",
+   "language": "python",
+   "name": "conda-env-sql-formatter-dev-py"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/sql_formatter/core.py b/sql_formatter/core.py
index 0826d55..63178d6 100644
--- a/sql_formatter/core.py
+++ b/sql_formatter/core.py
@@ -366,8 +366,8 @@ def format_where(s, **kwargs):
     # split by comment / non comment, quote / non-quote
     split_s = split_comment_quote(s)
     # define regex before loop
-    indent_and = re.compile(r"\s*(and)\b", flags=re.I)
-    indent_or = re.compile(r"\s*(or)\b", flags=re.I)
+    indent_and = re.compile(r"\s*\b(and)\b", flags=re.I)
+    indent_or = re.compile(r"\s*\b(or)\b", flags=re.I)
     for d in split_s:
         if not d["comment"] and not d["quote"]:
             s_aux = d["string"]