From f0bbb0f2b2594f2b1139e36c0c4c75b678647fb4 Mon Sep 17 00:00:00 2001 From: adam444555 Date: Tue, 21 Nov 2023 14:00:44 +0100 Subject: [PATCH] fix inconsistency --- nbs/02_utils.ipynb | 402 +++++++++++++++++++++++---------------------- 1 file changed, 203 insertions(+), 199 deletions(-) diff --git a/nbs/02_utils.ipynb b/nbs/02_utils.ipynb index 2bc1a33..6123363 100644 --- a/nbs/02_utils.ipynb +++ b/nbs/02_utils.ipynb @@ -9,7 +9,7 @@ "#hide\n", "%load_ext autoreload\n", "%autoreload 2\n", - "%config Completer.use_jedi = False # workaround for buggy jedi" + "%config Completer.use_jedi = False # workaround for buggy jedi\n" ] }, { @@ -18,7 +18,7 @@ "metadata": {}, "outputs": [], "source": [ - "# default_exp utils" + "# default_exp utils\n" ] }, { @@ -31,7 +31,7 @@ "import re\n", "from itertools import accumulate\n", "from collections import Counter\n", - "import operator" + "import operator\n" ] }, { @@ -50,7 +50,7 @@ "outputs": [], "source": [ "#hide\n", - "from nbdev.showdoc import *" + "from nbdev.showdoc import *\n" ] }, { @@ -88,10 +88,10 @@ " print(s_in[max(i-5, 0):i+5])\n", " print(\"\\n\")\n", " print(\"Expected:\\n\")\n", - " print(s_expected[max(i-5, 0):i+5]) \n", + " print(s_expected[max(i-5, 0):i+5])\n", " assert s_in == s_expected\n", " print(s_in)\n", - " return None" + " return None\n" ] }, { @@ -108,7 +108,7 @@ } ], "source": [ - "assert_and_print(\"some string\", \"some string\")" + "assert_and_print(\"some string\", \"some string\")\n" ] }, { @@ -140,7 +140,7 @@ " ld_out[-1][\"string\"] += d[\"string\"]\n", " else:\n", " ld_out.append(d)\n", - " return ld_out" + " return ld_out\n" ] }, { @@ -172,9 +172,9 @@ " [\n", " {\"string\": \"some string. qwerty asdf\", \"key1\": True, \"key2\": True},\n", " {\"string\": \"another string bla bla\", \"key1\": True, \"key2\": False},\n", - " {\"string\": \"1234\", \"key1\": False, \"key2\": False}, \n", + " {\"string\": \"1234\", \"key1\": False, \"key2\": False},\n", " ]\n", - ")" + ")\n" ] }, { @@ -195,7 +195,7 @@ " \"Remove whitespaces before and after newline in `s`\"\n", " s = re.sub(r\"\\n[\\r\\t\\f\\v ]+\", \"\\n\", s) # remove whitespaces after newline\n", " s = re.sub(r\"[\\r\\t\\f\\v ]+\\n\", \"\\n\", s) # remove whitespaces before newline\n", - " return s" + " return s\n" ] }, { @@ -218,7 +218,7 @@ "assert_and_print(\n", " remove_whitespaces_newline(\"select asdf\\n from table1 \\nwhere asdf = 1 \\n group by asdf\"),\n", " \"select asdf\\nfrom table1\\nwhere asdf = 1\\ngroup by asdf\"\n", - ")" + ")\n" ] }, { @@ -235,8 +235,8 @@ " s = re.sub(r\"\\[CS\\][\\r\\t\\f\\v ]+\", \"[CS]\", s) # remove whitespaces after comment token [CS]\n", " s = re.sub(r\"[\\r\\t\\f\\v ]+\\[CS\\]\", \"[CS]\", s) # remove whitespaces before comment token [CS]\n", " s = re.sub(r\"\\[CI\\][\\r\\t\\f\\v ]+\", \"[CI]\", s) # remove whitespaces after comment token [CI]\n", - " s = re.sub(r\"[\\r\\t\\f\\v ]+\\[CI\\]\", \"[CI]\", s) # remove whitespaces before comment token [CI] \n", - " return s" + " s = re.sub(r\"[\\r\\t\\f\\v ]+\\[CI\\]\", \"[CI]\", s) # remove whitespaces before comment token [CI]\n", + " return s\n" ] }, { @@ -258,7 +258,7 @@ " \"select asdf[C] from table1 [CS]where asdf = 1 [C] group by asdf\"\n", " ),\n", " \"select asdf[C]from table1[CS]where asdf = 1[C]group by asdf\"\n", - ")" + ")\n" ] }, { @@ -280,7 +280,7 @@ " \"select asdf[C][CS]/* asdf [CI] */[C] from table1 [CS]where asdf = 1 [C] group by asdf\"\n", " ),\n", " \"select asdf[C][CS]/* asdf[CI]*/[C]from table1[CS]where asdf = 1[C]group by asdf\"\n", - ")" + ")\n" ] }, { @@ -294,7 +294,7 @@ " \"Strip and remove redundant (more than 2) whitespaces in `s` but no newlines in between\"\n", " s = s.strip()\n", " s = re.sub(r\"[\\r\\t\\f\\v ]{2,}\", \" \", s) # remove too many whitespaces but not newlines\n", - " return s" + " return s\n" ] }, { @@ -321,7 +321,7 @@ "assert_and_print(\n", " remove_redundant_whitespaces(\"\\nsome string many whitespaces some more\"),\n", " \"some string many whitespaces some more\"\n", - ")" + ")\n" ] }, { @@ -351,7 +351,7 @@ "assert_and_print(\n", " remove_redundant_whitespaces(\"\\nsome string \\n many whitespaces\\n \\nsome more\\n\"),\n", " \"some string \\n many whitespaces\\n \\nsome more\"\n", - ")" + ")\n" ] }, { @@ -365,7 +365,7 @@ " \"Remove whitespaces between parenthesis in query `s`\"\n", " s = re.sub(r\"\\([\\r\\t\\f\\v ]+\", \"(\", s) # remove whitespaces after (\n", " s = re.sub(r\"[\\r\\t\\f\\v ]+\\)\", \")\", s) # remove whitespaces before )\n", - " return s" + " return s\n" ] }, { @@ -385,7 +385,7 @@ "assert_and_print(\n", " remove_whitespaces_parenthesis(\"select asdf, substr( qwer, 1, 2 ) as qwerty\"),\n", " \"select asdf, substr(qwer, 1, 2) as qwerty\"\n", - ")" + ")\n" ] }, { @@ -397,38 +397,42 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#export\n", "def add_whitespaces_between_symbols(s):\n", " \"Add whitespaces between symbols in line `s`\"\n", - " s = re.sub(r\"([^\\s=!<>#-])(-?[=!<>]+)\", r\"\\1 \\2\", s, flags=re.I) # no space left\n", + " s = re.sub(r\"([^\\s=!<>@#-])(-?[=!<>]+)\", r\"\\1 \\2\", s, flags=re.I) # no space left\n", " s = re.sub(r\"([=!<>]+)([^\\s=!<>])\", r\"\\1 \\2\", s, flags=re.I) # no space right\n", " s = re.sub(r\"([^\\s=!<>-])(-?[=!<>]+)([^\\s=!<>])\", r\"\\1 \\2 \\3\", s, flags=re.I) # no space left and right\n", - " return s" + " return s\n" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "WHERE asdf = 1 and qwer => 1 or blabla-dfg ->> 'asdf' or asdf #>> 'asdf'\n" + "ename": "NameError", + "evalue": "name 'assert_and_print' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_91594/648113473.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m assert_and_print(\n\u001b[0m\u001b[1;32m 2\u001b[0m add_whitespaces_between_symbols(\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\"WHERE asdf= 1 and qwer=>1 or blabla-dfg ->>'asdf' or asdf #>>'asdf' or asdf @>'asdf'\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m ), \"WHERE asdf = 1 and qwer => 1 or blabla-dfg ->> 'asdf' or asdf #>> 'asdf' or asdf @> 'asdf'\"\n\u001b[1;32m 5\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'assert_and_print' is not defined" ] } ], "source": [ "assert_and_print(\n", " add_whitespaces_between_symbols(\n", - " \"WHERE asdf= 1 and qwer=>1 or blabla-dfg ->>'asdf' or asdf #>>'asdf'\"\n", - " ), \"WHERE asdf = 1 and qwer => 1 or blabla-dfg ->> 'asdf' or asdf #>> 'asdf'\"\n", - ")" + " \"WHERE asdf= 1 and qwer=>1 or blabla-dfg ->>'asdf' or asdf #>>'asdf' or asdf @>'asdf'\"\n", + " ), \"WHERE asdf = 1 and qwer => 1 or blabla-dfg ->> 'asdf' or asdf #>> 'asdf' or asdf @> 'asdf'\"\n", + ")\n" ] }, { @@ -457,7 +461,7 @@ " # loop over character positions\n", " for i, c in enumerate(s):\n", " if (\n", - " c == \"\\n\" and \n", + " c == \"\\n\" and\n", " comment_open1 and\n", " not comment_open2 and\n", " not quote_open1 and\n", @@ -465,11 +469,11 @@ " ):\n", " positions.append(i)\n", " elif (\n", - " s[i:i+2] == \"/*\" and \n", - " not comment_open1 and \n", + " s[i:i+2] == \"/*\" and\n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " not quote_open2 \n", + " not quote_open1 and\n", + " not quote_open2\n", " ): # if there is an opening comment /*\n", " comment_open1 = True\n", " elif (\n", @@ -481,11 +485,11 @@ " ): # if there is a closing comment */\n", " comment_open1 = False\n", " elif (\n", - " s[i:i+2] == \"--\" and \n", - " not comment_open1 and \n", + " s[i:i+2] == \"--\" and\n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " not quote_open2 \n", + " not quote_open1 and\n", + " not quote_open2\n", " ): # if there is an opening comment --\n", " comment_open2 = True\n", " elif (\n", @@ -498,41 +502,41 @@ " comment_open2 = False\n", " elif (\n", " c == \"'\" and\n", - " not comment_open1 and \n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " not quote_open2 \n", + " not quote_open1 and\n", + " not quote_open2\n", " ): # if opening quote '\n", " quote_open1 = True\n", " elif (\n", " c == \"'\" and\n", - " not comment_open1 and \n", + " not comment_open1 and\n", " not comment_open2 and\n", - " quote_open1 and \n", - " not quote_open2 \n", + " quote_open1 and\n", + " not quote_open2\n", " ): # if opening quote '\n", " quote_open1 = False\n", " elif (\n", " c == '\"' and\n", - " not comment_open1 and \n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " quote_open2 \n", + " not quote_open1 and\n", + " quote_open2\n", " ): # if opening quote '\n", " quote_open2 = True\n", " elif (\n", " c == '\"' and\n", - " not comment_open1 and \n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " quote_open2 \n", + " not quote_open1 and\n", + " quote_open2\n", " ): # if opening quote '\n", " quote_open2 = False\n", " if len(positions) == 0:\n", " return s\n", " else:\n", " s = \"\".join([c if i not in positions else \"[CI]\" for i, c in enumerate(s)])\n", - " return s" + " return s\n" ] }, { @@ -552,7 +556,7 @@ "assert_and_print(\n", " mark_ci_comments(\"select /* asdf \\n qwer */\"),\n", " \"select /* asdf [CI] qwer */\"\n", - ")" + ")\n" ] }, { @@ -572,7 +576,7 @@ "assert_and_print(\n", " mark_ci_comments(\"select /* asdf \\n qwe \\n rqwer */\"),\n", " \"select /* asdf [CI] qwe [CI] rqwer */\"\n", - ")" + ")\n" ] }, { @@ -586,11 +590,11 @@ " \"Mark end of comments -- and begin of comments /* */ if they are in a new line with token [C]\"\n", " s = re.sub(r\"(--.*?)(\\n)\", r\"\\1[C]\\2\", s) # mark end of -- comments\n", " s = re.sub(r\"(\\/\\*.*?\\*\\/)\", r\"\\1[C]\", s, flags=re.DOTALL) # mark end of /* */ comments\n", - " s = re.sub(r\"(\\n)\\s*(--.*?)\", r\"\\1[CS]\\2\", s, flags=re.DOTALL) # mark start of comment line with -- \n", + " s = re.sub(r\"(\\n)\\s*(--.*?)\", r\"\\1[CS]\\2\", s, flags=re.DOTALL) # mark start of comment line with --\n", " s = re.sub(r\"(\\n)\\s*(\\/\\*.*\\*\\/)\", r\"\\1[CS]\\2\", s) # mark start of comment line with /*\n", " s = re.sub(r\"(\\n)\\s*(\\/\\*.*?\\*\\/)\", r\"\\1[CS]\\2\", s, flags=re.DOTALL) # mark start of comment line with /*\n", " s = mark_ci_comments(s) # replace intercomment new lines by [CI]\n", - " return s" + " return s\n" ] }, { @@ -630,7 +634,7 @@ "qwer3,\n", "qwer4 -- comment[C]\n", "\"\"\"\n", - ")" + ")\n" ] }, { @@ -673,7 +677,7 @@ "[CS]-- another comment line[C]\n", "qwer4 -- comment[C]\n", "\"\"\"\n", - ")" + ")\n" ] }, { @@ -707,7 +711,7 @@ "qwer, qwer2\n", "\n", "from table1\n", - "\"\"\" \n", + "\"\"\"\n", " ),\n", "\"\"\"\n", "create or replace my_table as\n", @@ -716,8 +720,8 @@ "qwer, qwer2\n", "\n", "from table1\n", - "\"\"\" \n", - ")" + "\"\"\"\n", + ")\n" ] }, { @@ -744,14 +748,14 @@ " mark_comments(\n", "\"\"\"\n", "SELECT asdf, qwer, /* another comment */\n", - "qwer1, \n", + "qwer1,\n", "/* inline comment */\n", "qwer2\n", "FROM table1\n", "WHERE asdf=1\"\"\"\n", " ),\n", " \"\\nSELECT asdf, qwer, /* another comment */[C]\\nqwer1, \\n[CS]/* inline comment */[C]\\nqwer2\\nFROM table1\\nWHERE asdf=1\"\n", - ")" + ")\n" ] }, { @@ -778,14 +782,14 @@ "\"\"\"\n", "select a.asdf, b.qwer, -- some comment here\n", "/* and here is a line comment inside select */\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", + "substr(c.asdf, 1, 2) as substr_asdf,\n", "/* some commenT there */\n", "\"\"\"\n", " ),\n", "\"\"\"\n", "select a.asdf, b.qwer, -- some comment here[C]\n", "[CS]/* and here is a line comment inside select */[C]\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", + "substr(c.asdf, 1, 2) as substr_asdf,\n", "[CS]/* some commenT there */[C]\n", "\"\"\"\n", ")\n" @@ -819,8 +823,8 @@ "comment */\n", "select a.asdf, b.qwer, -- some comment here\n", "/* and here is a line comment inside select */\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", - "/* some commenT \n", + "substr(c.asdf, 1, 2) as substr_asdf,\n", + "/* some commenT\n", "there */\n", "case when a.asdf= 1 then 'b' /* here a case comment */\n", "\"\"\"\n", @@ -829,11 +833,11 @@ "[CS]/* multi line[CI]comment */[C]\n", "select a.asdf, b.qwer, -- some comment here[C]\n", "[CS]/* and here is a line comment inside select */[C]\n", - "substr(c.asdf, 1, 2) as substr_asdf, \n", + "substr(c.asdf, 1, 2) as substr_asdf,\n", "[CS]/* some commenT [CI]there */[C]\n", "case when a.asdf= 1 then 'b' /* here a case comment */[C]\n", "\"\"\"\n", - ")" + ")\n" ] }, { @@ -870,7 +874,7 @@ "[CS]/* some commenT there */[C]\n", "case when a.asdf= 1 then 'b' /* here a case comment */[C]\n", "\"\"\"\n", - ")" + ")\n" ] }, { @@ -896,8 +900,8 @@ "#export\n", "def split_query(s):\n", " \"\"\"Split query into comment / non-comment, quote / non-quote, select / non-select\n", - " \n", - " Return a dict with keys \"string\", \"comment\" in (True, False) \"quote\" in (True, False) \n", + "\n", + " Return a dict with keys \"string\", \"comment\" in (True, False) \"quote\" in (True, False)\n", " and \"select\" in (True, False)\n", " \"\"\"\n", " s_low = s.lower() # lowercased string\n", @@ -923,9 +927,9 @@ " balanced_parentheses -= 1\n", " if select_re.match(s_low[max(i-1, 0):i+7]) and k == 0: # k = 0 -> no comment\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " })\n", " start = i\n", @@ -934,9 +938,9 @@ " balanced_parentheses = 0 #reset parentheses value\n", " elif from_re.match(s_low[max(i-1, 0):i+5]) and k == 0 and balanced_parentheses == 0:\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " })\n", " start = i\n", @@ -944,14 +948,14 @@ " balanced_parentheses = global_balanced_parentheses #get back the global paretheses value\n", " elif (\n", " s[i:i+4] == \"[CS]\" and\n", - " not comment_open1 and \n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", + " not quote_open1 and\n", " not quote_open2\n", " ): # if there is an opening full line comment\n", " k += 1\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", + " \"string\": s[start:i],\n", " \"comment\": comment_region,\n", " \"quote\": quote_region,\n", " \"select\": select_region\n", @@ -963,16 +967,16 @@ " else:\n", " comment_open2 = True\n", " elif (\n", - " s[i:i+2] == \"/*\" and \n", - " not comment_open1 and \n", + " s[i:i+2] == \"/*\" and\n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " not quote_open2 \n", + " not quote_open1 and\n", + " not quote_open2\n", " ): # if there is an opening comment /*\n", " k += 1\n", " # before opening comment it was no comment\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", + " \"string\": s[start:i],\n", " \"comment\": comment_region,\n", " \"quote\": quote_region,\n", " \"select\": select_region\n", @@ -1015,17 +1019,17 @@ " comment_region = False\n", " start = i+2\n", " elif (\n", - " s[i:i+2] == \"--\" and \n", - " not comment_open1 and \n", + " s[i:i+2] == \"--\" and\n", + " not comment_open1 and\n", " not comment_open2 and\n", - " not quote_open1 and \n", - " not quote_open2 \n", + " not quote_open1 and\n", + " not quote_open2\n", " ): # if there is an opening comment --\n", " k += 1\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before opening comment it was no comment\n", " comment_open2 = True\n", @@ -1042,19 +1046,19 @@ " comment_open2 = False\n", " if c == \"\\n\":\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before closing comment it was comment\n", " start = i\n", " else: # [C]\n", " s_comp.append({\n", - " \"string\": s[start:i+3], \n", + " \"string\": s[start:i+3],\n", " \"comment\": comment_region,\n", " \"quote\": quote_region,\n", " \"select\": select_region\n", - " }) # before closing comment it was comment \n", + " }) # before closing comment it was comment\n", " start = i+3\n", " comment_region = False\n", " elif (\n", @@ -1065,9 +1069,9 @@ " not quote_open2\n", " ):\n", " s_comp.append({\n", - " \"string\": s[start:i+1], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i+1],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before opening comment it was no comment\n", " quote_open1 = True\n", @@ -1081,9 +1085,9 @@ " not quote_open2\n", " ):\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before opening comment it was no comment\n", " quote_open1 = False\n", @@ -1097,9 +1101,9 @@ " not quote_open2\n", " ):\n", " s_comp.append({\n", - " \"string\": s[start:i + 1], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i + 1],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before opening comment it was no comment\n", " quote_open2 = True\n", @@ -1113,22 +1117,22 @@ " quote_open2\n", " ):\n", " s_comp.append({\n", - " \"string\": s[start:i], \n", - " \"comment\": comment_region, \n", - " \"quote\": quote_region, \n", + " \"string\": s[start:i],\n", + " \"comment\": comment_region,\n", + " \"quote\": quote_region,\n", " \"select\": select_region\n", " }) # before opening comment it was no comment\n", " quote_open2 = False\n", " quote_region = False\n", " start = i\n", " s_comp.append({\n", - " \"string\": s[start:], \n", + " \"string\": s[start:],\n", " \"comment\": comment_region,\n", " \"quote\": quote_region,\n", " \"select\": select_region\n", " })\n", " s_comp = [d for d in s_comp if d[\"string\"] != \"\"] # remove empty strings\n", - " return s_comp" + " return s_comp\n" ] }, { @@ -1161,10 +1165,10 @@ " {\"string\": \"[CS]/* some comment */\", \"comment\": True, \"quote\": False, \"select\": True},\n", " {\"string\": \" qwer, '\", \"comment\": False, \"quote\": False, \"select\": True},\n", " {\"string\": \"blabla\", \"comment\": False, \"quote\": True, \"select\": True},\n", - " {\"string\": \"' as qwerty\\n\", \"comment\": False, \"quote\": False, \"select\": True}, \n", + " {\"string\": \"' as qwerty\\n\", \"comment\": False, \"quote\": False, \"select\": True},\n", " {\"string\": \"from table1\", \"comment\": False, \"quote\": False, \"select\": False},\n", " ]\n", - ")" + ")\n" ] }, { @@ -1183,7 +1187,7 @@ "#export\n", "def split_apply_concat(s, f):\n", " \"Split query `s`, apply function `f` and concatenate strings\"\n", - " return \"\".join([d[\"string\"] for d in f(split_query(s))])" + " return \"\".join([d[\"string\"] for d in f(split_query(s))])\n" ] }, { @@ -1205,7 +1209,7 @@ " \"select asdf, /* some comment */\", lambda split_s: [d for d in split_s if not d[\"comment\"]]\n", " ),\n", " \"select asdf, \"\n", - ")" + ")\n" ] }, { @@ -1227,7 +1231,7 @@ " split_s = split_query(s)\n", " # compress all strings with same keys\n", " split_s = compress_dicts(split_s, keys=[\"comment\", \"quote\"])\n", - " return split_s " + " return split_s\n" ] }, { @@ -1259,9 +1263,9 @@ " {\"string\": \"[CS]/* some comment */\", \"comment\": True, \"quote\": False},\n", " {\"string\": \" qwer, '\", \"comment\": False, \"quote\": False},\n", " {\"string\": \"blabla\", \"comment\": False, \"quote\": True},\n", - " {\"string\": \"' as qwerty\\nfrom table1\", \"comment\": False, \"quote\": False}, \n", + " {\"string\": \"' as qwerty\\nfrom table1\", \"comment\": False, \"quote\": False},\n", " ]\n", - ")" + ")\n" ] }, { @@ -1287,9 +1291,9 @@ " {\"string\": \"\"\"\"\";\"\"\", \"comment\": False, \"quote\": True},\n", " {\"string\": \"\"\"', \\\"\"\"\", \"comment\": False, \"quote\": False},\n", " {\"string\": \"\"\"';'\"\"\", \"comment\": False, \"quote\": True},\n", - " {\"string\": \"\"\"\\\", asdf2) as asdf5\"\"\", \"comment\": False, \"quote\": False} \n", + " {\"string\": \"\"\"\\\", asdf2) as asdf5\"\"\", \"comment\": False, \"quote\": False}\n", " ]\n", - ")" + ")\n" ] }, { @@ -1311,7 +1315,7 @@ " split_s = split_query(s)\n", " # compress all strings with same keys\n", " split_s = compress_dicts(split_s, keys=[\"comment\"])\n", - " return split_s" + " return split_s\n" ] }, { @@ -1343,7 +1347,7 @@ " {\"string\": \"[CS]/* some comment */\", \"comment\": True},\n", " {\"string\": \" qwer, 'blabla' as qwerty\\nfrom table1\", \"comment\": False},\n", " ]\n", - ")" + ")\n" ] }, { @@ -1377,7 +1381,7 @@ " # increase the cumulative length\n", " cumul_len += len(d[\"string\"])\n", " positions = sorted(positions) # sort positions before returning\n", - " return positions" + " return positions\n" ] }, { @@ -1397,7 +1401,7 @@ "assert_and_print(\n", " identify_in_sql(r\"\\bcase\\b\", \"select asdf, qwer, case when blabla case\"),\n", " [19, 36]\n", - ")" + ")\n" ] }, { @@ -1417,7 +1421,7 @@ "assert_and_print(\n", " identify_in_sql(r\"\\n\", \"select asdf,\\nqwer\"),\n", " [12]\n", - ")" + ")\n" ] }, { @@ -1437,7 +1441,7 @@ "assert_and_print(\n", " identify_in_sql(r\"\\n\", \"select asdf,\\nqwer, -- some comment\\n\"),\n", " [12, 34]\n", - ")" + ")\n" ] }, { @@ -1457,7 +1461,7 @@ "assert_and_print(\n", " identify_in_sql(r\"\\n\", \"select asdf,\\nqwer,[CS]-- some comment[C]\\nqwer2\"),\n", " [12, 40]\n", - ")" + ")\n" ] }, { @@ -1489,7 +1493,7 @@ " split_s.append(s[start:end])\n", " else:\n", " split_s.append(s[start+1:end]) # do not take the semicolon\n", - " return split_s" + " return split_s\n" ] }, { @@ -1528,7 +1532,7 @@ " \"\\n\\nuse schema_another_schema\",\n", " \"\\n\"\n", " ]\n", - ")" + ")\n" ] }, { @@ -1542,7 +1546,7 @@ " \"Replace newline characters in `s` by whitespace but not in the comments\"\n", " positions = identify_in_sql(\"\\n\", s)\n", " clean_s = \"\".join([c if i not in positions else \" \" for i, c in enumerate(s)])\n", - " return clean_s" + " return clean_s\n" ] }, { @@ -1566,7 +1570,7 @@ " \"select asdf,\\nqwer, /* some comment \\n with multiple lines \\n*/[C], some_field from\\n table\"\n", " ),\n", " \"select asdf, qwer, /* some comment \\n with multiple lines \\n*/[C], some_field from table\"\n", - ")" + ")\n" ] }, { @@ -1590,7 +1594,7 @@ " if not d[\"comment\"] and not d[\"quote\"]: # only for non comments and non text in quotes\n", " d[\"string\"] = re.sub(regex, repl, d[\"string\"])\n", " s = \"\".join(d[\"string\"] for d in split_s)\n", - " return s" + " return s\n" ] }, { @@ -1612,7 +1616,7 @@ " r\",([\\w\\d])\", r\", \\1\", \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", " ),\n", " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", - ")" + ")\n" ] }, { @@ -1632,7 +1636,7 @@ "def add_whitespaces_after_comma(s):\n", " \"Add whitespace after comma in query `s` if there is no whitespace\"\n", " s = sub_in_sql(r\",([\\w\\d]+)\", r\", \\1\", s)\n", - " return s" + " return s\n" ] }, { @@ -1654,7 +1658,7 @@ " \"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)\"\n", " ),\n", " \"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)\"\n", - ")" + ")\n" ] }, { @@ -1674,7 +1678,7 @@ "assert_and_print(\n", " add_whitespaces_after_comma(\"select asdf,qwer,substr(asdf,1,2) as qwerty\"),\n", " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", - ")" + ")\n" ] }, { @@ -1694,7 +1698,7 @@ "assert_and_print(\n", " add_whitespaces_after_comma(\"select asdf, qwer, substr(asdf,1,2) as qwerty\"),\n", " \"select asdf, qwer, substr(asdf, 1, 2) as qwerty\"\n", - ")" + ")\n" ] }, { @@ -1740,8 +1744,8 @@ " elif c == '\"' and not quote_open1 and not quote_open2:\n", " quote_open2 = True\n", " elif c == '\"' and not quote_open1 and quote_open2:\n", - " quote_open2 = False \n", - " return end_of_fields" + " quote_open2 = False\n", + " return end_of_fields\n" ] }, { @@ -1762,9 +1766,9 @@ " identify_end_of_fields(\n", "\"\"\"\n", "select asdf, substr(asdf, 1, 2) as qwer, concat(substr(asdf, 1, 2), substr(asdf, 3, 2)) as qwer2, asdf5\n", - "\"\"\"), \n", + "\"\"\"),\n", " [12, 40, 97]\n", - ")" + ")\n" ] }, { @@ -1785,9 +1789,9 @@ " identify_end_of_fields(\n", "\"\"\"\n", "select concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer, asdf\n", - "\"\"\"), \n", + "\"\"\"),\n", " [120]\n", - ")" + ")\n" ] }, { @@ -1815,7 +1819,7 @@ "\"\"\"\n", "select asdf, /* Some commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some comment[C] asdf5\n", "\"\"\"), [42]\n", - ")" + ")\n" ] }, { @@ -1836,7 +1840,7 @@ "\"\"\"\n", "select asdf, [CS]/* Some commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some comment[C] asdf5\n", "\"\"\"), [46]\n", - ")" + ")\n" ] }, { @@ -1864,7 +1868,7 @@ "\"\"\"\n", "select asdf, /* Some, commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some, comment[C] asdf5\n", "\"\"\"), [43]\n", - ")" + ")\n" ] }, { @@ -1895,8 +1899,8 @@ " split_s.append(s[start:end].lstrip()) # get string part\n", " split_s.append(\"\\n\" + \" \" * indentation) # add indentation\n", " s = \"\".join(split_s)\n", - " s = s.strip() \n", - " return s" + " s = s.strip()\n", + " return s\n" ] }, { @@ -1927,7 +1931,7 @@ " concat(substr(asdf, 1, 2), substr(asdf, 3, 2)) as qwer2,\n", " asdf5\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -1958,7 +1962,7 @@ " lead(qwer) OVER (partition by asdf order by qwer),\n", " asdf2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -1989,7 +1993,7 @@ " lead(qwer) OVER (partition by asdf order by qwer),\n", " asdf2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2029,7 +2033,7 @@ " subquery_pos.append(i)\n", " return subquery_pos\n", " elif c == \")\":\n", - " k -= 1" + " k -= 1\n" ] }, { @@ -2042,7 +2046,7 @@ " extract_outer_subquery(\n", " \"() () (\\nSELECT () (\\nSELECT ())) ()\"\n", " ) == [6, 30]\n", - ")" + ")\n" ] }, { @@ -2088,7 +2092,7 @@ "\n", " # add new line and indentation before the end of the \")\"\n", " formatted_s = re.sub(r\"\\s*(\\))$\", \"\\n\" + \" \" * last_line_indent + r\"\\1\", formatted_s)\n", - " return formatted_s" + " return formatted_s\n" ] }, { @@ -2109,7 +2113,7 @@ " \"Extract outer subquery in query `li`\"\n", " # only process if the line is longer than max_len\n", " first_line = s.split(\"\\n\")[0]\n", - " if len(first_line) > max_len and \"(\" in first_line: \n", + " if len(first_line) > max_len and \"(\" in first_line:\n", " # initialize container for subquery positions\n", " # in string `s`\n", " subquery_pos = []\n", @@ -2119,7 +2123,7 @@ " k = -1\n", " # counter for '\n", " d = 0\n", - " # loop over string characters \n", + " # loop over string characters\n", " for i, c in enumerate(s):\n", " if c == \"(\" and k == -1 and d%2 == 0: # The first (\n", " subquery_pos.append(i)\n", @@ -2140,7 +2144,7 @@ " elif s[i:i+2] == \"OR\" and k == 0 and d%2 == 0:\n", " subquery_pos_and_or.append(i-1)\n", " elif c == \"'\":\n", - " d += 1" + " d += 1\n" ] }, { @@ -2165,7 +2169,7 @@ " # add new line and indentation before the end of the \")\"\n", " if is_end:\n", " formatted_s = re.sub(r\"\\s*(\\))$\", \"\\n\" + \" \" * last_line_indent + r\"\\1\", formatted_s)\n", - " return formatted_s" + " return formatted_s\n" ] }, { @@ -2187,8 +2191,8 @@ " in quotes\"\"\"\n", " split_s = split_query(s) # split in comment / non-comment, quote / non-quote regions\n", " s_code = \"\".join([d[\"string\"] for d in split_s if not d[\"comment\"] and not d[\"quote\"]])\n", - " return (bool(re.search(pattern=r\"\\bselect\\b|\\bcreate\\b.{0,27}(\\btable\\b|\\bview\\b)\", string=s_code, flags=re.I)) and \n", - " not bool(re.search(pattern=r\"\\bcreate\\b(?!.*(\\btable\\b|\\bview\\b))\", string=s_code, flags=re.I)))" + " return (bool(re.search(pattern=r\"\\bselect\\b|\\bcreate\\b.{0,27}(\\btable\\b|\\bview\\b)\", string=s_code, flags=re.I)) and\n", + " not bool(re.search(pattern=r\"\\bcreate\\b(?!.*(\\btable\\b|\\bview\\b))\", string=s_code, flags=re.I)))\n" ] }, { @@ -2203,7 +2207,7 @@ "select asdf\n", "from table\n", "where asdf = 1\n", - "\"\"\".strip())" + "\"\"\".strip())\n" ] }, { @@ -2218,7 +2222,7 @@ "select asdf\n", "from table\n", "where asdf = 1\n", - "\"\"\".strip())" + "\"\"\".strip())\n" ] }, { @@ -2229,7 +2233,7 @@ "source": [ "assert check_sql_query(\"\"\"\n", "SELECT qwer, asdf\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2238,7 +2242,7 @@ "metadata": {}, "outputs": [], "source": [ - "assert not check_sql_query(\"use database my_database;\")" + "assert not check_sql_query(\"use database my_database;\")\n" ] }, { @@ -2247,7 +2251,7 @@ "metadata": {}, "outputs": [], "source": [ - "assert not check_sql_query(\"use database my_database; /* create table */\")" + "assert not check_sql_query(\"use database my_database; /* create table */\")\n" ] }, { @@ -2258,7 +2262,7 @@ "source": [ "assert not check_sql_query(\"\"\"\n", "create or replace task my_task as\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2271,7 +2275,7 @@ "create or replace task my_task as\n", "create or replace table as\n", "select asdf\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2283,7 +2287,7 @@ "assert not check_sql_query(\"\"\"\n", "\n", "use schema my_schema;\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2302,7 +2306,7 @@ "#export\n", "def check_skip_marker(s):\n", " \"Checks whether user set marker /*skip-formatter*/ to not format query\"\n", - " return bool(re.search(r\"\\/\\*skip-formatter\\*\\/\", s))" + " return bool(re.search(r\"\\/\\*skip-formatter\\*\\/\", s))\n" ] }, { @@ -2317,7 +2321,7 @@ " /*skip-formatter*/\n", " asdf2\n", "FRoM table1\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2331,7 +2335,7 @@ " qwer,\n", " asdf2\n", "FRoM table1\n", - "\"\"\")" + "\"\"\")\n" ] }, { @@ -2358,9 +2362,9 @@ " line_numbers = [\n", " i + 1\n", " for i, line in enumerate(s_lines)\n", - " if re.search(\"(?:create.*?table|create.*?view)\", line, flags=re.I) \n", + " if re.search(\"(?:create.*?table|create.*?view)\", line, flags=re.I)\n", " ]\n", - " return line_numbers" + " return line_numbers\n" ] }, { @@ -2389,7 +2393,7 @@ "from table2;\n", "\"\"\"\n", " ), [2, 6]\n", - ")" + ")\n" ] }, { @@ -2401,7 +2405,7 @@ "#export\n", "def count_lines(s):\n", " \"Count the number of lines in `s`\"\n", - " return s.count(\"\\n\")" + " return s.count(\"\\n\")\n" ] }, { @@ -2430,7 +2434,7 @@ "from table2;\n", "\"\"\"\n", " ), 8\n", - ")" + ")\n" ] }, { @@ -2442,7 +2446,7 @@ "#export\n", "def find_line_number(s, positions):\n", " \"Find line number in `s` out of `positions`\"\n", - " return [s[0:pos].count(\"\\n\") + 1 for pos in positions]" + " return [s[0:pos].count(\"\\n\") + 1 for pos in positions]\n" ] }, { @@ -2471,7 +2475,7 @@ "from table2;\n", "\"\"\", [1, 68]),\n", " [2, 6]\n", - ")" + ")\n" ] }, { @@ -2487,14 +2491,14 @@ " split1 = re.split(r\"(?:\\s|,)\", str1)\n", " split1 = [sp for sp in split1 if sp != \"\"]\n", " split2 = re.split(r\"(?:\\s|,)\", str2)\n", - " split2 = [sp for sp in split2 if sp != \"\"] \n", + " split2 = [sp for sp in split2 if sp != \"\"]\n", " count1 = Counter(split1)\n", " count2 = Counter(split2)\n", " all_words = set(list(count1.keys()) + list(count2.keys()))\n", " disimilarity = 0\n", " for w in all_words:\n", " disimilarity += abs(count1[w] - count2[w])\n", - " return disimilarity" + " return disimilarity\n" ] }, { @@ -2511,7 +2515,7 @@ } ], "source": [ - "assert_and_print(disimilarity(\"hello world\", \"hello world\"), 0)" + "assert_and_print(disimilarity(\"hello world\", \"hello world\"), 0)\n" ] }, { @@ -2528,7 +2532,7 @@ } ], "source": [ - "assert_and_print(disimilarity(\"hello world\", \"hello world!\"), 2)" + "assert_and_print(disimilarity(\"hello world\", \"hello world!\"), 2)\n" ] }, { @@ -2540,7 +2544,7 @@ "#export\n", "def assign_comment(fs, cds):\n", " \"\"\"Assign comments in list of dictionaries `cds` to formatted string `fs` using Jaccard distance\n", - " \n", + "\n", " The comment dictionaries `cds` should contain the keys \"comment\" and \"preceding\" (string)\n", " \"\"\"\n", " fsplit_s = fs.split(\"\\n\")\n", @@ -2571,12 +2575,12 @@ " # add comment to it and replace [C] by empty string and [CS] by newline + proper indentation\n", " whitespace = \"\" if match_beginn_cs.match(d[\"comment\"]) else \" \"\n", " fsplit_s_out[line_number] += whitespace + re.sub(\n", - " \"\\[CS\\]\", \n", - " \"\\n\" + \" \" * indentation, \n", + " \"\\[CS\\]\",\n", + " \"\\n\" + \" \" * indentation,\n", " replace_c.sub(\"\", d[\"comment\"])\n", - " ) \n", + " )\n", " s_out = \"\\n\".join(fsplit_s_out)\n", - " return s_out" + " return s_out\n" ] }, { @@ -2626,7 +2630,7 @@ " else 0 end as qwerty,\n", " qwer2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2678,7 +2682,7 @@ " else 0 end as qwerty,\n", " qwer2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2730,7 +2734,7 @@ " else 0 end as qwerty,\n", " qwer2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2788,7 +2792,7 @@ " /* another whole line comment */\n", " qwer2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2843,7 +2847,7 @@ " else 0 end as qwerty,\n", " qwer2\n", "\"\"\".strip()\n", - ")" + ")\n" ] }, { @@ -2883,12 +2887,12 @@ "#export\n", "def remove_prefix(text, prefix):\n", " \"\"\"Remove prefix of a string\n", - " \n", - " Same behavior as removeprefix() in py3.9+. Compatible for python version < 3.9 \n", + "\n", + " Same behavior as removeprefix() in py3.9+. Compatible for python version < 3.9\n", " \"\"\"\n", " if text.startswith(prefix):\n", " return text[len(prefix):]\n", - " return text" + " return text\n" ] }, { @@ -2912,7 +2916,7 @@ "source": [ "#hide\n", "from nbdev.export import notebook2script\n", - "notebook2script()" + "notebook2script()\n" ] } ],