Skip to content

Commit

Permalink
fix inconsistency
Browse files Browse the repository at this point in the history
  • Loading branch information
adam444555 committed Nov 21, 2023
1 parent f0bbb0f commit cca979d
Showing 1 changed file with 34 additions and 2 deletions.
36 changes: 34 additions & 2 deletions ptypysql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from collections import Counter
import operator


# Cell
def assert_and_print(s_in, s_expected):
"Assert equality of `s_in` and `s_expected` and print the result of `s_in` if the assertion worked"
Expand Down Expand Up @@ -42,6 +43,7 @@ def assert_and_print(s_in, s_expected):
print(s_in)
return None


# Cell
def compress_dicts(ld, keys):
"Compress list of dicts `ld` with same `keys` concatenating key 'string'"
Expand All @@ -60,13 +62,15 @@ def compress_dicts(ld, keys):
ld_out.append(d)
return ld_out


# Cell
def remove_whitespaces_newline(s):
"Remove whitespaces before and after newline in `s`"
s = re.sub(r"\n[\r\t\f\v ]+", "\n", s) # remove whitespaces after newline
s = re.sub(r"[\r\t\f\v ]+\n", "\n", s) # remove whitespaces before newline
return s


# Cell
def remove_whitespaces_comments(s):
"Remove whitespaces before and after comment tokens in `s`"
Expand All @@ -78,28 +82,32 @@ def remove_whitespaces_comments(s):
s = re.sub(r"[\r\t\f\v ]+\[CI\]", "[CI]", s) # remove whitespaces before comment token [CI]
return s


# Cell
def remove_redundant_whitespaces(s):
"Strip and remove redundant (more than 2) whitespaces in `s` but no newlines in between"
s = s.strip()
s = re.sub(r"[\r\t\f\v ]{2,}", " ", s) # remove too many whitespaces but not newlines
return s


# Cell
def remove_whitespaces_parenthesis(s):
"Remove whitespaces between parenthesis in query `s`"
s = re.sub(r"\([\r\t\f\v ]+", "(", s) # remove whitespaces after (
s = re.sub(r"[\r\t\f\v ]+\)", ")", s) # remove whitespaces before )
return s


# Cell
def add_whitespaces_between_symbols(s):
"Add whitespaces between symbols in line `s`"
s = re.sub(r"([^\s=!<>#-])(-?[=!<>]+)", r"\1 \2", s, flags=re.I) # no space left
s = re.sub(r"([^\s=!<>@#-])(-?[=!<>]+)", r"\1 \2", s, flags=re.I) # no space left
s = re.sub(r"([=!<>]+)([^\s=!<>])", r"\1 \2", s, flags=re.I) # no space right
s = re.sub(r"([^\s=!<>-])(-?[=!<>]+)([^\s=!<>])", r"\1 \2 \3", s, flags=re.I) # no space left and right
return s


# Cell
def mark_ci_comments(s):
"Replace new lines in multiline comments by special token [CI]"
Expand Down Expand Up @@ -190,6 +198,7 @@ def mark_ci_comments(s):
s = "".join([c if i not in positions else "[CI]" for i, c in enumerate(s)])
return s


# Cell
def mark_comments(s):
"Mark end of comments -- and begin of comments /* */ if they are in a new line with token [C]"
Expand All @@ -201,6 +210,7 @@ def mark_comments(s):
s = mark_ci_comments(s) # replace intercomment new lines by [CI]
return s


# Cell
def split_query(s):
"""Split query into comment / non-comment, quote / non-quote, select / non-select
Expand Down Expand Up @@ -438,11 +448,13 @@ def split_query(s):
s_comp = [d for d in s_comp if d["string"] != ""] # remove empty strings
return s_comp


# Cell
def split_apply_concat(s, f):
"Split query `s`, apply function `f` and concatenate strings"
return "".join([d["string"] for d in f(split_query(s))])


# Cell
def split_comment_quote(s):
"Split query `s` into dictionaries with keys 'string', 'comment' and 'quote'"
Expand All @@ -451,6 +463,7 @@ def split_comment_quote(s):
split_s = compress_dicts(split_s, keys=["comment", "quote"])
return split_s


# Cell
def split_comment(s):
"Split query `s` into dictionaries with keys 'string', 'comment'"
Expand All @@ -459,6 +472,7 @@ def split_comment(s):
split_s = compress_dicts(split_s, keys=["comment"])
return split_s


# Cell
def identify_in_sql(regex, s):
"Find positions of `regex` (str or list) in string `s` ignoring comment and text in quotes"
Expand All @@ -479,6 +493,7 @@ def identify_in_sql(regex, s):
positions = sorted(positions) # sort positions before returning
return positions


# Cell
def split_by_semicolon(s):
"Split string `s` by semicolon but not between parenthesis or in comments"
Expand All @@ -497,13 +512,15 @@ def split_by_semicolon(s):
split_s.append(s[start+1:end]) # do not take the semicolon
return split_s


# Cell
def replace_newline_chars(s):
"Replace newline characters in `s` by whitespace but not in the comments"
positions = identify_in_sql("\n", s)
clean_s = "".join([c if i not in positions else " " for i, c in enumerate(s)])
return clean_s


# Cell
def sub_in_sql(regex, repl, s):
"Subsitute `regex` with `repl` in query `s` ignoring comments and text in quotes"
Expand All @@ -514,12 +531,14 @@ def sub_in_sql(regex, repl, s):
s = "".join(d["string"] for d in split_s)
return s


# Cell
def add_whitespaces_after_comma(s):
"Add whitespace after comma in query `s` if there is no whitespace"
s = sub_in_sql(r",([\w\d]+)", r", \1", s)
return s


# Cell
def identify_end_of_fields(s):
"Identify end of fields in query `s`"
Expand Down Expand Up @@ -549,6 +568,7 @@ def identify_end_of_fields(s):
quote_open2 = False
return end_of_fields


# Cell
def add_newline_indentation(s, indentation):
"Add newline and indentation for end of fields in query `s`"
Expand All @@ -567,6 +587,7 @@ def add_newline_indentation(s, indentation):
s = s.strip()
return s


# Cell
def extract_outer_subquery(s):
"Extract outer subquery in query `s`"
Expand All @@ -593,6 +614,7 @@ def extract_outer_subquery(s):
elif c == ")":
k -= 1


# Cell
def format_subquery(s, previous_s):
"Format subquery in line `s` based on indentation on `previous_s`"
Expand Down Expand Up @@ -632,6 +654,7 @@ def format_subquery(s, previous_s):
formatted_s = re.sub(r"\s*(\))$", "\n" + " " * last_line_indent + r"\1", formatted_s)
return formatted_s


# Cell
def extract_outer_subquery_too_long(s, max_len=99):
"Extract outer subquery in query `li`"
Expand Down Expand Up @@ -670,6 +693,7 @@ def extract_outer_subquery_too_long(s, max_len=99):
elif c == "'":
d += 1


# Cell
def format_subquery_too_long(s, previous_s, is_end):
"Format subquery in line `s` based on indentation on `previous_s`"
Expand All @@ -688,6 +712,7 @@ def format_subquery_too_long(s, previous_s, is_end):
formatted_s = re.sub(r"\s*(\))$", "\n" + " " * last_line_indent + r"\1", formatted_s)
return formatted_s


# Cell
def check_sql_query(s):
"""Checks whether `s` is a SQL query based on match of CREATE TABLE / VIEW or SELECT ignoring comments and text
Expand All @@ -697,11 +722,13 @@ def check_sql_query(s):
return (bool(re.search(pattern=r"\bselect\b|\bcreate\b.{0,27}(\btable\b|\bview\b)", string=s_code, flags=re.I)) and
not bool(re.search(pattern=r"\bcreate\b(?!.*(\btable\b|\bview\b))", string=s_code, flags=re.I)))


# Cell
def check_skip_marker(s):
"Checks whether user set marker /*skip-formatter*/ to not format query"
return bool(re.search(r"\/\*skip-formatter\*\/", s))


# Cell
def identify_create_table_view(s):
"Identify positions of CREATE .. TABLE / VIEW statements"
Expand All @@ -715,16 +742,19 @@ def identify_create_table_view(s):
]
return line_numbers


# Cell
def count_lines(s):
"Count the number of lines in `s`"
return s.count("\n")


# Cell
def find_line_number(s, positions):
"Find line number in `s` out of `positions`"
return [s[0:pos].count("\n") + 1 for pos in positions]


# Cell
def disimilarity(str1, str2):
"Calculate disimilarity between two strings by word"
Expand All @@ -741,6 +771,7 @@ def disimilarity(str1, str2):
disimilarity += abs(count1[w] - count2[w])
return disimilarity


# Cell
def assign_comment(fs, cds):
"""Assign comments in list of dictionaries `cds` to formatted string `fs` using Jaccard distance
Expand Down Expand Up @@ -782,6 +813,7 @@ def assign_comment(fs, cds):
s_out = "\n".join(fsplit_s_out)
return s_out


# Cell
def remove_prefix(text, prefix):
"""Remove prefix of a string
Expand All @@ -790,4 +822,4 @@ def remove_prefix(text, prefix):
"""
if text.startswith(prefix):
return text[len(prefix):]
return text
return text

0 comments on commit cca979d

Please sign in to comment.