Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create mysqldump_to_csv_and_txt.py #13

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions mysqldump_to_csv_and_txt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import fileinput
import csv
import sys

# This prevents prematurely closed pipes from raising
# an exception in Python
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)

# allow large content in the dump
csv.field_size_limit(sys.maxsize)

def is_create(line):
"""
Returns true if the line begins with a SQL create statement.
"""
return line.startswith('CREATE TABLE') or False

def is_insert(line):
"""
Returns true if the line begins a SQL insert statement.
"""
return line.startswith('INSERT INTO') or False

def get_tablename(line):
"""
Returns the name of the table working with
"""
from re import search
pattern=search(r"`.*`",line)
return pattern.group(0).replace(r"`","")

def get_values(line):
"""
Returns the portion of an INSERT statement containing values
"""
return line.partition('` VALUES ')[2]


def values_sanity_check(values):
"""
Ensures that values from the INSERT statement meet basic checks.
"""
assert values
assert values[0] == '('
# Assertions have not been raised
return True


def parse_values(table_name,values, outfile):
"""
Given a file handle and the raw values from a MySQL INSERT
statement, write the equivalent CSV to the file
"""
latest_row = []
reader = csv.reader([values], delimiter=',',
doublequote=False,
escapechar='\\',
quotechar="'",
strict=True
)
outfile=open(outfile,"a")
writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL)
for reader_row in reader:
for column in reader_row:
# If our current string is empty...
if len(column) == 0 or column == 'NULL':
latest_row.append(chr(0))
continue
# If our string starts with an open paren
if column[0] == "(":
# Assume that this column does not begin
# a new row.
new_row = False
# If we've been filling out a row
if len(latest_row) > 0:
# Check if the previous entry ended in
# a close paren. If so, the row we've
# been filling out has been COMPLETED
# as:
# 1) the previous entry ended in a )
# 2) the current entry starts with a (
if latest_row[-1][-1] == ")":
# Remove the close paren.
latest_row[-1] = latest_row[-1][:-1]
new_row = True
# If we've found a new row, write it out
# and begin our new one
if new_row:
writer.writerow(latest_row)
latest_row = []
# If we're beginning a new row, eliminate the
# opening parentheses.
if len(latest_row) == 0:
column = column[1:]
# Add our column to the row we're working on.
latest_row.append(column)
# At the end of an INSERT statement, we'll
# have the semicolon.
# Make sure to remove the semicolon and
# the close paren.
if latest_row[-1][-2:] == ");":
latest_row[-1] = latest_row[-1][:-2]
writer.writerow(latest_row)


def main():
"""
Parse arguments and start the program
"""
# Iterate over all lines in all files
# listed in sys.argv[1:]
# or stdin if no args given.
create_table=False #Flag indicates wheather processing create table command or not
tn=""#stores last table name
try:
for line in fileinput.input():
#Look for a create statement and parse it
if is_create(line) or create_table:
if not create_table:
#Getting table name
tn=get_tablename(line)
create_table=True
with open(tn+".txt",'a') as outFile:
outFile.write(line)
if ";" in line:
create_table=False
# Look for an INSERT statement and parse it.
if is_insert(line):
#Getting table name
tn=get_tablename(line)
values = get_values(line)
if values_sanity_check(values):
#Each new table name will result in a new file
parse_values(tn,values, tn+".csv")
except KeyboardInterrupt:
sys.exit(0)

if __name__ == "__main__":
main()