diff --git a/mysqldump_to_csv_and_txt.py b/mysqldump_to_csv_and_txt.py new file mode 100644 index 0000000..76cd6a0 --- /dev/null +++ b/mysqldump_to_csv_and_txt.py @@ -0,0 +1,140 @@ +import fileinput +import csv +import sys + +# This prevents prematurely closed pipes from raising +# an exception in Python +from signal import signal, SIGPIPE, SIG_DFL +signal(SIGPIPE, SIG_DFL) + +# allow large content in the dump +csv.field_size_limit(sys.maxsize) + +def is_create(line): + """ + Returns true if the line begins with a SQL create statement. + """ + return line.startswith('CREATE TABLE') or False + +def is_insert(line): + """ + Returns true if the line begins a SQL insert statement. + """ + return line.startswith('INSERT INTO') or False + +def get_tablename(line): + """ + Returns the name of the table working with + """ + from re import search + pattern=search(r"`.*`",line) + return pattern.group(0).replace(r"`","") + +def get_values(line): + """ + Returns the portion of an INSERT statement containing values + """ + return line.partition('` VALUES ')[2] + + +def values_sanity_check(values): + """ + Ensures that values from the INSERT statement meet basic checks. + """ + assert values + assert values[0] == '(' + # Assertions have not been raised + return True + + +def parse_values(table_name,values, outfile): + """ + Given a file handle and the raw values from a MySQL INSERT + statement, write the equivalent CSV to the file + """ + latest_row = [] + reader = csv.reader([values], delimiter=',', + doublequote=False, + escapechar='\\', + quotechar="'", + strict=True + ) + outfile=open(outfile,"a") + writer = csv.writer(outfile, quoting=csv.QUOTE_MINIMAL) + for reader_row in reader: + for column in reader_row: + # If our current string is empty... + if len(column) == 0 or column == 'NULL': + latest_row.append(chr(0)) + continue + # If our string starts with an open paren + if column[0] == "(": + # Assume that this column does not begin + # a new row. + new_row = False + # If we've been filling out a row + if len(latest_row) > 0: + # Check if the previous entry ended in + # a close paren. If so, the row we've + # been filling out has been COMPLETED + # as: + # 1) the previous entry ended in a ) + # 2) the current entry starts with a ( + if latest_row[-1][-1] == ")": + # Remove the close paren. + latest_row[-1] = latest_row[-1][:-1] + new_row = True + # If we've found a new row, write it out + # and begin our new one + if new_row: + writer.writerow(latest_row) + latest_row = [] + # If we're beginning a new row, eliminate the + # opening parentheses. + if len(latest_row) == 0: + column = column[1:] + # Add our column to the row we're working on. + latest_row.append(column) + # At the end of an INSERT statement, we'll + # have the semicolon. + # Make sure to remove the semicolon and + # the close paren. + if latest_row[-1][-2:] == ");": + latest_row[-1] = latest_row[-1][:-2] + writer.writerow(latest_row) + + +def main(): + """ + Parse arguments and start the program + """ + # Iterate over all lines in all files + # listed in sys.argv[1:] + # or stdin if no args given. + create_table=False #Flag indicates wheather processing create table command or not + tn=""#stores last table name + try: + for line in fileinput.input(): + #Look for a create statement and parse it + if is_create(line) or create_table: + if not create_table: + #Getting table name + tn=get_tablename(line) + create_table=True + with open(tn+".txt",'a') as outFile: + outFile.write(line) + if ";" in line: + create_table=False + # Look for an INSERT statement and parse it. + if is_insert(line): + #Getting table name + tn=get_tablename(line) + values = get_values(line) + if values_sanity_check(values): + #Each new table name will result in a new file + parse_values(tn,values, tn+".csv") + except KeyboardInterrupt: + sys.exit(0) + +if __name__ == "__main__": + main()