-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean_linebreaks.py
44 lines (36 loc) · 1.2 KB
/
clean_linebreaks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#-------------------------------------------------------------------------------
# Name: cleanlinebreaks
#
# Author: Leoson, Lily
#-------------------------------------------------------------------------------
# Clean csvs to remove newlines from long text fields.
import csv
import argparse
holder_dict = {}
def clean_csv(old_path, new_path):
'''
Clean csvs to remove newlines from long text fields, and outputs to
a new csv file.
'''
with open(old_path, 'r', encoding = 'utf-8') as csvfile:
with open(new_path, 'w', encoding='utf-8') as csvfile2:
reader = csv.reader(csvfile)
writer = csv.writer(csvfile2)
for row in reader:
stripped = row[-4].replace('\n', ' ')
to_write = row[:-4]
to_write.append(stripped)
writer.writerow(to_write)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Clean csvs to remove newlines within cells',
)
parser.add_argument(
'old_csv',
type=str,
help='The csv file to clean.',
)
args = parser.parse_args()
old_csv = args.old_csv
new_csv = '{0}.csv'.format("".join(old_csv.split('.csv')) + "_cleaned")
clean_csv(old_csv, new_csv)