Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conservar integridad de floats #61

Merged
merged 5 commits into from
Feb 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,4 @@ target/

*.md~
tests/output
.idea
2 changes: 1 addition & 1 deletion data_cleaner/data_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def __init__(self, input_path, ignore_dups=False, **kwargs):

# lee el CSV a limpiar
elif input_path.endswith('.csv'):
self.df = pd.read_csv(input_path, **default_args)
self.df = pd.read_csv(input_path, dtype=str, **default_args)

# lee el XLSX a limpiar
elif input_path.endswith('.xlsx'):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
chardet==3.0.4
pandas>=0.20.3
pandas==0.20.3
geopandas==0.2.1
parsley>=1.2
arrow>=0.7.0
Expand Down
2 changes: 2 additions & 0 deletions tests/input/to_clean_coordinates.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lat,long
-34.6091725340289,-58.3842972567038
20 changes: 20 additions & 0 deletions tests/test_data_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from data_cleaner import DataCleaner
from data_cleaner.data_cleaner import DuplicatedField
from rules.integration import rules
import csv

BASE_DIR = os.path.dirname(__file__)
VCR = vcr.VCR(path_transformer=vcr.VCR.ensure_suffix('.yaml'),
Expand Down Expand Up @@ -61,6 +62,11 @@ def nan_to_empty_string_list(iterable):
return [i if pd.notnull(i) else "" for i in iterable]


def raw_csv(file_path):
with open(file_path, 'rb') as csvfile:
return [row for row in csv.reader(csvfile)]


# @unittest.skip("skip")
class DataCleanerIntegrationTestCase(unittest.TestCase):
"""Testea el funcionamiento integral del paquete."""
Expand Down Expand Up @@ -545,5 +551,19 @@ def test_get_api_response(self):
self.assertEqual(res_test, res)


class DataCleanerDataIntegrityTestCase(unittest.TestCase):
"""Testea que la integridad de los datasets se mantenga."""
input_path = BASE_DIR + '/input/to_clean_coordinates.csv'

def test_float_integrity(self):
output_path = BASE_DIR + '/output/clean_coordinates.csv'

dc = DataCleaner(self.input_path)
dc.clean_file([], output_path)

raw_input = raw_csv(self.input_path)
raw_output = raw_csv(output_path)
self.assertEqual(raw_input, raw_output)

if __name__ == '__main__':
nose.run(defaultTest=__name__)