From 318bcefdfa6f565dff752f46a17a3bee1962733d Mon Sep 17 00:00:00 2001 From: Ted Janney Date: Sat, 30 Oct 2021 13:20:12 +1000 Subject: [PATCH 1/5] Changed the logic of the parseN method to return a "NaN" value for entries of length 1 which cannot be converted to an integer or a float. --- dbfread/field_parser.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/dbfread/field_parser.py b/dbfread/field_parser.py index b462a2d..ca50cb6 100644 --- a/dbfread/field_parser.py +++ b/dbfread/field_parser.py @@ -171,8 +171,13 @@ def parseN(self, field, data): if not data.strip(): return None else: - # Account for , in numeric fields - return float(data.replace(b',', b'.')) + + # Eliminate the lone comma entry + if len ( data ) >= 2: + return float ( data.replace ( b',', b'.' ) ) + + # The default ultimate failure should be a NaN value + return float ( "NaN" ) def parseO(self, field, data): """Parse long field (O) and return float.""" From 7ef5a9722746dc6e048e9ba3b5a7b884b8bce67c Mon Sep 17 00:00:00 2001 From: Ted Janney Date: Sat, 30 Oct 2021 13:27:24 +1000 Subject: [PATCH 2/5] Added a test for parsing b',' --- tests/test_field_parser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_field_parser.py b/tests/test_field_parser.py index c16f6fb..194f184 100644 --- a/tests/test_field_parser.py +++ b/tests/test_field_parser.py @@ -135,6 +135,7 @@ def test_N(): assert parse(b'1') == 1 assert parse(b'-99') == -99 assert parse(b'3.14') == 3.14 + assert parse(b',') == 'NaN' # In some files * is used for padding. assert parse(b'0.01**') == 0.01 From 8270a357f1a937b230287fc6ed78ae8a57451b9e Mon Sep 17 00:00:00 2001 From: Ted Janney Date: Sun, 14 Nov 2021 09:37:05 +1000 Subject: [PATCH 3/5] Changed test_D to make sure b'000' returns as None. Changed parse_L to return None for invalid characters as this is not misleading. --- dbfread/field_parser.py | 29 ++++++++++++++++++++--------- tests/test_field_parser.py | 8 ++++++-- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/dbfread/field_parser.py b/dbfread/field_parser.py index ca50cb6..857ec0d 100644 --- a/dbfread/field_parser.py +++ b/dbfread/field_parser.py @@ -92,12 +92,12 @@ def parseD(self, field, data): try: return datetime.date(int(data[:4]), int(data[4:6]), int(data[6:8])) except ValueError: - if data.strip(b' 0\0') == b'': + + if data == b' ' or data == b'\0': # A record containing only spaces and/or zeros is # a NULL value. return None - else: - raise ValueError('invalid date {!r}'.format(data)) + raise ValueError('invalid date {!r}'.format(data)) def parseF(self, field, data): """Parse float field and return float or None""" @@ -111,8 +111,16 @@ def parseF(self, field, data): def parseI(self, field, data): """Parse integer or autoincrement field and return int.""" - # Todo: is this 4 bytes on every platform? - return struct.unpack('= 4: + + return struct.unpack('= 2: - return float ( data.replace ( b',', b'.' ) ) + if str(data).count(',') == 1: + return float(data.replace(b',',b'')) + elif len ( data ) >= 2: + return float ( data.replace ( b',', b'' ) ) # The default ultimate failure should be a NaN value - return float ( "NaN" ) + return float ( 'NaN' ) def parseO(self, field, data): """Parse long field (O) and return float.""" @@ -200,7 +211,7 @@ def parseT(self, field, data): # Offset from julian days (used in the file) to proleptic Gregorian # ordinals (used by the datetime module) - offset = 1721425 # Todo: will this work? + offset = 1721425 if data.strip(): # Note: if the day number is 0, we return None diff --git a/tests/test_field_parser.py b/tests/test_field_parser.py index 194f184..494ea09 100644 --- a/tests/test_field_parser.py +++ b/tests/test_field_parser.py @@ -56,6 +56,7 @@ def test_D(): assert parse(b'19700101') == epoch with raises(ValueError): + assert parse(b' 0\0') is None parse(b'NotIntgr') def test_F(): @@ -85,6 +86,8 @@ def test_I(): assert parse(b'\x01\x00\x00\x00') == 1 assert parse(b'\xff\xff\xff\xff') == -1 + + def test_L(): parse = make_field_parser('L') @@ -100,7 +103,7 @@ def test_L(): # Some invalid values. for char in b'!0': with raises(ValueError): - parse(char) + assert parse(char) is None # This also tests B, G and P. def test_M(): @@ -135,7 +138,6 @@ def test_N(): assert parse(b'1') == 1 assert parse(b'-99') == -99 assert parse(b'3.14') == 3.14 - assert parse(b',') == 'NaN' # In some files * is used for padding. assert parse(b'0.01**') == 0.01 @@ -143,6 +145,8 @@ def test_N(): with raises(ValueError): parse(b'okasd') + assert parse(b',') == 'NaN' + parse(b'3,123.4') == 3123.4 def test_O(): """Test double field.""" From e944b425647f6195a93c77202bf7ff50ac112768 Mon Sep 17 00:00:00 2001 From: Ted Janney Date: Sun, 14 Nov 2021 10:03:45 +1000 Subject: [PATCH 4/5] Changed dbf.py, _read_header method to handle a header file not being present. I added a blank dictionary for the header file and raised a FileNotFound error. --- dbfread/dbf.py | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/dbfread/dbf.py b/dbfread/dbf.py index 65d45ac..0f0f57b 100644 --- a/dbfread/dbf.py +++ b/dbfread/dbf.py @@ -212,13 +212,47 @@ def deleted(self): def _read_header(self, infile): # Todo: more checks? - self.header = DBFHeader.read(infile) - - if self.encoding is None: - try: - self.encoding = guess_encoding(self.header.language_driver) - except LookupError: - self.encoding = 'ascii' + ''' + Reads the dbf header into memory. + + What could go wrong with reading the DBF header? The structure of the header is found + here: https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm. + + 1. The read operation could fail because the DBF Header does not exist. In this case, + the program should either generate a user message or generate a blank header file. + :param infile: + :return: + ''' + + try: + self.header = DBFHeader.read(infile) + + if self.encoding is None: + try: + self.encoding = guess_encoding ( self.header.language_driver ) + except LookupError: + self.encoding = 'ascii' + except FileNotFoundError: + self.header = dict('DBFHeader', + ' Date: Sun, 14 Nov 2021 12:08:36 +1000 Subject: [PATCH 5/5] Added tests for codepage.py coverage. --- dbfread/dbf.py | 38 +++++++++++++++++------------------ dbfread/field_parser.py | 12 +++++------ tests/__init__.py | 0 tests/test_codepages.py | 12 +++++++++++ tests/test_field_parser.py | 23 ++++++++++++++++++--- tests/test_ifiles.py | 2 +- tests/test_invalid_value.py | 1 + tests/test_memo.py | 7 +++++-- tests/test_read_and_length.py | 5 ++++- 9 files changed, 67 insertions(+), 33 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_codepages.py diff --git a/dbfread/dbf.py b/dbfread/dbf.py index 0f0f57b..90f670d 100644 --- a/dbfread/dbf.py +++ b/dbfread/dbf.py @@ -234,25 +234,25 @@ def _read_header(self, infile): self.encoding = 'ascii' except FileNotFoundError: self.header = dict('DBFHeader', - '= 4: - return struct.unpack('