Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed b'.' cannot be converted to a float error in parseN #60

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 41 additions & 7 deletions dbfread/dbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,47 @@ def deleted(self):

def _read_header(self, infile):
# Todo: more checks?
self.header = DBFHeader.read(infile)

if self.encoding is None:
try:
self.encoding = guess_encoding(self.header.language_driver)
except LookupError:
self.encoding = 'ascii'
'''
Reads the dbf header into memory.

What could go wrong with reading the DBF header? The structure of the header is found
here: https://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm.

1. The read operation could fail because the DBF Header does not exist. In this case,
the program should either generate a user message or generate a blank header file.
:param infile:
:return:
'''

try:
self.header = DBFHeader.read(infile)

if self.encoding is None:
try:
self.encoding = guess_encoding ( self.header.language_driver )
except LookupError:
self.encoding = 'ascii'
except FileNotFoundError:
self.header = dict('DBFHeader',
'<BBBBLHHHBBLLLBBH',
['dbversion unknown',
str(datetime.year),
str(datetime.month),
str(datetime.day),
'numrecords unknown',
'headerlen unknown',
'recordlen unknown',
'reserved1 unknown',
'incomplete_transaction unknown',
'encryption_flag unknown',
'free_record_thread unknown',
'reserved2 unknown',
'reserved3 unknown',
'mdx_flag unknown',
'language_driver unknown',
'reserved4 unknown',
])
raise FileNotFoundError

def _decode_text(self, data):
return data.decode(self.encoding, errors=self.char_decode_errors)
Expand Down
28 changes: 21 additions & 7 deletions dbfread/field_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import datetime
import struct
from decimal import Decimal
from .memo import BinaryMemo
from dbfread.memo import BinaryMemo

PY2 = sys.version_info[0] == 2

Expand Down Expand Up @@ -96,8 +96,7 @@ def parseD(self, field, data):
# A record containing only spaces and/or zeros is
# a NULL value.
return None
else:
raise ValueError('invalid date {!r}'.format(data))
raise ValueError ( 'invalid date {!r}'.format ( data ) )

def parseF(self, field, data):
"""Parse float field and return float or None"""
Expand All @@ -111,9 +110,16 @@ def parseF(self, field, data):

def parseI(self, field, data):
"""Parse integer or autoincrement field and return int."""
# Todo: is this 4 bytes on every platform?


# Check to make sure integers are 4 bytes on this platform
# 32 bit and 64 bit platforms both have an unsigned integer length of
# 4 bytes. I'm not implementing backwards compatibility for < 32 bit
# architecture.

return struct.unpack('<i', data)[0]


def parseL(self, field, data):
"""Parse logical field and return True, False or None"""
if data in b'TtYy':
Expand All @@ -126,6 +132,7 @@ def parseL(self, field, data):
# Todo: return something? (But that would be misleading!)
message = 'Illegal value for logical field: {!r}'
raise ValueError(message.format(data))
return None

def _parse_memo_index(self, data):
if len(data) == 4:
Expand Down Expand Up @@ -171,8 +178,15 @@ def parseN(self, field, data):
if not data.strip():
return None
else:
# Account for , in numeric fields
return float(data.replace(b',', b'.'))

# Eliminate the lone comma entry
if str(data).count(',') == 1:
return float(data.replace(b',',b''))
elif len ( data ) >= 2:
return float ( data.replace ( b',', b'' ) )

# The default ultimate failure should be a NaN value
return float ( 'NaN' )

def parseO(self, field, data):
"""Parse long field (O) and return float."""
Expand All @@ -195,7 +209,7 @@ def parseT(self, field, data):

# Offset from julian days (used in the file) to proleptic Gregorian
# ordinals (used by the datetime module)
offset = 1721425 # Todo: will this work?
offset = 1721425

if data.strip():
# Note: if the day number is 0, we return None
Expand Down
Empty file added tests/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions tests/test_codepages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from dbfread.codepages import *
from pytest import *


def test_guess_encoding():

assert guess_encoding(0x00) == 'ascii'
with raises(LookupError):
guess_encoding(0x200)

if __name__ == '__main__':
pytest.main ()
30 changes: 26 additions & 4 deletions tests/test_field_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
from decimal import Decimal
from pytest import raises
from dbfread.field_parser import FieldParser
from dbfread.field_parser import *

class MockHeader(object):
dbversion = 0x02
Expand All @@ -12,6 +12,15 @@ def __init__(self):
self.encoding = 'ascii'
self.char_decode_errors = 'strict'

def set_encoding(self, new_encoding):
self.encoding = new_encoding

def set_year(self, year):
'''
Access self.header.year and change the year
'''
self.header.year = year

class MockField(object):
def __init__(self, type='', **kwargs):
self.type = type
Expand All @@ -24,12 +33,17 @@ def __getitem__(self, index):
else:
return dict.__getitem__(self, index)

def make_field_parser(field_type, dbversion=0x02, memofile=None):
def make_field_parser(field_type, dbversion=0x02, memofile=None, encoding=None, year=None):
dbf = MockDBF()
dbf.header.dbversion = dbversion
parser = FieldParser(dbf, memofile)
field = MockField(field_type)

if encoding is not None:
dbf.set_encoding(encoding)
if year is not None:
dbf.set_year(year)

def parse(data):
return parser.parse(field, data)

Expand All @@ -47,15 +61,19 @@ def test_C():
assert type(parse(b'test')) == type(u'')

def test_D():
parse = make_field_parser('D')
parse = make_field_parser('D', year=21)

assert parse(b'00000000') is None
assert parse(b' ') is None

epoch = datetime.date(1970, 1, 1)
assert parse(b'19700101') == epoch

new_century = datetime.date(2021,1,1)
assert parse(b'00210101') == new_century

with raises(ValueError):
assert parse(b' 0\0') is None
parse(b'NotIntgr')

def test_F():
Expand Down Expand Up @@ -85,6 +103,8 @@ def test_I():
assert parse(b'\x01\x00\x00\x00') == 1
assert parse(b'\xff\xff\xff\xff') == -1



def test_L():
parse = make_field_parser('L')

Expand All @@ -100,7 +120,7 @@ def test_L():
# Some invalid values.
for char in b'!0':
with raises(ValueError):
parse(char)
assert parse(char) is None

# This also tests B, G and P.
def test_M():
Expand Down Expand Up @@ -142,6 +162,8 @@ def test_N():

with raises(ValueError):
parse(b'okasd')
assert parse(b',') == 'NaN'
parse(b'3,123.4') == 3123.4

def test_O():
"""Test double field."""
Expand Down
2 changes: 1 addition & 1 deletion tests/test_ifiles.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dbfread.ifiles import ipat, ifnmatch
from dbfread.ifiles import *

assert ipat('mixed') == '[Mm][Ii][Xx][Ee][Dd]'
assert ifnmatch('test', 'test') == True
Expand Down
1 change: 1 addition & 0 deletions tests/test_invalid_value.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dbfread.field_parser import InvalidValue


def test_repr():
assert repr(InvalidValue(b'')) == "InvalidValue(b'')"

Expand Down
7 changes: 5 additions & 2 deletions tests/test_memo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from pytest import raises
from dbfread import DBF
from dbfread import MissingMemoFile

from dbfread.memo import *
from dbfread.exceptions import MissingMemoFile
from dbfread.dbf import DBF


def test_missing_memofile():
with raises(MissingMemoFile):
Expand Down
5 changes: 4 additions & 1 deletion tests/test_read_and_length.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
Tests reading from database.
"""
import datetime

from dbfread import dbf
from pytest import fixture
from dbfread import DBF

from dbfread.dbf import DBF

@fixture
def table():
Expand Down