diff --git a/csv_reconcile/__init__.py b/csv_reconcile/__init__.py index 928a955..3575fc2 100644 --- a/csv_reconcile/__init__.py +++ b/csv_reconcile/__init__.py @@ -22,7 +22,7 @@ except: from importlib import metadata -__version__ = '0.3.1' +__version__ = '0.3.2' #------------------------------------------------------------------ # Implement reconciliation API # [[https://reconciliation-api.github.io/specs/latest/]] diff --git a/csv_reconcile/initdb.py b/csv_reconcile/initdb.py index 807e435..15d5668 100644 --- a/csv_reconcile/initdb.py +++ b/csv_reconcile/initdb.py @@ -73,9 +73,14 @@ def init_db(db, with db: # Create a table with ids (as PRIMARY ID), words and bigrams with open(csvfilenm, newline='', **enckwarg) as csvfile: - dialect = csv.Sniffer().sniff(csvfile.read(1024)) + dialect = None + try: + dialect = csv.Sniffer().sniff(csvfile.read(1024)) + except: + pass + csvfile.seek(0) - reader = csv.reader(csvfile, dialect, **csvkwargs) + reader = csv.reader(csvfile, dialect=dialect, **csvkwargs) header = next(reader) # Throws if col doesn't exist diff --git a/pyproject.toml b/pyproject.toml index 8db7102..5538297 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "csv-reconcile" -version = "0.3.1" +version = "0.3.2" description = "OpenRefine reconciliation service backed by csv resource" authors = ["Douglas Mennella "] license = "MIT" diff --git a/tests/conftest.py b/tests/conftest.py index 6359a4e..8e6cabb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,6 +34,13 @@ def ambiguous_csvcontents(): However, above all, columns matter most '''.strip() +@pytest.fixture +def sniffer_throwing_csvcontents(): + '''Try to throw off csv.Sniffer() to test overrides''' + return ''' +a,b,c\n1,2 +'''.strip() + @pytest.fixture def formContentHeader(): @@ -78,6 +85,17 @@ def getSetup(idnm): return getSetup +@pytest.fixture +def sniffer_throwing_setup(tmp_path, sniffer_throwing_csvcontents): + '''mock csv file with id and name columns indicated''' + + def getSetup(idnm): + p = tmp_path / "snfthrw_csvfile" + p.write_text(sniffer_throwing_csvcontents) + return (p, *idnm) + + return getSetup + @pytest.fixture def cfgContents(): return ''' diff --git a/tests/main/test_csv_reconcile.py b/tests/main/test_csv_reconcile.py index 8d80d8a..36cd001 100644 --- a/tests/main/test_csv_reconcile.py +++ b/tests/main/test_csv_reconcile.py @@ -8,7 +8,7 @@ def test_version(): - assert __version__ == '0.3.1' + assert __version__ == '0.3.2' def test_manifest(basicClient): @@ -305,3 +305,22 @@ def test_csv_sniffer_overrides(app, ambiguous_setup, ambiguous_csvcontents, conf with chk.app_context(): headernms = [name for _,name in getCSVCols()] assert headernms == items(SEP) + +def test_csv_sniffer_throwing(app, sniffer_throwing_setup, sniffer_throwing_csvcontents, config, mkConfig): + + topline = sniffer_throwing_csvcontents.splitlines()[0] + items = lambda sep: [ h.strip() for h in topline.split(sep)] + + # First guess is that the , is a separator + SEP = ',' + chk = app(sniffer_throwing_setup(items(SEP)[:2]), config) + with chk.app_context(): + headernms = [name for _,name in getCSVCols()] + assert headernms == items(SEP) + + # Now parse with override + cfg = mkConfig('CSVKWARGS = {"delimiter": ","}') + chk = app(sniffer_throwing_setup(items(SEP)[:2]), cfg) + with chk.app_context(): + headernms = [name for _,name in getCSVCols()] + assert headernms == items(SEP)