Skip to content

Commit

Permalink
UrlReader: fix issue with double extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed May 27, 2020
1 parent 49e433c commit 77b3d06
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 2 deletions.
5 changes: 3 additions & 2 deletions Orange/data/io.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import contextlib
import csv
import locale
import os
import pickle
import re
import sys
Expand All @@ -16,6 +15,7 @@
from tempfile import NamedTemporaryFile
from urllib.parse import urlparse, urlsplit, urlunsplit, unquote as urlunquote
from urllib.request import urlopen, Request
from pathlib import Path

import numpy as np

Expand Down Expand Up @@ -400,7 +400,8 @@ def read(self):
self.filename = self._trim(self._resolve_redirects(self.filename))
with contextlib.closing(self.urlopen(self.filename)) as response:
name = self._suggest_filename(response.headers['content-disposition'])
extension = os.path.splitext(name)[1] # get only file extension
# using Path since splitext does not extract more extensions
extension = ''.join(Path(name).suffixes) # get only file extension
with NamedTemporaryFile(suffix=extension, delete=False) as f:
f.write(response.read())
# delete=False is a workaround for https://bugs.python.org/issue14243
Expand Down
19 changes: 19 additions & 0 deletions Orange/tests/test_url_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import unittest

from Orange.data.io import UrlReader


class TestUrlReader(unittest.TestCase):
def test_basic_file(self):
data = UrlReader("https://datasets.biolab.si/core/titanic.tab").read()
self.assertEqual(2201, len(data))

data = UrlReader("https://datasets.biolab.si/core/grades.xlsx").read()
self.assertEqual(16, len(data))

def test_zipped(self):
""" Test zipped files with two extensions"""
data = UrlReader(
"http://datasets.orange.biolab.si/core/philadelphia-crime.csv.xz"
).read()
self.assertEqual(9666, len(data))

0 comments on commit 77b3d06

Please sign in to comment.