Skip to content

Commit

Permalink
[FIX] Speed-up slow table_to_frame
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Apr 29, 2021
1 parent 98af325 commit f92b98e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
3 changes: 1 addition & 2 deletions Orange/data/pandas_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,8 +368,7 @@ def _column_to_series(col, vals):
elif col.is_continuous:
dt = float
# np.nan are not compatible with int column
nan_values_in_column = [t for t in vals if np.isnan(t)]
if col.number_of_decimals == 0 and len(nan_values_in_column) == 0:
if col.number_of_decimals == 0 and not np.any(np.isnan(vals)):
dt = int
result = (col.name, pd.Series(vals).astype(dt))
elif col.is_string:
Expand Down
19 changes: 19 additions & 0 deletions Orange/data/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,25 @@ def test_table_to_frame(self):
self.assertEqual(list(df['sepal length'])[0:4], [5.1, 4.9, 4.7, 4.6])
self.assertEqual(list(df['iris'])[0:2], ['Iris-setosa', 'Iris-setosa'])

def test_table_to_frame_nans(self):
from Orange.data.pandas_compat import table_to_frame
domain = Domain(
[ContinuousVariable("a", number_of_decimals=0), ContinuousVariable("b")]
)
table = Table(
domain, np.column_stack((np.ones(10), np.hstack((np.ones(9), [np.nan]))))
)

df = table_to_frame(table)
table_column_names = [var.name for var in table.domain.variables]
frame_column_names = df.columns

self.assertEqual(sorted(table_column_names), sorted(frame_column_names))
self.assertEqual(df["a"].dtype, int)
self.assertEqual(df["b"].dtype, float)
self.assertEqual([1, 1, 1], list(df["a"].iloc[-3:]))
self.assertTrue(np.isnan(df["b"].iloc[-1]))

def test_table_to_frame_metas(self):
from Orange.data.pandas_compat import table_to_frame

Expand Down

0 comments on commit f92b98e

Please sign in to comment.