Skip to content

Commit

Permalink
use pyarrow instead of fastparquet
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhrisca committed Nov 18, 2024
1 parent 5e2504b commit 400e2aa
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 18 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ optional dependencies needed for exports

* h5py : for HDF5 export
* hdf5storage : for Matlab v7.3 .mat export
* fastparquet : for parquet export
* pyarrow : for parquet export
* scipy: for Matlab v4 and v5 .mat export

other optional dependencies
Expand Down
2 changes: 1 addition & 1 deletion doc/intro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ optional dependencies needed for exports

* h5py : for HDF5 export
* hdf5storage : for Matlab v7.3 .mat export
* fastparquet : for parquet export
* pyarrow : for parquet export
* scipy: for Matlab v4 and v5 .mat export

other optional dependencies
Expand Down
1 change: 1 addition & 0 deletions requirements_exe_build.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ keyring
pyinstaller
scipy
sympy
pyarrow
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _get_ext_modules():
extras_require={
"decode": ["faust-cchardet==2.1.19", "chardet"],
"export": [
"fastparquet",
"pyarrow",
"h5py",
"hdf5storage>=0.1.19",
"python-snappy",
Expand Down
13 changes: 7 additions & 6 deletions src/asammdf/gui/widgets/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(
pass

try:
from fastparquet import write as write_parquet # noqa: F401
from pyarrow.parquet import write_table as write_parquet # noqa: F401

formats.append("Parquet")
except ImportError:
Expand Down Expand Up @@ -220,7 +220,7 @@ def export_changed(self, name):
if name == "parquet":
self.export_compression.setEnabled(True)
self.export_compression.clear()
self.export_compression.addItems(["GZIP", "SNAPPY"])
self.export_compression.addItems(["GZIP", "SNAPPY", "LZ4"])
self.export_compression.setCurrentIndex(-1)
elif name == "hdf5":
self.export_compression.setEnabled(True)
Expand Down Expand Up @@ -1362,12 +1362,13 @@ def apply_processing(self, event):

elif output_format == "Parquet":
try:
from fastparquet import write as write_parquet # noqa: F401
from pyarrow.parquet import write_table as write_parquet # noqa: F401

except ImportError:
MessageBox.critical(
self,
"export_batch to parquet unavailale",
"fastparquet package not found; export to parquet is unavailable",
"pyarrow package not found; export to parquet is unavailable",
)
return

Expand Down Expand Up @@ -1407,7 +1408,7 @@ def apply_processing_thread(self, progress):

elif output_format == "Parquet":
suffix = ".parquet"
from fastparquet import write as write_parquet # noqa: F401
from pyarrow.parquet import write_table as write_parquet # noqa: F401

elif output_format == "CSV":
suffix = ".csv"
Expand Down Expand Up @@ -1696,7 +1697,7 @@ def output_format_changed(self, name):
if name == "Parquet":
self.export_compression.setEnabled(True)
self.export_compression.clear()
self.export_compression.addItems(["GZIP", "SNAPPY"])
self.export_compression.addItems(["GZIP", "SNAPPY", "LZ4"])
self.export_compression.setCurrentIndex(0)
elif name == "HDF5":
self.export_compression.setEnabled(True)
Expand Down
9 changes: 5 additions & 4 deletions src/asammdf/gui/widgets/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def __init__(
pass

try:
from fastparquet import write as write_parquet # noqa: F401
from pyarrow.parquet import write_table as write_parquet # noqa: F401

formats.append("Parquet")
except ImportError:
Expand Down Expand Up @@ -758,7 +758,7 @@ def output_format_changed(self, name):
if name == "Parquet":
self.export_compression.setEnabled(True)
self.export_compression.clear()
self.export_compression.addItems(["GZIP", "SNAPPY"])
self.export_compression.addItems(["GZIP", "SNAPPY", "LZ4"])
self.export_compression.setCurrentIndex(0)
elif name == "HDF5":
self.export_compression.setEnabled(True)
Expand Down Expand Up @@ -2704,12 +2704,13 @@ def apply_processing(self, event):

elif output_format == "Parquet":
try:
from fastparquet import write as write_parquet # noqa: F401
from pyarrow.parquet import write_table as write_parquet # noqa: F401

except ImportError:
MessageBox.critical(
self,
"Export to parquet unavailale",
"fastparquet package not found; export to parquet is unavailable",
"pyarrow package not found; export to parquet is unavailable",
)
return

Expand Down
15 changes: 11 additions & 4 deletions src/asammdf/mdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,10 +1151,14 @@ def export(
* `compression` : str
compression to be used
* for ``parquet`` : "GZIP" or "SNAPPY"
* for ``parquet`` : "GZIP", "SNAPPY" or "LZ4"
* for ``hfd5`` : "gzip", "lzf" or "szip"
* for ``mat`` : bool
.. versionadded:: 8.1.0
added LZ4 compression after changing to pyarrow
* `time_as_date` (False) : bool
export time as local timezone datetimee; only valid for CSV export
Expand Down Expand Up @@ -1251,9 +1255,11 @@ def export(

if fmt == "parquet":
try:
from fastparquet import write as write_parquet
from pyarrow import table
from pyarrow.parquet import write_table as write_parquet

except ImportError:
logger.warning("fastparquet not found; export to parquet is unavailable")
logger.warning("pyarrow not found; export to parquet is unavailable")
return

elif fmt == "hdf5":
Expand Down Expand Up @@ -1944,13 +1950,14 @@ def decompose(samples):

elif fmt == "parquet":
filename = filename.with_suffix(".parquet")
df = table(df)
if compression:
write_parquet(filename, df, compression=compression)
else:
write_parquet(filename, df)

else:
message = 'Unsopported export type "{}". ' 'Please select "csv", "excel", "hdf5", "mat" or "pandas"'
message = 'Unsupported export type "{}". ' 'Please select "csv", "excel", "hdf5", "mat" or "pandas"'
message.format(fmt)
logger.warning(message)

Expand Down
2 changes: 1 addition & 1 deletion src/asammdf/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
""" asammdf version module """

__version__ = "8.0.2.dev4"
__version__ = "8.1.0.dev1"

0 comments on commit 400e2aa

Please sign in to comment.