Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
blaylockbk committed Jun 17, 2022
1 parent 8ca5dda commit 6004645
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 19 deletions.
12 changes: 11 additions & 1 deletion .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,20 @@
"type": "shell",
"command": "conda env update -f environment.yml --prune"
},
{
"label": "✨ update all packages",
"type": "shell",
"command": "conda update --name herbie --all -c conda-forge"
},
{
"label": "🗿 search outdated packages",
"type": "shell",
"command": "eval \"$(conda shell.bash hook)\" && conda activate herbie && conda search --outdated -c conda-forge"
},
{
"label": "💣 destroy conda environment",
"type": "shell",
"command": "conda activate base && conda env remove --name herbie"
"command": "eval \"$(conda shell.bash hook)\" && conda activate base && conda env remove --name herbie"
},
{
"label": "🚀 launch JupyterLab",
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
"sphinx.ext.autosectionlabel",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx_panels",
"sphinx_design",
"autodocsumm",
"sphinx_markdown_tables",
"myst_parser",
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ dependencies:
# -------------
- sphinx>=4.4.0
- nbsphinx
- nbconvert>=6.5
- pydata-sphinx-theme # PyData Sphinx Theme (i.e, Numpy, Pandas, MetPy)
- sphinx-panels # Add bootstrap elements to Sphinx
- sphinx-design
- recommonmark
- sphinx-markdown-tables
- sphinxcontrib-mermaid # For mermaid diagram support
Expand Down
2 changes: 1 addition & 1 deletion herbie/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,4 @@ def _expand(self):


from herbie.archive import Herbie
from herbie.tools import fast_Herbie, fast_Herbie_download, fast_Herbie_xarray
from herbie.tools import FastHerbie
48 changes: 33 additions & 15 deletions herbie/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


"""
🧵 Notice! Multithreading is use
🧵🤹🏻‍♂️ Notice! Multithreading and Multiprocessing is use
This is my first implementation of multithreading to create, download,
and read many Herbie objects. This drastically reduces the time it takes
Expand Down Expand Up @@ -145,7 +145,7 @@ def df(self):
ds_list, index=self.DATES, columns=[f"F{i:02d}" for i in self.fxx]
)

def download(self, searchString=None, max_threads=20, **download_kwargs):
def download(self, searchString=None, *, max_threads=20, **download_kwargs):
r"""Download many Herbie objects
Uses multithreading.
Expand Down Expand Up @@ -183,13 +183,19 @@ def download(self, searchString=None, max_threads=20, **download_kwargs):

return outFiles

def xarray(self, searchString, max_threads=2, **xarray_kwargs):
def xarray(
self,
searchString,
*,
max_threads=None,
**xarray_kwargs,
):
"""Read many Herbie objects into an xarray Dataset
# TODO: Sometimes the Jupyter Cell always crashes when I run this.
# TODO: "fatal flex scanner internal error--end of buffer missed"
Uses multithreading.
Uses multithreading (or multiprocessing).
This would likely benefit from multiprocessing instead.
Parameters
Expand All @@ -207,19 +213,31 @@ def xarray(self, searchString, max_threads=2, **xarray_kwargs):
- 10 threads took 39 s
- 50 threads took 37 s
"""
###########################
# Multithread the downloads
threads = min(self.tasks, max_threads)
log.info(f"🧵 Working on {self.tasks} tasks with {threads} threads.")
xarray_kwargs = dict(searchString=searchString, **xarray_kwargs)

with ThreadPoolExecutor(max_threads) as exe:
futures = [
exe.submit(H.xarray, searchString, **xarray_kwargs)
for H in self.file_exists
]
# NOTE: Multiprocessing does not seem to work because it looks
# NOTE: like xarray objects are not pickleable.
# NOTE: ``Reason: 'TypeError("cannot pickle '_thread.lock' object"``

# Return list of Herbie objects in order completed
ds_list = [future.result() for future in as_completed(futures)]
if max_threads:
###########################
# Multithread the downloads
# ! Only works sometimes
# ! I get this error: "'EntryPoint' object has no attribute '_key'""

threads = min(self.tasks, max_threads)
log.info(f"🧵 Working on {self.tasks} tasks with {threads} threads.")

with ThreadPoolExecutor(max_threads) as exe:
futures = [
exe.submit(H.xarray, **xarray_kwargs) for H in self.file_exists
]

# Return list of Herbie objects in order completed
ds_list = [future.result() for future in as_completed(futures)]

else:
ds_list = [H.xarray(**xarray_kwargs) for H in self.file_exists]

# Sort the DataSets, first by lead time (step), then by run time (time)
ds_list.sort(key=lambda x: x.step.data.max())
Expand Down
20 changes: 20 additions & 0 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Tests for Herbie tools like FastHerbie
"""

from herbie import FastHerbie
import pandas as pd


def test_FastHerbie():
DATES = pd.date_range("2022-01-01", "2022-01-01 02:00", freq="1H")

# Create Fast Herbie
FH = FastHerbie(DATES, fxx=range(0, 3))
assert len(FH) == 9

# Download these files
FH.download()

# Load these files
FH.xarray("TMP:2 m")

0 comments on commit 6004645

Please sign in to comment.