updates

blaylockbk · Jun 17, 2022 · 6004645 · 6004645
1 parent 8ca5dda
commit 6004645
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 19 deletions.
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -23,10 +23,20 @@
             "type": "shell",
             "command": "conda env update -f environment.yml --prune"
         },
+        {
+            "label": "✨ update all packages",
+            "type": "shell",
+            "command": "conda update --name herbie --all -c conda-forge"
+        },
+        {
+            "label": "🗿 search outdated packages",
+            "type": "shell",
+            "command": "eval \"$(conda shell.bash hook)\" && conda activate herbie && conda search --outdated -c conda-forge"
+        },
         {
             "label": "💣 destroy conda environment",
             "type": "shell",
-            "command": "conda activate base && conda env remove --name herbie"
+            "command": "eval \"$(conda shell.bash hook)\" && conda activate base && conda env remove --name herbie"
         },
         {
             "label": "🚀 launch JupyterLab",

diff --git a/docs/conf.py b/docs/conf.py
@@ -42,7 +42,7 @@
     "sphinx.ext.autosectionlabel",
     "sphinx.ext.napoleon",
     "sphinx.ext.viewcode",
-    "sphinx_panels",
+    "sphinx_design",
     "autodocsumm",
     "sphinx_markdown_tables",
     "myst_parser",

diff --git a/environment.yml b/environment.yml
@@ -47,8 +47,9 @@ dependencies:
   # -------------
   - sphinx>=4.4.0
   - nbsphinx
+  - nbconvert>=6.5
   - pydata-sphinx-theme # PyData Sphinx Theme (i.e, Numpy, Pandas, MetPy)
-  - sphinx-panels # Add bootstrap elements to Sphinx
+  - sphinx-design
   - recommonmark
   - sphinx-markdown-tables
   - sphinxcontrib-mermaid # For mermaid diagram support

diff --git a/herbie/__init__.py b/herbie/__init__.py
@@ -99,4 +99,4 @@ def _expand(self):
 
 
 from herbie.archive import Herbie
-from herbie.tools import fast_Herbie, fast_Herbie_download, fast_Herbie_xarray
+from herbie.tools import FastHerbie
diff --git a/herbie/tools.py b/herbie/tools.py
@@ -27,7 +27,7 @@
 
 
 """
-🧵 Notice! Multithreading is use
+🧵🤹🏻‍♂️ Notice! Multithreading and Multiprocessing is use
 
 This is my first implementation of multithreading to create, download,
 and read many Herbie objects. This drastically reduces the time it takes
@@ -145,7 +145,7 @@ def df(self):
             ds_list, index=self.DATES, columns=[f"F{i:02d}" for i in self.fxx]
         )
 
-    def download(self, searchString=None, max_threads=20, **download_kwargs):
+    def download(self, searchString=None, *, max_threads=20, **download_kwargs):
         r"""Download many Herbie objects
 
         Uses multithreading.
@@ -183,13 +183,19 @@ def download(self, searchString=None, max_threads=20, **download_kwargs):
 
         return outFiles
 
-    def xarray(self, searchString, max_threads=2, **xarray_kwargs):
+    def xarray(
+        self,
+        searchString,
+        *,
+        max_threads=None,
+        **xarray_kwargs,
+    ):
         """Read many Herbie objects into an xarray Dataset
 
         # TODO: Sometimes the Jupyter Cell always crashes when I run this.
         # TODO: "fatal flex scanner internal error--end of buffer missed"
 
-        Uses multithreading.
+        Uses multithreading (or multiprocessing).
         This would likely benefit from multiprocessing instead.
 
         Parameters
@@ -207,19 +213,31 @@ def xarray(self, searchString, max_threads=2, **xarray_kwargs):
             - 10 threads took 39 s
             - 50 threads took 37 s
         """
-        ###########################
-        # Multithread the downloads
-        threads = min(self.tasks, max_threads)
-        log.info(f"🧵 Working on {self.tasks} tasks with {threads} threads.")
+        xarray_kwargs = dict(searchString=searchString, **xarray_kwargs)
 
-        with ThreadPoolExecutor(max_threads) as exe:
-            futures = [
-                exe.submit(H.xarray, searchString, **xarray_kwargs)
-                for H in self.file_exists
-            ]
+        # NOTE: Multiprocessing does not seem to work because it looks
+        # NOTE: like xarray objects are not pickleable.
+        # NOTE: ``Reason: 'TypeError("cannot pickle '_thread.lock' object"``
 
-            # Return list of Herbie objects in order completed
-            ds_list = [future.result() for future in as_completed(futures)]
+        if max_threads:
+            ###########################
+            # Multithread the downloads
+            # ! Only works sometimes
+            # ! I get this error: "'EntryPoint' object has no attribute '_key'""
+
+            threads = min(self.tasks, max_threads)
+            log.info(f"🧵 Working on {self.tasks} tasks with {threads} threads.")
+
+            with ThreadPoolExecutor(max_threads) as exe:
+                futures = [
+                    exe.submit(H.xarray, **xarray_kwargs) for H in self.file_exists
+                ]
+
+                # Return list of Herbie objects in order completed
+                ds_list = [future.result() for future in as_completed(futures)]
+
+        else:
+            ds_list = [H.xarray(**xarray_kwargs) for H in self.file_exists]
 
         # Sort the DataSets, first by lead time (step), then by run time (time)
         ds_list.sort(key=lambda x: x.step.data.max())

diff --git a/tests/test_tools.py b/tests/test_tools.py
@@ -0,0 +1,20 @@
+"""
+Tests for Herbie tools like FastHerbie
+"""
+
+from herbie import FastHerbie
+import pandas as pd
+
+
+def test_FastHerbie():
+    DATES = pd.date_range("2022-01-01", "2022-01-01 02:00", freq="1H")
+
+    # Create Fast Herbie
+    FH = FastHerbie(DATES, fxx=range(0, 3))
+    assert len(FH) == 9
+
+    # Download these files
+    FH.download()
+
+    # Load these files
+    FH.xarray("TMP:2 m")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -99,4 +99,4 @@ def _expand(self):


		from herbie.archive import Herbie
		from herbie.tools import fast_Herbie, fast_Herbie_download, fast_Herbie_xarray
		from herbie.tools import FastHerbie