diff --git a/workflows/prognostic_c48_run/docs/development.rst b/workflows/prognostic_c48_run/docs/development.rst index e10c46386d..52d61d57ec 100644 --- a/workflows/prognostic_c48_run/docs/development.rst +++ b/workflows/prognostic_c48_run/docs/development.rst @@ -3,35 +3,26 @@ Developer's Guide ----------------- -The prognostic run is developed via docker and docker-compose. This -environment is based off the `prognostic_run` docker image, but has -bind-mounts to the packages in "/external" of this repository and this -directory, which allows locally developing this workflow and its -dependencies. +The prognostic run is developed via docker. This environment is based off the +`prognostic_run` docker image, but has bind-mounts to the packages in "/external" +of this repository and this directory, which allows locally developing this workflow +and its dependencies. It is usually fastest to use the latest docker image from Google Container Repository. Pull the image:: - docker pull us.gcr.io/vcm-ml/prognostic_run:latest + make pull_image_prognostic_run .. note:: If you run into problems, it would be best to rebuild the docker image from scratch:: - docker-compose build fv3 + make build_image_prognostic_run Enter a bash shell in the image:: - docker-compose run fv3net bash + make enter_prognostic_run -.. note :: - - This docker-compose will propagate key-based authentication to Google - Cloud Platform into the docker image. It expects that environmental variable - ``GOOGLE_APPLICATION_CREDENTIALS`` points to a json key. See Google's - `documentation `_ - on how to generate one. - -Run the tests:: +Then run the tests:: pytest \ No newline at end of file diff --git a/workflows/prognostic_c48_run/runtime/loop.py b/workflows/prognostic_c48_run/runtime/loop.py index 40b1d5673d..b8fdb154e1 100644 --- a/workflows/prognostic_c48_run/runtime/loop.py +++ b/workflows/prognostic_c48_run/runtime/loop.py @@ -338,10 +338,6 @@ def _open_model(self, ml_config: MachineLearningConfig, step: str): def time(self) -> cftime.DatetimeJulian: return self._state.time - def cleanup(self): - self._print_global_timings() - self._fv3gfs.cleanup() - def _step_dynamics(self) -> Diagnostics: self._log_debug(f"Dynamics Step") self._fv3gfs.step_dynamics() @@ -378,17 +374,21 @@ def _apply_physics(self) -> Diagnostics: "total_precip_after_physics": self._state[TOTAL_PRECIP], } - def _print_timing(self, name, min_val, max_val, mean_val): - self._print(f"{name:<30}{min_val:15.4f}{max_val:15.4f}{mean_val:15.4f}") - - def _print_global_timings(self, root=0): - is_root = self.rank == root - recvbuf = np.array(0.0) - reduced = {} + def _print_timings(self, reduced): self._print("-----------------------------------------------------------------") self._print(" Reporting clock statistics from python ") self._print("-----------------------------------------------------------------") self._print(f"{' ':<30}{'min (s)':>15}{'max (s)':>15}{'mean (s)':>15}") + for name, timing in reduced.items(): + self._print( + f"{name:<30}{timing['min']:15.4f}" + f"{timing['max']:15.4f}{timing['mean']:15.4f}" + ) + + def log_global_timings(self, root=0): + is_root = self.rank == root + recvbuf = np.array(0.0) + reduced = {} for name, value in self._timer.times.items(): reduced[name] = {} for label, op in [("min", MPI.MIN), ("max", MPI.MAX), ("mean", MPI.SUM)]: @@ -396,10 +396,12 @@ def _print_global_timings(self, root=0): if is_root and label == "mean": recvbuf /= self.comm.Get_size() reduced[name][label] = recvbuf.copy().item() - self._print_timing( - name, reduced[name]["min"], reduced[name]["max"], reduced[name]["mean"] - ) - self._log_info(f"python_timing:{json.dumps(reduced)}") + self._print_timings(reduced) + log_out = { + "steps": reduced, + "units": "[s], cumulative and reduced across ranks", + } + self._log_info(json.dumps({"python_timing": log_out})) def _step_prephysics(self) -> Diagnostics: @@ -529,6 +531,11 @@ def _apply_postphysics_to_dycore_state(self) -> Diagnostics: ) return diagnostics + def _intermediate_restarts(self) -> Diagnostics: + self._log_info("Saving intermediate restarts if enabled.") + self._fv3gfs.save_intermediate_restart_if_enabled() + return {} + def __iter__( self, ) -> Iterator[Tuple[cftime.DatetimeJulian, Dict[str, xr.DataArray]]]: @@ -551,6 +558,7 @@ def __iter__( ), self._compute_postphysics, self.monitor("python", self._apply_postphysics_to_dycore_state), + self._intermediate_restarts, ]: with self._timer.clock(substep.__name__): diagnostics.update(substep()) diff --git a/workflows/prognostic_c48_run/runtime/main.py b/workflows/prognostic_c48_run/runtime/main.py index 7a3134f77f..2b58b2b820 100644 --- a/workflows/prognostic_c48_run/runtime/main.py +++ b/workflows/prognostic_c48_run/runtime/main.py @@ -71,6 +71,8 @@ def main(): for diag_file in diag_files: diag_file.flush() + loop.log_global_timings() + if __name__ == "__main__":