From eb6bcca697ccadb0fee647091d9f44520d970b74 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Tue, 30 Jul 2024 17:19:29 -0600
Subject: [PATCH 1/3] EAMxx: add routine to reset unlimited dim length

This allows future snapshots to effectively overwrite what was already stored
---
 .../src/share/io/scream_scorpio_interface.cpp | 24 +++++++++++++++++++
 .../src/share/io/scream_scorpio_interface.hpp |  1 +
 2 files changed, 25 insertions(+)

diff --git a/components/eamxx/src/share/io/scream_scorpio_interface.cpp b/components/eamxx/src/share/io/scream_scorpio_interface.cpp
index cb2b501f2b5..8d2f64994dd 100644
--- a/components/eamxx/src/share/io/scream_scorpio_interface.cpp
+++ b/components/eamxx/src/share/io/scream_scorpio_interface.cpp
@@ -717,6 +717,30 @@ std::string get_time_name (const std::string& filename)
   return pf.file->time_dim->name;
 }
 
+void reset_unlimited_dim_len(const std::string& filename, const int new_length)
+{
+  auto& f = impl::get_file(filename,"scorpio::reset_unlimited_dim_len");
+
+  // Reset dim length
+  EKAT_REQUIRE_MSG (f.time_dim!=nullptr,
+      "Error! Cannot reset unlimited dim length. No unlimited dim stored.\n"
+      "  - file name: " + filename + "\n");
+  EKAT_REQUIRE_MSG (new_length<f.time_dim->length,
+      "Error! New time dimension length must be shorter than the current one.\n"
+      "  - file name: " + filename + "\n"
+      "  - curr len : " + std::to_string(f.time_dim->length) + "\n"
+      "  - new len  : " + std::to_string(new_length) + "\n");
+  f.time_dim->length = new_length;
+
+  // Reset number of records counter for each time dep var
+  for (auto it : f.vars) {
+    auto& v = *it.second;
+    if (v.time_dep) {
+      v.num_records = new_length;
+    }
+  }
+}
+
 // =================== Decompositions operations ==================== //
 
 // NOTES:
diff --git a/components/eamxx/src/share/io/scream_scorpio_interface.hpp b/components/eamxx/src/share/io/scream_scorpio_interface.hpp
index 549de680fe4..54e3d902765 100644
--- a/components/eamxx/src/share/io/scream_scorpio_interface.hpp
+++ b/components/eamxx/src/share/io/scream_scorpio_interface.hpp
@@ -108,6 +108,7 @@ bool is_dim_unlimited (const std::string& filename,
 // NOTE: these throw if time dim is not present. Use has_dim to check first.
 int get_time_len (const std::string& filename);
 std::string get_time_name (const std::string& filename);
+void reset_unlimited_dim_len(const std::string& filename, const int new_length);
 
 // =================== Decompositions operations ==================== //
 

From 0bdead86c0f3a3b240d92efd3076a62cdc67dd65 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Tue, 30 Jul 2024 17:21:19 -0600
Subject: [PATCH 2/3] EAMxx: two fixes to hist restart logic

* Only open last output file if the next write timestamp will fit
* When resuming a file, reset time dim length to what was stored
  in the rhist file (will overwrite any timestamp that was written
  to file after rhist was written)
---
 .../src/share/io/scream_output_manager.cpp    | 36 +++++++++++++++----
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/components/eamxx/src/share/io/scream_output_manager.cpp b/components/eamxx/src/share/io/scream_output_manager.cpp
index 13e2e97101b..d6be54d0918 100644
--- a/components/eamxx/src/share/io/scream_output_manager.cpp
+++ b/components/eamxx/src/share/io/scream_output_manager.cpp
@@ -242,17 +242,18 @@ setup (const ekat::Comm& io_comm, const ekat::ParameterList& params,
       const auto& last_output_filename = get_attribute<std::string>(rhist_file,"GLOBAL","last_output_filename");
       m_resume_output_file = last_output_filename!="" and not restart_pl.get("force_new_file",false);
       if (m_resume_output_file) {
-        scorpio::register_file(last_output_filename,scorpio::Read,m_output_file_specs.iotype);
-        int num_snaps = scorpio::get_dimlen(last_output_filename,"time");
-        scorpio::release_file(last_output_filename);
+        int num_snaps = scorpio::get_attribute<int>(rhist_file,"GLOBAL","last_output_file_num_snaps");
 
         m_output_file_specs.filename = last_output_filename;
         m_output_file_specs.is_open = true;
         m_output_file_specs.storage.num_snapshots_in_file = num_snaps;
-        // The setup_file call will not register any new variable (the file is in Append mode,
-        // so all dims/vars must already be in the file). However, it will register decompositions,
-        // since those are a property of the run, not of the file.
-        setup_file(m_output_file_specs,m_output_control);
+
+        if (m_output_file_specs.storage.snapshot_fits(m_output_control.next_write_ts)) {
+          // The setup_file call will not register any new variable (the file is in Append mode,
+          // so all dims/vars must already be in the file). However, it will register decompositions,
+          // since those are a property of the run, not of the file.
+          setup_file(m_output_file_specs,m_output_control);
+        }
       }
       scorpio::release_file(rhist_file);
     }
@@ -494,6 +495,9 @@ void OutputManager::run(const util::TimeStamp& timestamp)
           write_timestamp (filespecs.filename,"last_write",m_output_control.last_write_ts,true);
           scorpio::set_attribute (filespecs.filename,"GLOBAL","last_output_filename",m_output_file_specs.filename);
           scorpio::set_attribute (filespecs.filename,"GLOBAL","num_snapshots_since_last_write",m_output_control.nsamples_since_last_write);
+
+          int nsnaps = scorpio::get_dimlen(m_output_file_specs.filename,"time");
+          scorpio::set_attribute (filespecs.filename,"GLOBAL","last_output_file_num_snaps",nsnaps);
         }
         // Write these in both output and rhist file. The former, b/c we need these info when we postprocess
         // output, and the latter b/c we want to make sure these params don't change across restarts
@@ -789,6 +793,24 @@ setup_file (      IOFileSpecs& filespecs,
   auto mode = m_resume_output_file ? scorpio::Append : scorpio::Write;
   scorpio::register_file(filename,mode,filespecs.iotype);
   if (m_resume_output_file) {
+    // We may have resumed an output file that contains extra snapshots *after* the restart time.
+    // E.g., if we output every step and the run crashed a few steps after writing the restart.
+    // In that case, we need to reset the time dimension in the output file, so that the extra
+    // snapshots will be overwritten.
+    const auto all_times = scorpio::get_all_times(filename);
+    int ntimes = all_times.size();
+    int ngood  = 0;
+    for (const auto& t : all_times) {
+      auto keep = t<=m_output_control.last_write_ts.days_from(m_case_t0);
+      if (keep) {
+        ++ngood;
+      } else {
+        break;
+      }
+    }
+    if (ngood<ntimes) {
+      scorpio::reset_unlimited_dim_len(filename,ngood);
+    }
     scorpio::redef(filename);
   } else {
     // Register time (and possibly time_bnds) var(s)

From cd19cdfe58780516c55f7776ff9f4c61f379a719 Mon Sep 17 00:00:00 2001
From: Luca Bertagna <lbertag@sandia.gov>
Date: Wed, 7 Aug 2024 10:10:31 -0600
Subject: [PATCH 3/3] EAMxx: fix access to output file in IO

Avoid calling scorpio interfaces on a file not open
---
 components/eamxx/src/share/io/scream_output_manager.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/components/eamxx/src/share/io/scream_output_manager.cpp b/components/eamxx/src/share/io/scream_output_manager.cpp
index d6be54d0918..0b738efe648 100644
--- a/components/eamxx/src/share/io/scream_output_manager.cpp
+++ b/components/eamxx/src/share/io/scream_output_manager.cpp
@@ -393,7 +393,7 @@ void OutputManager::run(const util::TimeStamp& timestamp)
       snapshot_start = m_case_t0;
       snapshot_start += m_time_bnds[0];
     }
-    if (not filespecs.storage.snapshot_fits(snapshot_start)) {
+    if (filespecs.is_open and not filespecs.storage.snapshot_fits(snapshot_start)) {
       release_file(filespecs.filename);
       filespecs.close();
     }
@@ -496,7 +496,8 @@ void OutputManager::run(const util::TimeStamp& timestamp)
           scorpio::set_attribute (filespecs.filename,"GLOBAL","last_output_filename",m_output_file_specs.filename);
           scorpio::set_attribute (filespecs.filename,"GLOBAL","num_snapshots_since_last_write",m_output_control.nsamples_since_last_write);
 
-          int nsnaps = scorpio::get_dimlen(m_output_file_specs.filename,"time");
+          int nsnaps = m_output_file_specs.is_open
+                     ? scorpio::get_dimlen(m_output_file_specs.filename,"time") : 0;
           scorpio::set_attribute (filespecs.filename,"GLOBAL","last_output_file_num_snaps",nsnaps);
         }
         // Write these in both output and rhist file. The former, b/c we need these info when we postprocess