From e45ed43bd0df8a8ec2f825431dde696be794f3f3 Mon Sep 17 00:00:00 2001
From: lbonaldo <bonaldo.luca12@gmail.com>
Date: Tue, 23 Apr 2024 19:10:19 -0400
Subject: [PATCH] Allow writing of multistage stats during optimization

---
 src/case_runners/case_runner.jl               | 12 +--
 src/multi_stage/dual_dynamic_programming.jl   | 51 ++--------
 src/multi_stage/write_multi_stage_outputs.jl  | 30 ++++++
 src/multi_stage/write_multi_stage_stats.jl    | 84 ++++++++++++++--
 test/runtests.jl                              |  7 ++
 test/writing_outputs/test_writing_stats_ms.jl | 99 +++++++++++++++++++
 6 files changed, 228 insertions(+), 55 deletions(-)
 create mode 100644 src/multi_stage/write_multi_stage_outputs.jl
 create mode 100644 test/writing_outputs/test_writing_stats_ms.jl

diff --git a/src/case_runners/case_runner.jl b/src/case_runners/case_runner.jl
index c50b00fcbf..4426ff5e76 100644
--- a/src/case_runners/case_runner.jl
+++ b/src/case_runners/case_runner.jl
@@ -140,12 +140,7 @@ function run_genx_case_multistage!(case::AbstractString, mysetup::Dict)
     ### Solve model
     println("Solving Model")
 
-    # Step 3) Run DDP Algorithm
-    ## Solve Model
-    model_dict, mystats_d, inputs_dict = run_ddp(model_dict, mysetup, inputs_dict)
-
-    # Step 4) Write final outputs from each stage
-
+    # Prepare folder for results    
     outpath = get_default_output_folder(case)
 
     if mysetup["OverwriteResults"] == 1
@@ -160,6 +155,11 @@ function run_genx_case_multistage!(case::AbstractString, mysetup::Dict)
         mkdir(outpath)
     end
 
+    # Step 3) Run DDP Algorithm
+    ## Solve Model
+    model_dict, mystats_d, inputs_dict = run_ddp(outpath, model_dict, mysetup, inputs_dict)
+    
+    # Step 4) Write final outputs from each stage
     for p in 1:mysetup["MultiStageSettingsDict"]["NumStages"]
         outpath_cur = joinpath(outpath, "Results_p$p")
         write_outputs(model_dict[p], outpath_cur, mysetup, inputs_dict[p])
diff --git a/src/multi_stage/dual_dynamic_programming.jl b/src/multi_stage/dual_dynamic_programming.jl
index c749476d98..3a0e7f40e9 100644
--- a/src/multi_stage/dual_dynamic_programming.jl
+++ b/src/multi_stage/dual_dynamic_programming.jl
@@ -63,8 +63,7 @@ returns:
   * stats\_d – Dictionary which contains the run time, upper bound, and lower bound of each DDP iteration.
   * inputs\_d – Dictionary of inputs for each model stage, generated by the load\_inputs() method, modified by this method.
 """
-function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
-
+function run_ddp(outpath::AbstractString, models_d::Dict, setup::Dict, inputs_d::Dict)
     settings_d = setup["MultiStageSettingsDict"]
     num_stages = settings_d["NumStages"]  # Total number of investment planning stages
     EPSILON = settings_d["ConvergenceTolerance"] # Tolerance
@@ -75,10 +74,13 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
     ic = 0 # Iteration Counter
 
     results_d = Dict() # Dictionary to store the results to return
-    stats_d = Dict() # Dictionary to store the statistics (total time, upper bound, and lower bound for each iteration)
     times_a = [] # Array to store the total time of each iteration
     upper_bounds_a = [] # Array to store the upper bound of each iteration
     lower_bounds_a = [] # Array to store the lower bound of each iteration
+    stats_d = Dict() # Dictionary to store the statistics (total time, upper bound, and lower bound for each iteration)
+    stats_d["TIMES"] = times_a
+    stats_d["UPPER_BOUNDS"] = upper_bounds_a
+    stats_d["LOWER_BOUNDS"] = lower_bounds_a
 
     # Step a.i) Initialize cost-to-go function for t = 1:num_stages
     for t in 1:num_stages
@@ -116,10 +118,6 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
             println(string("Lower Bound = ", z_lower))
             println("***********")
 
-            stats_d["TIMES"] = times_a
-            stats_d["UPPER_BOUNDS"] = upper_bounds_a
-            statd_d["LOWER_BOUNDS"] = lower_bounds_a
-
             return models_d, stats_d, inputs_d
         end
 
@@ -163,10 +161,6 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
             println(string("Upper Bound = ", z_upper))
             println(string("Lower Bound = ", z_lower))
             println("***********")
-
-            stats_d["TIMES"] = times_a
-            stats_d["UPPER_BOUNDS"] = upper_bounds_a
-            stats_d["LOWER_BOUNDS"] = lower_bounds_a
             return models_d, stats_d, inputs_d
         end
         ###
@@ -183,6 +177,7 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
         end
 
         append!(upper_bounds_a, z_upper) # Store current iteration upper bound
+        update_multi_stage_stats_file(outpath, ic, z_upper, z_lower, NaN, new_row=true)
 
         # Step f) Backward pass for t = num_stages:2
         for t in num_stages:-1:2
@@ -202,10 +197,13 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
         # Step g) Recalculate lower bound and go back to c)
         z_lower = objective_value(models_d[1])
         append!(lower_bounds_a, z_lower) # Store current iteration lower bound
+        update_multi_stage_stats_file(outpath, ic, z_upper, z_lower, NaN)
 
         # Step h) Store the total time of the current iteration (in seconds)
         ddp_iteration_time = time() - ddp_prev_time
         append!(times_a, ddp_iteration_time)
+        update_multi_stage_stats_file(outpath, ic, z_upper, z_lower, ddp_iteration_time)
+
         ddp_prev_time = time()
     end
 
@@ -239,40 +237,9 @@ function run_ddp(models_d::Dict, setup::Dict, inputs_d::Dict)
     end
     ##### END of final forward pass
 
-    stats_d["TIMES"] = times_a
-    stats_d["UPPER_BOUNDS"] = upper_bounds_a
-    stats_d["LOWER_BOUNDS"] = lower_bounds_a
-
     return models_d, stats_d, inputs_d
 end
 
-@doc raw"""
-	function write_multi_stage_outputs(stats_d::Dict, outpath::String, settings_d::Dict)
-
-This function calls various methods which write multi-stage modeling outputs as .csv files.
-
-inputs:
-
-  * stats\_d – Dictionary which contains the run time, upper bound, and lower bound of each DDP iteration.
-  * outpath – String which represents the path to the Results directory.
-  * settings\_d - Dictionary containing settings configured in the GenX settings genx\_settings.yml file as well as the multi-stage settings file multi\_stage\_settings.yml.
-"""
-function write_multi_stage_outputs(stats_d::Dict, outpath::String, settings_d::Dict, inputs_dict::Dict)
-
-    multi_stage_settings_d = settings_d["MultiStageSettingsDict"]
-
-    write_multi_stage_capacities_discharge(outpath, multi_stage_settings_d)
-    write_multi_stage_capacities_charge(outpath, multi_stage_settings_d)
-    write_multi_stage_capacities_energy(outpath, multi_stage_settings_d)
-    if settings_d["NetworkExpansion"] == 1
-    	write_multi_stage_network_expansion(outpath, multi_stage_settings_d)
-    end
-    write_multi_stage_costs(outpath, multi_stage_settings_d, inputs_dict)
-    write_multi_stage_stats(outpath, stats_d)
-    write_multi_stage_settings(outpath, settings_d)
-
-end
-
 @doc raw"""
 	function fix_initial_investments(EP_prev::Model, EP_cur::Model, start_cap_d::Dict)
 
diff --git a/src/multi_stage/write_multi_stage_outputs.jl b/src/multi_stage/write_multi_stage_outputs.jl
new file mode 100644
index 0000000000..4e6d5612d1
--- /dev/null
+++ b/src/multi_stage/write_multi_stage_outputs.jl
@@ -0,0 +1,30 @@
+@doc raw"""
+    write_multi_stage_outputs(stats_d::Dict, 
+        outpath::String, 
+        settings_d::Dict, 
+        inputs_dict::Dict)
+
+This function calls various methods which write multi-stage modeling outputs as .csv files.
+
+# Arguments:
+  * stats\_d: Dictionary which contains the run time, upper bound, and lower bound of each DDP iteration.
+  * outpath: String which represents the path to the Results directory.
+  * settings\_d: Dictionary containing settings configured in the GenX settings `genx_settings.yml` file as well as the multi-stage settings file `multi_stage_settings.yml`.
+  * inputs\_dict: Dictionary containing the input data for the multi-stage model.
+"""
+function write_multi_stage_outputs(stats_d::Dict,
+        outpath::String,
+        settings_d::Dict,
+        inputs_dict::Dict)
+    multi_stage_settings_d = settings_d["MultiStageSettingsDict"]
+
+    write_multi_stage_capacities_discharge(outpath, multi_stage_settings_d)
+    write_multi_stage_capacities_charge(outpath, multi_stage_settings_d)
+    write_multi_stage_capacities_energy(outpath, multi_stage_settings_d)
+    if settings_d["NetworkExpansion"] == 1
+        write_multi_stage_network_expansion(outpath, multi_stage_settings_d)
+    end
+    write_multi_stage_costs(outpath, multi_stage_settings_d, inputs_dict)
+    multi_stage_settings_d["Myopic"] == 0 && write_multi_stage_stats(outpath, stats_d)
+    write_multi_stage_settings(outpath, settings_d)
+end
diff --git a/src/multi_stage/write_multi_stage_stats.jl b/src/multi_stage/write_multi_stage_stats.jl
index 6a04612404..3581e4544f 100644
--- a/src/multi_stage/write_multi_stage_stats.jl
+++ b/src/multi_stage/write_multi_stage_stats.jl
@@ -1,3 +1,6 @@
+_get_multi_stage_stats_filename() = "stats_multi_stage.csv"
+_get_multi_stage_stats_header() = ["Iteration_Number", "Seconds", "Upper_Bound", "Lower_Bound", "Relative_Gap"]
+
 @doc raw"""
 	function write_multi_stage_stats(outpath::String, stats_d::Dict)
 
@@ -10,22 +13,89 @@ inputs:
 """
 function write_multi_stage_stats(outpath::String, stats_d::Dict)
 
+    filename = _get_multi_stage_stats_filename()
+
+    # don't overwrite existing file
+    isfile(joinpath(outpath, filename)) && return nothing
+
     times_a = stats_d["TIMES"] # Time (seconds) of each iteration
     upper_bounds_a = stats_d["UPPER_BOUNDS"] # Upper bound of each iteration
     lower_bounds_a = stats_d["LOWER_BOUNDS"] # Lower bound of each iteration
 
     # Create an array of numbers 1 through total number of iterations
     iteration_count_a = collect(1:length(times_a))
-
+    
     realtive_gap_a = (upper_bounds_a .- lower_bounds_a) ./ lower_bounds_a
 
     # Construct dataframe where first column is iteration number, second is iteration time
-    df_stats = DataFrame(Iteration_Number=iteration_count_a,
-        Seconds=times_a,
-        Upper_Bound=upper_bounds_a,
-        Lower_Bound=lower_bounds_a,
-        Relative_Gap=realtive_gap_a)
+    header = _get_multi_stage_stats_header()
+    df_stats = DataFrame(header .=> [iteration_count_a, times_a, upper_bounds_a, lower_bounds_a, realtive_gap_a])
+
+    CSV.write(joinpath(outpath, filename), df_stats)
+    return nothing
+end
 
-    CSV.write(joinpath(outpath, "stats_multi_stage.csv"), df_stats)
+@doc raw"""
+    create_multi_stage_stats_file(outpath::String)
+
+Create an empty CSV file in the specified output directory with the filename `stats_multi_stage.csv`. 
+The file contains the columns defined in `_get_multi_stage_stats_header()`.
+The function first generates the filename and header using `_get_multi_stage_stats_filename()` and 
+`_get_multi_stage_stats_header()` respectively. It then creates a DataFrame with column names as headers and 
+writes it into a CSV file in the specified output directory.
+
+# Arguments
+- `outpath::String`: The output directory where the statistics file will be written.
 
+# Returns
+- Nothing. A CSV file is written to the `outpath`.
+"""
+function create_multi_stage_stats_file(outpath::String)
+    filename = _get_multi_stage_stats_filename()
+    header = _get_multi_stage_stats_header()
+    df_stats = DataFrame([col_name => Float64[] for col_name in header])
+    CSV.write(joinpath(outpath, filename), df_stats)
 end
+
+@doc raw"""
+    update_multi_stage_stats_file(outpath::String, ic::Int64, upper_bound::Float64, lower_bound::Float64, iteration_time::Float64; new_row::Bool=false)
+
+Update a multi-stage statistics file.
+
+# Arguments
+- `outpath::String`: The output directory where the statistics file will be written.
+- `ic::Int64`: The iteration count.
+- `upper_bound::Float64`: The upper bound value.
+- `lower_bound::Float64`: The lower bound value.
+- `iteration_time::Float64`: The iteration time value.
+- `new_row::Bool=false`: Optional argument to determine whether to append a new row (if true) or update the current row (if false).
+
+The function first checks if the file exists. If it does not, it creates a new one. 
+Then, it reads the statistics from the existing file into a DataFrame. 
+It calculates the relative gap based on the upper and lower bounds, and either appends a new row or updates the current row based on the `new_row` argument. 
+Finally, it writes the updated DataFrame back to the file.
+
+# Returns
+- Nothing. A CSV file is updated or created at the `outpath`.
+"""
+function update_multi_stage_stats_file(outpath::String, ic::Int64, upper_bound::Float64, lower_bound::Float64, iteration_time::Float64; new_row::Bool=false)
+    filename = _get_multi_stage_stats_filename()
+
+    # If the file does not exist, create it
+    if !isfile(joinpath(outpath, filename))
+        create_multi_stage_stats_file(outpath)
+    end
+
+    df_stats = CSV.read(joinpath(outpath, filename), DataFrame, types=Float64)
+
+    relative_gap = (upper_bound - lower_bound) / lower_bound
+
+    new_values = [ic, iteration_time, upper_bound, lower_bound, relative_gap]
+
+    # If new_row is true, append the new values to the end of the dataframe
+    # otherwise, update the row at index ic
+    new_row ? push!(df_stats, new_values) : (df_stats[ic, :] = new_values)
+
+    CSV.write(joinpath(outpath, filename), df_stats)
+    return nothing
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index d14ab3c1a7..6a98f1f8e9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -16,3 +16,10 @@ using Test
     @test isa(inputs_gen["VRE"], Int64)
     =#
 end
+
+# Test writing outputs
+@testset "Writing outputs " begin
+    for test_file in filter!(x -> endswith(x, ".jl"), readdir("writing_outputs"))
+        include("writing_outputs/$test_file")
+    end
+end
\ No newline at end of file
diff --git a/test/writing_outputs/test_writing_stats_ms.jl b/test/writing_outputs/test_writing_stats_ms.jl
new file mode 100644
index 0000000000..6160963944
--- /dev/null
+++ b/test/writing_outputs/test_writing_stats_ms.jl
@@ -0,0 +1,99 @@
+module TestWritingStatsMs
+
+using Test
+using CSV, DataFrames
+using GenX
+
+
+# create temporary directory for testing 
+mkpath("writing_outputs/multi_stage_stats_tmp")
+outpath = "writing_outputs/multi_stage_stats_tmp"
+filename = GenX._get_multi_stage_stats_filename()
+
+function test_header()
+    # Note: if this test fails, it means that the header in the function _get_multi_stage_stats_header() has been changed.
+    # Make sure to check that the code is consistent with the new header, and update the test accordingly.
+    header = GenX._get_multi_stage_stats_header()
+    @test header == ["Iteration_Number", "Seconds", "Upper_Bound", "Lower_Bound", "Relative_Gap"]
+end
+
+function test_skip_existing_file()
+    touch(joinpath(outpath, filename))
+    # If the file already exists, don't overwrite it
+    write_multi_stage_stats = GenX.write_multi_stage_stats(outpath, Dict())
+    @test isnothing(write_multi_stage_stats)
+    rm(joinpath(outpath, filename))
+end
+
+function test_write_multi_stage_stats(iter::Int64 = 10)
+    # test writing stats to file for `iter` number of iterations
+    times_a, upper_bounds_a, lower_bounds_a = rand(iter), rand(iter), rand(iter)
+    stats_d = Dict("TIMES" => times_a, "UPPER_BOUNDS" => upper_bounds_a, "LOWER_BOUNDS" => lower_bounds_a)
+
+    @test isnothing(GenX.write_multi_stage_stats(outpath, stats_d))
+    df_stats = CSV.read(joinpath(outpath, filename), DataFrame)
+    header = GenX._get_multi_stage_stats_header()
+    @test size(df_stats) == (iter, length(header))
+    for i in 1:iter
+        test_stats_d(df_stats, i, times_a[i], upper_bounds_a[i], lower_bounds_a[i], (upper_bounds_a[i] - lower_bounds_a[i]) / lower_bounds_a[i])
+    end
+    rm(joinpath(outpath, filename))
+end
+
+function test_create_multi_stage_stats_file()
+    GenX.create_multi_stage_stats_file(outpath)
+    df_stats = CSV.read(joinpath(outpath, filename), DataFrame)
+    @test size(df_stats, 1) == 0
+    @test size(df_stats, 2) == 5
+    @test names(df_stats) == GenX._get_multi_stage_stats_header()
+    rm(joinpath(outpath, filename))
+end
+
+function test_update_multi_stage_stats_file(iter::Int64 = 10)
+    # test updating the stats file with new values
+    header = GenX._get_multi_stage_stats_header()
+    GenX.create_multi_stage_stats_file(outpath)
+    lower_bound = rand()
+    iteration_time = rand()
+    for i in 1:iter
+        # upper bound is updated
+        upper_bound = rand()
+        GenX.update_multi_stage_stats_file(outpath, i, upper_bound, lower_bound, iteration_time, new_row=true)
+        df_stats = CSV.read(joinpath(outpath, filename), DataFrame)
+        test_stats_d(df_stats, i, iteration_time, upper_bound, lower_bound, (upper_bound - lower_bound) / lower_bound)
+        # lower bound is updated
+        lower_bound = rand()
+        GenX.update_multi_stage_stats_file(outpath, i, upper_bound, lower_bound, iteration_time)
+        df_stats = CSV.read(joinpath(outpath, filename), DataFrame)
+        test_stats_d(df_stats, i, iteration_time, upper_bound, lower_bound, (upper_bound - lower_bound) / lower_bound)
+        # iteration time is updated
+        iteration_time = rand()
+        GenX.update_multi_stage_stats_file(outpath, i, upper_bound, lower_bound, iteration_time)
+        df_stats = CSV.read(joinpath(outpath, filename), DataFrame)
+        test_stats_d(df_stats, i, iteration_time, upper_bound, lower_bound, (upper_bound - lower_bound) / lower_bound)
+        # test size 
+        @test size(df_stats) == (i, length(header))
+    end
+    rm(joinpath(outpath, filename))
+end
+
+function test_stats_d(df_stats, i, iteration_time, upper_bound, lower_bound, relative_gap)
+    header = GenX._get_multi_stage_stats_header()
+    @test df_stats[i, header[1]] == i
+    @test df_stats[i, header[2]] == iteration_time
+    @test df_stats[i, header[3]] == upper_bound
+    @test df_stats[i, header[4]] == lower_bound
+    @test df_stats[i, header[5]] == relative_gap
+end
+
+@testset "Test writing multi-stage stats" begin
+    test_header()
+    test_skip_existing_file()
+    test_write_multi_stage_stats()
+    test_create_multi_stage_stats_file()
+    test_update_multi_stage_stats_file()
+end
+
+rm(outpath)
+
+end # module TestWritingStatsMs
\ No newline at end of file