Skip to content

Commit

Permalink
Add serialization for TimeVortex and several action types
Browse files Browse the repository at this point in the history
Add CheckpointAction to trigger checkpoints
  • Loading branch information
gvoskuilen committed Apr 16, 2024
1 parent 2eba1c4 commit 796079e
Show file tree
Hide file tree
Showing 40 changed files with 800 additions and 53 deletions.
4 changes: 4 additions & 0 deletions src/sst/core/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ nobase_dist_sst_HEADERS = \
activity.h \
clock.h \
baseComponent.h \
checkpointAction.h \
component.h \
componentExtension.h \
componentInfo.h \
Expand Down Expand Up @@ -92,11 +93,13 @@ nobase_dist_sst_HEADERS = \
serialization/serialize.h \
serialization/serialize_impl_fwd.h \
serialization/serialize_array.h \
serialization/serialize_atomic.h \
serialization/serialize_buffer_accessor.h \
serialization/serialize_deque.h \
serialization/serialize_list.h \
serialization/serialize_map.h \
serialization/serialize_packer.h \
serialization/serialize_priority_queue.h \
serialization/serialize_serializable.h \
serialization/serialize_set.h \
serialization/serialize_sizer.h \
Expand Down Expand Up @@ -160,6 +163,7 @@ sst_core_sources = \
action.cc \
clock.cc \
baseComponent.cc \
checkpointAction.cc \
component.cc \
componentExtension.cc \
componentInfo.cc \
Expand Down
6 changes: 6 additions & 0 deletions src/sst/core/action.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,10 @@ Action::endSimulation(SimTime_t end)
Simulation_impl::getSimulation()->endSimulation(end);
}

void
Action::serialize_order(SST::Core::Serialization::serializer& ser)
{
SST::Activity::serialize_order(ser);
}

} // namespace SST
4 changes: 3 additions & 1 deletion src/sst/core/action.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ class Action : public Activity
protected:
/** Called to signal to the Simulation object to end the simulation */
void endSimulation();

/** Called to signal to the Simulation object to end the simulation
* @param end Simulation cycle when the simulation finishes
*/
void endSimulation(SimTime_t end);

NotSerializable(SST::Action)
void serialize_order(SST::Core::Serialization::serializer& ser) override;
ImplementVirtualSerializable(SST::Action)
};

} // namespace SST
Expand Down
4 changes: 3 additions & 1 deletion src/sst/core/cfgoutput/jsonConfigOutput.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,14 @@ JSONConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph)
outputJson["program_options"]["print-timing-info"] = cfg->print_timing() ? "true" : "false";
// Ignore stopAfter for now
// outputJson["program_options"]["stopAfter"] = cfg->stopAfterSec();
outputJson["program_options"]["heartbeat-period"] = cfg->heartbeatPeriod();
outputJson["program_options"]["heartbeat-period"] = cfg->heartbeat_period();
outputJson["program_options"]["timebase"] = cfg->timeBase();
outputJson["program_options"]["partitioner"] = cfg->partitioner();
outputJson["program_options"]["timeVortex"] = cfg->timeVortex();
outputJson["program_options"]["interthread-links"] = cfg->interthread_links() ? "true" : "false";
outputJson["program_options"]["output-prefix-core"] = cfg->output_core_prefix();
outputJson["program_options"]["checkpoint-period"] = cfg->checkpoint_period();


// Put in the global param sets
for ( const auto& set : getGlobalParamSetNames() ) {
Expand Down
4 changes: 3 additions & 1 deletion src/sst/core/cfgoutput/pythonConfigOutput.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ PythonConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph)
outputFile, "sst.setProgramOption(\"print-timing-info\", \"%s\")\n", cfg->print_timing() ? "true" : "false");
// Ignore stopAfter for now
// fprintf(outputFile, "sst.setProgramOption(\"stopAfter\", \"%" PRIu32 "\")\n", cfg->stopAfterSec);
fprintf(outputFile, "sst.setProgramOption(\"heartbeat-period\", \"%s\")\n", cfg->heartbeatPeriod().c_str());
fprintf(outputFile, "sst.setProgramOption(\"heartbeat-period\", \"%s\")\n", cfg->heartbeat_period().c_str());
fprintf(outputFile, "sst.setProgramOption(\"timebase\", \"%s\")\n", cfg->timeBase().c_str());
fprintf(outputFile, "sst.setProgramOption(\"partitioner\", \"%s\")\n", cfg->partitioner().c_str());
fprintf(outputFile, "sst.setProgramOption(\"timeVortex\", \"%s\")\n", cfg->timeVortex().c_str());
Expand All @@ -238,6 +238,8 @@ PythonConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph)
cfg->interthread_links() ? "true" : "false");
fprintf(outputFile, "sst.setProgramOption(\"output-prefix-core\", \"%s\")\n", cfg->output_core_prefix().c_str());

fprintf(outputFile, "sst.setProgramOption(\"checkpoint-period\", \"%s\")\n", cfg->checkpoint_period().c_str());

// Output the global params
fprintf(outputFile, "# Define the global parameter sets:\n");
std::vector<std::string> global_param_sets = getGlobalParamSetNames();
Expand Down
69 changes: 69 additions & 0 deletions src/sst/core/checkpointAction.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2009-2023 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2023, NTESS
// All rights reserved.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#include "sst_config.h"

#include "sst/core/checkpointAction.h"

#include "sst/core/component.h"
#include "sst/core/mempoolAccessor.h"
#include "sst/core/simulation_impl.h"
#include "sst/core/stringize.h"
#include "sst/core/timeConverter.h"
#include "sst/core/warnmacros.h"

#ifdef SST_CONFIG_HAVE_MPI
DISABLE_WARN_MISSING_OVERRIDE
#include <mpi.h>
REENABLE_WARNING
#endif

namespace SST {

CheckpointAction::CheckpointAction(Config* UNUSED(cfg), int this_rank, Simulation_impl* sim, TimeConverter* period) :
Action(),
rank(this_rank),
m_period(period)
{
sim->insertActivity(period->getFactor(), this);
if ( (0 == this_rank) ) { lastTime = sst_get_cpu_time(); }
// if( (0 == this_rank) ) {
// sim->insertActivity( period->getFactor(), this );
// lastTime = sst_get_cpu_time();
// }
}

CheckpointAction::~CheckpointAction() {}

void
CheckpointAction::execute(void)
{
Simulation_impl* sim = Simulation_impl::getSimulation();
const double now = sst_get_cpu_time();

Output& sim_output = sim->getSimulationOutput();
if ( 0 == rank ) {
sim->getSimulationOutput().output(
"# Simulation Checkpoint: Simulated Time %s (Real CPU time since last checkpoint %.5f seconds)\n",
sim->getElapsedSimTime().toStringBestSI().c_str(), (now - lastTime));

lastTime = now;
}

sim->checkpoint();

SimTime_t next = sim->getCurrentSimCycle() + m_period->getFactor();
sim->insertActivity(next, this);

// Print some resource usage
}

} // namespace SST
55 changes: 55 additions & 0 deletions src/sst/core/checkpointAction.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright 2009-2023 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2023, NTESS
// All rights reserved.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#ifndef SST_CORE_CHECKPOINT_ACTION_H
#define SST_CORE_CHECKPOINT_ACTION_H

#include "sst/core/action.h"
#include "sst/core/config.h"
#include "sst/core/cputimer.h"
#include "sst/core/output.h"
#include "sst/core/sst_types.h"

#include <set>

namespace SST {

class Simulation_impl;
class TimeConverter;

/**
\class CheckpointAction
A recurring event to trigger checkpoint generation
*/
class CheckpointAction : public Action
{
public:
/**
Create a new checkpoint object for the simulation core to initiate checkpoints
*/
CheckpointAction(Config* cfg, int this_rank, Simulation_impl* sim, TimeConverter* period);
~CheckpointAction();

NotSerializable(SST::CheckpointAction) // Going to have to fix this

private : CheckpointAction() {};
CheckpointAction(const CheckpointAction&);

void operator=(CheckpointAction const&);
void execute(void) override;
int rank;
TimeConverter* m_period;
double lastTime;
};

} // namespace SST

#endif // SST_CORE_CHECKPOINT_ACTION_H
2 changes: 1 addition & 1 deletion src/sst/core/componentInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class ComponentInfo
ComponentInfo();

/**
Funciton used to serialize the class
Function used to serialize the class
*/
void serialize_order(SST::Core::Serialization::serializer& ser);

Expand Down
54 changes: 43 additions & 11 deletions src/sst/core/config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class ConfigHelper
static int setHeartbeat(Config* cfg, const std::string& arg)
{
/* TODO: Error checking */
cfg->heartbeatPeriod_ = arg;
cfg->heartbeat_period_ = arg;
return 0;
}

Expand Down Expand Up @@ -451,6 +451,22 @@ class ConfigHelper
return 0;
}

// Advanced options - checkpointing

// Set frequency of checkpoint generation
static int setCheckpointPeriod(Config* cfg, const std::string& arg)
{
/* TODO: Error checking */
cfg->checkpoint_period_ = arg;
return 0;
}

// Set whether to load from checkpoint
static int setLoadFromCheckpoint(Config* cfg, const std::string& UNUSED(arg))
{
if ( arg == "" ) { cfg->load_from_checkpoint_ = true; }
return 0;
}

// Advanced options - environment

Expand All @@ -477,7 +493,7 @@ Config::print()
std::cout << "stop_at = " << stop_at_ << std::endl;
std::cout << "exit_after = " << exit_after_ << std::endl;
std::cout << "partitioner = " << partitioner_ << std::endl;
std::cout << "heartbeatPeriod = " << heartbeatPeriod_ << std::endl;
std::cout << "heartbeat_period = " << heartbeat_period_ << std::endl;
std::cout << "output_directory = " << output_directory_ << std::endl;
std::cout << "output_core_prefix = " << output_core_prefix_ << std::endl;
std::cout << "output_config_graph = " << output_config_graph_ << std::endl;
Expand All @@ -489,6 +505,7 @@ Config::print()
std::cout << "output_partition = " << output_partition_ << std::endl;
std::cout << "timeBase = " << timeBase_ << std::endl;
std::cout << "parallel_load = " << parallel_load_ << std::endl;
std::cout << "load_checkpoint = " << load_from_checkpoint_ << std::endl;
std::cout << "timeVortex = " << timeVortex_ << std::endl;
std::cout << "interthread_links = " << interthread_links_ << std::endl;
#ifdef USE_MEMPOOL
Expand Down Expand Up @@ -535,15 +552,15 @@ Config::Config(uint32_t num_ranks, bool first_rank) : ConfigShared(!first_rank,
// Basic Options
first_rank_ = first_rank;

num_ranks_ = num_ranks;
num_threads_ = 1;
configFile_ = "NONE";
model_options_ = "";
print_timing_ = false;
stop_at_ = "0 ns";
exit_after_ = 0;
partitioner_ = "sst.linear";
heartbeatPeriod_ = "";
num_ranks_ = num_ranks;
num_threads_ = 1;
configFile_ = "NONE";
model_options_ = "";
print_timing_ = false;
stop_at_ = "0 ns";
exit_after_ = 0;
partitioner_ = "sst.linear";
heartbeat_period_ = "";

char* wd_buf = (char*)malloc(sizeof(char) * PATH_MAX);
getcwd(wd_buf, PATH_MAX);
Expand Down Expand Up @@ -590,6 +607,10 @@ Config::Config(uint32_t num_ranks, bool first_rank) : ConfigShared(!first_rank,
#endif
rank_seq_startup_ = false;

// Advanced Options - Checkpointing
checkpoint_period_ = "";
load_from_checkpoint_ = false;

// Advanced Options - environment
enable_sig_handling_ = true;

Expand Down Expand Up @@ -780,6 +801,17 @@ Config::insertOptions()
"disable-signal-handlers", 0, "Disable signal handlers",
std::bind(&ConfigHelper::disableSigHandlers, this, _1));

/* Advanced Features - Checkpoint */
DEF_SECTION_HEADING("Advanced Options - Checkpointing (EXPERIMENTAL)");
DEF_ARG(
"checkpoint-period", 0, "PERIOD",
"Set frequency for checkpoints to be generated (this is an approximate timing and specified in simulated "
"time.\n ",
std::bind(&ConfigHelper::setCheckpointPeriod, this, _1), true);
DEF_ARG(
"load-checkpoint", 0, "FILE", "Set file to load checkpoint from",
std::bind(&ConfigHelper::setLoadFromCheckpoint, this, _1), true);

enableDashDashSupport(std::bind(&ConfigHelper::setModelOptions, this, _1));
addPositionalCallback(std::bind(&Config::positionalCallback, this, _1, _2));
};
Expand Down
Loading

0 comments on commit 796079e

Please sign in to comment.