diff --git a/src/sst/core/Makefile.am b/src/sst/core/Makefile.am index c7d6ec697..2038e17ef 100644 --- a/src/sst/core/Makefile.am +++ b/src/sst/core/Makefile.am @@ -21,6 +21,7 @@ nobase_dist_sst_HEADERS = \ activity.h \ clock.h \ baseComponent.h \ + checkpointAction.h \ component.h \ componentExtension.h \ componentInfo.h \ @@ -92,11 +93,13 @@ nobase_dist_sst_HEADERS = \ serialization/serialize.h \ serialization/serialize_impl_fwd.h \ serialization/serialize_array.h \ + serialization/serialize_atomic.h \ serialization/serialize_buffer_accessor.h \ serialization/serialize_deque.h \ serialization/serialize_list.h \ serialization/serialize_map.h \ serialization/serialize_packer.h \ + serialization/serialize_priority_queue.h \ serialization/serialize_serializable.h \ serialization/serialize_set.h \ serialization/serialize_sizer.h \ @@ -160,6 +163,7 @@ sst_core_sources = \ action.cc \ clock.cc \ baseComponent.cc \ + checkpointAction.cc \ component.cc \ componentExtension.cc \ componentInfo.cc \ diff --git a/src/sst/core/action.cc b/src/sst/core/action.cc index 3d1af57d5..b577f2a6d 100644 --- a/src/sst/core/action.cc +++ b/src/sst/core/action.cc @@ -29,4 +29,10 @@ Action::endSimulation(SimTime_t end) Simulation_impl::getSimulation()->endSimulation(end); } +void +Action::serialize_order(SST::Core::Serialization::serializer& ser) +{ + SST::Activity::serialize_order(ser); +} + } // namespace SST diff --git a/src/sst/core/action.h b/src/sst/core/action.h index 67d6a7131..9bfd96d6b 100644 --- a/src/sst/core/action.h +++ b/src/sst/core/action.h @@ -32,12 +32,14 @@ class Action : public Activity protected: /** Called to signal to the Simulation object to end the simulation */ void endSimulation(); + /** Called to signal to the Simulation object to end the simulation * @param end Simulation cycle when the simulation finishes */ void endSimulation(SimTime_t end); - NotSerializable(SST::Action) + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementVirtualSerializable(SST::Action) }; } // namespace SST diff --git a/src/sst/core/cfgoutput/jsonConfigOutput.cc b/src/sst/core/cfgoutput/jsonConfigOutput.cc index 540ae04c4..1dd4dac9d 100644 --- a/src/sst/core/cfgoutput/jsonConfigOutput.cc +++ b/src/sst/core/cfgoutput/jsonConfigOutput.cc @@ -154,12 +154,14 @@ JSONConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph) outputJson["program_options"]["print-timing-info"] = cfg->print_timing() ? "true" : "false"; // Ignore stopAfter for now // outputJson["program_options"]["stopAfter"] = cfg->stopAfterSec(); - outputJson["program_options"]["heartbeat-period"] = cfg->heartbeatPeriod(); + outputJson["program_options"]["heartbeat-period"] = cfg->heartbeat_period(); outputJson["program_options"]["timebase"] = cfg->timeBase(); outputJson["program_options"]["partitioner"] = cfg->partitioner(); outputJson["program_options"]["timeVortex"] = cfg->timeVortex(); outputJson["program_options"]["interthread-links"] = cfg->interthread_links() ? "true" : "false"; outputJson["program_options"]["output-prefix-core"] = cfg->output_core_prefix(); + outputJson["program_options"]["checkpoint-period"] = cfg->checkpoint_period(); + // Put in the global param sets for ( const auto& set : getGlobalParamSetNames() ) { diff --git a/src/sst/core/cfgoutput/pythonConfigOutput.cc b/src/sst/core/cfgoutput/pythonConfigOutput.cc index 909194ff1..9ca9fb0d3 100644 --- a/src/sst/core/cfgoutput/pythonConfigOutput.cc +++ b/src/sst/core/cfgoutput/pythonConfigOutput.cc @@ -229,7 +229,7 @@ PythonConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph) outputFile, "sst.setProgramOption(\"print-timing-info\", \"%s\")\n", cfg->print_timing() ? "true" : "false"); // Ignore stopAfter for now // fprintf(outputFile, "sst.setProgramOption(\"stopAfter\", \"%" PRIu32 "\")\n", cfg->stopAfterSec); - fprintf(outputFile, "sst.setProgramOption(\"heartbeat-period\", \"%s\")\n", cfg->heartbeatPeriod().c_str()); + fprintf(outputFile, "sst.setProgramOption(\"heartbeat-period\", \"%s\")\n", cfg->heartbeat_period().c_str()); fprintf(outputFile, "sst.setProgramOption(\"timebase\", \"%s\")\n", cfg->timeBase().c_str()); fprintf(outputFile, "sst.setProgramOption(\"partitioner\", \"%s\")\n", cfg->partitioner().c_str()); fprintf(outputFile, "sst.setProgramOption(\"timeVortex\", \"%s\")\n", cfg->timeVortex().c_str()); @@ -238,6 +238,8 @@ PythonConfigGraphOutput::generate(const Config* cfg, ConfigGraph* graph) cfg->interthread_links() ? "true" : "false"); fprintf(outputFile, "sst.setProgramOption(\"output-prefix-core\", \"%s\")\n", cfg->output_core_prefix().c_str()); + fprintf(outputFile, "sst.setProgramOption(\"checkpoint-period\", \"%s\")\n", cfg->checkpoint_period().c_str()); + // Output the global params fprintf(outputFile, "# Define the global parameter sets:\n"); std::vector global_param_sets = getGlobalParamSetNames(); diff --git a/src/sst/core/checkpointAction.cc b/src/sst/core/checkpointAction.cc new file mode 100644 index 000000000..72de1ad58 --- /dev/null +++ b/src/sst/core/checkpointAction.cc @@ -0,0 +1,69 @@ +// Copyright 2009-2023 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2023, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst_config.h" + +#include "sst/core/checkpointAction.h" + +#include "sst/core/component.h" +#include "sst/core/mempoolAccessor.h" +#include "sst/core/simulation_impl.h" +#include "sst/core/stringize.h" +#include "sst/core/timeConverter.h" +#include "sst/core/warnmacros.h" + +#ifdef SST_CONFIG_HAVE_MPI +DISABLE_WARN_MISSING_OVERRIDE +#include +REENABLE_WARNING +#endif + +namespace SST { + +CheckpointAction::CheckpointAction(Config* UNUSED(cfg), int this_rank, Simulation_impl* sim, TimeConverter* period) : + Action(), + rank(this_rank), + m_period(period) +{ + sim->insertActivity(period->getFactor(), this); + if ( (0 == this_rank) ) { lastTime = sst_get_cpu_time(); } + // if( (0 == this_rank) ) { + // sim->insertActivity( period->getFactor(), this ); + // lastTime = sst_get_cpu_time(); + // } +} + +CheckpointAction::~CheckpointAction() {} + +void +CheckpointAction::execute(void) +{ + Simulation_impl* sim = Simulation_impl::getSimulation(); + const double now = sst_get_cpu_time(); + + Output& sim_output = sim->getSimulationOutput(); + if ( 0 == rank ) { + sim->getSimulationOutput().output( + "# Simulation Checkpoint: Simulated Time %s (Real CPU time since last checkpoint %.5f seconds)\n", + sim->getElapsedSimTime().toStringBestSI().c_str(), (now - lastTime)); + + lastTime = now; + } + + sim->checkpoint(); + + SimTime_t next = sim->getCurrentSimCycle() + m_period->getFactor(); + sim->insertActivity(next, this); + + // Print some resource usage +} + +} // namespace SST diff --git a/src/sst/core/checkpointAction.h b/src/sst/core/checkpointAction.h new file mode 100644 index 000000000..095120b1c --- /dev/null +++ b/src/sst/core/checkpointAction.h @@ -0,0 +1,55 @@ +// Copyright 2009-2023 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2023, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_CORE_CHECKPOINT_ACTION_H +#define SST_CORE_CHECKPOINT_ACTION_H + +#include "sst/core/action.h" +#include "sst/core/config.h" +#include "sst/core/cputimer.h" +#include "sst/core/output.h" +#include "sst/core/sst_types.h" + +#include + +namespace SST { + +class Simulation_impl; +class TimeConverter; + +/** + \class CheckpointAction + A recurring event to trigger checkpoint generation +*/ +class CheckpointAction : public Action +{ +public: + /** + Create a new checkpoint object for the simulation core to initiate checkpoints + */ + CheckpointAction(Config* cfg, int this_rank, Simulation_impl* sim, TimeConverter* period); + ~CheckpointAction(); + + NotSerializable(SST::CheckpointAction) // Going to have to fix this + + private : CheckpointAction() {}; + CheckpointAction(const CheckpointAction&); + + void operator=(CheckpointAction const&); + void execute(void) override; + int rank; + TimeConverter* m_period; + double lastTime; +}; + +} // namespace SST + +#endif // SST_CORE_CHECKPOINT_ACTION_H diff --git a/src/sst/core/componentInfo.h b/src/sst/core/componentInfo.h index a26480243..ed643d436 100644 --- a/src/sst/core/componentInfo.h +++ b/src/sst/core/componentInfo.h @@ -168,7 +168,7 @@ class ComponentInfo ComponentInfo(); /** - Funciton used to serialize the class + Function used to serialize the class */ void serialize_order(SST::Core::Serialization::serializer& ser); diff --git a/src/sst/core/config.cc b/src/sst/core/config.cc index 8b6b5019b..c3a7bb2b9 100644 --- a/src/sst/core/config.cc +++ b/src/sst/core/config.cc @@ -162,7 +162,7 @@ class ConfigHelper static int setHeartbeat(Config* cfg, const std::string& arg) { /* TODO: Error checking */ - cfg->heartbeatPeriod_ = arg; + cfg->heartbeat_period_ = arg; return 0; } @@ -451,6 +451,22 @@ class ConfigHelper return 0; } + // Advanced options - checkpointing + + // Set frequency of checkpoint generation + static int setCheckpointPeriod(Config* cfg, const std::string& arg) + { + /* TODO: Error checking */ + cfg->checkpoint_period_ = arg; + return 0; + } + + // Set whether to load from checkpoint + static int setLoadFromCheckpoint(Config* cfg, const std::string& UNUSED(arg)) + { + if ( arg == "" ) { cfg->load_from_checkpoint_ = true; } + return 0; + } // Advanced options - environment @@ -477,7 +493,7 @@ Config::print() std::cout << "stop_at = " << stop_at_ << std::endl; std::cout << "exit_after = " << exit_after_ << std::endl; std::cout << "partitioner = " << partitioner_ << std::endl; - std::cout << "heartbeatPeriod = " << heartbeatPeriod_ << std::endl; + std::cout << "heartbeat_period = " << heartbeat_period_ << std::endl; std::cout << "output_directory = " << output_directory_ << std::endl; std::cout << "output_core_prefix = " << output_core_prefix_ << std::endl; std::cout << "output_config_graph = " << output_config_graph_ << std::endl; @@ -489,6 +505,7 @@ Config::print() std::cout << "output_partition = " << output_partition_ << std::endl; std::cout << "timeBase = " << timeBase_ << std::endl; std::cout << "parallel_load = " << parallel_load_ << std::endl; + std::cout << "load_checkpoint = " << load_from_checkpoint_ << std::endl; std::cout << "timeVortex = " << timeVortex_ << std::endl; std::cout << "interthread_links = " << interthread_links_ << std::endl; #ifdef USE_MEMPOOL @@ -535,15 +552,15 @@ Config::Config(uint32_t num_ranks, bool first_rank) : ConfigShared(!first_rank, // Basic Options first_rank_ = first_rank; - num_ranks_ = num_ranks; - num_threads_ = 1; - configFile_ = "NONE"; - model_options_ = ""; - print_timing_ = false; - stop_at_ = "0 ns"; - exit_after_ = 0; - partitioner_ = "sst.linear"; - heartbeatPeriod_ = ""; + num_ranks_ = num_ranks; + num_threads_ = 1; + configFile_ = "NONE"; + model_options_ = ""; + print_timing_ = false; + stop_at_ = "0 ns"; + exit_after_ = 0; + partitioner_ = "sst.linear"; + heartbeat_period_ = ""; char* wd_buf = (char*)malloc(sizeof(char) * PATH_MAX); getcwd(wd_buf, PATH_MAX); @@ -590,6 +607,10 @@ Config::Config(uint32_t num_ranks, bool first_rank) : ConfigShared(!first_rank, #endif rank_seq_startup_ = false; + // Advanced Options - Checkpointing + checkpoint_period_ = ""; + load_from_checkpoint_ = false; + // Advanced Options - environment enable_sig_handling_ = true; @@ -780,6 +801,17 @@ Config::insertOptions() "disable-signal-handlers", 0, "Disable signal handlers", std::bind(&ConfigHelper::disableSigHandlers, this, _1)); + /* Advanced Features - Checkpoint */ + DEF_SECTION_HEADING("Advanced Options - Checkpointing (EXPERIMENTAL)"); + DEF_ARG( + "checkpoint-period", 0, "PERIOD", + "Set frequency for checkpoints to be generated (this is an approximate timing and specified in simulated " + "time.\n ", + std::bind(&ConfigHelper::setCheckpointPeriod, this, _1), true); + DEF_ARG( + "load-checkpoint", 0, "FILE", "Set file to load checkpoint from", + std::bind(&ConfigHelper::setLoadFromCheckpoint, this, _1), true); + enableDashDashSupport(std::bind(&ConfigHelper::setModelOptions, this, _1)); addPositionalCallback(std::bind(&Config::positionalCallback, this, _1, _2)); }; diff --git a/src/sst/core/config.h b/src/sst/core/config.h index 7b1352013..60b42591e 100644 --- a/src/sst/core/config.h +++ b/src/sst/core/config.h @@ -134,7 +134,7 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl /** Simulation period at which to print out a "heartbeat" message */ - const std::string& heartbeatPeriod() const { return heartbeatPeriod_; } + const std::string& heartbeat_period() const { return heartbeat_period_; } /** The directory to be used for writting output files @@ -216,7 +216,7 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl bool parallel_load_mode_multi() const { return parallel_load_mode_multi_; } /** - Retruns the string equivalent for parallel-load: NONE (if + Returns the string equivalent for parallel-load: NONE (if parallel load is off), SINGLE or MULTI. */ std::string parallel_load_str() const @@ -226,6 +226,16 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl return "SINGLE"; } + /** + Simulation period at which to create a checkpoint + */ + const std::string& checkpoint_period() const { return checkpoint_period_; } + + /** + * Returns whether the simulation will begin from a checkpoint (true) or not (false). + */ + bool load_from_checkpoint() const { return load_from_checkpoint_; } + /** TimeVortex implementation to use */ @@ -360,7 +370,7 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl ser& stop_at_; ser& exit_after_; ser& partitioner_; - ser& heartbeatPeriod_; + ser& heartbeat_period_; ser& output_directory_; ser& output_core_prefix_; @@ -390,6 +400,8 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl #ifdef USE_MEMPOOL ser& event_dump_file_; #endif + ser& load_from_checkpoint_; + ser& checkpoint_period_; ser& print_env_; ser& enable_sig_handling_; @@ -444,7 +456,7 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl std::string stop_at_; /*!< When to stop the simulation */ uint32_t exit_after_; /*!< When (wall-time) to stop the simulation */ std::string partitioner_; /*!< Partitioner to use */ - std::string heartbeatPeriod_; /*!< Sets the heartbeat period for the simulation */ + std::string heartbeat_period_; /*!< Sets the heartbeat period for the simulation */ std::string output_directory_; /*!< Output directory to dump all files to */ std::string output_core_prefix_; /*!< Set the SST::Output prefix for the core */ @@ -483,6 +495,10 @@ class Config : public ConfigShared, public SST::Core::Serialization::serializabl #endif bool rank_seq_startup_; /*!< Run simulation initialization phases one rank at a time */ + // Advanced options - checkpoint + bool load_from_checkpoint_; /*!< If true, load from checkpoint instead of config file */ + std::string checkpoint_period_; /*!< Simulated time interval to generate checkpoints at */ + // Advanced options - envrionment bool enable_sig_handling_; /*!< Enable signal handling */ // bool print_env_; ** in ConfigShared diff --git a/src/sst/core/exit.h b/src/sst/core/exit.h index 11942e0fc..3876f7be4 100644 --- a/src/sst/core/exit.h +++ b/src/sst/core/exit.h @@ -90,6 +90,26 @@ class Exit : public Action unsigned int getGlobalCount() { return global_count; } + /** + * + * TODO to enable different partitioning on restart, will need to associate m_thread_counts and + * m_idSet back to components so that a new Exit event can be generated on restart + */ + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + Action::serialize_order(ser); + ser& num_threads; + for ( unsigned int i = 0; i < m_refCount; i++ ) { + ser& m_thread_counts[i]; + } + ser& m_refCount; + ser& global_count; + ser& m_idSet; + ser& end_time; + ser& single_rank; + // TBD spinlock -> can re-create + } + ImplementSerializable(SST::Exit) private: Exit() {} // for serialization only Exit(const Exit&); // Don't implement diff --git a/src/sst/core/heartbeat.h b/src/sst/core/heartbeat.h index 258265494..50762dfed 100644 --- a/src/sst/core/heartbeat.h +++ b/src/sst/core/heartbeat.h @@ -38,8 +38,9 @@ class SimulatorHeartbeat : public Action SimulatorHeartbeat(Config* cfg, int this_rank, Simulation_impl* sim, TimeConverter* period); ~SimulatorHeartbeat(); -private: - SimulatorHeartbeat() {}; + NotSerializable(SST::SimulatorHeartbeat) + + private : SimulatorHeartbeat() {}; SimulatorHeartbeat(const SimulatorHeartbeat&); void operator=(SimulatorHeartbeat const&); diff --git a/src/sst/core/impl/timevortex/timeVortexPQ.cc b/src/sst/core/impl/timevortex/timeVortexPQ.cc index 0a0e31432..4493c2e04 100644 --- a/src/sst/core/impl/timevortex/timeVortexPQ.cc +++ b/src/sst/core/impl/timevortex/timeVortexPQ.cc @@ -27,6 +27,10 @@ TimeVortexPQBase::TimeVortexPQBase(Params& UNUSED(params)) : current_depth(0) {} +template +TimeVortexPQBase::TimeVortexPQBase() : TimeVortex(), insertOrder(0), max_depth(0), current_depth(0) +{} + template TimeVortexPQBase::~TimeVortexPQBase() { @@ -100,11 +104,34 @@ TimeVortexPQBase::print(Output& out) const out.output("TimeVortex state:\n"); // STL's priority_queue does not support iteration. - // - // dataType_t::iterator it; - // for ( it = data.begin(); it != data.end(); it++ ) { - // (*it)->print(" ", out); - // } + /*std::vector& act = const_cast*>(this)->getContainer(data); + for ( auto it = act.begin(); it != act.end(); it++ ) { + (*it)->print(" ", out); + }*/ +} + +template +void +TimeVortexPQBase::dbg_print(Output& out) +{ + out.output("TimeVortex state:\n"); + + // STL's priority_queue does not support iteration. + std::vector& act = getContainer(data); + for ( auto it = act.begin(); it != act.end(); it++ ) { + (*it)->print(" ", out); + } +} + +template +void +TimeVortexPQBase::serialize_order(SST::Core::Serialization::serializer& ser) +{ + TimeVortex::serialize_order(ser); + ser& insertOrder; + ser& max_depth; + ser& current_depth; + ser& data; } class TimeVortexPQ : public TimeVortexPQBase @@ -120,7 +147,11 @@ class TimeVortexPQ : public TimeVortexPQBase TimeVortexPQ(Params& params) : TimeVortexPQBase(params) {} + TimeVortexPQ() : TimeVortexPQBase() {} // For serialization only ~TimeVortexPQ() {} + + void serialize_order(SST::Core::Serialization::serializer& ser) { TimeVortexPQBase::serialize_order(ser); } + SST_ELI_EXPORT(TimeVortexPQ) }; @@ -137,7 +168,11 @@ class TimeVortexPQ_ts : public TimeVortexPQBase TimeVortexPQ_ts(Params& params) : TimeVortexPQBase(params) {} + TimeVortexPQ_ts() : TimeVortexPQBase() {} // For serialization only ~TimeVortexPQ_ts() {} + + void serialize_order(SST::Core::Serialization::serializer& ser) { TimeVortexPQBase::serialize_order(ser); } + SST_ELI_EXPORT(TimeVortexPQ_ts) }; diff --git a/src/sst/core/impl/timevortex/timeVortexPQ.h b/src/sst/core/impl/timevortex/timeVortexPQ.h index 967d6c08c..7c6ecf758 100644 --- a/src/sst/core/impl/timevortex/timeVortexPQ.h +++ b/src/sst/core/impl/timevortex/timeVortexPQ.h @@ -25,6 +25,7 @@ class Output; namespace IMPL { + /** * Primary Event Queue */ @@ -35,6 +36,7 @@ class TimeVortexPQBase : public TimeVortex public: // TimeVortexPQ(); TimeVortexPQBase(Params& params); + TimeVortexPQBase(); // For serialization only ~TimeVortexPQBase(); bool empty() override; @@ -49,9 +51,23 @@ class TimeVortexPQBase : public TimeVortex uint64_t getCurrentDepth() const override { return current_depth; } uint64_t getMaxDepth() const override { return max_depth; } + void dbg_print(Output& out) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override; + private: typedef std::priority_queue, Activity::greater> dataType_t; + template + S& getContainer(std::priority_queue& q) + { + struct UnderlyingContainer : std::priority_queue + { + static S& getUnderlyingContainer(std::priority_queue& q) { return q.*&UnderlyingContainer::c; } + }; + return UnderlyingContainer::getUnderlyingContainer(q); + } + // Data dataType_t data; uint64_t insertOrder; diff --git a/src/sst/core/link.cc b/src/sst/core/link.cc index 63dba2b6d..296b0782c 100644 --- a/src/sst/core/link.cc +++ b/src/sst/core/link.cc @@ -89,7 +89,7 @@ SST::Core::Serialization::serialize_impl::operator()(Link*& s, SST::Core: // MULTI-PARELLEL RESTART: When supporting different // restart parallelism, will also need to store the rank // of the links in order to have unique identifies for - // each link. For pair linnks on another rank, we will + // each link. For pair links on another rank, we will // use the delivery_info field as the pointer part of the // tag (this is the uintptr_t representation of the link // pointer on the remote rank). This will also require diff --git a/src/sst/core/link.h b/src/sst/core/link.h index 808b1be3c..72f7aa808 100644 --- a/src/sst/core/link.h +++ b/src/sst/core/link.h @@ -41,7 +41,7 @@ class SST::Core::Serialization::serialize_impl { template friend class serialize; - // Function implemented in timeLord.cc + // Function implemented in link.cc void operator()(Link*& s, SST::Core::Serialization::serializer& ser); }; diff --git a/src/sst/core/model/python/pymodel.cc b/src/sst/core/model/python/pymodel.cc index b5e2caa98..3abb81db0 100644 --- a/src/sst/core/model/python/pymodel.cc +++ b/src/sst/core/model/python/pymodel.cc @@ -312,7 +312,8 @@ getProgramOptions(PyObject* UNUSED(self), PyObject* UNUSED(args)) PyDict_SetItem( dict, SST_ConvertToPythonString("partitioner"), SST_ConvertToPythonString(cfg->partitioner().c_str())); PyDict_SetItem( - dict, SST_ConvertToPythonString("heartbeat-period"), SST_ConvertToPythonString(cfg->heartbeatPeriod().c_str())); + dict, SST_ConvertToPythonString("heartbeat-period"), + SST_ConvertToPythonString(cfg->heartbeat_period().c_str())); PyDict_SetItem( dict, SST_ConvertToPythonString("output-directory"), SST_ConvertToPythonString(cfg->output_directory().c_str())); @@ -365,6 +366,11 @@ getProgramOptions(PyObject* UNUSED(self), PyObject* UNUSED(args)) PyDict_SetItem( dict, SST_ConvertToPythonString("force-rank-seq-startup"), SST_ConvertToPythonBool(cfg->rank_seq_startup())); + // Advanced options - checkpointing + PyDict_SetItem( + dict, SST_ConvertToPythonString("checkpoint-period"), + SST_ConvertToPythonString(cfg->checkpoint_period().c_str())); + return dict; } diff --git a/src/sst/core/oneshot.h b/src/sst/core/oneshot.h index 3e0806e74..b3c0b8297 100644 --- a/src/sst/core/oneshot.h +++ b/src/sst/core/oneshot.h @@ -78,8 +78,9 @@ class OneShot : public Action /** Print details about the OneShot */ void print(const std::string& header, Output& out) const override; -private: - typedef std::vector HandlerList_t; + NotSerializable(SST::OneShot) + + private : typedef std::vector HandlerList_t; // Since this only gets fixed latency events, the times will fire // in order of arrival. No need to use a full map, a double ended diff --git a/src/sst/core/serialization/CMakeLists.txt b/src/sst/core/serialization/CMakeLists.txt index e2caadd97..c3eca5122 100644 --- a/src/sst/core/serialization/CMakeLists.txt +++ b/src/sst/core/serialization/CMakeLists.txt @@ -25,6 +25,7 @@ set(SSTSerializationHeaders serialize_list.h serialize_map.h serialize_packer.h + serialize_priority_queue.h serializer_fwd.h serializer.h serialize_serializable.h diff --git a/src/sst/core/serialization/serialize.h b/src/sst/core/serialization/serialize.h index 1b682dcd1..664d82f79 100644 --- a/src/sst/core/serialization/serialize.h +++ b/src/sst/core/serialization/serialize.h @@ -15,6 +15,7 @@ #include "sst/core/serialization/serializer.h" #include "sst/core/warnmacros.h" +#include #include #include @@ -328,9 +329,11 @@ operator|(serializer& ser, T& t) } // namespace SST #include "sst/core/serialization/serialize_array.h" +#include "sst/core/serialization/serialize_atomic.h" #include "sst/core/serialization/serialize_deque.h" #include "sst/core/serialization/serialize_list.h" #include "sst/core/serialization/serialize_map.h" +#include "sst/core/serialization/serialize_priority_queue.h" #include "sst/core/serialization/serialize_set.h" #include "sst/core/serialization/serialize_string.h" #include "sst/core/serialization/serialize_vector.h" diff --git a/src/sst/core/serialization/serialize_atomic.h b/src/sst/core/serialization/serialize_atomic.h new file mode 100644 index 000000000..1ed2284b2 --- /dev/null +++ b/src/sst/core/serialization/serialize_atomic.h @@ -0,0 +1,60 @@ +// Copyright 2009-2023 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2023, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_CORE_SERIALIZATION_SERIALIZE_ATOMIC_H +#define SST_CORE_SERIALIZATION_SERIALIZE_ATOMIC_H + +#include "sst/core/serialization/serializer.h" + +#include + +namespace SST { +namespace Core { +namespace Serialization { + +template +class serialize> +{ + typedef std::atomic Value; + +public: + void operator()(Value& v, serializer& ser) + { + switch ( ser.mode() ) { + case serializer::SIZER: + { + T t = v.load(); + ser& t; + // ser.size(t); + break; + } + case serializer::PACK: + { + T t = v.load(); + ser& t; + break; + } + case serializer::UNPACK: + { + T val; + ser& val; + v.store(val); + break; + } + } + } +}; + +} // namespace Serialization +} // namespace Core +} // namespace SST + +#endif // SST_CORE_SERIALIZATION_SERIALIZE_VECTOR_H diff --git a/src/sst/core/serialization/serialize_priority_queue.h b/src/sst/core/serialization/serialize_priority_queue.h new file mode 100644 index 000000000..6dae9bd3d --- /dev/null +++ b/src/sst/core/serialization/serialize_priority_queue.h @@ -0,0 +1,85 @@ +// Copyright 2009-2023 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2023, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_CORE_SERIALIZATION_SERIALIZE_PRIORITY_QUEUE_H +#define SST_CORE_SERIALIZATION_SERIALIZE_PRIORITY_QUEUE_H + +#include "sst/core/serialization/serializer.h" + +#include + +namespace SST { +namespace Core { +namespace Serialization { + + +template +class serialize> +{ + typedef std::priority_queue Pqueue; + +public: + S& getContainer(std::priority_queue& q) + { + struct UnderlyingContainer : std::priority_queue + { + static S& getUnderlyingContainer(std::priority_queue& q) { return q.*&UnderlyingContainer::c; } + }; + return UnderlyingContainer::getUnderlyingContainer(q); + } + + void operator()(Pqueue& v, serializer& ser) + { + switch ( ser.mode() ) { + case serializer::SIZER: + { + size_t size = v.size(); + ser.size(size); + + auto container = getContainer(v); + for ( auto it = container.begin(); it != container.end(); ++it ) { + T& t = const_cast(*it); + ser& t; + } + break; + } + case serializer::PACK: + { + size_t size = v.size(); + ser.pack(size); + + auto container = getContainer(v); + for ( auto it = container.begin(); it != container.end(); ++it ) { + T& t = const_cast(*it); + ser& t; + } + break; + } + case serializer::UNPACK: + { + size_t size; + ser.unpack(size); + for ( size_t i = 0; i < size; ++i ) { + T t = {}; + ser& t; + v.push(t); + } + break; + } + } + } +}; + +} // namespace Serialization +} // namespace Core +} // namespace SST + +#endif // SST_CORE_SERIALIZATION_SERIALIZE_PRIORITY_QUEUE_H diff --git a/src/sst/core/serialization/serialize_set.h b/src/sst/core/serialization/serialize_set.h index 5fac41655..8504890d7 100644 --- a/src/sst/core/serialization/serialize_set.h +++ b/src/sst/core/serialization/serialize_set.h @@ -105,7 +105,7 @@ class serialize> size_t size; ser.unpack(size); for ( size_t i = 0; i < size; ++i ) { - T t; + T t = {}; ser& t; v.insert(t); } diff --git a/src/sst/core/simulation.cc b/src/sst/core/simulation.cc index ed3be19fe..283a74f08 100644 --- a/src/sst/core/simulation.cc +++ b/src/sst/core/simulation.cc @@ -14,6 +14,7 @@ #include "sst/core/simulation_impl.h" // simulation_impl header should stay here +#include "sst/core/checkpointAction.h" #include "sst/core/clock.h" #include "sst/core/config.h" #include "sst/core/configGraph.h" @@ -184,15 +185,21 @@ Simulation_impl::Simulation_impl(Config* cfg, RankInfo my_rank, RankInfo num_ran Params p; // params get passed twice - both the params and a ctor argument direct_interthread = cfg->interthread_links(); - std::string timevortex_type(cfg->timeVortex()); - if ( direct_interthread && num_ranks.thread > 1 ) timevortex_type = timevortex_type + ".ts"; - timeVortex = factory->Create(timevortex_type, p); + timeVortexType = cfg->timeVortex(); + if ( direct_interthread && num_ranks.thread > 1 ) timeVortexType = timeVortexType + ".ts"; + timeVortex = factory->Create(timeVortexType, p); if ( my_rank.thread == 0 ) { m_exit = new Exit(num_ranks.thread, num_ranks.rank == 1); } - if ( cfg->heartbeatPeriod() != "" && my_rank.thread == 0 ) { - sim_output.output("# Creating simulation heartbeat at period of %s.\n", cfg->heartbeatPeriod().c_str()); + if ( cfg->heartbeat_period() != "" && my_rank.thread == 0 ) { + sim_output.output("# Creating simulation heartbeat at period of %s.\n", cfg->heartbeat_period().c_str()); m_heartbeat = - new SimulatorHeartbeat(cfg, my_rank.rank, this, timeLord.getTimeConverter(cfg->heartbeatPeriod())); + new SimulatorHeartbeat(cfg, my_rank.rank, this, timeLord.getTimeConverter(cfg->heartbeat_period())); + } + + if ( cfg->checkpoint_period() != "" ) { + sim_output.output("# Creating simulation checkpoint at period of %s.\n", cfg->checkpoint_period().c_str()); + m_checkpoint = + new CheckpointAction(cfg, my_rank.rank, this, timeLord.getTimeConverter(cfg->checkpoint_period())); } // Need to create the thread sync if there is more than one thread @@ -1233,6 +1240,94 @@ Simulation_impl::intializeProfileTools(const std::string& config) #endif } +void +Simulation_impl::checkpoint() +{ + sim_output.output("Checkpoint triggered at time %" PRIu64 "\n", currentSimCycle); + + printf("Printing original TV, size = %zu\n", timeVortex->getCurrentDepth()); + timeVortex->dbg_print(sim_output); + + SST::Core::Serialization::serializer ser; + ser.enable_pointer_tracking(); + + // Get the size + ser.start_sizing(); + ser& timeVortex; + + size_t size = ser.size(); + char* buffer = new char[size + 10]; + + // Serialize + ser.start_packing(buffer, size); + ser& timeVortex; + + // Unpack into a new thing + Params p; + TimeVortex* test_tv; + ser.start_unpacking(buffer, size); + ser& test_tv; + + printf("Printing new TV\n"); + test_tv->dbg_print(sim_output); + + + /* + * State to checkpoint: + TimeVortex* timeVortex; - yes but do it later + TimeConverter* threadMinPartTC; - yes? + Activity* current_activity; - no + static SimTime_t minPart; - yes + static TimeConverter* minPartTC; - yes + std::vector interThreadLatencies; - yes + SimTime_t interThreadMinLatency; - yes + SyncManager* syncManager; - yes + ComponentInfoMap compInfoMap; - yes + clockMap_t clockMap; - yes + oneShotMap_t oneShotMap; - yes, eventually + static Exit* m_exit; - yes + SimulatorHeartbeat* m_heartbeat; - yes + bool endSim; - yes + bool independent; - yes + static std::atomic untimed_msg_count; - no, only valid in untimed phases? + unsigned int untimed_phase; - no, only valid in untimed phases? + volatile sig_atomic_t lastRecvdSignal; - no + ShutdownMode_t shutdown_mode; - not sure if we need this + bool wireUpFinished; - yes + static TimeLord timeLord; - yes + static Output sim_output; - yes + SST::Statistics::StatisticProcessingEngine stat_engine; - yes but not now + std::map profile_tools; - no + std::map> profiler_map; - no + SimulationRunMode runMode; - yes?? + SimTime_t currentSimCycle; - yes + int currentPriority; - yes + SimTime_t endSimCycle; - i guess but probably don't need it + RankInfo my_rank; - yes + RankInfo num_ranks; - yes + std::string output_directory; - yes + static SharedRegionManager* sharedRegionManager; - yes... + + double run_phase_start_time; - no + double run_phase_total_time; - no + double init_phase_start_time; - no + double init_phase_total_time; - no + double complete_phase_start_time; - no + double complete_phase_total_time; - no + + static std::unordered_map instanceMap; - no + static std::vector instanceVec; - no + + std::map link_restart_tracking; - no + std::map event_handler_restart_tracking; - no + CheckpointAction* m_checkpoint; - no + */ + // Checkpointing! + // Create serializer + // SST::Core::Serialization::serializer ser; + // ser.enable_pointer_tracking(); +} + void Simulation_impl::printProfilingInfo(FILE* fp) { diff --git a/src/sst/core/simulation_impl.h b/src/sst/core/simulation_impl.h index fe226de7e..ce8f2fde3 100644 --- a/src/sst/core/simulation_impl.h +++ b/src/sst/core/simulation_impl.h @@ -41,12 +41,12 @@ namespace SST { #define STATALLFLAG "--ALLSTATS--" class Activity; +class CheckpointAction; class Component; class Config; class ConfigGraph; class Exit; class Factory; -class SimulatorHeartbeat; class Link; class LinkMap; class Params; @@ -311,6 +311,8 @@ class Simulation_impl : public Simulation */ TimeConverter* minPartToTC(SimTime_t cycles) const; + void checkpoint(); + /** Factory used to generate the simulation components */ static Factory* factory; @@ -350,6 +352,7 @@ class Simulation_impl : public Simulation friend class SyncManager; TimeVortex* timeVortex; + std::string timeVortexType; TimeConverter* threadMinPartTC; Activity* current_activity; static SimTime_t minPart; @@ -505,6 +508,7 @@ class Simulation_impl : public Simulation /******** Checkpoint/restart tracking data structures ***********/ std::map link_restart_tracking; std::map event_handler_restart_tracking; + CheckpointAction* m_checkpoint; friend void wait_my_turn_start(); diff --git a/src/sst/core/stopAction.h b/src/sst/core/stopAction.h index 28d0f99b1..72aeccb94 100644 --- a/src/sst/core/stopAction.h +++ b/src/sst/core/stopAction.h @@ -56,6 +56,14 @@ class StopAction : public Action { out.output("%s StopAction to be delivered at %" PRIu64 "\n", header.c_str(), getDeliveryTime()); } + + void serialize_order(SST::Core::Serialization::serializer& ser) + { + Action::serialize_order(ser); + ser& message; + ser& print_message; + } + ImplementSerializable(SST::StopAction) }; } // namespace SST diff --git a/src/sst/core/sync/rankSyncParallelSkip.cc b/src/sst/core/sync/rankSyncParallelSkip.cc index 756dd0cb3..f4105e5e7 100644 --- a/src/sst/core/sync/rankSyncParallelSkip.cc +++ b/src/sst/core/sync/rankSyncParallelSkip.cc @@ -483,4 +483,29 @@ RankSyncParallelSkip::deserializeMessage(comm_recv_pair* msg) deserializeTime += SST::Core::Profile::getElapsed(deserialStart); } +void +RankSyncParallelSkip::serialize_order(SST::Core::Serialization::serializer& ser) +{ + RankSync::serialize_order(ser); + ser& myNextSyncTime; + ser& mpiWaitTime; + ser& deserializeTime; + ser& send_count; + for ( uint32_t i = 0; i < num_ranks.thread; i++ ) + ser& recv_count[i]; + + ser& comm_send_map; + ser& comm_recv_map; + + // Unused + // link_map + + // No need to serialize + // remaining_deser + // queues (deserialize_queue, link_send_queue, serialize_queue, send_queue) + // barriers (serializeReadyBarrier, slaveExchangeDoneBarrier, allDoneBarrier) + // lock +} + + } // namespace SST diff --git a/src/sst/core/sync/rankSyncParallelSkip.h b/src/sst/core/sync/rankSyncParallelSkip.h index 6889f0130..8ca1c34b2 100644 --- a/src/sst/core/sync/rankSyncParallelSkip.h +++ b/src/sst/core/sync/rankSyncParallelSkip.h @@ -35,6 +35,7 @@ class RankSyncParallelSkip : public RankSync public: /** Create a new Sync object which fires with a specified period */ RankSyncParallelSkip(RankInfo num_ranks, TimeConverter* minPartTC); + RankSyncParallelSkip() {} // For serialization virtual ~RankSyncParallelSkip(); /** Register a Link which this Sync Object is responsible for */ @@ -53,6 +54,9 @@ class RankSyncParallelSkip : public RankSync uint64_t getDataSize() const override; + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementSerializable(SST::RankSyncParallelSkip) + private: static SimTime_t myNextSyncTime; @@ -60,15 +64,24 @@ class RankSyncParallelSkip : public RankSync void exchange_master(int thread); void exchange_slave(int thread); - struct comm_send_pair + struct comm_send_pair : public SST::Core::Serialization::serializable { RankInfo to_rank; SyncQueue* squeue; // SyncQueue char* sbuf; uint32_t remote_size; + + void serialize_order(SST::Core::Serialization::serializer& ser) + { + ser& to_rank; + // squeue - empty so recreate on restart + // sbuf - empty so recreate on restart + // remote_size - don't need + } + ImplementSerializable(comm_send_pair) }; - struct comm_recv_pair + struct comm_recv_pair : public SST::Core::Serialization::serializable { uint32_t remote_rank; uint32_t local_thread; @@ -79,6 +92,16 @@ class RankSyncParallelSkip : public RankSync #ifdef SST_CONFIG_HAVE_MPI MPI_Request req; #endif + void serialize_order(SST::Core::Serialization::serializer& ser) + { + ser& remote_rank; + ser& local_thread; + // activity_vec - empty so recreate on restart + // rbuf - empty so recreate on restart + // recv_done - don't need + // req - don't need + } + ImplementSerializable(comm_recv_pair) }; typedef std::map comm_send_map_t; diff --git a/src/sst/core/sync/rankSyncSerialSkip.cc b/src/sst/core/sync/rankSyncSerialSkip.cc index cea687084..c6499fdfa 100644 --- a/src/sst/core/sync/rankSyncSerialSkip.cc +++ b/src/sst/core/sync/rankSyncSerialSkip.cc @@ -333,4 +333,15 @@ RankSyncSerialSkip::exchangeLinkUntimedData(int UNUSED_WO_MPI(thread), std::atom #endif } +void +RankSyncSerialSkip::serialize_order(SST::Core::Serialization::serializer& ser) +{ + RankSync::serialize_order(ser); + // TODO Do we need to keep anything from comm_map? Certainly don't need the struct + // ser& comm_map; + ser& mpiWaitTime; + ser& deserializeTime; +} + + } // namespace SST diff --git a/src/sst/core/sync/rankSyncSerialSkip.h b/src/sst/core/sync/rankSyncSerialSkip.h index 71a4e62ca..6430194ca 100644 --- a/src/sst/core/sync/rankSyncSerialSkip.h +++ b/src/sst/core/sync/rankSyncSerialSkip.h @@ -28,6 +28,7 @@ class RankSyncSerialSkip : public RankSync public: /** Create a new Sync object which fires with a specified period */ RankSyncSerialSkip(RankInfo num_ranks, TimeConverter* minPartTC); + RankSyncSerialSkip() {} // For serialization virtual ~RankSyncSerialSkip(); /** Register a Link which this Sync Object is responsible for */ @@ -46,18 +47,24 @@ class RankSyncSerialSkip : public RankSync uint64_t getDataSize() const override; + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementSerializable(SST::RankSyncSerialSkip) + private: static SimTime_t myNextSyncTime; // Function that actually does the exchange during run void exchange(); - struct comm_pair + struct comm_pair : public SST::Core::Serialization::serializable { SyncQueue* squeue; // SyncQueue char* rbuf; // receive buffer uint32_t local_size; uint32_t remote_size; + + void serialize_order(SST::Core::Serialization::serializer& UNUSED(ser)) {} + ImplementSerializable(comm_pair) }; typedef std::map comm_map_t; diff --git a/src/sst/core/sync/syncManager.cc b/src/sst/core/sync/syncManager.cc index 4090d9adb..e9ef77ad2 100644 --- a/src/sst/core/sync/syncManager.cc +++ b/src/sst/core/sync/syncManager.cc @@ -82,6 +82,7 @@ class EmptyRankSync : public RankSync { public: EmptyRankSync(const RankInfo& num_ranks) : RankSync(num_ranks) { nextSyncTime = MAX_SIMTIME_T; } + EmptyRankSync() {} // For serialization ~EmptyRankSync() {} /** Register a Link which this Sync Object is responsible for */ @@ -119,6 +120,9 @@ class EmptyRankSync : public RankSync TimeConverter* getMaxPeriod() { return max_period; } uint64_t getDataSize() const override { return 0; } + + void serialize_order(SST::Core::Serialization::serializer& ser) override { RankSync::serialize_order(ser); } + ImplementSerializable(SST::EmptyRankSync) }; class EmptyThreadSync : public ThreadSync @@ -128,6 +132,7 @@ class EmptyThreadSync : public ThreadSync public: EmptyThreadSync(Simulation_impl* sim) : sim(sim) { nextSyncTime = MAX_SIMTIME_T; } + EmptyThreadSync() {} // For serialization ~EmptyThreadSync() {} void before() override {} @@ -143,6 +148,10 @@ class EmptyThreadSync : public ThreadSync { return nullptr; } + + /** Serialization for checkpoint support */ + void serialize_order(SST::Core::Serialization::serializer& ser) override { ThreadSync::serialize_order(ser); } + ImplementSerializable(EmptyThreadSync) }; void @@ -449,4 +458,27 @@ SyncManager::addProfileTool(Profile::SyncProfileTool* tool) profile_tools->addProfileTool(tool); } +void +SyncManager::serialize_order(SST::Core::Serialization::serializer& ser) +{ + Action::serialize_order(ser); + + // AHHHHHHHHHHHHHHHHH + ser& rank; // const causes problems + ser& num_ranks; // const again + + ser& next_rankSync; + ser& threadSync; + ser& next_sync_type; + ser& min_part; + + // No need to serialize + // RankExecBarrier + // LinkUntimedBarrier + // sim + // exit + // profile_tools + + // static RankSync* rankSync; +} } // namespace SST diff --git a/src/sst/core/sync/syncManager.h b/src/sst/core/sync/syncManager.h index e4effd08d..8bac59e8d 100644 --- a/src/sst/core/sync/syncManager.h +++ b/src/sst/core/sync/syncManager.h @@ -34,10 +34,11 @@ namespace Profile { class SyncProfileTool; } -class RankSync +class RankSync : public SST::Core::Serialization::serializable { public: RankSync(RankInfo num_ranks) : num_ranks(num_ranks) { link_maps.resize(num_ranks.rank); } + RankSync() {} virtual ~RankSync() {} /** Register a Link which this Sync Object is responsible for */ @@ -57,8 +58,14 @@ class RankSync virtual uint64_t getDataSize() const = 0; -protected: - SimTime_t nextSyncTime; + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + ser& nextSyncTime; + ser& max_period; // Unused + // ser& num_ranks; // const so a pain to serialize but don't need it + ser& link_maps; + } + ImplementVirtualSerializable(SST::RankSync) protected : SimTime_t nextSyncTime; TimeConverter* max_period; const RankInfo num_ranks; @@ -77,7 +84,7 @@ class RankSync private: }; -class ThreadSync +class ThreadSync : public SST::Core::Serialization::serializable { public: ThreadSync() {} @@ -99,8 +106,14 @@ class ThreadSync virtual void registerLink(const std::string& name, Link* link) = 0; virtual ActivityQueue* registerRemoteLink(int tid, const std::string& name, Link* link) = 0; -protected: - SimTime_t nextSyncTime; + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + ser& nextSyncTime; + ser& max_period; // Unused + } + ImplementVirtualSerializable(SST::ThreadSync) + + protected : SimTime_t nextSyncTime; TimeConverter* max_period; void finalizeConfiguration(Link* link) { link->finalizeConfiguration(); } @@ -122,6 +135,7 @@ class SyncManager : public Action SyncManager( const RankInfo& rank, const RankInfo& num_ranks, TimeConverter* minPartTC, SimTime_t min_part, const std::vector& interThreadLatencies); + SyncManager() {} // For serialization only virtual ~SyncManager(); /** Register a Link which this Sync Object is responsible for */ @@ -142,6 +156,8 @@ class SyncManager : public Action void addProfileTool(Profile::SyncProfileTool* tool); + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementSerializable(SST::SyncManager) private: enum sync_type_t { RANK, THREAD }; @@ -164,8 +180,6 @@ class SyncManager : public Action SyncProfileToolList* profile_tools = nullptr; void computeNextInsert(); - - NotSerializable(SST::SyncManager) }; } // namespace SST diff --git a/src/sst/core/sync/threadSyncDirectSkip.cc b/src/sst/core/sync/threadSyncDirectSkip.cc index fb4a5a8f6..064be1a6d 100644 --- a/src/sst/core/sync/threadSyncDirectSkip.cc +++ b/src/sst/core/sync/threadSyncDirectSkip.cc @@ -84,6 +84,22 @@ ThreadSyncDirectSkip::getDataSize() const return count; } +void +ThreadSyncDirectSkip::serialize_order(SST::Core::Serialization::serializer& ser) +{ + ThreadSync::serialize_order(ser); + ser& my_max_period; + ser& num_threads; + ser& thread; + ser& localMinimumNextActivityTime; + ser& totalWaitTime; + ser& single_rank; + + // No need to serialize + // sim + // barrier +} + Core::ThreadSafe::Barrier ThreadSyncDirectSkip::barrier[3]; diff --git a/src/sst/core/sync/threadSyncDirectSkip.h b/src/sst/core/sync/threadSyncDirectSkip.h index c031224a1..08701b23c 100644 --- a/src/sst/core/sync/threadSyncDirectSkip.h +++ b/src/sst/core/sync/threadSyncDirectSkip.h @@ -34,6 +34,7 @@ class ThreadSyncDirectSkip : public ThreadSync public: /** Create a new ThreadSync object */ ThreadSyncDirectSkip(int num_threads, int thread, Simulation_impl* sim); + ThreadSyncDirectSkip() {} // For serialization only ~ThreadSyncDirectSkip(); void setMaxPeriod(TimeConverter* period); @@ -59,6 +60,9 @@ class ThreadSyncDirectSkip : public ThreadSync uint64_t getDataSize() const; + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementSerializable(SST::ThreadSyncDirectSkip) + private: SimTime_t my_max_period; int num_threads; diff --git a/src/sst/core/sync/threadSyncSimpleSkip.cc b/src/sst/core/sync/threadSyncSimpleSkip.cc index 10318246d..33c1cf170 100644 --- a/src/sst/core/sync/threadSyncSimpleSkip.cc +++ b/src/sst/core/sync/threadSyncSimpleSkip.cc @@ -177,6 +177,25 @@ ThreadSyncSimpleSkip::getDataSize() const return count; } +void +ThreadSyncSimpleSkip::serialize_order(SST::Core::Serialization::serializer& ser) +{ + ThreadSync::serialize_order(ser); + ser& my_max_period; + ser& num_threads; + ser& thread; + ser& localMinimumNextActivityTime; + ser& totalWaitTime; + ser& single_rank; + + // No need to serialize + // link_map - unused after construction + // sim - regenerate + // barrier - regenerate & guarantee empty during checkpoint + // lock - regenerate + // queues - empty +} + Core::ThreadSafe::Barrier ThreadSyncSimpleSkip::barrier[3]; } // namespace SST diff --git a/src/sst/core/sync/threadSyncSimpleSkip.h b/src/sst/core/sync/threadSyncSimpleSkip.h index a9eaa2320..5d9b8e368 100644 --- a/src/sst/core/sync/threadSyncSimpleSkip.h +++ b/src/sst/core/sync/threadSyncSimpleSkip.h @@ -35,6 +35,7 @@ class ThreadSyncSimpleSkip : public ThreadSync public: /** Create a new ThreadSync object */ ThreadSyncSimpleSkip(int num_threads, int thread, Simulation_impl* sim); + ThreadSyncSimpleSkip() {} // For serialization only ~ThreadSyncSimpleSkip(); void setMaxPeriod(TimeConverter* period); @@ -57,6 +58,9 @@ class ThreadSyncSimpleSkip : public ThreadSync // static void disable() { disabled = true; barrier.disable(); } + void serialize_order(SST::Core::Serialization::serializer& ser) override; + ImplementSerializable(SST::ThreadSyncSimpleSkip) + private: // Stores the links until they can be intialized with the right // remote data. It will hold whichever thread registers the link diff --git a/src/sst/core/testElements/coreTest_Serialization.cc b/src/sst/core/testElements/coreTest_Serialization.cc index 485032cae..e8afacbfc 100644 --- a/src/sst/core/testElements/coreTest_Serialization.cc +++ b/src/sst/core/testElements/coreTest_Serialization.cc @@ -67,6 +67,27 @@ checkContainerSerializeDeserialize(T& data) return true; }; +// For ordered but non-iterable contaienrs +template +bool +checkNonIterableContainerSerializeDeserialize(T& data) +{ + auto buffer = SST::Comms::serialize(data); + T result; + SST::Comms::deserialize(buffer, result); + + if ( data.size() != result.size() ) return false; + + while ( !data.empty() ) { + auto data_val = data.top(); + auto result_val = result.top(); + if ( data_val != result_val ) return false; + data.pop(); + result.pop(); + } + return true; +}; + // For unordered containers template bool @@ -320,6 +341,12 @@ coreTestSerialization::coreTestSerialization(ComponentId_t id, Params& params) : deque_in.push_back(rng->generateNextInt32()); passed = checkContainerSerializeDeserialize(deque_in); if ( !passed ) out.output("ERROR: deque did not serialize/deserialize properly\n"); + + std::priority_queue priority_queue_in; + for ( int i = 0; i < 10; ++i ) + priority_queue_in.push(rng->generateNextInt32()); + passed = checkNonIterableContainerSerializeDeserialize(priority_queue_in); + if ( !passed ) out.output("ERROR: priority_queue did not serialize/deserialize properly\n"); } else if ( test == "unordered_containers" ) { // Unordered Containers @@ -629,7 +656,15 @@ coreTestSerialization::coreTestSerialization(ComponentId_t id, Params& params) : info2.test_printComponentInfoHierarchy(); } - else { + else if ( test == "atomic" ) { + std::atomic atom(12); + + auto buffer = SST::Comms::serialize(atom); + std::atomic result; + SST::Comms::deserialize(buffer, result); + passed = (atom.load() == result.load()) ? true : false; + if ( !passed ) out.output("ERROR: std::atomic did not serialize/deserialize properly\n"); + } else { out.fatal(CALL_INFO_LONG, 1, "ERROR: Unknown serialization test specified: %s\n", test.c_str()); } } diff --git a/src/sst/core/timeVortex.h b/src/sst/core/timeVortex.h index 438329146..966e3edf0 100644 --- a/src/sst/core/timeVortex.h +++ b/src/sst/core/timeVortex.h @@ -14,6 +14,8 @@ #include "sst/core/activityQueue.h" #include "sst/core/module.h" +#include "sst/core/serialization/serialize_impl_fwd.h" +#include "sst/core/simulation_impl.h" namespace SST { @@ -43,11 +45,39 @@ class TimeVortex : public ActivityQueue virtual void print(Output& out) const = 0; virtual uint64_t getMaxDepth() const { return max_depth; } virtual uint64_t getCurrentDepth() const = 0; + virtual void dbg_print(Output& out) { print(out); } + + virtual void serialize_order(SST::Core::Serialization::serializer& ser) { ser& max_depth; } protected: uint64_t max_depth; }; +template <> +class SST::Core::Serialization::serialize_impl +{ + + template + friend class serialize; + void operator()(TimeVortex*& s, SST::Core::Serialization::serializer& ser) + { + switch ( ser.mode() ) { + case serializer::SIZER: + case serializer::PACK: + ser& Simulation_impl::getSimulation()->timeVortexType; + s->serialize_order(ser); + break; + case serializer::UNPACK: + std::string tv_type; + ser& tv_type; + Params p; + s = Factory::getFactory()->Create(tv_type, p); + s->serialize_order(ser); + break; + } + } +}; + } // namespace SST #endif // SST_CORE_TIMEVORTEX_H diff --git a/tests/test_Checkpoint.py b/tests/test_Checkpoint.py index 6d8b29e8d..17d882fc9 100644 --- a/tests/test_Checkpoint.py +++ b/tests/test_Checkpoint.py @@ -1,5 +1,7 @@ import sst +sst.setProgramOption("checkpoint-period", "500us") + # Define the simulation components comp_c0 = sst.Component("c0", "coreTestElement.coreTestCheckpoint") comp_c0.addParams({ diff --git a/tests/testsuite_default_Serialization.py b/tests/testsuite_default_Serialization.py index a62bc05e3..b6d173f7b 100644 --- a/tests/testsuite_default_Serialization.py +++ b/tests/testsuite_default_Serialization.py @@ -35,7 +35,7 @@ def initializeTestModule_SingleInstance(class_inst): ################################################################################ -class testcase_StatisticComponent(SSTTestCase): +class testcase_Serialization(SSTTestCase): def initializeClass(self, testName): super(type(self), self).initializeClass(testName) @@ -74,6 +74,9 @@ def test_Serialization_handler(self): def test_Serialization_component_info(self): self.serialization_test_template("componentinfo", False) + def test_Serialization_atomic(self): + self.serialization_test_template("atomic") + ##### def serialization_test_template(self, testtype, default_reffile = True): @@ -89,7 +92,6 @@ def serialization_test_template(self, testtype, default_reffile = True): outfile = "{0}/test_Serialization_{1}.out".format(outdir,testtype) options = "--model-options=\"{0}\"".format(testtype) - # Force serial run since the serialization is all done in-situ self.run_sst(sdlfile, outfile, num_ranks=1, num_threads=1, other_args=options)