Allow to start environments in single without mpirun if required. Par…

…ameter added to configuration file.
flexi-framework · Aug 4, 2022 · e2c50b5 · e2c50b5
1 parent 3380227
commit e2c50b5
Show file tree

Hide file tree

Showing 4 changed files with 42 additions and 21 deletions.
diff --git a/examples/HIT_24_DOF/prm.yaml b/examples/HIT_24_DOF/prm.yaml
@@ -16,6 +16,7 @@ environment:
   mesh_file: ./simulation_files/CART_HEX_PERIODIC_004_mesh.h5
   #local_dir: /var/tmp
   mpi_launch_mpmd: False
+  env_launcher: local
 
 # All parameters for setting up the reward
 reward:
@@ -28,7 +29,7 @@ reward:
 training:
   train_num_epochs: 5
   train_learning_rate: 1.e-4
-  train_num_iterations: 10
+  train_num_iterations: 1
   train_buffer_capacity: 2000
   train_files:
     - ./simulation_files/run_f200_N5_4Elems_State_0000003.000000000.h5

diff --git a/examples/HIT_32_DOF/prm.yaml b/examples/HIT_32_DOF/prm.yaml
@@ -16,6 +16,7 @@ environment:
   mesh_file: ./simulation_files/CART_HEX_PERIODIC_004_mesh.h5
   #local_dir: /var/tmp
   mpi_launch_mpmd: False
+  env_launcher: local
 
 # All parameters for setting up the reward
 reward:
@@ -31,10 +32,10 @@ training:
   train_num_iterations: 10
   train_buffer_capacity: 2000
   train_files:
-    - ./simulation_files/N7_4Elems/run_f200_N7_4Elems_State_0000003.000000000.h5
-    - ./simulation_files/N7_4Elems/run_f200_N7_4Elems_State_0000004.000000000.h5
-    - ./simulation_files/N7_4Elems/run_f200_N7_4Elems_State_0000005.000000000.h5
-    - ./simulation_files/N7_4Elems/run_f200_N7_4Elems_State_0000006.000000000.h5
+    - ./simulation_files/run_f200_N7_4Elems_State_0000003.000000000.h5
+    - ./simulation_files/run_f200_N7_4Elems_State_0000004.000000000.h5
+    - ./simulation_files/run_f200_N7_4Elems_State_0000005.000000000.h5
+    - ./simulation_files/run_f200_N7_4Elems_State_0000006.000000000.h5
 
 # All parameters for the agent
 agent:
@@ -53,7 +54,7 @@ evaluation:
   eval_num_episodes: 1
   eval_interval:     10
   eval_files:
-    - ./simulation_files/N7_4Elems/run_f200_N7_4Elems_State_0000008.000000000.h5
+    - ./simulation_files/run_f200_N7_4Elems_State_0000008.000000000.h5
 
 # Performance
 performance:

diff --git a/src/flexiEnvSmartSim.py b/src/flexiEnvSmartSim.py
@@ -14,7 +14,7 @@
 
 from smartsim import Experiment
 from smartsim.database import PBSOrchestrator,Orchestrator
-from smartsim.settings import MpirunSettings
+from smartsim.settings import MpirunSettings,RunSettings
 
 from smartredis import Client
 
@@ -67,6 +67,7 @@ def __init__( self
                ,hosts = None
                ,rankfiles = None
                ,mpi_launch_mpmd = False
+               ,env_launcher = 'mpirun'
                ):
     """Initialize TF and FLEXI specific properties"""
 
@@ -83,7 +84,18 @@ def __init__( self
     self.flexi_path = flexi_path
     self.hosts = hosts
     self.rankfiles = rankfiles
-    self.mpi_launch_mpmd = mpi_launch_mpmd
+
+    # Sanity Check Launcher
+    self.env_launcher = env_launcher
+    if ((self.env_launcher == 'local') and (n_procs != 1)):
+      printWarning("For env_launcher 'local', only single execution is allowed! Setting 'n_procs=1'!")
+      printWarning("To run evironments in parallel with MPI, use env_launcher='mpi'!")
+      n_procs = 1
+
+    if (self.env_launcher == 'mpirun'):
+      self.mpi_launch_mpmd = mpi_launch_mpmd
+    else:   
+      self.mpi_launch_mpmd = False
 
     # Save list of restart files
     self.random_restart_file = random_restart_file
@@ -216,21 +228,25 @@ def _start_flexi(self,exp,n_procs,n_envs):
         args.append('--tag')
         args.append(self.tag[i])
 
-      run_args = {"rankfile" : self.rankfiles[i] ,"report-bindings" : ""}
-      mpi = MpirunSettings(exe=self.flexi_path, exe_args=args, run_args=run_args)
-      mpi.set_tasks(n_procs)
-      mpi.set_hostlist(self.hosts[hosts_per_flexi[i,0]:hosts_per_flexi[i,1]+1])
+      if (self.env_launcher == 'mpirun'):
+        run_args = {"rankfile" : self.rankfiles[i] ,"report-bindings" : ""}
+        run = MpirunSettings(exe=self.flexi_path, exe_args=args, run_args=run_args)
+        run.set_tasks(n_procs)
+        run.set_hostlist(self.hosts[hosts_per_flexi[i,0]:hosts_per_flexi[i,1]+1])
 
-      # Create MPMD Settings and start later in single command
-      if self.mpi_launch_mpmd:
-        if i==0:
-          f_mpmd = mpi
-        else:
-          f_mpmd.make_mpmd(mpi)
+        # Create MPMD Settings and start later in single command
+        if self.mpi_launch_mpmd:
+          if i==0:
+            f_mpmd = run
+          else:
+            f_mpmd.make_mpmd(run)
+
+      else: # Otherwise do not use launcher
+        run = RunSettings(exe=self.flexi_path, exe_args=args)
 
       # Create and directly start FLEXI instances
-      else:
-        flexi_instance = exp.create_model(self.tag[i]+"flexi", mpi)
+      if not self.mpi_launch_mpmd:
+        flexi_instance = exp.create_model(self.tag[i]+"flexi", run)
         exp.start(flexi_instance, block=False,summary=False)
         flexi.append(flexi_instance)
 

diff --git a/src/train.py b/src/train.py
@@ -361,6 +361,7 @@ def train( config_file
           ,smartsim_num_dbs = 1
           ,smartsim_launcher = "local"
           ,smartsim_orchestrator = "local"
+          ,env_launcher = "mpirun"
           ,mpi_launch_mpmd = False
           ,local_dir = None
           ,n_procs_per_node=128 # Hawk
@@ -483,6 +484,7 @@ def train( config_file
                                     ,reward_scale     = reward_scale
                                     ,restart_files    = train_files
                                     ,rankfiles        = rank_files
+                                    ,env_launcher     = env_launcher
                                     ,mpi_launch_mpmd  = mpi_launch_mpmd
                                     ,debug            = debug
                                     ))
@@ -509,8 +511,9 @@ def train( config_file
                                          ,reward_scale     = reward_scale
                                          ,restart_files    = eval_files
                                          ,random_restart_file = False
-                                         ,debug            = debug
                                          ,rankfiles        = rank_files
+                                         ,env_launcher     = env_launcher
+                                         ,debug            = debug
                                          ))