-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Geoffroy Vallee <[email protected]>
- Loading branch information
Showing
16 changed files
with
916 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
module github.com/gvallee/go_hpc_jobmgr | ||
|
||
go 1.13 | ||
|
||
require ( | ||
github.com/gvallee/go_exec v0.0.1 | ||
github.com/gvallee/go_util v1.0.1 | ||
github.com/gvallee/kv v1.0.0 | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
github.com/gvallee/go_exec v0.0.1 h1:yNP5/fTWnnym4wT17JIEBiRAxOEhclOePdwxCxWsEZ8= | ||
github.com/gvallee/go_exec v0.0.1/go.mod h1:4AwegK9oPhkgwkd0rjlTwxRw//8cW4pPcCSFLZ6+LZg= | ||
github.com/gvallee/go_util v1.0.1/go.mod h1:fTexpwdH/n05Ziu0TXJIQsr7E+46QpBxNdeOOsyC0/s= | ||
github.com/gvallee/kv v1.0.0/go.mod h1:sfSclfFfLV+Y+9e9FayIbBUOtvbt1779S6q52bSSU5E= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package job | ||
|
||
import ( | ||
"bytes" | ||
|
||
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys" | ||
"github.com/gvallee/go_hpc_jobmgr/pkg/app" | ||
"github.com/gvallee/go_hpc_jobmgr/pkg/implem" | ||
"github.com/gvallee/go_hpc_jobmgr/pkg/mpi" | ||
) | ||
|
||
// CleanUpFn is a "function pointer" to call to clean up the system after the completion of a job | ||
type CleanUpFn func(...interface{}) error | ||
|
||
// GetOutputFn is a "function pointer" to call to gather the output of an application after completion of a job | ||
type GetOutputFn func(*Job, *sys.Config) string | ||
|
||
// GetErrorFn is a "function pointer" to call to gather stderr from an application after completion of a job | ||
type GetErrorFn func(*Job, *sys.Config) string | ||
|
||
// Job represents a job | ||
type Job struct { | ||
// Name is the name of the job | ||
Name string | ||
|
||
// NP is the number of ranks | ||
NP int | ||
|
||
// NNodes is the number of nodes | ||
NNodes int | ||
|
||
// CleanUp is the function to call once the job is completed to clean the system | ||
CleanUp CleanUpFn | ||
|
||
// BatchScript is the path to the script required to start a job (optional) | ||
BatchScript string | ||
|
||
// HostCfg is the MPI configuration to use on the host | ||
HostCfg *implem.Info | ||
|
||
// App is the path to the application's binary, i.e., the binary to start | ||
App app.Info | ||
|
||
// OutBuffer is a buffer with the output of the job | ||
OutBuffer bytes.Buffer | ||
|
||
// ErrBuffer is a buffer with the stderr of the job | ||
ErrBuffer bytes.Buffer | ||
|
||
// GetOutput is the function to call to gather the output of the application based on the use of a given job manager | ||
GetOutput GetOutputFn | ||
|
||
// GetError is the function to call to gather stderr of the application based on the use of a given job manager | ||
GetError GetErrorFn | ||
|
||
// Args is a set of arguments to be used for launching the job | ||
Args []string | ||
|
||
// MPICfg is the MPI configuration to use for the execution of the job | ||
MPICfg *mpi.Config | ||
|
||
// Partition is the name of the partition to use with the jobmgr (optional) | ||
Partition string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package mpich | ||
|
||
const ( | ||
// VersionTag is the tag used to refer to the MPI version in MPICH template(s) | ||
VersionTag = "MPICHVERSION" | ||
// URLTag is the tag used to refer to the MPI URL in MPICH template(s) | ||
URLTag = "MPICHURL" | ||
// TarballTag is the tag used to refer to the MPI tarball in MPICH template(s) | ||
TarballTag = "MPICHTARBALL" | ||
) | ||
|
||
// MPICHGetExtraMpirunArgs returns the extra mpirun arguments required by MPICH for a specific configuration | ||
func MPICHGetExtraMpirunArgs() []string { | ||
var extraArgs []string | ||
return extraArgs | ||
} | ||
|
||
// MPICHGetConfigureExtraArgs returns the extra arguments required to configure MPICH | ||
func MPICHGetConfigureExtraArgs() []string { | ||
var extraArgs []string | ||
return extraArgs | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package openmpi | ||
|
||
import ( | ||
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys" | ||
) | ||
|
||
const ( | ||
// VersionTag is the tag used to refer to the MPI version in Open MPI template(s) | ||
VersionTag = "OMPIVERSION" | ||
|
||
// URLTag is the tag used to refer to the MPI URL in Open MPI template(s) | ||
URLTag = "OMPIURL" | ||
|
||
// TarballTag is the tag used to refer to the MPI tarball in Open MPI template(s) | ||
TarballTag = "OMPITARBALL" | ||
) | ||
|
||
// GetExtraMpirunArgs returns the set of arguments required for the mpirun command for the target platform | ||
func GetExtraMpirunArgs(sys *sys.Config) []string { | ||
var extraArgs []string | ||
/* | ||
if sys.IBEnabled { | ||
extraArgs = append(extraArgs, "--mca") | ||
extraArgs = append(extraArgs, "btl") | ||
extraArgs = append(extraArgs, "openib,self,vader") | ||
} | ||
*/ | ||
|
||
return extraArgs | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package slurm | ||
|
||
const ( | ||
// SlurmParitionKey is the key to use to retrieve the optinal parition id that | ||
// can be specified in the tool's configuration file. | ||
PartitionKey = "slurm_partition" | ||
|
||
// EnabledKey is the key used in the singularity-mpi.conf file to specify if Slurm shall be used | ||
EnabledKey = "enable_slurm" | ||
|
||
// ScriptCmdPrefix is the prefix to add to a script | ||
ScriptCmdPrefix = "#SBATCH" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package sys | ||
|
||
type Config struct { | ||
ScratchDir string | ||
|
||
Persistent string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package app | ||
|
||
// Info gathers information about a given application | ||
type Info struct { | ||
// Name is the name of the application | ||
Name string | ||
|
||
// BinName is the name of the binary to start executing the application | ||
BinName string | ||
|
||
// BinPath is the path to the binary to start executing the application | ||
BinPath string | ||
|
||
// BinArgs is the list of argument that the application's binary needs | ||
BinArgs []string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package implem | ||
|
||
const ( | ||
// OMPI is the identifier for Open MPI | ||
OMPI = "openmpi" | ||
|
||
// MPICH is the identifier for MPICH | ||
MPICH = "mpich" | ||
) | ||
|
||
// Info gathers all data about a specific MPI implementation | ||
type Info struct { | ||
// ID is the string idenfifying the MPI implementation | ||
ID string | ||
|
||
// Version is the version of the MPI implementation | ||
Version string | ||
|
||
// InstallDir is where the MPI implementation is installed | ||
InstallDir string | ||
} | ||
|
||
// IsMPI checks if information passed in is an MPI implementation | ||
func IsMPI(i *Info) bool { | ||
if i != nil && (i.ID == OMPI || i.ID == MPICH) { | ||
return true | ||
} | ||
|
||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
// Copyright (c) 2019, Sylabs Inc. All rights reserved. | ||
// This software is licensed under a 3-clause BSD license. Please consult the | ||
// LICENSE.md file distributed with the sources of this project regarding your | ||
// rights to use or distribute this software. | ||
|
||
package jm | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"log" | ||
"os" | ||
"path/filepath" | ||
|
||
"github.com/gvallee/go_exec/pkg/advexec" | ||
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/job" | ||
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys" | ||
"github.com/gvallee/go_util/pkg/util" | ||
) | ||
|
||
const ( | ||
// NativeID is the value set to JM.ID when mpirun shall be used to submit a job | ||
NativeID = "native" | ||
|
||
// SlurmID is the value set to JM.ID when Slurm shall be used to submit a job | ||
SlurmID = "slurm" | ||
|
||
// PrunID is the value set to JM.ID when prun shall be used to submit a job | ||
PrunID = "prun" | ||
) | ||
|
||
type Environment struct { | ||
InstallDir string | ||
|
||
mpiBin string | ||
} | ||
|
||
// Loader checks whether a giv job manager is applicable or not | ||
type Loader interface { | ||
Load() bool | ||
} | ||
|
||
// LoadFn loads a specific job manager once detected | ||
type LoadFn func(*JM, *sys.Config) error | ||
|
||
// SubmitFn is a "function pointer" that lets us job a new job | ||
type SubmitFn func(*job.Job, *sys.Config) (advexec.Advcmd, error) | ||
|
||
// JM is the structure representing a specific JM | ||
type JM struct { | ||
// ID identifies which job manager has been detected on the system | ||
ID string | ||
|
||
Load LoadFn | ||
|
||
// Submit is the function to submit a job through the current job manager | ||
Submit SubmitFn | ||
} | ||
|
||
// Detect figures out which job manager must be used on the system and return a | ||
// structure that gather all the data necessary to interact with it | ||
func Detect() JM { | ||
// Default job manager | ||
loaded, comp := NativeDetect() | ||
if !loaded { | ||
log.Fatalln("unable to find a default job manager") | ||
} | ||
|
||
// Now we check if we can find better | ||
loaded, slurmComp := SlurmDetect() | ||
if loaded { | ||
return slurmComp | ||
} | ||
|
||
loaded, prunComp := PrunDetect() | ||
if loaded { | ||
return prunComp | ||
} | ||
|
||
return comp | ||
} | ||
|
||
// Load is the function to use to load the JM component | ||
func Load(jm *JM) error { | ||
return nil | ||
} | ||
|
||
// TempFile creates a temporary file that is used to store a batch script | ||
func TempFile(j *job.Job, sysCfg *sys.Config) error { | ||
filePrefix := "sbash-" + j.Name | ||
path := "" | ||
if sysCfg.Persistent == "" { | ||
f, err := ioutil.TempFile("", filePrefix+"-") | ||
if err != nil { | ||
return fmt.Errorf("failed to create temporary file: %s", err) | ||
} | ||
path = f.Name() | ||
f.Close() | ||
j.BatchScript = path | ||
} else { | ||
fileName := filePrefix + ".sh" | ||
path = filepath.Join(j.MPICfg.Implem.InstallDir, fileName) | ||
j.BatchScript = path | ||
if util.PathExists(path) { | ||
return fmt.Errorf("file %s already exists", path) | ||
} | ||
} | ||
|
||
j.CleanUp = func(...interface{}) error { | ||
err := os.RemoveAll(path) | ||
if err != nil { | ||
return fmt.Errorf("unable to delete %s: %s", path, err) | ||
} | ||
return nil | ||
} | ||
|
||
return nil | ||
} |
Oops, something went wrong.