Skip to content

Commit

Permalink
First version as separate package
Browse files Browse the repository at this point in the history
Signed-off-by: Geoffroy Vallee <[email protected]>
  • Loading branch information
gvallee committed Jan 19, 2021
1 parent 31a81c3 commit ebd0d37
Show file tree
Hide file tree
Showing 16 changed files with 916 additions and 0 deletions.
9 changes: 9 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module github.com/gvallee/go_hpc_jobmgr

go 1.13

require (
github.com/gvallee/go_exec v0.0.1
github.com/gvallee/go_util v1.0.1
github.com/gvallee/kv v1.0.0
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
github.com/gvallee/go_exec v0.0.1 h1:yNP5/fTWnnym4wT17JIEBiRAxOEhclOePdwxCxWsEZ8=
github.com/gvallee/go_exec v0.0.1/go.mod h1:4AwegK9oPhkgwkd0rjlTwxRw//8cW4pPcCSFLZ6+LZg=
github.com/gvallee/go_util v1.0.1/go.mod h1:fTexpwdH/n05Ziu0TXJIQsr7E+46QpBxNdeOOsyC0/s=
github.com/gvallee/kv v1.0.0/go.mod h1:sfSclfFfLV+Y+9e9FayIbBUOtvbt1779S6q52bSSU5E=
70 changes: 70 additions & 0 deletions internal/pkg/job/job.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package job

import (
"bytes"

"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys"
"github.com/gvallee/go_hpc_jobmgr/pkg/app"
"github.com/gvallee/go_hpc_jobmgr/pkg/implem"
"github.com/gvallee/go_hpc_jobmgr/pkg/mpi"
)

// CleanUpFn is a "function pointer" to call to clean up the system after the completion of a job
type CleanUpFn func(...interface{}) error

// GetOutputFn is a "function pointer" to call to gather the output of an application after completion of a job
type GetOutputFn func(*Job, *sys.Config) string

// GetErrorFn is a "function pointer" to call to gather stderr from an application after completion of a job
type GetErrorFn func(*Job, *sys.Config) string

// Job represents a job
type Job struct {
// Name is the name of the job
Name string

// NP is the number of ranks
NP int

// NNodes is the number of nodes
NNodes int

// CleanUp is the function to call once the job is completed to clean the system
CleanUp CleanUpFn

// BatchScript is the path to the script required to start a job (optional)
BatchScript string

// HostCfg is the MPI configuration to use on the host
HostCfg *implem.Info

// App is the path to the application's binary, i.e., the binary to start
App app.Info

// OutBuffer is a buffer with the output of the job
OutBuffer bytes.Buffer

// ErrBuffer is a buffer with the stderr of the job
ErrBuffer bytes.Buffer

// GetOutput is the function to call to gather the output of the application based on the use of a given job manager
GetOutput GetOutputFn

// GetError is the function to call to gather stderr of the application based on the use of a given job manager
GetError GetErrorFn

// Args is a set of arguments to be used for launching the job
Args []string

// MPICfg is the MPI configuration to use for the execution of the job
MPICfg *mpi.Config

// Partition is the name of the partition to use with the jobmgr (optional)
Partition string
}
27 changes: 27 additions & 0 deletions internal/pkg/mpich/mpich.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package mpich

const (
// VersionTag is the tag used to refer to the MPI version in MPICH template(s)
VersionTag = "MPICHVERSION"
// URLTag is the tag used to refer to the MPI URL in MPICH template(s)
URLTag = "MPICHURL"
// TarballTag is the tag used to refer to the MPI tarball in MPICH template(s)
TarballTag = "MPICHTARBALL"
)

// MPICHGetExtraMpirunArgs returns the extra mpirun arguments required by MPICH for a specific configuration
func MPICHGetExtraMpirunArgs() []string {
var extraArgs []string
return extraArgs
}

// MPICHGetConfigureExtraArgs returns the extra arguments required to configure MPICH
func MPICHGetConfigureExtraArgs() []string {
var extraArgs []string
return extraArgs
}
35 changes: 35 additions & 0 deletions internal/pkg/openmpi/openmpi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package openmpi

import (
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys"
)

const (
// VersionTag is the tag used to refer to the MPI version in Open MPI template(s)
VersionTag = "OMPIVERSION"

// URLTag is the tag used to refer to the MPI URL in Open MPI template(s)
URLTag = "OMPIURL"

// TarballTag is the tag used to refer to the MPI tarball in Open MPI template(s)
TarballTag = "OMPITARBALL"
)

// GetExtraMpirunArgs returns the set of arguments required for the mpirun command for the target platform
func GetExtraMpirunArgs(sys *sys.Config) []string {
var extraArgs []string
/*
if sys.IBEnabled {
extraArgs = append(extraArgs, "--mca")
extraArgs = append(extraArgs, "btl")
extraArgs = append(extraArgs, "openib,self,vader")
}
*/

return extraArgs
}
18 changes: 18 additions & 0 deletions internal/pkg/slurm/slurm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package slurm

const (
// SlurmParitionKey is the key to use to retrieve the optinal parition id that
// can be specified in the tool's configuration file.
PartitionKey = "slurm_partition"

// EnabledKey is the key used in the singularity-mpi.conf file to specify if Slurm shall be used
EnabledKey = "enable_slurm"

// ScriptCmdPrefix is the prefix to add to a script
ScriptCmdPrefix = "#SBATCH"
)
12 changes: 12 additions & 0 deletions internal/pkg/sys/sys.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package sys

type Config struct {
ScratchDir string

Persistent string
}
21 changes: 21 additions & 0 deletions pkg/app/app.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package app

// Info gathers information about a given application
type Info struct {
// Name is the name of the application
Name string

// BinName is the name of the binary to start executing the application
BinName string

// BinPath is the path to the binary to start executing the application
BinPath string

// BinArgs is the list of argument that the application's binary needs
BinArgs []string
}
35 changes: 35 additions & 0 deletions pkg/implem/implem.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package implem

const (
// OMPI is the identifier for Open MPI
OMPI = "openmpi"

// MPICH is the identifier for MPICH
MPICH = "mpich"
)

// Info gathers all data about a specific MPI implementation
type Info struct {
// ID is the string idenfifying the MPI implementation
ID string

// Version is the version of the MPI implementation
Version string

// InstallDir is where the MPI implementation is installed
InstallDir string
}

// IsMPI checks if information passed in is an MPI implementation
func IsMPI(i *Info) bool {
if i != nil && (i.ID == OMPI || i.ID == MPICH) {
return true
}

return false
}
118 changes: 118 additions & 0 deletions pkg/jm/jm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// Copyright (c) 2019, Sylabs Inc. All rights reserved.
// This software is licensed under a 3-clause BSD license. Please consult the
// LICENSE.md file distributed with the sources of this project regarding your
// rights to use or distribute this software.

package jm

import (
"fmt"
"io/ioutil"
"log"
"os"
"path/filepath"

"github.com/gvallee/go_exec/pkg/advexec"
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/job"
"github.com/gvallee/go_hpc_jobmgr/internal/pkg/sys"
"github.com/gvallee/go_util/pkg/util"
)

const (
// NativeID is the value set to JM.ID when mpirun shall be used to submit a job
NativeID = "native"

// SlurmID is the value set to JM.ID when Slurm shall be used to submit a job
SlurmID = "slurm"

// PrunID is the value set to JM.ID when prun shall be used to submit a job
PrunID = "prun"
)

type Environment struct {
InstallDir string

mpiBin string
}

// Loader checks whether a giv job manager is applicable or not
type Loader interface {
Load() bool
}

// LoadFn loads a specific job manager once detected
type LoadFn func(*JM, *sys.Config) error

// SubmitFn is a "function pointer" that lets us job a new job
type SubmitFn func(*job.Job, *sys.Config) (advexec.Advcmd, error)

// JM is the structure representing a specific JM
type JM struct {
// ID identifies which job manager has been detected on the system
ID string

Load LoadFn

// Submit is the function to submit a job through the current job manager
Submit SubmitFn
}

// Detect figures out which job manager must be used on the system and return a
// structure that gather all the data necessary to interact with it
func Detect() JM {
// Default job manager
loaded, comp := NativeDetect()
if !loaded {
log.Fatalln("unable to find a default job manager")
}

// Now we check if we can find better
loaded, slurmComp := SlurmDetect()
if loaded {
return slurmComp
}

loaded, prunComp := PrunDetect()
if loaded {
return prunComp
}

return comp
}

// Load is the function to use to load the JM component
func Load(jm *JM) error {
return nil
}

// TempFile creates a temporary file that is used to store a batch script
func TempFile(j *job.Job, sysCfg *sys.Config) error {
filePrefix := "sbash-" + j.Name
path := ""
if sysCfg.Persistent == "" {
f, err := ioutil.TempFile("", filePrefix+"-")
if err != nil {
return fmt.Errorf("failed to create temporary file: %s", err)
}
path = f.Name()
f.Close()
j.BatchScript = path
} else {
fileName := filePrefix + ".sh"
path = filepath.Join(j.MPICfg.Implem.InstallDir, fileName)
j.BatchScript = path
if util.PathExists(path) {
return fmt.Errorf("file %s already exists", path)
}
}

j.CleanUp = func(...interface{}) error {
err := os.RemoveAll(path)
if err != nil {
return fmt.Errorf("unable to delete %s: %s", path, err)
}
return nil
}

return nil
}
Loading

0 comments on commit ebd0d37

Please sign in to comment.