forked from University-of-Delaware-IT-RCI/auto_tmpdir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
auto_tmpdir.c
244 lines (210 loc) · 6.81 KB
/
auto_tmpdir.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/*
* auto_tmpdir
*
* SLURM SPANK plugin that automates the process of creating/destroying
* temporary directories for jobs/steps.
*/
#include "fs-utils.h"
/*
* All spank plugins must define this macro for the SLURM plugin loader.
*/
SPANK_PLUGIN(auto_tmpdir, 1)
/*
* Options bit vector:
*/
static auto_tmpdir_fs_options_t auto_tmpdir_options = 0;
/*
* Filesystem bind mount info:
*/
static auto_tmpdir_fs_ref auto_tmpdir_fs_info = NULL;
/*
* Which job step should cleanup?
*/
static uint32_t auto_tmpdir_cleanup_in_step = SLURM_EXTERN_CONT;
/*
* @function _opt_no_rm_tmpdir
*
* Parse the --no-rm-tmpdir option.
*
*/
static int _opt_no_rm_tmpdir(
int val,
const char *optarg,
int remote
)
{
auto_tmpdir_options |= auto_tmpdir_fs_options_should_not_delete;
slurm_verbose("auto_tmpdir: will not remove tempororary directories");
return ESPANK_SUCCESS;
}
#ifdef AUTO_TMPDIR_ENABLE_SHARED_TMPDIR
/*
* @function _opt_use_shared_tmpdir
*
* Parse the --use-shared-tmpdir option.
*
*/
static int _opt_use_shared_tmpdir(
int val,
const char *optarg,
int remote
)
{
/*
* Check the optarg to see if "per-node" is being requested:
*/
if ( optarg && strcmp(optarg, "(null)") ) {
if ( strcmp(optarg, "per-node") == 0 ) {
auto_tmpdir_options |= auto_tmpdir_fs_options_should_use_per_host;
} else {
slurm_error("auto_tmpdir: invalid --use-shared-tmpdir optional value: %s", optarg);
return ESPANK_BAD_ARG;
}
}
auto_tmpdir_options |= auto_tmpdir_fs_options_should_use_shared;
slurm_verbose("auto_tmpdir: will use shared tempororary directory under `%s`", AUTO_TMPDIR_DEFAULT_SHARED_PREFIX);
return ESPANK_SUCCESS;
}
#endif
/*
* Options available to this spank plugin:
*/
struct spank_option spank_options[] =
{
{ "no-rm-tmpdir", NULL,
"Do not automatically remove temporary directories for the job/steps.",
0, 0, (spank_opt_cb_f) _opt_no_rm_tmpdir },
#ifdef AUTO_TMPDIR_ENABLE_SHARED_TMPDIR
{ "use-shared-tmpdir", NULL,
"Create temporary directories on shared storage. Use \"--use-shared-tmpdir=per-node\" to create unique sub-directories for each node allocated to the job (e.g. <base><job-id>/<nodename>).",
2, 0, (spank_opt_cb_f) _opt_use_shared_tmpdir },
#endif
SPANK_OPTIONS_TABLE_END
};
/**/
/*
* @function slurm_spank_init
*
* In the ALLOCATOR context, the 'spank_options' don't get automatically
* registered as they do under LOCAL and REMOTE. So under that context
* we explicitly register our cli options.
*
* In the REMOTE context, go ahead and check the SPANK env for our options.
*
*/
int
slurm_spank_init(
spank_t spank_ctxt,
int argc,
char *argv[]
)
{
int rc = ESPANK_SUCCESS;
int i;
switch ( spank_context() ) {
case S_CTX_ALLOCATOR: {
struct spank_option *o = spank_options;
while ( o->name && (rc == ESPANK_SUCCESS) ) rc = spank_option_register(spank_ctxt, o++);
break;
}
case S_CTX_REMOTE: {
char v[PATH_MAX];
//
// Check for our arguments in the environment:
//
if ( spank_getenv(spank_ctxt, "SLURM_SPANK__SLURM_SPANK_OPTION_auto_tmpdir_no_rm_tmpdir", v, sizeof(v)) == ESPANK_SUCCESS ) {
rc = _opt_no_rm_tmpdir(0, v, 1);
}
#ifdef AUTO_TMPDIR_ENABLE_SHARED_TMPDIR
if ( (rc == ESPANK_SUCCESS) && (spank_getenv(spank_ctxt, "SLURM_SPANK__SLURM_SPANK_OPTION_auto_tmpdir_use_shared_tmpdir", v, sizeof(v)) == ESPANK_SUCCESS) ) {
rc = _opt_use_shared_tmpdir(0, v, 1);
}
#endif
break;
}
}
return rc;
}
/*
* @function slurm_spank_job_prolog
*
* In the prolog we create the hierarchy of bind-mounted directories for the job but we
* DO NOT bind-mount them. It wouldn't do much anyway, since the job_script context isn't
* where we'll end up running the job steps.
*
* If we're able to create the hierarchy, let's serialize it to a file so we can
* reconstitute in the job step and later in the epilog context.
*/
int
slurm_spank_job_prolog(
spank_t spank_ctxt,
int argc,
char *argv[]
)
{
int rc = ESPANK_SUCCESS;
/* We only want to run in the job_script context: */
if ( spank_context() == S_CTX_JOB_SCRIPT ) {
auto_tmpdir_fs_info = auto_tmpdir_fs_init(spank_ctxt, argc, argv, auto_tmpdir_options);
if ( ! auto_tmpdir_fs_info ) {
slurm_error("auto_tmpdir::slurm_spank_job_prolog: failure to create fs info");
rc = ESPANK_ERROR;
}
else if ( auto_tmpdir_fs_serialize_to_file(auto_tmpdir_fs_info, spank_ctxt, argc, argv, NULL) != 0 ) {
slurm_error("auto_tmpdir::slurm_spank_job_prolog: failure to serialize fs info");
rc = ESPANK_ERROR;
}
}
return rc;
}
/*
* @function slurm_spank_init_post_opt
*
* At this point we're in a slurmstepd just prior to transitioning to the user
* credentials. Now's the right time to pull the cached bind-mount hierarchy
* back off disk and do all the bind mounts.
*/
int
slurm_spank_init_post_opt(
spank_t spank_ctxt,
int argc,
char *argv[]
)
{
int rc = ESPANK_SUCCESS;
/* We only want to run in the remote context: */
if ( spank_remote(spank_ctxt) ) {
auto_tmpdir_fs_info = auto_tmpdir_fs_init_with_file(spank_ctxt, argc, argv, auto_tmpdir_options, NULL, 0);
rc = ESPANK_ERROR;
if ( auto_tmpdir_fs_info && (auto_tmpdir_fs_bind_mount(auto_tmpdir_fs_info) == 0) ) {
const char *tmpdir = auto_tmpdir_fs_get_tmpdir(auto_tmpdir_fs_info);
if ( ! tmpdir || ((rc = spank_setenv(spank_ctxt, "TMPDIR", tmpdir, strlen(tmpdir))) != ESPANK_SUCCESS) ) {
slurm_error("auto_tmpdir::slurm_spank_init_post_opt: setenv(TMPDIR, \"/tmp\") failed (%m)");
}
}
}
return rc;
}
/*
* @function slurm_spank_job_epilog
*
* In the epilog we pull the cached bind-mount hierarchy back off disk and
* destroy all the directories we created.
*/
int
slurm_spank_job_epilog(
spank_t spank_ctxt,
int argc,
char *argv[]
)
{
int rc = ESPANK_SUCCESS;
if ( spank_context() == S_CTX_JOB_SCRIPT ) {
auto_tmpdir_fs_info = auto_tmpdir_fs_init_with_file(spank_ctxt, argc, argv, auto_tmpdir_options, NULL, 1);
rc = ESPANK_ERROR;
if ( auto_tmpdir_fs_info && (auto_tmpdir_fs_fini(auto_tmpdir_fs_info, 0) == 0) ) {
rc = ESPANK_SUCCESS;
}
}
return rc;
}