Skip to content

Commit

Permalink
chore: add runtime-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
chenzhuofu committed Dec 16, 2024
1 parent f48e6f5 commit 26f907a
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 0 deletions.
1 change: 1 addition & 0 deletions .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ build_targets = [
"models",
"export-model-arch",
"substitution-to-dot",
"runtime-dev",
]

test_targets = [
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_subdirectory(pcg)
add_subdirectory(compiler)
add_subdirectory(runtime)
add_subdirectory(runtime-dev)
add_subdirectory(op-attrs)
add_subdirectory(kernels)
add_subdirectory(local-execution)
Expand Down
34 changes: 34 additions & 0 deletions lib/runtime-dev/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ff_add_library(
NAME
runtime-dev
SRC_PATTERNS
src/*.cc
PUBLIC_INCLUDE
include/
${CMAKE_CURRENT_SOURCE_DIR}/../../deps/legion/runtime

PRIVATE_INCLUDE
src/
DEPS
op-attrs
utils
legion
compiler
kernels
pcg
local-execution
)

# ff_add_test_executable(
# NAME
# runtime-test
# SRC_PATTERNS
# test/src/*.cc
# PUBLIC_INCLUDE
# include/
# PRIVATE_INCLUDE
# test/src/ src/
# DEPS
# runtime
# doctest
# )
52 changes: 52 additions & 0 deletions lib/runtime-dev/include/runtime/tasks.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _RUNTIME_TASK_H_
#define _RUNTIME_TASK_H_
#include "legion.h"
#ifdef FF_USE_NCCL
#if defined(FF_USE_CUDA) || defined(FF_USE_HIP_CUDA)
#include <nccl.h>
#else
#include <rccl/rccl.h>
#endif
#endif

namespace FlexFlow {

enum TaskIDs {
TOP_LEVEL_TASK_ID,
CUDA_INIT_TASK_ID,
NCCL_GETUNIQUEID_TASK_ID,
NCCL_INIT_COMMS_TASK_ID,
NCCL_FINISH_COMMS_TASK_ID,
EXEC_FORWARD_TASK_ID,
EXEC_BACKWARD_TASK_ID,
EXEC_OPTIMIZE_TASK_ID,
};

void top_level_task(Legion::Task const *task,
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);

void register_flexflow_internal_tasks(Legion::Runtime *runtime = NULL,
bool pre_register = true,
bool enable_control_replication = true);

void register_custom_tasks();

} // namespace FlexFlow

#endif // _RUNTIME_TASK_H_
47 changes: 47 additions & 0 deletions lib/runtime-dev/src/cpp_driver.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "dirent.h"
#include "legion.h"
#include "runtime/tasks.h"

using namespace Legion;
using namespace FlexFlow;

// ========================================================
// Task and mapper registrations
// ========================================================
int main(int argc, char **argv) {
// This needs to be set, otherwise NCCL will try to use group kernel launches,
// which are not compatible with the Realm CUDA hijack.
setenv("NCCL_LAUNCH_MODE", "PARALLEL", true);

Runtime::set_top_level_task_id(TOP_LEVEL_TASK_ID);
{
TaskVariantRegistrar registrar(TOP_LEVEL_TASK_ID, "top_level");
registrar.add_constraint(ProcessorConstraint(Processor::LOC_PROC));
registrar.set_replicable();
Runtime::preregister_task_variant<top_level_task>(registrar, "top_level");
}

register_flexflow_internal_tasks();

// Register custom tasks
register_custom_tasks();

// TODO: add mapper implementations
// Runtime::add_registration_callback(FFMapper::update_mappers);
return Runtime::start(argc, argv);
}
40 changes: 40 additions & 0 deletions lib/runtime-dev/src/dummy_app.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "legion.h"
#include "runtime/tasks.h"
#include <iostream>
#include <vector>

using namespace Legion;

Legion::Logger log_app("dummy");

void FlexFlow::top_level_task(Task const *task,
std::vector<PhysicalRegion> const &regions,
Context ctx,
Runtime *runtime) {
// NOP

// Execution fence
{
Future future = runtime->issue_execution_fence(ctx);
future.get_void_result();
}

log_app.print("----------dummy app started--------------");
}

void FlexFlow::register_custom_tasks() {}
103 changes: 103 additions & 0 deletions lib/runtime-dev/src/tasks.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/* Copyright 2023 CMU, Facebook, LANL, MIT, NVIDIA, and Stanford (alphabetical)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "runtime/tasks.h"

using namespace Legion;

namespace FlexFlow {

void register_flexflow_internal_tasks(Runtime *runtime,
bool pre_register,
bool enable_control_replication) {
if (!pre_register) {
assert(runtime != NULL);
}
// CUDA_INIT_TASK
{
TaskVariantRegistrar registrar(CUDA_INIT_TASK_ID, "cuda_init_task");
registrar.add_constraint(ProcessorConstraint(Processor::TOC_PROC));
registrar.set_leaf();
if (pre_register) {
Runtime::preregister_task_variant<FFHandler,
UtilityTasks::init_cuda_task>(
registrar, "cuda_init_task");
} else {
if (enable_control_replication) {
registrar.global_registration = false;
}
runtime->register_task_variant<FFHandler, UtilityTasks::init_cuda_task>(
registrar);
}
}
#ifdef FF_USE_NCCL
// NCCL
{
TaskVariantRegistrar registrar(NCCL_GETUNIQUEID_TASK_ID,
"NCCL GetUniqueId");
registrar.add_constraint(ProcessorConstraint(Processor::TOC_PROC));
registrar.set_leaf();
if (pre_register) {
Runtime::preregister_task_variant<ncclUniqueId,
Op::get_nccl_unique_id_task>(
registrar, "NCCL GetUniqueId Task");
} else {
if (enable_control_replication) {
registrar.global_registration = false;
}
runtime->register_task_variant<ncclUniqueId, Op::get_nccl_unique_id_task>(
registrar);
}
}
{
TaskVariantRegistrar registrar(NCCL_INIT_COMMS_TASK_ID,
"NCCL Init Communicators");
registrar.add_constraint(ProcessorConstraint(Processor::TOC_PROC));
registrar.set_leaf();
registrar.set_concurrent();
// registrar.set_concurrent_barrier();
if (pre_register) {
Runtime::preregister_task_variant<ncclComm_t, Op::init_nccl_comms_task>(
registrar, "NCCL Init Communicators Task", 111 /*variant ID*/);
} else {
if (enable_control_replication) {
registrar.global_registration = false;
}
runtime->register_task_variant<ncclComm_t, Op::init_nccl_comms_task>(
registrar, 111 /*variant ID*/);
}
}
{
TaskVariantRegistrar registrar(NCCL_FINISH_COMMS_TASK_ID,
"NCCL Finish Communicators");
registrar.add_constraint(ProcessorConstraint(Processor::TOC_PROC));
registrar.set_leaf();
registrar.set_concurrent();
// registrar.set_concurrent_barrier();
if (pre_register) {
Runtime::preregister_task_variant<Op::finish_nccl_comms_task>(
registrar, "NCCL Finish Communicators Task", 111 /*variant ID*/);
} else {
if (enable_control_replication) {
registrar.global_registration = false;
}
runtime->register_task_variant<Op::finish_nccl_comms_task>(
registrar, 111 /*variant ID*/);
}
}
#endif
}

} // namespace FlexFlow

0 comments on commit 26f907a

Please sign in to comment.