Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New MachineView representation #1458

Merged
merged 40 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
15dd787
containers helper functions
Aug 6, 2024
1f43d69
Additional support for unordered_multiset
Aug 6, 2024
0f2cb56
format fix
Aug 6, 2024
1eeeba8
Unordered Machine Mapping and adjacent changes
Aug 7, 2024
53dca87
repo-refactor merge
Aug 7, 2024
d3f1657
formatting
Aug 7, 2024
67a94a2
Minor fixes
Aug 9, 2024
5a5d276
Update to StridedRectangle interface
Aug 9, 2024
b302903
Minor updates
Aug 9, 2024
775cb90
added get_allowed_machine_views
Aug 10, 2024
c0c1c00
formatting
Aug 10, 2024
fdd556e
minor fix
Aug 10, 2024
075f4de
Added StartInvariantMachineView
Aug 10, 2024
878954f
formatting
Aug 10, 2024
340f441
Containers fix
Aug 13, 2024
1e8fa90
Implemented tensor to machine view injection
Aug 13, 2024
6e6adaa
small refactor
Aug 13, 2024
4b8600d
formatting
Aug 13, 2024
94e2dc2
Merge branch 'repo-refactor' into unordered-machine-view
lockshaw Aug 22, 2024
c9532d1
Cleaning Up
Aug 28, 2024
7d078a1
Formatting fix
Aug 28, 2024
40ab5ef
new machine-view interface
Sep 9, 2024
f7a50d4
repo-refactor merge
Sep 13, 2024
3cbc6be
update to allowed machine views
Sep 13, 2024
7eff4f5
PR review fixes
Sep 13, 2024
21ca265
update to machine view and getting allowed machine view to match new …
Sep 15, 2024
0810bce
merge with `repo-refactor`
Oct 4, 2024
0385e01
formatting
Oct 4, 2024
b438b49
minor fix
Oct 4, 2024
52b7a26
PR fixes
Oct 5, 2024
3a5fbf4
PR fixes
Oct 5, 2024
c1e1c8c
machineview interface change
Oct 5, 2024
5cc2a2f
Minor PR fixes
Oct 9, 2024
e1cd5a2
.cc machine view fixes + added StartInvariantMachineView
Oct 9, 2024
1744555
Merge remote-tracking branch 'origin/repo-refactor' into pietro-machi…
lockshaw Oct 9, 2024
93f9bb4
minor PR fixes
Oct 9, 2024
3c3518a
minor fixes
Oct 9, 2024
9fc8712
Post-merge fixes
lockshaw Oct 9, 2024
aa2bb4b
Merge remote-tracking branch 'refs/remotes/Marsella8/unordered-machin…
lockshaw Oct 9, 2024
20f6a75
Format
lockshaw Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions lib/compiler/include/compiler/machine_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "pcg/start_invariant_machine_view.h"
#include "substitutions/sub_parallel_computation_graph.h"
#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h"

Expand Down Expand Up @@ -53,17 +54,15 @@ OptimalCostResult optimal_cost(
MachineSpecification const &resources,
OptimalCostCache &cached_subgraph_costs);

} // namespace FlexFlow
std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machinespec,
ParallelTensorShape const &shape);

// namespace std {
//
// template <>
// struct hash<std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping>> {
// size_t operator()(
// std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping> const &g)
// const;
// };
std::unordered_set<StartInvariantMachineView>
get_allowed_start_invariant_machine_views(
MachineSpecification const &machinespec,
ParallelTensorShape const &shape);

// }; // namespace std
} // namespace FlexFlow

#endif
97 changes: 97 additions & 0 deletions lib/compiler/src/machine_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@
#include "pcg/machine_view.dtg.h"
#include "pcg/machine_view.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "pcg/start_invariant_machine_view.h"
#include "utils/containers.h"
#include "utils/containers/are_disjoint.h"
#include "utils/containers/as_vector.h"
#include "utils/containers/cartesian_product.h"
#include "utils/containers/contains_key.h"
#include "utils/containers/get_only.h"
#include "utils/containers/keys.h"
#include "utils/containers/product.h"
#include "utils/containers/range.h"
#include "utils/containers/replicate.h"
#include "utils/containers/transform.h"
#include "utils/containers/without_order.h"
#include "utils/containers/zip.h"
#include "utils/exception.h"
#include "utils/graph/graph_split.dtg.h"
#include "utils/graph/node/algorithms.h"
Expand Down Expand Up @@ -358,4 +366,93 @@
return searcher.optimal_cost(subpcg, resources, sp_decomposition);
}

bool is_valid_machine_view(MachineView const &mv,

Check warning on line 369 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L369

Added line #L369 was not covered by tests
MachineSpecification const &machinespec) {
int num_devices_per_node = ((get_device_type(mv) == DeviceType::GPU)
? machinespec.num_gpus_per_node
: machinespec.num_cpus_per_node);
int num_devices = machinespec.num_nodes * num_devices_per_node;
return (num_devices > get_raw_id(get_last_device_id(mv)));

Check warning on line 375 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L371-L375

Added lines #L371 - L375 were not covered by tests
}

std::vector<int> get_tensor_parallel_degrees(ParallelTensorShape const &shape) {
std::vector<int> degrees = as_vector(ff_ordered_shard_degrees(shape));
degrees.push_back(get_sum_degree(shape));
degrees.push_back(get_discard_copy_degree(shape));
return degrees;

Check warning on line 382 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L378-L382

Added lines #L378 - L382 were not covered by tests
}

bool is_valid_machine_view(MachineView const &mv,

Check warning on line 385 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L385

Added line #L385 was not covered by tests
ParallelTensorShape const &shape) {
std::vector<int> mv_degrees =
transform(get_num_devices_per_dim(mv),
[](num_points_t degree) { return degree.unwrapped; });
std::vector<int> tensor_degrees = get_tensor_parallel_degrees(shape);
tensor_degrees =
filter(tensor_degrees, [](int degree) { return degree != 1; });
return without_order(mv_degrees) == without_order(tensor_degrees);

Check warning on line 393 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L387-L393

Added lines #L387 - L393 were not covered by tests
}

// TODO(@pietro): add support for both CPU and GPU
static std::unordered_set<MachineView>
get_candidate_machine_views(MachineSpecification const &machinespec,

Check warning on line 398 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L398

Added line #L398 was not covered by tests
ParallelTensorShape const &shape) {

auto candidate_strides = [](std::vector<int> tensor_dims, int total_devices) {
int max_stride_upper_bound =
(total_devices + 1) /
product(transform(tensor_dims, [](int degree) { return degree - 1; }));
std::unordered_multiset<std::vector<int>> strides = cartesian_product(
replicate(tensor_dims.size(), range(1, max_stride_upper_bound + 1)));
return strides;

Check warning on line 407 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L401-L407

Added lines #L401 - L407 were not covered by tests
};

std::vector<int> tensor_dims = filter(get_tensor_parallel_degrees(shape),
[](int degree) { return degree != 1; });
std::unordered_set<MachineView> machine_views;
int total_devices = machinespec.num_nodes * machinespec.num_gpus_per_node;
for (std::vector<int> stride :
candidate_strides(tensor_dims, total_devices)) {

Check warning on line 415 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L410-L415

Added lines #L410 - L415 were not covered by tests
for (int start_id = 0; start_id < total_devices; start_id++) {
std::vector<StridedRectangleSide> sides =
transform(zip(tensor_dims, stride), [&](auto const &pair) {
return StridedRectangleSide(num_points_t(pair.first),
stride_t(pair.second));
});
MachineView mv =
MachineView{device_id_t(gpu_id_t(start_id)), StridedRectangle{sides}};
machine_views.insert(mv);

Check warning on line 424 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L417-L424

Added lines #L417 - L424 were not covered by tests
}
}
return machine_views;

Check warning on line 427 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L427

Added line #L427 was not covered by tests
}

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machinespec,

Check warning on line 431 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L431

Added line #L431 was not covered by tests
ParallelTensorShape const &shape) {

std::unordered_set<MachineView> views =
get_candidate_machine_views(machinespec, shape);
views = filter(views, [&](MachineView const &view) {
return is_valid_machine_view(view, shape);
});
views = filter(views, [&](MachineView const &view) {
return is_valid_machine_view(view, machinespec);
});
return views;

Check warning on line 442 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L434-L442

Added lines #L434 - L442 were not covered by tests
}

std::unordered_set<StartInvariantMachineView>
get_allowed_start_invariant_machine_views(

Check warning on line 446 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L446

Added line #L446 was not covered by tests
MachineSpecification const &machinespec,
ParallelTensorShape const &shape) {
return transform(get_allowed_machine_views(machinespec, shape),
to_start_invariant);

Check warning on line 450 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L449-L450

Added lines #L449 - L450 were not covered by tests
}

auto get_all_machine_views_to_tensor_dim_bijections(

Check warning on line 453 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L453

Added line #L453 was not covered by tests
MachineView const &mv, ParallelTensorShape const &shape) {
NOT_IMPLEMENTED();

Check warning on line 455 in lib/compiler/src/machine_mapping.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/machine_mapping.cc#L455

Added line #L455 was not covered by tests
}

} // namespace FlexFlow
160 changes: 160 additions & 0 deletions lib/compiler/test/src/machine_mapping.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#include "compiler/machine_mapping.h"
#include "doctest/doctest.h"
#include "pcg/machine_specification.dtg.h"
#include "test_generator.h"
#include "utils/containers/extend.h"

TEST_SUITE(FF_TEST_SUITE) {

TEST_CASE("get_allowed_machine_view") {

SUBCASE("1 degree of parallelism") {
MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0);
ParallelTensorShape shape = ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{10, 3},
},
ReplicaParallelDimSet{
SumDegree{1},
DiscardCopyDegree{1},
},
},
DataType::FLOAT,
};

std::unordered_set<MachineView> correct = {
make_1d_machine_view(gpu_id_t(0), gpu_id_t(3), stride_t(1)),
make_1d_machine_view(gpu_id_t(1), gpu_id_t(4), stride_t(1)),
make_1d_machine_view(gpu_id_t(2), gpu_id_t(5), stride_t(1)),
make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))};
std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, shape);
CHECK(correct == result);
}

SUBCASE("2 degrees of parallelism") {
MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0);
ParallelTensorShape shape = ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{10, 3},
},
ReplicaParallelDimSet{
SumDegree{2},
DiscardCopyDegree{1},
},
},
DataType::FLOAT,
};

auto make_2d_views = [&](int num_starts, int stride1, int stride2) {
std::unordered_set<MachineView> views;
for (int i = 0; i < num_starts; i++) {
StridedRectangle rect = StridedRectangle{
{StridedRectangleSide{num_points_t(2), stride_t(stride1)},
StridedRectangleSide{num_points_t(3), stride_t(stride2)}}};
MachineView mv = MachineView{device_id_t(gpu_id_t(i)), rect};
views.insert(mv);
}
return views;
};
std::unordered_set<MachineView> correct;
extend(correct,
make_2d_views(/*num_starts*/ 13, /*stride1*/ 1, /*stride2*/ 1));
extend(correct,
make_2d_views(/*num_starts*/ 8, /*stride1*/ 2, /*stride2*/ 1));
extend(correct,
make_2d_views(/*num_starts*/ 9, /*stride1*/ 1, /*stride2*/ 2));
extend(correct,
make_2d_views(/*num_starts*/ 3, /*stride1*/ 3, /*stride2*/ 1));
extend(correct,
make_2d_views(/*num_starts*/ 5, /*stride1*/ 1, /*stride2*/ 3));
extend(correct,
make_2d_views(/*num_starts*/ 1, /*stride1*/ 1, /*stride2*/ 4));

std::unordered_set<MachineView> result =
get_allowed_machine_views(ms, shape);
CHECK(result == correct);
}
}

TEST_CASE("get_allowed_start_invariant_machine_views") {

SUBCASE("1 degree of parallelism") {
MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0);
ParallelTensorShape shape = ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{10, 3},
},
ReplicaParallelDimSet{
SumDegree{1},
DiscardCopyDegree{1},
},
},
DataType::FLOAT,
};

std::unordered_set<StartInvariantMachineView> correct = {
make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)),
make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))};
std::unordered_set<StartInvariantMachineView> result =
get_allowed_start_invariant_machine_views(ms, shape);
CHECK(correct == result);
}

SUBCASE("2 degrees of parallelism") {
MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0);
ParallelTensorShape shape = ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{10, 3},
},
ReplicaParallelDimSet{
SumDegree{2},
DiscardCopyDegree{1},
},
},
DataType::FLOAT,
};

auto make_2d_view = [&](int stride1, int stride2) {
StridedRectangle rect = StridedRectangle{
{StridedRectangleSide{num_points_t(2), stride_t(stride1)},
StridedRectangleSide{num_points_t(3), stride_t(stride2)}}};
return StartInvariantMachineView{rect};
};
std::unordered_set<StartInvariantMachineView> correct = {
make_2d_view(/*stride1*/ 1, /*stride2*/ 1),
make_2d_view(/*stride1*/ 2, /*stride2*/ 1),
make_2d_view(/*stride1*/ 1, /*stride2*/ 2),
make_2d_view(/*stride1*/ 3, /*stride2*/ 1),
make_2d_view(/*stride1*/ 1, /*stride2*/ 3),
make_2d_view(/*stride1*/ 1, /*stride2*/ 4)};

std::unordered_set<StartInvariantMachineView> result =
get_allowed_start_invariant_machine_views(ms, shape);
CHECK(result == correct);
}
}

// TEST_CASE("MachineMapping::combine") {
// RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) {
// RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1));

// MachineMapping comb = MachineMapping::combine(m0, m1);

// RC_ASSERT(comb.machine_views.size() ==
// m0.machine_views.size() + m1.machine_views.size());
// RC_ASSERT(is_submap(comb.machine_views, m0.machine_views));
// RC_ASSERT(is_submap(comb.machine_views, m1.machine_views));
// });
// }

// TEST_CASE("OptimalCostResult::infinity") {
// RC_SUBCASE([](OptimalCostResult const &c) {
// RC_ASSERT(c.runtime <= OptimalCostResult::infinity().runtime);
// });
// }
}
23 changes: 0 additions & 23 deletions lib/compiler/test/src/test_machine_mapping.cc

This file was deleted.

18 changes: 18 additions & 0 deletions lib/pcg/include/pcg/device_coordinates.struct.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace = "FlexFlow"
name = "DeviceCoordinates"
features = [
"eq",
"ord",
"hash",
"json",
# "rapidcheck",
"fmt",
]

includes = [
"op-attrs/dim_ordered.h",
]

[[fields]]
name = "coords"
type = "::FlexFlow::FFOrdered<int>"
1 change: 1 addition & 0 deletions lib/pcg/include/pcg/device_id.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ device_id_t operator+(device_id_t, size_t);
DeviceType get_device_type(device_id_t const &device_id);
gpu_id_t unwrap_gpu(device_id_t);
cpu_id_t unwrap_cpu(device_id_t);
int get_raw_id(device_id_t);

device_id_t device_id_from_index(int, DeviceType);

Expand Down
Loading
Loading