Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New MachineView representation #1458

Merged
merged 40 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
15dd787
containers helper functions
Aug 6, 2024
1f43d69
Additional support for unordered_multiset
Aug 6, 2024
0f2cb56
format fix
Aug 6, 2024
1eeeba8
Unordered Machine Mapping and adjacent changes
Aug 7, 2024
53dca87
repo-refactor merge
Aug 7, 2024
d3f1657
formatting
Aug 7, 2024
67a94a2
Minor fixes
Aug 9, 2024
5a5d276
Update to StridedRectangle interface
Aug 9, 2024
b302903
Minor updates
Aug 9, 2024
775cb90
added get_allowed_machine_views
Aug 10, 2024
c0c1c00
formatting
Aug 10, 2024
fdd556e
minor fix
Aug 10, 2024
075f4de
Added StartInvariantMachineView
Aug 10, 2024
878954f
formatting
Aug 10, 2024
340f441
Containers fix
Aug 13, 2024
1e8fa90
Implemented tensor to machine view injection
Aug 13, 2024
6e6adaa
small refactor
Aug 13, 2024
4b8600d
formatting
Aug 13, 2024
94e2dc2
Merge branch 'repo-refactor' into unordered-machine-view
lockshaw Aug 22, 2024
c9532d1
Cleaning Up
Aug 28, 2024
7d078a1
Formatting fix
Aug 28, 2024
40ab5ef
new machine-view interface
Sep 9, 2024
f7a50d4
repo-refactor merge
Sep 13, 2024
3cbc6be
update to allowed machine views
Sep 13, 2024
7eff4f5
PR review fixes
Sep 13, 2024
21ca265
update to machine view and getting allowed machine view to match new …
Sep 15, 2024
0810bce
merge with `repo-refactor`
Oct 4, 2024
0385e01
formatting
Oct 4, 2024
b438b49
minor fix
Oct 4, 2024
52b7a26
PR fixes
Oct 5, 2024
3a5fbf4
PR fixes
Oct 5, 2024
c1e1c8c
machineview interface change
Oct 5, 2024
5cc2a2f
Minor PR fixes
Oct 9, 2024
e1cd5a2
.cc machine view fixes + added StartInvariantMachineView
Oct 9, 2024
1744555
Merge remote-tracking branch 'origin/repo-refactor' into pietro-machi…
lockshaw Oct 9, 2024
93f9bb4
minor PR fixes
Oct 9, 2024
3c3518a
minor fixes
Oct 9, 2024
9fc8712
Post-merge fixes
lockshaw Oct 9, 2024
aa2bb4b
Merge remote-tracking branch 'refs/remotes/Marsella8/unordered-machin…
lockshaw Oct 9, 2024
20f6a75
Format
lockshaw Oct 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions lib/compiler/include/compiler/allowed_machine_views.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H

#include "compiler/machine_view_to_tensor_mapping.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/start_invariant_machine_view.dtg.h"

namespace FlexFlow {

bool is_valid_machine_view(MachineView const &mv,
MachineSpecification const &machine_spec);

bool is_valid_machine_view(MachineView const &mv,
ParallelTensorShape const &shape);

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
ParallelTensorShape const &shape,
DeviceType device_type = DeviceType::GPU);

std::unordered_set<StartInvariantMachineView>
get_allowed_start_invariant_machine_views(
MachineSpecification const &machine_spec,
ParallelTensorShape const &shape,
DeviceType device_type = DeviceType::GPU);

} // namespace FlexFlow

#endif
13 changes: 0 additions & 13 deletions lib/compiler/include/compiler/machine_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
#include "compiler/machine_mapping.dtg.h"
#include "compiler/optimal_cost_state.dtg.h"
#include "cost_estimate.h"
#include "pcg/machine_specification.dtg.h"
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "substitutions/sub_parallel_computation_graph.h"
Expand Down Expand Up @@ -55,15 +53,4 @@ OptimalCostResult optimal_cost(

} // namespace FlexFlow

// namespace std {
//
// template <>
// struct hash<std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping>> {
// size_t operator()(
// std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping> const &g)
// const;
// };

// }; // namespace std

#endif
22 changes: 22 additions & 0 deletions lib/compiler/include/compiler/machine_view_to_tensor_mapping.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H
#define _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H

#include "compiler/machine_view_to_tensor_mapping.dtg.h"
#include "op-attrs/parallel_tensor_shape.dtg.h"
#include "pcg/machine_view.h"

#include <unordered_set>

namespace FlexFlow {

bool is_valid_mapping(MachineViewToTensorMapping const &mapping,
MachineView const &mv,
ParallelTensorShape const &shape);

std::unordered_set<MachineViewToTensorMapping>
get_all_machine_view_to_tensor_mappings(MachineView const &mv,
ParallelTensorShape const &shape);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
namespace = "FlexFlow"
name = "MachineViewToTensorMapping"
features = [
"eq",
"hash",
"fmt",
]

includes = [
"pcg/machine_view_dim_idx_t.dtg.h",
"op-attrs/parallel_tensor_dim_idx_t.dtg.h",
"utils/bidict/bidict.h",
]

[[fields]]
name = "raw_bidict"
type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>"
151 changes: 151 additions & 0 deletions lib/compiler/src/compiler/allowed_machine_views.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#include "compiler/allowed_machine_views.h"
#include "op-attrs/parallel_tensor_dim_idx_t.h"
#include "op-attrs/parallel_tensor_dims.h"
#include "op-attrs/parallel_tensor_shape.h"
#include "pcg/machine_specification.h"
#include "pcg/machine_view.h"
#include "pcg/machine_view_dim_idx_t.h"
#include "pcg/multi_dimensional_stride.dtg.h"
#include "pcg/start_invariant_machine_view.h"
#include "utils/containers/all_of.h"
#include "utils/containers/cartesian_product.h"
#include "utils/containers/extend.h"
#include "utils/containers/filter.h"
#include "utils/containers/get_all_permutations.h"
#include "utils/containers/product.h"
#include "utils/containers/range.h"
#include "utils/containers/replicate.h"
#include "utils/containers/sorted.h"
#include "utils/containers/transform.h"
#include "utils/containers/unordered_multiset_of.h"
#include "utils/containers/unordered_set_of.h"
#include "utils/containers/zip.h"
#include "utils/graph/serial_parallel/serial_parallel_decomposition.h"
#include "utils/overload.h"

namespace FlexFlow {

static std::unordered_multiset<num_points_t>
get_num_devices_per_parallel_dim(ParallelTensorShape const &shape) {
std::unordered_multiset<int> raw_device_nums =
unordered_multiset_of(ff_ordered_shard_degrees(shape));
raw_device_nums.insert(get_sum_degree(shape));
raw_device_nums.insert(get_discard_copy_degree(shape));
// filtering non-parallel dims
raw_device_nums =
filter(raw_device_nums, [](int num_devices) { return num_devices != 1; });

return transform(raw_device_nums,
[&](int num_devices) { return num_points_t{num_devices}; });
}

bool is_valid_machine_view(MachineView const &mv,
MachineSpecification const &machine_spec) {
return false; // TODO: fix
}

bool is_valid_machine_view(MachineView const &mv,
ParallelTensorShape const &shape) {

std::vector<num_points_t> mv_num_devices = get_num_devices_per_dim(mv);
std::unordered_multiset<num_points_t> tensor_num_devices =
get_num_devices_per_parallel_dim(shape);

return unordered_multiset_of(mv_num_devices) == tensor_num_devices;
}

/* Generates a set of candidate `MachineView`s.
* The returned set includes all valid machine views, and might contain
invalid
* ones. This function should never be used externally (see
* `get_allowed_machine_views` instead). There is no guarantee that a
non-empty
* returned set contains a valid machine view (i.e. its possible for all
* `MachineView`s to be invalid)
*/
static std::unordered_set<MachineView>
get_candidate_machine_views(MachineSpecification const &machine_spec,
ParallelTensorShape const &shape,
DeviceType const &device_type) {

auto candidate_strides =
[](std::vector<num_points_t> const &tensor_dims,
int total_devices) -> std::unordered_multiset<MultiDimensionalStride> {
int min_num_devices_with_full_stride_volume =
product(transform(tensor_dims, [](num_points_t const &num_devices) {
return num_devices.unwrapped - 1;
}));
int max_stride_upper_bound =
std::ceil(total_devices / min_num_devices_with_full_stride_volume);

std::vector<stride_t> single_stride_range =
transform(range(1, max_stride_upper_bound + 1),
[](int stride) { return stride_t(stride); });
std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
cartesian_product(replicate(tensor_dims.size(), single_stride_range));
std::unordered_multiset<MultiDimensionalStride> strides =
transform(raw_stride_vectors, [](auto const &stride_vec) {
return MultiDimensionalStride{stride_vec};
});
return strides;
};

auto candidate_starts = [](std::vector<num_points_t> ordered_tensor_dims) {
std::vector<std::vector<int>> coordinate_ranges =
transform(ordered_tensor_dims, [&](num_points_t const &num_points) {
return range(num_points.unwrapped);
});

std::unordered_set<std::vector<int>> raw_coordinates =
unordered_set_of(cartesian_product(coordinate_ranges));
std::unordered_set<DeviceCoordinates> device_coordinates =
transform(raw_coordinates, [](std::vector<int> const &point) {
return DeviceCoordinates(point);
});
return device_coordinates;
};

std::unordered_multiset<num_points_t> tensor_dims =
get_num_devices_per_parallel_dim(shape);
int total_devices = get_num_devices(machine_spec, device_type);

std::unordered_set<MachineView> machine_views;

for (MultiDimensionalStride const &strides :
candidate_strides(sorted(tensor_dims), total_devices)) {
StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims));
StartInvariantMachineView start_inv_mv =
StartInvariantMachineView{rect, device_type};

for (DeviceCoordinates start : candidate_starts(sorted(tensor_dims))) {
machine_views.insert(
machine_view_from_start_invariant(start_inv_mv, start));
}
}

return machine_views;
}

std::unordered_set<MachineView>
get_allowed_machine_views(MachineSpecification const &machine_spec,
ParallelTensorShape const &shape,
DeviceType device_type) {

std::unordered_set<MachineView> views =
get_candidate_machine_views(machine_spec, shape, device_type);
return filter(views, [&](MachineView const &view) {
return is_valid_machine_view(view, shape) &&
is_valid_machine_view(view, machine_spec);
});
}

std::unordered_set<StartInvariantMachineView>
get_allowed_start_invariant_machine_views(
MachineSpecification const &machine_spec,
ParallelTensorShape const &shape,
DeviceType device_type) {
return transform(get_allowed_machine_views(machine_spec, shape, device_type),
start_invariant_from_machine_view);
}

} // namespace FlexFlow
47 changes: 47 additions & 0 deletions lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#include "compiler/machine_view_to_tensor_mapping.h"
#include "compiler/allowed_machine_views.h"
#include "op-attrs/parallel_tensor_dim_idx_t.h"
#include "pcg/machine_view_dim_idx_t.h"
#include "utils/containers/all_of.h"
#include "utils/containers/filter.h"
#include "utils/containers/get_all_permutations.h"
#include "utils/containers/sorted.h"
#include "utils/containers/zip.h"

namespace FlexFlow {

std::unordered_set<MachineViewToTensorMapping>
get_all_machine_view_to_tensor_mappings(MachineView const &mv,
ParallelTensorShape const &shape) {
assert(is_valid_machine_view(mv, shape));
std::vector<machine_view_dim_idx_t> machine_view_dim_ordering =
sorted(get_machine_view_indices(mv));
std::unordered_set<parallel_tensor_dim_idx_t> shape_indices =
get_parallel_tensor_indices(shape);
shape_indices =
filter(shape_indices, [&](parallel_tensor_dim_idx_t const &idx) {
return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1;
});

std::unordered_set<MachineViewToTensorMapping> result;
for (std::vector<parallel_tensor_dim_idx_t> const &tensor_dim_orderings :
get_all_permutations(shape_indices)) {
MachineViewToTensorMapping mapping = MachineViewToTensorMapping(
bidict(zip(machine_view_dim_ordering, tensor_dim_orderings)));
if (is_valid_mapping(mapping, mv, shape)) {
result.insert(mapping);
}
}
return result;
}

bool is_valid_mapping(MachineViewToTensorMapping const &mapping,
MachineView const &mv,
ParallelTensorShape const &shape) {
return all_of(mapping.raw_bidict, [&](auto const pair) {
int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped;
int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second));
return (tensor_degree == mv_degree);
});
}
} // namespace FlexFlow
1 change: 0 additions & 1 deletion lib/compiler/src/graph_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
#include "substitutions/sub_parallel_computation_graph.dtg.h"
#include "utils/containers/without_order.h"
#include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h"
namespace FlexFlow {

Expand Down
Loading