flexflow · lockshaw · Oct 9, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h
@@ -0,0 +1,31 @@
+#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
+#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H
+
+#include "compiler/machine_view_to_tensor_mapping.dtg.h"
+#include "op-attrs/parallel_tensor_shape.dtg.h"
+#include "pcg/machine_specification.h"
+#include "pcg/machine_view.h"
+#include "pcg/start_invariant_machine_view.dtg.h"
+
+namespace FlexFlow {
+
+bool is_valid_machine_view(MachineView const &mv,
+                           MachineSpecification const &machine_spec);
+
+bool is_valid_machine_view(MachineView const &mv,
+                           ParallelTensorShape const &shape);
+
+std::unordered_set<MachineView>
+    get_allowed_machine_views(MachineSpecification const &machine_spec,
+                              ParallelTensorShape const &shape,
+                              DeviceType device_type = DeviceType::GPU);
+
+std::unordered_set<StartInvariantMachineView>
+    get_allowed_start_invariant_machine_views(
+        MachineSpecification const &machine_spec,
+        ParallelTensorShape const &shape,
+        DeviceType device_type = DeviceType::GPU);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h
@@ -4,8 +4,6 @@
 #include "compiler/machine_mapping.dtg.h"
 #include "compiler/optimal_cost_state.dtg.h"
 #include "cost_estimate.h"
-#include "pcg/machine_specification.dtg.h"
-#include "pcg/machine_specification.h"
 #include "pcg/machine_view.h"
 #include "pcg/parallel_computation_graph/parallel_computation_graph.h"
 #include "substitutions/sub_parallel_computation_graph.h"
@@ -55,15 +53,4 @@ OptimalCostResult optimal_cost(
 
 } // namespace FlexFlow
 
-// namespace std {
-//
-// template <>
-// struct hash<std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping>> {
-//   size_t operator()(
-//       std::unordered_map<FlexFlow::Node, FlexFlow::MachineMapping> const &g)
-//       const;
-// };
-
-// }; // namespace std
-
 #endif
diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H
+#define _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H
+
+#include "compiler/machine_view_to_tensor_mapping.dtg.h"
+#include "op-attrs/parallel_tensor_shape.dtg.h"
+#include "pcg/machine_view.h"
+
+#include <unordered_set>
+
+namespace FlexFlow {
+
+bool is_valid_mapping(MachineViewToTensorMapping const &mapping,
+                      MachineView const &mv,
+                      ParallelTensorShape const &shape);
+
+std::unordered_set<MachineViewToTensorMapping>
+    get_all_machine_view_to_tensor_mappings(MachineView const &mv,
+                                            ParallelTensorShape const &shape);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml
@@ -0,0 +1,17 @@
+namespace = "FlexFlow"
+name = "MachineViewToTensorMapping"
+features = [
+  "eq",
+  "hash",
+  "fmt",
+]
+
+includes = [
+  "pcg/machine_view_dim_idx_t.dtg.h",
+  "op-attrs/parallel_tensor_dim_idx_t.dtg.h",
+  "utils/bidict/bidict.h",
+]
+
+[[fields]]
+name = "raw_bidict"
+type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>"
diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc
@@ -0,0 +1,151 @@
+#include "compiler/allowed_machine_views.h"
+#include "op-attrs/parallel_tensor_dim_idx_t.h"
+#include "op-attrs/parallel_tensor_dims.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "pcg/machine_specification.h"
+#include "pcg/machine_view.h"
+#include "pcg/machine_view_dim_idx_t.h"
+#include "pcg/multi_dimensional_stride.dtg.h"
+#include "pcg/start_invariant_machine_view.h"
+#include "utils/containers/all_of.h"
+#include "utils/containers/cartesian_product.h"
+#include "utils/containers/extend.h"
+#include "utils/containers/filter.h"
+#include "utils/containers/get_all_permutations.h"
+#include "utils/containers/product.h"
+#include "utils/containers/range.h"
+#include "utils/containers/replicate.h"
+#include "utils/containers/sorted.h"
+#include "utils/containers/transform.h"
+#include "utils/containers/unordered_multiset_of.h"
+#include "utils/containers/unordered_set_of.h"
+#include "utils/containers/zip.h"
+#include "utils/graph/serial_parallel/serial_parallel_decomposition.h"
+#include "utils/overload.h"
+
+namespace FlexFlow {
+
+static std::unordered_multiset<num_points_t>
+    get_num_devices_per_parallel_dim(ParallelTensorShape const &shape) {
+  std::unordered_multiset<int> raw_device_nums =
+      unordered_multiset_of(ff_ordered_shard_degrees(shape));
+  raw_device_nums.insert(get_sum_degree(shape));
+  raw_device_nums.insert(get_discard_copy_degree(shape));
+  // filtering non-parallel dims
+  raw_device_nums =
+      filter(raw_device_nums, [](int num_devices) { return num_devices != 1; });
+
+  return transform(raw_device_nums,
+                   [&](int num_devices) { return num_points_t{num_devices}; });
+}
+
+bool is_valid_machine_view(MachineView const &mv,
+                           MachineSpecification const &machine_spec) {
+  return false; // TODO: fix
+}
+
+bool is_valid_machine_view(MachineView const &mv,
+                           ParallelTensorShape const &shape) {
+
+  std::vector<num_points_t> mv_num_devices = get_num_devices_per_dim(mv);
+  std::unordered_multiset<num_points_t> tensor_num_devices =
+      get_num_devices_per_parallel_dim(shape);
+
+  return unordered_multiset_of(mv_num_devices) == tensor_num_devices;
+}
+
+/* Generates a set of candidate `MachineView`s.
+ * The returned set includes all valid machine views, and might contain
+ invalid
+ * ones. This function should never be used externally (see
+ * `get_allowed_machine_views` instead). There is no guarantee that a
+ non-empty
+ * returned set contains a valid machine view (i.e. its possible for all
+ * `MachineView`s to be invalid)
+ */
+static std::unordered_set<MachineView>
+    get_candidate_machine_views(MachineSpecification const &machine_spec,
+                                ParallelTensorShape const &shape,
+                                DeviceType const &device_type) {
+
+  auto candidate_strides =
+      [](std::vector<num_points_t> const &tensor_dims,
+         int total_devices) -> std::unordered_multiset<MultiDimensionalStride> {
+    int min_num_devices_with_full_stride_volume =
+        product(transform(tensor_dims, [](num_points_t const &num_devices) {
+          return num_devices.unwrapped - 1;
+        }));
+    int max_stride_upper_bound =
+        std::ceil(total_devices / min_num_devices_with_full_stride_volume);
+
+    std::vector<stride_t> single_stride_range =
+        transform(range(1, max_stride_upper_bound + 1),
+                  [](int stride) { return stride_t(stride); });
+    std::unordered_multiset<std::vector<stride_t>> raw_stride_vectors =
+        cartesian_product(replicate(tensor_dims.size(), single_stride_range));
+    std::unordered_multiset<MultiDimensionalStride> strides =
+        transform(raw_stride_vectors, [](auto const &stride_vec) {
+          return MultiDimensionalStride{stride_vec};
+        });
+    return strides;
+  };
+
+  auto candidate_starts = [](std::vector<num_points_t> ordered_tensor_dims) {
+    std::vector<std::vector<int>> coordinate_ranges =
+        transform(ordered_tensor_dims, [&](num_points_t const &num_points) {
+          return range(num_points.unwrapped);
+        });
+
+    std::unordered_set<std::vector<int>> raw_coordinates =
+        unordered_set_of(cartesian_product(coordinate_ranges));
+    std::unordered_set<DeviceCoordinates> device_coordinates =
+        transform(raw_coordinates, [](std::vector<int> const &point) {
+          return DeviceCoordinates(point);
+        });
+    return device_coordinates;
+  };
+
+  std::unordered_multiset<num_points_t> tensor_dims =
+      get_num_devices_per_parallel_dim(shape);
+  int total_devices = get_num_devices(machine_spec, device_type);
+
+  std::unordered_set<MachineView> machine_views;
+
+  for (MultiDimensionalStride const &strides :
+       candidate_strides(sorted(tensor_dims), total_devices)) {
+    StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims));
+    StartInvariantMachineView start_inv_mv =
+        StartInvariantMachineView{rect, device_type};
+
+    for (DeviceCoordinates start : candidate_starts(sorted(tensor_dims))) {
+      machine_views.insert(
+          machine_view_from_start_invariant(start_inv_mv, start));
+    }
+  }
+
+  return machine_views;
+}
+
+std::unordered_set<MachineView>
+    get_allowed_machine_views(MachineSpecification const &machine_spec,
+                              ParallelTensorShape const &shape,
+                              DeviceType device_type) {
+
+  std::unordered_set<MachineView> views =
+      get_candidate_machine_views(machine_spec, shape, device_type);
+  return filter(views, [&](MachineView const &view) {
+    return is_valid_machine_view(view, shape) &&
+           is_valid_machine_view(view, machine_spec);
+  });
+}
+
+std::unordered_set<StartInvariantMachineView>
+    get_allowed_start_invariant_machine_views(
+        MachineSpecification const &machine_spec,
+        ParallelTensorShape const &shape,
+        DeviceType device_type) {
+  return transform(get_allowed_machine_views(machine_spec, shape, device_type),
+                   start_invariant_from_machine_view);
+}
+
+} // namespace FlexFlow
diff --git a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc
@@ -0,0 +1,47 @@
+#include "compiler/machine_view_to_tensor_mapping.h"
+#include "compiler/allowed_machine_views.h"
+#include "op-attrs/parallel_tensor_dim_idx_t.h"
+#include "pcg/machine_view_dim_idx_t.h"
+#include "utils/containers/all_of.h"
+#include "utils/containers/filter.h"
+#include "utils/containers/get_all_permutations.h"
+#include "utils/containers/sorted.h"
+#include "utils/containers/zip.h"
+
+namespace FlexFlow {
+
+std::unordered_set<MachineViewToTensorMapping>
+    get_all_machine_view_to_tensor_mappings(MachineView const &mv,
+                                            ParallelTensorShape const &shape) {
+  assert(is_valid_machine_view(mv, shape));
+  std::vector<machine_view_dim_idx_t> machine_view_dim_ordering =
+      sorted(get_machine_view_indices(mv));
+  std::unordered_set<parallel_tensor_dim_idx_t> shape_indices =
+      get_parallel_tensor_indices(shape);
+  shape_indices =
+      filter(shape_indices, [&](parallel_tensor_dim_idx_t const &idx) {
+        return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1;
+      });
+
+  std::unordered_set<MachineViewToTensorMapping> result;
+  for (std::vector<parallel_tensor_dim_idx_t> const &tensor_dim_orderings :
+       get_all_permutations(shape_indices)) {
+    MachineViewToTensorMapping mapping = MachineViewToTensorMapping(
+        bidict(zip(machine_view_dim_ordering, tensor_dim_orderings)));
+    if (is_valid_mapping(mapping, mv, shape)) {
+      result.insert(mapping);
+    }
+  }
+  return result;
+}
+
+bool is_valid_mapping(MachineViewToTensorMapping const &mapping,
+                      MachineView const &mv,
+                      ParallelTensorShape const &shape) {
+  return all_of(mapping.raw_bidict, [&](auto const pair) {
+    int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped;
+    int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second));
+    return (tensor_degree == mv_degree);
+  });
+}
+} // namespace FlexFlow
diff --git a/lib/compiler/src/graph_utils.cc b/lib/compiler/src/graph_utils.cc
@@ -3,7 +3,6 @@
 #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h"
 #include "pcg/parallel_computation_graph/parallel_computation_graph.h"
 #include "substitutions/sub_parallel_computation_graph.dtg.h"
-#include "utils/containers/without_order.h"
 #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h"
 namespace FlexFlow {