From 15dd787dc6d9eeabfa845b3965e9d50f4809a097 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 6 Aug 2024 15:31:50 -0700 Subject: [PATCH 01/34] containers helper functions --- .../utils/containers/cartesian_product.h | 42 +++++++++++++ lib/utils/include/utils/containers/foldl.h | 31 ++++++++++ lib/utils/include/utils/containers/range.h | 34 +++++++++++ .../include/utils/containers/replicate.h | 19 ++++++ lib/utils/include/utils/containers/scanl.h | 40 +++++++++++++ .../src/utils/containers/cartesian_product.cc | 1 + lib/utils/src/utils/containers/foldl.cc | 1 + lib/utils/src/utils/containers/range.cc | 1 + lib/utils/src/utils/containers/replicate.cc | 1 + lib/utils/src/utils/containers/scanl.cc | 1 + .../src/utils/containers/cartesian_product.cc | 60 +++++++++++++++++++ lib/utils/test/src/utils/containers/foldl.cc | 40 +++++++++++++ lib/utils/test/src/utils/containers/range.cc | 47 +++++++++++++++ .../test/src/utils/containers/replicate.cc | 25 ++++++++ lib/utils/test/src/utils/containers/scanl.cc | 54 +++++++++++++++++ 15 files changed, 397 insertions(+) create mode 100644 lib/utils/include/utils/containers/cartesian_product.h create mode 100644 lib/utils/include/utils/containers/foldl.h create mode 100644 lib/utils/include/utils/containers/range.h create mode 100644 lib/utils/include/utils/containers/replicate.h create mode 100644 lib/utils/include/utils/containers/scanl.h create mode 100644 lib/utils/src/utils/containers/cartesian_product.cc create mode 100644 lib/utils/src/utils/containers/foldl.cc create mode 100644 lib/utils/src/utils/containers/range.cc create mode 100644 lib/utils/src/utils/containers/replicate.cc create mode 100644 lib/utils/src/utils/containers/scanl.cc create mode 100644 lib/utils/test/src/utils/containers/cartesian_product.cc create mode 100644 lib/utils/test/src/utils/containers/foldl.cc create mode 100644 lib/utils/test/src/utils/containers/range.cc create mode 100644 lib/utils/test/src/utils/containers/replicate.cc create mode 100644 lib/utils/test/src/utils/containers/scanl.cc diff --git a/lib/utils/include/utils/containers/cartesian_product.h b/lib/utils/include/utils/containers/cartesian_product.h new file mode 100644 index 0000000000..dd143bc302 --- /dev/null +++ b/lib/utils/include/utils/containers/cartesian_product.h @@ -0,0 +1,42 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_CARTESIAN_PRODUCT_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_CARTESIAN_PRODUCT_H + +#include "utils/containers/as_vector.h" +#include "utils/hash/vector.h" +#include +#include +#include + +namespace FlexFlow { + +template +auto cartesian_product(Container const &containers) { + using ValueType = typename Container::value_type::value_type; + using VectorType = std::vector; + using SetType = std::unordered_multiset; + auto ordered = as_vector(containers); + SetType result; + + std::function recurse = [&](VectorType ¤t, + std::size_t depth) { + if (depth == ordered.size()) { + result.insert(current); + return; + } + + for (const auto &item : ordered[depth]) { + current.push_back(item); + recurse(current, depth + 1); + current.pop_back(); + } + }; + + VectorType current; + recurse(current, 0); + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/foldl.h b/lib/utils/include/utils/containers/foldl.h new file mode 100644 index 0000000000..bdcb6d1270 --- /dev/null +++ b/lib/utils/include/utils/containers/foldl.h @@ -0,0 +1,31 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H + +#include +#include + +namespace FlexFlow { + +template +T foldl(C const &c, T init, F func) { + T result = init; + for (auto const &elem : c) { + result = func(result, elem); + } + return result; +} + +template +auto foldl1(C const &c, F func) -> typename C::value_type { + auto it = c.begin(); + assert(it != c.cend()); + + typename C::value_type init = *it; + ++it; + C remaining(it, c.end()); + return foldl(remaining, init, func); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/range.h b/lib/utils/include/utils/containers/range.h new file mode 100644 index 0000000000..b7cd3f93e6 --- /dev/null +++ b/lib/utils/include/utils/containers/range.h @@ -0,0 +1,34 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_RANGE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_RANGE_H + +#include +#include +#include + +namespace FlexFlow { + +template +std::vector range(T start, T end, T step = 1) { + assert(step != 0); + + std::vector result; + if (step > 0) { + for (T i = start; i < end; i += step) { + result.push_back(i); + } + } else { + for (T i = start; i > end; i += step) { + result.push_back(i); + } + } + return result; +} + +template +std::vector range(T end) { + return range(T(0), end); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/replicate.h b/lib/utils/include/utils/containers/replicate.h new file mode 100644 index 0000000000..8a8fca532e --- /dev/null +++ b/lib/utils/include/utils/containers/replicate.h @@ -0,0 +1,19 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H + +#include + +namespace FlexFlow { + +template +std::unordered_multiset replicate(std::size_t n, T const &element) { + std::unordered_multiset result; + for (std::size_t i = 0; i < n; ++i) { + result.insert(element); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/scanl.h b/lib/utils/include/utils/containers/scanl.h new file mode 100644 index 0000000000..54acf2d743 --- /dev/null +++ b/lib/utils/include/utils/containers/scanl.h @@ -0,0 +1,40 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H + +#include + +namespace FlexFlow { + +template +std::vector scanl(C const &c, T init, F const &op) { + std::vector result; + + result.push_back(init); + + for (auto const &elem : c) { + init = op(init, elem); + result.push_back(init); + } + + return result; +} + +template +auto scanl1(C const &c, F op) { + using T = typename C::value_type; + + if (c.empty()) { + return std::vector(); + } + + auto it = c.begin(); + T init = *it; + ++it; + + C remaining(it, c.end()); + return scanl(remaining, init, op); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/src/utils/containers/cartesian_product.cc b/lib/utils/src/utils/containers/cartesian_product.cc new file mode 100644 index 0000000000..b716a49ad5 --- /dev/null +++ b/lib/utils/src/utils/containers/cartesian_product.cc @@ -0,0 +1 @@ +#include "utils/containers/cartesian_product.h" diff --git a/lib/utils/src/utils/containers/foldl.cc b/lib/utils/src/utils/containers/foldl.cc new file mode 100644 index 0000000000..85db79a34d --- /dev/null +++ b/lib/utils/src/utils/containers/foldl.cc @@ -0,0 +1 @@ +#include "utils/containers/foldl.h" \ No newline at end of file diff --git a/lib/utils/src/utils/containers/range.cc b/lib/utils/src/utils/containers/range.cc new file mode 100644 index 0000000000..8612f334b7 --- /dev/null +++ b/lib/utils/src/utils/containers/range.cc @@ -0,0 +1 @@ +#include "utils/containers/range.h" diff --git a/lib/utils/src/utils/containers/replicate.cc b/lib/utils/src/utils/containers/replicate.cc new file mode 100644 index 0000000000..2fb2f079f6 --- /dev/null +++ b/lib/utils/src/utils/containers/replicate.cc @@ -0,0 +1 @@ +#include "utils/containers/replicate.h" diff --git a/lib/utils/src/utils/containers/scanl.cc b/lib/utils/src/utils/containers/scanl.cc new file mode 100644 index 0000000000..4f7ff78b9f --- /dev/null +++ b/lib/utils/src/utils/containers/scanl.cc @@ -0,0 +1 @@ +#include "utils/containers/scanl.h" diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc new file mode 100644 index 0000000000..7809f0c513 --- /dev/null +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -0,0 +1,60 @@ +#include "utils/containers/cartesian_product.h" +#include +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("cartesian_product") { + SUBCASE("empty") { + std::vector> containers = {}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {{}}; + CHECK(result == correct); + } + + SUBCASE("single container, one element") { + std::vector> containers = {{1}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {{1}}; + CHECK(result == correct); + } + + SUBCASE("single container, multiple elements") { + std::vector> containers = {{1, 2, 3}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {{1}, {2}, {3}}; + CHECK(result == correct); + } + + SUBCASE("multiple containers, one element each") { + std::vector> containers = {{1}, {2}, {3}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {{1, 2, 3}}; + CHECK(result == correct); + } + + SUBCASE("multiple containers, multiple elements") { + std::vector> containers = {{1, 2}, {3, 4}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = { + {1, 3}, {1, 4}, {2, 3}, {2, 4}}; + CHECK(result == correct); + } + + SUBCASE("multiple containers, duplicate elements") { + std::vector> containers = {{1, 1}, {2, 3}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = { + {1, 2}, {1, 3}, {1, 3}, {1, 2}}; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/foldl.cc b/lib/utils/test/src/utils/containers/foldl.cc new file mode 100644 index 0000000000..ff43299539 --- /dev/null +++ b/lib/utils/test/src/utils/containers/foldl.cc @@ -0,0 +1,40 @@ +#include "utils/containers/foldl.h" +#include +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("foldl") { + SUBCASE("product") { + std::vector container = {1, 2, 3, 4, 5}; + int result = foldl(container, 1, [](int acc, int elem) { return acc * elem; }); + int correct = 120; + CHECK(result == correct); + } + + SUBCASE("string concat") { + std::vector container = {1, 2, 3, 4, 5}; + std::string result = foldl(container, std::string(""), [](std::string acc, int elem) { return acc + std::to_string(elem); }); + std::string correct = "12345"; + CHECK(result == correct); + } + } + + TEST_CASE("foldl1") { + SUBCASE("product") { + std::vector container = {1, 2, 3, 4, 5}; + int result = foldl1(container, [](int acc, int elem) { return acc * elem; }); + int correct = 120; + CHECK(result == correct); + } + + SUBCASE("string concat") { + std::vector container = {"1", "2", "3", "4", "5"}; + std::string result = foldl1(container, [](std::string acc, std::string elem) { return acc + elem; }); + std::string correct = "12345"; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/range.cc b/lib/utils/test/src/utils/containers/range.cc new file mode 100644 index 0000000000..dbc9b50073 --- /dev/null +++ b/lib/utils/test/src/utils/containers/range.cc @@ -0,0 +1,47 @@ +#include "utils/containers/range.h" +#include "utils/hash/unordered_set.h" +#include +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("range") { + SUBCASE("basic integer range") { + std::vector result = range(0, 5); + std::vector correct = {0, 1, 2, 3, 4}; + CHECK(result == correct); + } + + SUBCASE("integer range with step") { + std::vector result = range(-2, 10, 2); + std::vector correct = {-2, 0, 2, 4, 6, 8}; + CHECK(result == correct); + } + + SUBCASE("negative integer range") { + std::vector result = range(5, 0, -1); + std::vector correct = {5, 4, 3, 2, 1}; + CHECK(result == correct); + } + + SUBCASE("single argument range") { + std::vector result = range(5); + std::vector correct = {0, 1, 2, 3, 4}; + CHECK(result == correct); + } + + SUBCASE("empty range") { + std::vector result = range(5, 5); + std::vector correct = {}; + CHECK(result == correct); + } + + SUBCASE("empty range") { + std::vector result = range(5, 4); + std::vector correct = {}; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/replicate.cc new file mode 100644 index 0000000000..b992a26487 --- /dev/null +++ b/lib/utils/test/src/utils/containers/replicate.cc @@ -0,0 +1,25 @@ +#include "utils/containers/replicate.h" +#include "utils/hash/unordered_set.h" +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("replicate") { + SUBCASE("ints") { + int x = 42; + std::unordered_multiset result = replicate(5, x); + std::unordered_multiset correct = {42, 42, 42, 42, 42}; + CHECK(result == correct); + } + SUBCASE("unordered_set") { + std::unordered_set x = {1.0, 1.5}; + std::unordered_multiset> result = + replicate(3, x); + std::unordered_multiset> correct = { + {1.0, 1.5}, {1.0, 1.5}, {1.0, 1.5}}; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/scanl.cc b/lib/utils/test/src/utils/containers/scanl.cc new file mode 100644 index 0000000000..d8c31bfb92 --- /dev/null +++ b/lib/utils/test/src/utils/containers/scanl.cc @@ -0,0 +1,54 @@ +#include "utils/containers/scanl.h" +#include +#include +#include +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("scanl") { + SUBCASE("sum") { + std::vector input = {1, 2, 3, 4}; + std::vector result = + scanl(input, 0, [](int a, int b) { return a + b; }); + std::vector correct = {0, 1, 3, 6, 10}; + CHECK(result == correct); + } + + SUBCASE("custom function") { + std::vector input = {1, 3, 1, 2}; + auto op = [](int a, int b) { return (a + 1) * (b + 1); }; + std::vector result = scanl(input, 1, op); + std::vector correct = {1, 4, 20, 42, 129}; + CHECK(result == correct); + } + + SUBCASE("heterogenous types") { + std::vector input = {1, 2, 3, 4}; + auto op = [](std::string const &a, int b) { + return a + std::to_string(b); + }; + std::vector result = scanl(input, std::string(""), op); + std::vector correct = {"", "1", "12", "123", "1234"}; + CHECK(result == correct); + } + } + + TEST_CASE("scanl1") { + SUBCASE("sum") { + std::vector input = {1, 2, 3, 4}; + std::vector result = + scanl1(input, [](int a, int b) { return a + b; }); + std::vector correct = {1, 3, 6, 10}; + CHECK(result == correct); + } + + SUBCASE("custom function") { + std::vector input = {1, 2, 5, 2}; + auto op = [](int a, int b) { return a * b + 1; }; + std::vector result = scanl1(input, op); + std::vector correct = {1, 3, 16, 33}; + CHECK(result == correct); + } + } +} From 1f43d69ae65f0c2528cee09c65e959fb630ad482 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 6 Aug 2024 16:31:02 -0700 Subject: [PATCH 02/34] Additional support for unordered_multiset --- .../include/utils/containers/transform.h | 11 +++++ .../include/utils/fmt/unordered_multiset.h | 44 +++++++++++++++++++ lib/utils/include/utils/fmt/unordered_set.h | 1 - .../include/utils/hash/unordered_multiset.h | 20 +++++++++ lib/utils/src/utils/fmt/unordered_multiset.cc | 1 + .../src/utils/hash/unordered_multiset.cc | 1 + 6 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 lib/utils/include/utils/fmt/unordered_multiset.h create mode 100644 lib/utils/include/utils/hash/unordered_multiset.h create mode 100644 lib/utils/src/utils/fmt/unordered_multiset.cc create mode 100644 lib/utils/src/utils/hash/unordered_multiset.cc diff --git a/lib/utils/include/utils/containers/transform.h b/lib/utils/include/utils/containers/transform.h index c40e05b591..374dd92da0 100644 --- a/lib/utils/include/utils/containers/transform.h +++ b/lib/utils/include/utils/containers/transform.h @@ -31,6 +31,17 @@ std::unordered_set transform(std::unordered_set const &v, F const &f) { return result; } +template ()(std::declval()))> +std::unordered_multiset transform(std::unordered_multiset const &v, F const &f) { + std::unordered_multiset result; + for (auto const &e : v) { + result.insert(f(e)); + } + return result; +} + template std::string transform(std::string const &s, F const &f) { std::string result; diff --git a/lib/utils/include/utils/fmt/unordered_multiset.h b/lib/utils/include/utils/fmt/unordered_multiset.h new file mode 100644 index 0000000000..8b13378519 --- /dev/null +++ b/lib/utils/include/utils/fmt/unordered_multiset.h @@ -0,0 +1,44 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_UNORDERED_MULTISET_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_FMT_UNORDERED_MULTISET_H + +#include "utils/check_fmtable.h" +#include "utils/join_strings.h" +#include "utils/type_traits_core.h" +#include +#include + +namespace fmt { + +template +struct formatter< + ::std::unordered_multiset, + Char, + std::enable_if_t>::value>> + : formatter<::std::string> { + template + auto format(::std::unordered_multiset const &m, FormatContext &ctx) + -> decltype(ctx.out()) { + CHECK_FMTABLE(T); + + std::string result = + ::FlexFlow::join_strings(m.cbegin(), m.cend(), ", ", [](T const &t) { + return fmt::to_string(t); + }); + return formatter::format("{" + result + "}", ctx); + } +}; + +} // namespace fmt + +namespace FlexFlow { + +template +std::ostream &operator<<(std::ostream &s, std::unordered_multiset const &x) { + CHECK_FMTABLE(T); + + return s << fmt::to_string(x); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/fmt/unordered_set.h b/lib/utils/include/utils/fmt/unordered_set.h index 257545af1b..be347ec5ea 100644 --- a/lib/utils/include/utils/fmt/unordered_set.h +++ b/lib/utils/include/utils/fmt/unordered_set.h @@ -24,7 +24,6 @@ struct formatter< ::FlexFlow::join_strings(m.cbegin(), m.cend(), ", ", [](T const &t) { return fmt::to_string(t); }); - // } return formatter::format("{" + result + "}", ctx); } }; diff --git a/lib/utils/include/utils/hash/unordered_multiset.h b/lib/utils/include/utils/hash/unordered_multiset.h new file mode 100644 index 0000000000..1727ae548d --- /dev/null +++ b/lib/utils/include/utils/hash/unordered_multiset.h @@ -0,0 +1,20 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_HASH_UNORDERED_SET_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_HASH_UNORDERED_SET_H + +#include "utils/hash-utils.h" +#include + +namespace std { + +template +struct hash> { + size_t operator()(std::unordered_multiset const &s) const { + size_t result = 0; + ::FlexFlow::unordered_container_hash(result, s); + return result; + } +}; + +} // namespace std + +#endif diff --git a/lib/utils/src/utils/fmt/unordered_multiset.cc b/lib/utils/src/utils/fmt/unordered_multiset.cc new file mode 100644 index 0000000000..4e07b010c1 --- /dev/null +++ b/lib/utils/src/utils/fmt/unordered_multiset.cc @@ -0,0 +1 @@ +#include "utils/fmt/unordered_multiset.h" \ No newline at end of file diff --git a/lib/utils/src/utils/hash/unordered_multiset.cc b/lib/utils/src/utils/hash/unordered_multiset.cc new file mode 100644 index 0000000000..7e534e5518 --- /dev/null +++ b/lib/utils/src/utils/hash/unordered_multiset.cc @@ -0,0 +1 @@ +#include "utils/hash/unordered_multiset.h" \ No newline at end of file From 0f2cb567addc4c55f48f99b509ce5f9130693739 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 6 Aug 2024 16:37:48 -0700 Subject: [PATCH 03/34] format fix --- lib/utils/include/utils/containers/transform.h | 3 ++- lib/utils/src/utils/containers/foldl.cc | 2 +- lib/utils/src/utils/fmt/unordered_multiset.cc | 2 +- lib/utils/src/utils/hash/unordered_multiset.cc | 2 +- lib/utils/test/src/utils/containers/foldl.cc | 17 ++++++++++++----- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/utils/include/utils/containers/transform.h b/lib/utils/include/utils/containers/transform.h index 374dd92da0..4699b61a9e 100644 --- a/lib/utils/include/utils/containers/transform.h +++ b/lib/utils/include/utils/containers/transform.h @@ -34,7 +34,8 @@ std::unordered_set transform(std::unordered_set const &v, F const &f) { template ()(std::declval()))> -std::unordered_multiset transform(std::unordered_multiset const &v, F const &f) { +std::unordered_multiset transform(std::unordered_multiset const &v, + F const &f) { std::unordered_multiset result; for (auto const &e : v) { result.insert(f(e)); diff --git a/lib/utils/src/utils/containers/foldl.cc b/lib/utils/src/utils/containers/foldl.cc index 85db79a34d..a4c32e83cc 100644 --- a/lib/utils/src/utils/containers/foldl.cc +++ b/lib/utils/src/utils/containers/foldl.cc @@ -1 +1 @@ -#include "utils/containers/foldl.h" \ No newline at end of file +#include "utils/containers/foldl.h" diff --git a/lib/utils/src/utils/fmt/unordered_multiset.cc b/lib/utils/src/utils/fmt/unordered_multiset.cc index 4e07b010c1..cf463296cc 100644 --- a/lib/utils/src/utils/fmt/unordered_multiset.cc +++ b/lib/utils/src/utils/fmt/unordered_multiset.cc @@ -1 +1 @@ -#include "utils/fmt/unordered_multiset.h" \ No newline at end of file +#include "utils/fmt/unordered_multiset.h" diff --git a/lib/utils/src/utils/hash/unordered_multiset.cc b/lib/utils/src/utils/hash/unordered_multiset.cc index 7e534e5518..7f6f73f428 100644 --- a/lib/utils/src/utils/hash/unordered_multiset.cc +++ b/lib/utils/src/utils/hash/unordered_multiset.cc @@ -1 +1 @@ -#include "utils/hash/unordered_multiset.h" \ No newline at end of file +#include "utils/hash/unordered_multiset.h" diff --git a/lib/utils/test/src/utils/containers/foldl.cc b/lib/utils/test/src/utils/containers/foldl.cc index ff43299539..9ed9768a92 100644 --- a/lib/utils/test/src/utils/containers/foldl.cc +++ b/lib/utils/test/src/utils/containers/foldl.cc @@ -1,7 +1,7 @@ #include "utils/containers/foldl.h" #include -#include #include +#include using namespace FlexFlow; @@ -9,14 +9,18 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("foldl") { SUBCASE("product") { std::vector container = {1, 2, 3, 4, 5}; - int result = foldl(container, 1, [](int acc, int elem) { return acc * elem; }); + int result = + foldl(container, 1, [](int acc, int elem) { return acc * elem; }); int correct = 120; CHECK(result == correct); } SUBCASE("string concat") { std::vector container = {1, 2, 3, 4, 5}; - std::string result = foldl(container, std::string(""), [](std::string acc, int elem) { return acc + std::to_string(elem); }); + std::string result = + foldl(container, std::string(""), [](std::string acc, int elem) { + return acc + std::to_string(elem); + }); std::string correct = "12345"; CHECK(result == correct); } @@ -25,14 +29,17 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("foldl1") { SUBCASE("product") { std::vector container = {1, 2, 3, 4, 5}; - int result = foldl1(container, [](int acc, int elem) { return acc * elem; }); + int result = + foldl1(container, [](int acc, int elem) { return acc * elem; }); int correct = 120; CHECK(result == correct); } SUBCASE("string concat") { std::vector container = {"1", "2", "3", "4", "5"}; - std::string result = foldl1(container, [](std::string acc, std::string elem) { return acc + elem; }); + std::string result = + foldl1(container, + [](std::string acc, std::string elem) { return acc + elem; }); std::string correct = "12345"; CHECK(result == correct); } From 1eeeba8e2f01a3b23ed742e6c642b77faea077b2 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 6 Aug 2024 18:23:08 -0700 Subject: [PATCH 04/34] Unordered Machine Mapping and adjacent changes --- lib/compiler/src/machine_mapping.cc | 153 ++++++++++++++++++ .../pcg/device_coordinates.struct.toml | 19 +++ lib/pcg/include/pcg/device_id.h | 1 + lib/pcg/include/pcg/machine_view.h | 27 ++-- lib/pcg/include/pcg/machine_view.struct.toml | 2 +- lib/pcg/include/pcg/stride_t.struct.toml | 14 ++ lib/pcg/include/pcg/strided_rectangle.h | 8 +- .../include/pcg/strided_rectangle.struct.toml | 8 +- lib/pcg/include/pcg/strided_rectangle_side.h | 4 +- .../pcg/strided_rectangle_side.struct.toml | 3 +- lib/pcg/src/pcg/device_id.cc | 11 ++ lib/pcg/src/pcg/machine_view.cc | 89 ++++++++-- lib/pcg/src/pcg/strided_rectangle_side.cc | 10 +- lib/pcg/src/strided_rectangle.cc | 24 ++- .../computation_graph_builder.cc} | 2 +- lib/pcg/test/src/pcg/machine_view.cc | 133 +++++++++++++++ lib/pcg/test/src/pcg/strided_rectangle.cc | 27 ++++ .../test/src/pcg/strided_rectangle_side.cc | 19 +++ lib/pcg/test/src/test_machine_view.cc | 76 --------- lib/pcg/test/src/test_strided_rectangle.cc | 39 ----- 20 files changed, 499 insertions(+), 170 deletions(-) create mode 100644 lib/pcg/include/pcg/device_coordinates.struct.toml create mode 100644 lib/pcg/include/pcg/stride_t.struct.toml rename lib/pcg/test/src/{test_computation_graph_builder.cc => pcg/computation_graph_builder.cc} (100%) create mode 100644 lib/pcg/test/src/pcg/machine_view.cc create mode 100644 lib/pcg/test/src/pcg/strided_rectangle.cc create mode 100644 lib/pcg/test/src/pcg/strided_rectangle_side.cc delete mode 100644 lib/pcg/test/src/test_machine_view.cc delete mode 100644 lib/pcg/test/src/test_strided_rectangle.cc diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 2b08e9fe23..fe9db1cab7 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -1,6 +1,7 @@ #include "compiler/machine_mapping.h" #include "compiler/cost_estimate.h" #include "graph_utils.h" +#include "pcg/machine_view.h" #include "pcg/parallel_computation_graph.h" #include "utils/exception.h" #include "utils/graph/serialparallel.h" @@ -323,4 +324,156 @@ OptimalCostResult return searcher.optimal_cost(subpcg, resources, sp_decomposition); } +bool is_valid_machine_view(MachineSpecification const &machinespec, + MachineView const &mv) { + // Note: we're checking the size of the machineview, not the last device id + // (e.g. consider 2D machine view of size 4x4 with stride 2 along each + // dimension, technically it could fit into a 3x3 machine spec but the machine + // view is really describing a 4x4 space) + if (get_device_type(mv) == DeviceType::GPU) { + return get_size(mv) <= + machinespec.num_nodes * machinespec.num_gpus_per_node; + } else if (get_device_type(mv) == DeviceType::CPU) { + return get_size(mv) <= + machinespec.num_nodes * machinespec.num_cpus_per_node; + } else { + assert(false && "Unsupported DeviceType"); + return false; + } + + // Add check that the last one doesn't go over + // Add other check that you can cram them into the 2D frame (there has to + // exist a bipartition of the dimension that fits) +} + +bool is_valid_machine_view(MachineView const &mv, + ParallelTensorShape const &shape) { + std::unordered_set unordered_mv_degrees = + without_order(get_point_dims(mv)); + std::unordered_set unordered_tensor_degrees = + without_order(ff_ordered_shard_degrees(shape)) + {get_sum_degree(shape)} + + {get_discard_copy_degree(shape)}; // filter for the 1s (no parallelism) + return unordered_mv_dims == unordered_tensor_dims; +} + +// WARNING: some machine_views returned are invalid, get allowed_machine_views +// for valid ones. +static std::unordered_set + get_all_machine_views(MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + + auto all_possible_strides = + [](std::vector tensor_dims, + size_t total_devices, + size_t num_devices_used_by_tensor) { + size_t room_for_stride = total_devices / num_devices_used_by_tensor; + auto x = cartesian_product(replicate(range(1, room_for_stride + 1)), + tensor_dims.size()); + return filter(x, product((elem-1 for elem in x)) <= room_for_stride); + } + + size_t total_devices = machinespec.num_nodes * machinespec.num_gpus_per_nodes; + std::unordered_set machine_views; + std::vector tensor_dims; + size_t num_devices_used_by_tensor = product(tensor_dims); + std::unordered_set> stride_sets = + make_stride_sets(tensor_dims, total_devices); + for (std::vector stride : + all_possible_strides(tensor_dims, total_devices)) { + for (int start_id = 0 : + start_id <= total_devices - num_devices_used_by_tensor + 1; + start_id++) { + std::vector sides = + transform(zip(tensor_dims, stride)); + MachineView mv = {start, StridedRectangle{sides}}; + machine_views.insert(mv); + } + } + return machine_views; +} + +static std::unordered_set + get_all_start_invariant_machine_views( + MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + NOT_IMPLEMENTED(); +} + +auto get_all_machine_views_to_tensor_dim_bijections(MachineView const &mv, ) { + NOT_IMPLEMENTED(); +} + +// // do product (total num of devices vs total num of elements in the tensor). +// bool is_valid_machine_view(Operator const &op, ParallelTensorShape const& +// output_shape, MachineView const& view) { +// int is_dim = 0; +// for (int i = 0; i < num_dims; i++) { +// if (dims[i].parallel_idx != -1) { +// is_dim++; +// if (dims[i].parallel_idx > view.ndims) { +// return false; +// } +// if (view.dim[dims[i].parallel_idx] != dims[i].degree) { +// return false; +// } +// } +// } +// if (is_dim == 0) { +// is_dim = 1; +// } +// if (is_dim != view.ndims) { +// return false; +// } +// if (get_total_num_parts() != view.num_parts()) { +// return false; +// } +// return true; +// } + +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + std::unordered_set operator_views = + get_all_machine_views(machinespec); + // operator_views = filter(operator_views, [&](MachineView const& view) + // {return all_of(outputs(op), [&](ParallelTensorShape const& output){return + // is_valid_machine_view(op, output, view);}); + operator_views = filter(operator_views, [&](MachineView const &view) { + return is_valid_machine_view(shape, view); + }); + operator_views = filter(operator_views, [&](MachineView const &view) { + return is_valid_machine_view(machinespec, view); + }); + return operator_views; +} + +// // Ask the output shapes +// // Get the PCG + +// std::vector SearchHelper::get_valid_machine_views( +// Op const *op, MachineResource const &resource) const { +// std::vector const cached_op_views; +// std::vector valid_views; +// for (size_t i = 0; i < this->model->all_valid_views.size(); i++) { +// bool valid = true; +// for (int j = 0; j < op->numOutputs; j++) { +// if (!op->outputs[j]->is_valid_machine_view( +// this->model->all_valid_views[i])) { +// valid = false; +// break; +// } +// } +// if (valid) { +// cached_op_views.push_back(this->model->all_valid_views[i]); +// } +// } + +// for (size_t i = 0; i < cached_op_views->size(); i++) { +// if (resource.is_valid_machine_view(view)) { +// valid_views.push_back(view); +// } +// } +// return valid_views; +// } + } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/device_coordinates.struct.toml b/lib/pcg/include/pcg/device_coordinates.struct.toml new file mode 100644 index 0000000000..80d8ce45c9 --- /dev/null +++ b/lib/pcg/include/pcg/device_coordinates.struct.toml @@ -0,0 +1,19 @@ +# DeviceCoordinates is exclusive to machine_view.cc, must not be used outside of it. +namespace = "FlexFlow" +name = "DeviceCoordinates" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "op-attrs/dim_ordered.h", +] + +[[fields]] +name = "coords" +type = "::FlexFlow::FFOrdered" diff --git a/lib/pcg/include/pcg/device_id.h b/lib/pcg/include/pcg/device_id.h index 1157a2932a..28cf30eaba 100644 --- a/lib/pcg/include/pcg/device_id.h +++ b/lib/pcg/include/pcg/device_id.h @@ -13,6 +13,7 @@ device_id_t operator+(device_id_t, size_t); DeviceType get_device_type(device_id_t const &device_id); gpu_id_t unwrap_gpu(device_id_t); cpu_id_t unwrap_cpu(device_id_t); +int get_raw_id(device_id_t); device_id_t device_id_from_index(int, DeviceType); diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 56abf5aa20..1006e20186 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H #include "pcg/cpu_id_t.dtg.h" +#include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" #include "pcg/device_id_t.dtg.h" #include "pcg/device_type.dtg.h" @@ -14,38 +15,44 @@ namespace FlexFlow { -std::vector device_ids(MachineView const &); -size_t num_dims(MachineView const &); -std::size_t num_devices(MachineView const &); -DeviceType get_device_type(MachineView const &); +std::unordered_multiset get_device_ids(MachineView const &mv); +device_id_t get_last_device_id(MachineView const &mv); + +size_t num_dims(MachineView const &mv); +size_t num_devices(MachineView const &mv); +size_t get_size(MachineView const &mv); +std::unordered_multiset + get_num_devices_per_dim(MachineView const &mv); +std::unordered_multiset + get_side_size_per_dim(MachineView const &mv); + +DeviceType get_device_type(MachineView const &mv); MachineView make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride = 1); MachineView make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride = 1); MachineView make_1d_machine_view(device_id_t start, device_id_t stop, int stride = 1); -MachineView make_1d_machine_view(gpu_id_t start, +MachineView make_1d_machine_view(cpu_id_t start, num_points_t num_points, int stride = 1); -MachineView make_1d_machine_view(cpu_id_t start, +MachineView make_1d_machine_view(gpu_id_t start, num_points_t num_points, int stride = 1); MachineView make_1d_machine_view(device_id_t start, num_points_t num_points, int stride = 1); -MachineView make_1d_machine_view(gpu_id_t start, +MachineView make_1d_machine_view(cpu_id_t start, side_size_t interval_size, int stride = 1); -MachineView make_1d_machine_view(cpu_id_t start, +MachineView make_1d_machine_view(gpu_id_t start, side_size_t interval_size, int stride = 1); MachineView make_1d_machine_view(device_id_t start, side_size_t interval_size, int stride = 1); -MachineView make_1d_machine_view(device_id_t start, size_t interval_size); - } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index c97731991f..07418f7825 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -2,7 +2,7 @@ namespace = "FlexFlow" name = "MachineView" features = [ "eq", - "ord", + # "ord", "hash", "json", # "rapidcheck", diff --git a/lib/pcg/include/pcg/stride_t.struct.toml b/lib/pcg/include/pcg/stride_t.struct.toml new file mode 100644 index 0000000000..a764497b8b --- /dev/null +++ b/lib/pcg/include/pcg/stride_t.struct.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "stride_t" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +[[fields]] +name = "unwrapped" +type = "int" diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 9c3b8eeda9..ced747d5d3 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -2,16 +2,18 @@ #define _FLEXFLOW_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_H #include "op-attrs/ff_dim.dtg.h" +#include "pcg/device_id_t.dtg.h" #include "pcg/side_size_t.dtg.h" #include "pcg/strided_rectangle.dtg.h" namespace FlexFlow { -size_t get_num_dims(StridedRectangle const &); -StridedRectangleSide get_side_at_idx(StridedRectangle const &rect, - ff_dim_t const &idx); +size_t get_num_dims(StridedRectangle const &rect); + num_points_t get_num_points(StridedRectangle const &rect); +size_t get_size(StridedRectangle const &rect); + } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/strided_rectangle.struct.toml b/lib/pcg/include/pcg/strided_rectangle.struct.toml index 3dfd90e296..1b2318173d 100644 --- a/lib/pcg/include/pcg/strided_rectangle.struct.toml +++ b/lib/pcg/include/pcg/strided_rectangle.struct.toml @@ -2,7 +2,7 @@ namespace = "FlexFlow" name = "StridedRectangle" features = [ "eq", - "ord", + # "ord", "hash", "json", "rapidcheck", @@ -11,9 +11,11 @@ features = [ includes = [ "pcg/strided_rectangle_side.dtg.h", - "op-attrs/dim_ordered.h", + "", + "utils/hash/unordered_multiset.h", + "utils/fmt/unordered_multiset.h", ] [[fields]] name = "sides" -type = "::FlexFlow::FFOrdered<::FlexFlow::StridedRectangleSide>" +type = "std::unordered_multiset<::FlexFlow::StridedRectangleSide>" diff --git a/lib/pcg/include/pcg/strided_rectangle_side.h b/lib/pcg/include/pcg/strided_rectangle_side.h index 1486b73143..fb18b6d44e 100644 --- a/lib/pcg/include/pcg/strided_rectangle_side.h +++ b/lib/pcg/include/pcg/strided_rectangle_side.h @@ -2,11 +2,13 @@ #define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_SIDE_H #include "pcg/side_size_t.dtg.h" +#include "pcg/stride_t.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" namespace FlexFlow { -StridedRectangleSide strided_side_from_size_and_stride(side_size_t, int stride); +StridedRectangleSide strided_side_from_size_and_stride(side_size_t, + stride_t stride); side_size_t get_side_size(StridedRectangleSide const &); diff --git a/lib/pcg/include/pcg/strided_rectangle_side.struct.toml b/lib/pcg/include/pcg/strided_rectangle_side.struct.toml index f26adfafd5..3481ebcf16 100644 --- a/lib/pcg/include/pcg/strided_rectangle_side.struct.toml +++ b/lib/pcg/include/pcg/strided_rectangle_side.struct.toml @@ -11,6 +11,7 @@ features = [ includes = [ "pcg/num_points_t.dtg.h", + "pcg/stride_t.dtg.h", ] [[fields]] @@ -19,4 +20,4 @@ type = "::FlexFlow::num_points_t" [[fields]] name = "stride" -type = "int" +type = "::FlexFlow::stride_t" diff --git a/lib/pcg/src/pcg/device_id.cc b/lib/pcg/src/pcg/device_id.cc index 35b0c9aeda..86c943045b 100644 --- a/lib/pcg/src/pcg/device_id.cc +++ b/lib/pcg/src/pcg/device_id.cc @@ -25,6 +25,17 @@ cpu_id_t unwrap_cpu(device_id_t device_id) { return device_id.get(); } +int get_raw_id(device_id_t device_id) { + if (get_device_type(device_id) == DeviceType::GPU) { + return unwrap_gpu(device_id).gpu_index; + } else if (get_device_type(device_id) == DeviceType::CPU) { + return unwrap_cpu(device_id).cpu_index; + } else { + assert(false && "Unsupported DeviceType"); + return -1; + } +} + device_id_t device_id_from_index(int, DeviceType) { NOT_IMPLEMENTED(); } diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index c09ab1a3c9..bccaa84bd5 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,23 +1,90 @@ #include "pcg/machine_view.h" +#include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" #include "pcg/strided_rectangle.dtg.h" #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" +#include "utils/containers.h" +#include "utils/containers/as_vector.h" +#include "utils/containers/cartesian_product.h" +#include "utils/containers/product.h" +#include "utils/containers/range.h" +#include "utils/containers/reversed.h" +#include "utils/containers/scanl.h" +#include "utils/containers/transform.h" +#include "utils/containers/zip.h" +#include "utils/hash/vector.h" namespace FlexFlow { -std::vector device_ids(MachineView const &) { - NOT_IMPLEMENTED(); +static device_id_t get_device_id(MachineView const &mv, + DeviceCoordinates const &point) { + assert(point.coords.size() == get_num_dims(mv.rect)); + std::vector coefficients = + scanl(sorted(mv.rect.sides), + 1, + [](size_t const &result, StridedRectangleSide const &side) { + return result * get_side_size(side).unwrapped; + }); + size_t raw_id = + sum(transform(zip(coefficients, as_vector(point.coords)), + [](auto const pair) { return pair.first * pair.second; })) + + get_raw_id(mv.start); + + return ((get_device_type(mv) == DeviceType::CPU) + ? device_id_t(cpu_id_t(raw_id)) + : device_id_t(gpu_id_t(raw_id))); +} + +std::unordered_multiset get_device_ids(MachineView const &mv) { + std::vector> ranges = + transform(sorted(mv.rect.sides), [](StridedRectangleSide const &side) { + return range(size_t(0), + size_t(get_side_size(side).unwrapped), + size_t(side.stride.unwrapped)); + }); + std::unordered_multiset devices_as_points = + transform(cartesian_product(ranges), + [](auto const &point) { return DeviceCoordinates(point); }); + std::unordered_multiset ids = + transform(devices_as_points, [&](DeviceCoordinates const &dc) { + return get_device_id(mv, dc); + }); + return ids; +} + +device_id_t get_last_device_id(MachineView const &mv) { + DeviceCoordinates last_device = DeviceCoordinates( + transform(sorted(mv.rect.sides), [](StridedRectangleSide const &s) { + return size_t(s.stride.unwrapped); + })); + return maximum(get_device_ids(mv)); +} + +size_t num_dims(MachineView const &mv) { + return get_num_dims(mv.rect); } -std::size_t num_dims(MachineView const &mv) { - return get_num_dims(mv.rect); +std::unordered_multiset + get_num_devices_per_dim(MachineView const &mv) { + return transform(mv.rect.sides, [](StridedRectangleSide const &side) { + return side.num_points; + }); +} + +std::unordered_multiset + get_side_size_per_dim(MachineView const &mv) { + return transform(mv.rect.sides, get_side_size); } size_t num_devices(MachineView const &mv) { return get_num_points(mv.rect).unwrapped; } +size_t get_size(MachineView const &mv) { + return get_size(mv.rect); +} + DeviceType get_device_type(MachineView const &mv) { return get_device_type(mv.start); } @@ -25,10 +92,10 @@ DeviceType get_device_type(MachineView const &mv) { static StridedRectangle make_1d_rect(int start, int stop, int stride) { assert(stop > start); assert(stride > 0); - StridedRectangleSide side = - strided_side_from_size_and_stride(side_size_t{stop - start}, stride); + StridedRectangleSide side = strided_side_from_size_and_stride( + side_size_t{stop - start}, stride_t{stride}); StridedRectangle rect = - StridedRectangle{std::vector{side}}; + StridedRectangle{std::unordered_multiset{side}}; return rect; } @@ -109,13 +176,5 @@ MachineView make_1d_machine_view(device_id_t start, return make_1d_machine_view(unwrap_gpu(start), interval_size, stride); } } -MachineView make_1d_machine_view(device_id_t start, size_t interval_size) { - NOT_IMPLEMENTED(); -} - -/* device_id_t MachineView::at(FFOrdered const &coord) const { */ -/* size_t offset = this->rect.at(coord); */ -/* return this->start + offset; */ -/* } */ } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/strided_rectangle_side.cc b/lib/pcg/src/pcg/strided_rectangle_side.cc index e6caf4cb86..fcfc0e854a 100644 --- a/lib/pcg/src/pcg/strided_rectangle_side.cc +++ b/lib/pcg/src/pcg/strided_rectangle_side.cc @@ -4,14 +4,14 @@ namespace FlexFlow { StridedRectangleSide strided_side_from_size_and_stride(side_size_t side_size, - int stride) { - assert((side_size.unwrapped % stride) == 0); - return StridedRectangleSide{num_points_t{side_size.unwrapped / stride}, - stride}; + stride_t stride) { + assert((side_size.unwrapped % stride.unwrapped) == 0); + return StridedRectangleSide{ + num_points_t{side_size.unwrapped / stride.unwrapped}, stride}; } side_size_t get_side_size(StridedRectangleSide const &s) { - return side_size_t{s.num_points.unwrapped * s.stride}; + return side_size_t{s.num_points.unwrapped * s.stride.unwrapped}; } } // namespace FlexFlow diff --git a/lib/pcg/src/strided_rectangle.cc b/lib/pcg/src/strided_rectangle.cc index dfb5d0af12..c2b4d77f60 100644 --- a/lib/pcg/src/strided_rectangle.cc +++ b/lib/pcg/src/strided_rectangle.cc @@ -1,21 +1,14 @@ #include "pcg/strided_rectangle.h" #include "op-attrs/dim_ordered/transform.h" +#include "pcg/device_coordinates.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/strided_rectangle_side.h" +#include "utils/containers/as_vector.h" #include "utils/containers/product.h" +#include "utils/containers/transform.h" namespace FlexFlow { -/* size_t StridedRectangle::at(FFOrdered const &coord) const { */ -/* assert(coord.size() == this->num_dims()); */ - -/* size_t _1d_stride = 1; */ -/* size_t idx = 0; */ -/* for (auto dim : inner_to_outer_idxs(this->sides)) { */ -/* idx += this->sides.at(dim).at(coord.at(dim)).value() * _1d_stride; */ -/* _1d_stride *= this->sides.at(dim).get_size().value(); */ -/* } */ -/* return idx; */ -/* } */ - size_t get_num_dims(StridedRectangle const &rect) { return rect.sides.size(); } @@ -27,9 +20,10 @@ num_points_t get_num_points(StridedRectangle const &rect) { }))}; } -StridedRectangleSide get_side_at_idx(StridedRectangle const &rect, - ff_dim_t const &idx) { - return rect.sides.at(idx); +size_t get_size(StridedRectangle const &rect) { + return product(transform(rect.sides, [](StridedRectangleSide const &side) { + return get_side_size(side).unwrapped; + })); } } // namespace FlexFlow diff --git a/lib/pcg/test/src/test_computation_graph_builder.cc b/lib/pcg/test/src/pcg/computation_graph_builder.cc similarity index 100% rename from lib/pcg/test/src/test_computation_graph_builder.cc rename to lib/pcg/test/src/pcg/computation_graph_builder.cc index 936c2de00d..c0bc844489 100644 --- a/lib/pcg/test/src/test_computation_graph_builder.cc +++ b/lib/pcg/test/src/pcg/computation_graph_builder.cc @@ -1,6 +1,6 @@ +#include "pcg/computation_graph_builder.h" #include "doctest/doctest.h" #include "pcg/computation_graph.h" -#include "pcg/computation_graph_builder.h" using namespace ::FlexFlow; diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc new file mode 100644 index 0000000000..71b07b23a9 --- /dev/null +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -0,0 +1,133 @@ +#include "pcg/machine_view.h" +#include "pcg/strided_rectangle.h" +#include "pcg/strided_rectangle_side.h" +#include "test/utils/doctest.h" +#include "utils/containers/transform.h" + +std::unordered_multiset + make_gpu_device_ids(std::unordered_multiset ids) { + return transform(ids, [](int id) { return device_id_t(gpu_id_t(id)); }); +} + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("MachineView utility functions") { + StridedRectangle rect{{StridedRectangleSide(num_points_t(7), stride_t(5)), + StridedRectangleSide(num_points_t(10), stride_t(2)), + StridedRectangleSide(num_points_t(1), stride_t(4))}}; + gpu_id_t start(1); + MachineView mv{device_id_t{start}, rect}; + + SUBCASE("num_dims") { + CHECK(num_dims(mv) == 3); + } + SUBCASE("num_devices") { + CHECK(num_devices(mv) == 7 * 10 * 1); + } + SUBCASE("get_size") { + CHECK(get_size(mv) == (7 * 5) * (10 * 2) * (1 * 4)); + } + + SUBCASE("get_side_size_per_dim") { + std::unordered_multiset expected = { + side_size_t(7 * 5), side_size_t(10 * 2), side_size_t(1 * 4)}; + std::unordered_multiset result = get_side_size_per_dim(mv); + CHECK(expected == result); + } + SUBCASE("get_num_devices_per_dim") { + std::unordered_multiset expected = { + num_points_t(7), num_points_t(10), num_points_t(1)}; + std::unordered_multiset result = + get_num_devices_per_dim(mv); + CHECK(expected == result); + } + + SUBCASE("get_device_type") { + CHECK(get_device_type(mv) == DeviceType::GPU); + } + } + + TEST_CASE("MachineView - device ids") { + SUBCASE("MachineView #1") { + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(2), stride_t(3)), + StridedRectangleSide(num_points_t(2), stride_t(2)), + }}; + gpu_id_t start(0); + MachineView mv{device_id_t{start}, rect}; + SUBCASE("get_device_ids") { + std::unordered_multiset expected = + make_gpu_device_ids({0, 2, 12, 14}); + std::unordered_multiset result = get_device_ids(mv); + CHECK(expected == result); + } + SUBCASE("get_last_device_id") { + CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(14))); + } + } + SUBCASE("MachineView #2") { + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(1), stride_t(3)), + StridedRectangleSide(num_points_t(2), stride_t(1)), + StridedRectangleSide(num_points_t(2), stride_t(2)), + }}; + gpu_id_t start(1); + MachineView mv{device_id_t{start}, rect}; + + SUBCASE("get_device_ids") { + std::unordered_multiset expected = + make_gpu_device_ids({1, 4, 13, 16}); + std::unordered_multiset result = get_device_ids(mv); + + CHECK(expected == result); + } + + SUBCASE("get_last_device_id") { + CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(16))); + } + } + } + + TEST_CASE("MachineView make_1d_machine_view - GPU") { + + StridedRectangle rect{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}; + device_id_t start_gpu{gpu_id_t{1}}; + MachineView gpu_mv{start_gpu, rect}; + + SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride)") { + MachineView result = + make_1d_machine_view(start_gpu, device_id_t{gpu_id_t(1 + 7 * 5)}, 5); + MachineView correct = gpu_mv; + CHECK(result == correct); + } + + SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, int " + "stride)") { + MachineView result = make_1d_machine_view(start_gpu, num_points_t{7}, 5); + MachineView correct = gpu_mv; + CHECK(result == correct); + } + } + + TEST_CASE("MachineView make_1d_machine_view - CPU") { + StridedRectangle rect{ + {StridedRectangleSide{num_points_t{11}, stride_t{4}}}}; + device_id_t start_cpu{cpu_id_t{2}}; + MachineView cpu_mv{start_cpu, rect}; + + SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride)") { + MachineView result = + make_1d_machine_view(start_cpu, device_id_t{cpu_id_t(2 + 11 * 4)}, 4); + MachineView correct = cpu_mv; + CHECK(result == correct); + } + SUBCASE("make_1d_machine_view(cpu_id_t start, num_points_t num_points, int " + "stride)") { + MachineView result = make_1d_machine_view(start_cpu, num_points_t{11}, 4); + MachineView correct = cpu_mv; + CHECK(result == correct); + } + } +} diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc new file mode 100644 index 0000000000..ed48502263 --- /dev/null +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -0,0 +1,27 @@ +#include "pcg/strided_rectangle.h" +#include "pcg/strided_rectangle_side.h" +#include "test/utils/doctest.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("StridedRectangle - helper functions") { + + StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; + StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; + StridedRectangleSide s2{num_points_t{8}, stride_t{1}}; + StridedRectangle rect{{s0, s1, s2}}; + + SUBCASE("get_num_dims") { + CHECK(get_num_dims(rect) == 3); + } + SUBCASE("get_num_points") { + CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); + } + + SUBCASE("get_size") { + CHECK(get_size(rect) == size_t{(7 * 5) * (10 * 2) * (8 * 1)}); + } + } +} diff --git a/lib/pcg/test/src/pcg/strided_rectangle_side.cc b/lib/pcg/test/src/pcg/strided_rectangle_side.cc new file mode 100644 index 0000000000..e45cc576e9 --- /dev/null +++ b/lib/pcg/test/src/pcg/strided_rectangle_side.cc @@ -0,0 +1,19 @@ +#include "pcg/strided_rectangle_side.h" +#include "pcg/strided_rectangle.h" +#include "test/utils/doctest.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_side_size(StridedRectangleSide)") { + StridedRectangleSide side{num_points_t{7}, stride_t{5}}; + + CHECK(get_side_size(side) == side_size_t{7 * 5}); + } + TEST_CASE("strided_side_from_size_and_stride") { + StridedRectangleSide correct{num_points_t{10}, stride_t{3}}; + StridedRectangleSide result = + strided_side_from_size_and_stride(side_size_t{10 * 3}, stride_t{3}); + CHECK(result == correct); + } +} diff --git a/lib/pcg/test/src/test_machine_view.cc b/lib/pcg/test/src/test_machine_view.cc deleted file mode 100644 index 70fe958d8c..0000000000 --- a/lib/pcg/test/src/test_machine_view.cc +++ /dev/null @@ -1,76 +0,0 @@ -#include "pcg/machine_view.h" -#include "pcg/strided_rectangle.h" -#include "pcg/strided_rectangle_side.h" -#include "test/utils/doctest.h" - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("MachineView general util functions") { - StridedRectangle rect{{StridedRectangleSide{num_points_t{7}, 5}, - StridedRectangleSide{num_points_t{10}, 2}}}; - gpu_id_t start(1); - MachineView mv{device_id_t{start}, rect}; - SUBCASE("num_dims") { - CHECK(num_dims(mv) == 2); - } - SUBCASE("num_devices") { - CHECK(num_devices(mv) == 7 * 10); - } - SUBCASE("get_device_type") { - CHECK(get_device_type(mv) == DeviceType::GPU); - } - } - - TEST_CASE("MachineView make_1d_machine_view - GPU") { - StridedRectangle rect{{StridedRectangleSide{num_points_t{7}, 5}}}; - device_id_t start_gpu{gpu_id_t{1}}; - MachineView gpu_mv{start_gpu, rect}; - - SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride)") { - MachineView result = - make_1d_machine_view(start_gpu, device_id_t{gpu_id_t(1 + 7 * 5)}, 5); - MachineView correct = gpu_mv; - CHECK(result == correct); - } - SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, int " - "stride)") { - MachineView result = make_1d_machine_view(start_gpu, num_points_t{7}, 5); - MachineView correct = gpu_mv; - CHECK(result == correct); - } - SUBCASE("make_1d_machine_view(gpu_id_t start, side_size_t interval_size, " - "int stride)") { - MachineView result = make_1d_machine_view( - start_gpu, get_side_size(rect.sides.at(ff_dim_t{0})), 5); - MachineView correct = gpu_mv; - CHECK(result == correct); - } - } - - TEST_CASE("MachineView make_1d_machine_view - CPU") { - StridedRectangle rect{{StridedRectangleSide{num_points_t{11}, 4}}}; - device_id_t start_cpu{cpu_id_t{2}}; - MachineView cpu_mv{start_cpu, rect}; - - SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride)") { - MachineView result = - make_1d_machine_view(start_cpu, device_id_t{cpu_id_t(2 + 11 * 4)}, 4); - MachineView correct = cpu_mv; - CHECK(result == correct); - } - SUBCASE("make_1d_machine_view(cpu_id_t start, num_points_t num_points, int " - "stride)") { - MachineView result = make_1d_machine_view(start_cpu, num_points_t{11}, 4); - MachineView correct = cpu_mv; - CHECK(result == correct); - } - SUBCASE("make_1d_machine_view(cpu_id_t start, side_size_t interval_size, " - "int stride)") { - MachineView result = make_1d_machine_view( - start_cpu, get_side_size(rect.sides.at(ff_dim_t{0})), 4); - MachineView correct = cpu_mv; - CHECK(result == correct); - } - } -} diff --git a/lib/pcg/test/src/test_strided_rectangle.cc b/lib/pcg/test/src/test_strided_rectangle.cc deleted file mode 100644 index 2fe3005b15..0000000000 --- a/lib/pcg/test/src/test_strided_rectangle.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "pcg/strided_rectangle.h" -#include "pcg/strided_rectangle_side.h" -#include "test/utils/doctest.h" - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_side_size(StridedRectangleSide)") { - StridedRectangleSide side{num_points_t{7}, 5}; - - CHECK(get_side_size(side) == side_size_t{7 * 5}); - } - TEST_CASE("strided_side_from_size_and_stride") { - StridedRectangleSide correct{num_points_t{10}, 3}; - StridedRectangleSide result = - strided_side_from_size_and_stride(side_size_t{10 * 3}, 3); - CHECK(result == correct); - } - - TEST_CASE("StridedRectangle - helper functions") { - - StridedRectangleSide s0{num_points_t{7}, 5}; - StridedRectangleSide s1{num_points_t{10}, 2}; - StridedRectangleSide s2{num_points_t{8}, 1}; - StridedRectangle rect{{s0, s1, s2}}; - - SUBCASE("get_num_dims") { - CHECK(get_num_dims(rect) == 3); - } - SUBCASE("get_num_points") { - CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); - } - SUBCASE("get_side_at_idx") { - CHECK(get_side_at_idx(rect, ff_dim_t{0}) == s0); - CHECK(get_side_at_idx(rect, ff_dim_t{1}) == s1); - CHECK(get_side_at_idx(rect, ff_dim_t{2}) == s2); - } - } -} From d3f16577e7d05d95fe7ec49251ed1fb84c544cc3 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 6 Aug 2024 20:01:51 -0700 Subject: [PATCH 05/34] formatting --- lib/compiler/src/machine_mapping.cc | 110 ++++++++++++++-------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index c69ad83ff6..1e9d6ee2eb 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -359,35 +359,35 @@ OptimalCostResult optimal_cost( } // bool is_valid_machine_view(MachineSpecification const &machinespec, - // MachineView const &mv) { - // Note: we're checking the size of the machineview, not the last device id - // (e.g. consider 2D machine view of size 4x4 with stride 2 along each - // dimension, technically it could fit into a 3x3 machine spec but the machine - // view is really describing a 4x4 space) - // if (get_device_type(mv) == DeviceType::GPU) { - // return get_size(mv) <= - // machinespec.num_nodes * machinespec.num_gpus_per_node; - // } else if (get_device_type(mv) == DeviceType::CPU) { - // return get_size(mv) <= - // machinespec.num_nodes * machinespec.num_cpus_per_node; - // } else { - // assert(false && "Unsupported DeviceType"); - // return false; - // } +// MachineView const &mv) { +// Note: we're checking the size of the machineview, not the last device id +// (e.g. consider 2D machine view of size 4x4 with stride 2 along each +// dimension, technically it could fit into a 3x3 machine spec but the machine +// view is really describing a 4x4 space) +// if (get_device_type(mv) == DeviceType::GPU) { +// return get_size(mv) <= +// machinespec.num_nodes * machinespec.num_gpus_per_node; +// } else if (get_device_type(mv) == DeviceType::CPU) { +// return get_size(mv) <= +// machinespec.num_nodes * machinespec.num_cpus_per_node; +// } else { +// assert(false && "Unsupported DeviceType"); +// return false; +// } - // Add check that the last one doesn't go over - // Add other check that you can cram them into the 2D frame (there has to - // exist a bipartition of the dimension that fits) +// Add check that the last one doesn't go over +// Add other check that you can cram them into the 2D frame (there has to +// exist a bipartition of the dimension that fits) // } // bool is_valid_machine_view(MachineView const &mv, // ParallelTensorShape const &shape) { - // std::unordered_set unordered_mv_degrees = - // without_order(get_point_dims(mv)); - // std::unordered_set unordered_tensor_degrees = - // without_order(ff_ordered_shard_degrees(shape)) + {get_sum_degree(shape)} + - // {get_discard_copy_degree(shape)}; // filter for the 1s (no parallelism) - // return unordered_mv_dims == unordered_tensor_dims; +// std::unordered_set unordered_mv_degrees = +// without_order(get_point_dims(mv)); +// std::unordered_set unordered_tensor_degrees = +// without_order(ff_ordered_shard_degrees(shape)) + {get_sum_degree(shape)} +// + {get_discard_copy_degree(shape)}; // filter for the 1s (no parallelism) +// return unordered_mv_dims == unordered_tensor_dims; // } // WARNING: some machine_views returned are invalid, get allowed_machine_views @@ -396,34 +396,34 @@ OptimalCostResult optimal_cost( // get_all_machine_views(MachineSpecification const &machinespec, // ParallelTensorShape const &shape) { - // auto all_possible_strides = - // [](std::vector tensor_dims, - // size_t total_devices, - // size_t num_devices_used_by_tensor) { - // size_t room_for_stride = total_devices / num_devices_used_by_tensor; - // auto x = cartesian_product(replicate(range(1, room_for_stride + 1)), - // tensor_dims.size()); - // return filter(x, product((elem-1 for elem in x)) <= room_for_stride); - // } - - // size_t total_devices = machinespec.num_nodes * machinespec.num_gpus_per_nodes; - // std::unordered_set machine_views; - // std::vector tensor_dims; - // size_t num_devices_used_by_tensor = product(tensor_dims); - // std::unordered_set> stride_sets = - // make_stride_sets(tensor_dims, total_devices); - // for (std::vector stride : - // all_possible_strides(tensor_dims, total_devices)) { - // for (int start_id = 0 : - // start_id <= total_devices - num_devices_used_by_tensor + 1; - // start_id++) { - // std::vector sides = - // transform(zip(tensor_dims, stride)); - // MachineView mv = {start, StridedRectangle{sides}}; - // machine_views.insert(mv); - // } - // } - // return machine_views; +// auto all_possible_strides = +// [](std::vector tensor_dims, +// size_t total_devices, +// size_t num_devices_used_by_tensor) { +// size_t room_for_stride = total_devices / num_devices_used_by_tensor; +// auto x = cartesian_product(replicate(range(1, room_for_stride + 1)), +// tensor_dims.size()); +// return filter(x, product((elem-1 for elem in x)) <= room_for_stride); +// } + +// size_t total_devices = machinespec.num_nodes * +// machinespec.num_gpus_per_nodes; std::unordered_set +// machine_views; std::vector tensor_dims; size_t +// num_devices_used_by_tensor = product(tensor_dims); +// std::unordered_set> stride_sets = +// make_stride_sets(tensor_dims, total_devices); +// for (std::vector stride : +// all_possible_strides(tensor_dims, total_devices)) { +// for (int start_id = 0 : +// start_id <= total_devices - num_devices_used_by_tensor + 1; +// start_id++) { +// std::vector sides = +// transform(zip(tensor_dims, stride)); +// MachineView mv = {start, StridedRectangle{sides}}; +// machine_views.insert(mv); +// } +// } +// return machine_views; // } // static std::unordered_set @@ -433,7 +433,8 @@ OptimalCostResult optimal_cost( // NOT_IMPLEMENTED(); // } -// auto get_all_machine_views_to_tensor_dim_bijections(MachineView const &mv, ParallelTensorShape const &shape) { +// auto get_all_machine_views_to_tensor_dim_bijections(MachineView const &mv, +// ParallelTensorShape const &shape) { // NOT_IMPLEMENTED(); // } @@ -470,7 +471,8 @@ OptimalCostResult optimal_cost( // std::unordered_set operator_views = // get_all_machine_views(machinespec, shape); // // operator_views = filter(operator_views, [&](MachineView const& view) -// // {return all_of(outputs(op), [&](ParallelTensorShape const& output){return +// // {return all_of(outputs(op), [&](ParallelTensorShape const& +// output){return // // is_valid_machine_view(op, output, view);}); // operator_views = filter(operator_views, [&](MachineView const &view) { // return is_valid_machine_view(shape, view); From 67a94a29fdafe9336584286ed64a037f3d5031c7 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Thu, 8 Aug 2024 22:03:25 -0700 Subject: [PATCH 06/34] Minor fixes --- lib/utils/include/utils/containers/foldl.h | 57 +++++++++++++++++--- lib/utils/include/utils/containers/range.h | 14 +++-- lib/utils/include/utils/containers/scanl.h | 55 +++++++++++++++---- lib/utils/test/src/utils/containers/range.cc | 18 ++++--- 4 files changed, 113 insertions(+), 31 deletions(-) diff --git a/lib/utils/include/utils/containers/foldl.h b/lib/utils/include/utils/containers/foldl.h index bdcb6d1270..16851d7d9b 100644 --- a/lib/utils/include/utils/containers/foldl.h +++ b/lib/utils/include/utils/containers/foldl.h @@ -1,11 +1,29 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H +#include "utils/exception.h" +#include "utils/fmt/vector.h" #include +#include #include +#include namespace FlexFlow { +/** + * @brief + * Iteratively applies `func` to the elements of `c` from left to right. + * `init` is used as the starting value. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * int result = foldl(nums, 0, [](int a, int b) { return a + b; }); + * result -> ((((0+1)+2)+3)+4) = 10 + * + * @note + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:foldl + */ template T foldl(C const &c, T init, F func) { T result = init; @@ -15,15 +33,38 @@ T foldl(C const &c, T init, F func) { return result; } -template -auto foldl1(C const &c, F func) -> typename C::value_type { - auto it = c.begin(); - assert(it != c.cend()); +/** + * @brief + * Applies `func` to the elements of `c` from left to right, accumulating the + * result. The first element of `c` is used as the starting point for the + * accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * int result = foldl1(nums, [](int a, int b) { return a + b; }); + * result -> (((1+2)+3)+4) = 10 + * + * @note + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:foldl1 + * @throws std::runtime_error if the container is empty. + */ +template +E foldl1(C const &c, F func) { + if (c.empty()) { + throw mk_runtime_error( + fmt::format("foldl1 received empty container: {}", c)); + } + std::optional result = std::nullopt; - typename C::value_type init = *it; - ++it; - C remaining(it, c.end()); - return foldl(remaining, init, func); + for (E const &e : c) { + if (!result.has_value()) { + result = e; + } else { + result = func(result.value(), e); + } + } + return result.value(); } } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/range.h b/lib/utils/include/utils/containers/range.h index b7cd3f93e6..ca6352be25 100644 --- a/lib/utils/include/utils/containers/range.h +++ b/lib/utils/include/utils/containers/range.h @@ -7,26 +7,24 @@ namespace FlexFlow { -template -std::vector range(T start, T end, T step = 1) { +std::vector range(int start, int end, int step = 1) { assert(step != 0); - std::vector result; + std::vector result; if (step > 0) { - for (T i = start; i < end; i += step) { + for (int i = start; i < end; i += step) { result.push_back(i); } } else { - for (T i = start; i > end; i += step) { + for (int i = start; i > end; i += step) { result.push_back(i); } } return result; } -template -std::vector range(T end) { - return range(T(0), end); +std::vector range(int end) { + return range(0, end); } } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/scanl.h b/lib/utils/include/utils/containers/scanl.h index 54acf2d743..a30a9e1576 100644 --- a/lib/utils/include/utils/containers/scanl.h +++ b/lib/utils/include/utils/containers/scanl.h @@ -1,16 +1,32 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_SCANL_H +#include #include namespace FlexFlow { +/** + * @brief + * Applies `op` to the elements of `c` from left to right, accumulating + * the intermediate results in a vector. `init` is used as the starting point + * for the accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * auto result = scanl(nums, 0, [](int a, int b) {return a+b;}); + * result -> {0,1,3,6,10} + * + * @note + * Essentially a foldl which stores the intermediate results + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl + */ template std::vector scanl(C const &c, T init, F const &op) { std::vector result; result.push_back(init); - for (auto const &elem : c) { init = op(init, elem); result.push_back(init); @@ -19,20 +35,41 @@ std::vector scanl(C const &c, T init, F const &op) { return result; } -template -auto scanl1(C const &c, F op) { - using T = typename C::value_type; +/** + * @brief + * Applies `op` to the elements of `c` from left to right, accumulating + * the intermediate results in a vector. The first item of `c` is used as the + * starting point for the accumulation. + * + * @example + * std::vector nums = {1, 2, 3, 4}; + * auto result = scanl1(nums, [](int a, int b) {return a+b;}); + * result -> {1,3,6,10} + * + * @note + * Essentially a foldl1 which stores the intermediate results. + * For more information, see + * https://hackage.haskell.org/package/base-4.20.0.1/docs/Prelude.html#v:scanl1 + */ +template +std::vector scanl1(C const &c, F op) { if (c.empty()) { return std::vector(); } - auto it = c.begin(); - T init = *it; - ++it; + std::optional init = std::nullopt; + std::vector result; - C remaining(it, c.end()); - return scanl(remaining, init, op); + for (T const &elem : c) { + if (!init.has_value()) { + init = elem; + } else { + init = op(init.value(), elem); + } + result.push_back(init.value()); + } + return result; } } // namespace FlexFlow diff --git a/lib/utils/test/src/utils/containers/range.cc b/lib/utils/test/src/utils/containers/range.cc index dbc9b50073..15244278de 100644 --- a/lib/utils/test/src/utils/containers/range.cc +++ b/lib/utils/test/src/utils/containers/range.cc @@ -8,40 +8,46 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("range") { - SUBCASE("basic integer range") { + SUBCASE("step=1") { std::vector result = range(0, 5); std::vector correct = {0, 1, 2, 3, 4}; CHECK(result == correct); } - SUBCASE("integer range with step") { + SUBCASE("step = 2") { std::vector result = range(-2, 10, 2); std::vector correct = {-2, 0, 2, 4, 6, 8}; CHECK(result == correct); } - SUBCASE("negative integer range") { + SUBCASE("step = -1") { std::vector result = range(5, 0, -1); std::vector correct = {5, 4, 3, 2, 1}; CHECK(result == correct); } - SUBCASE("single argument range") { + SUBCASE("single argument") { std::vector result = range(5); std::vector correct = {0, 1, 2, 3, 4}; CHECK(result == correct); } - SUBCASE("empty range") { + SUBCASE("start = end") { std::vector result = range(5, 5); std::vector correct = {}; CHECK(result == correct); } - SUBCASE("empty range") { + SUBCASE("start > end") { std::vector result = range(5, 4); std::vector correct = {}; CHECK(result == correct); } + + SUBCASE("start < end, step < 0") { + std::vector result = range(0, 10, -1); + std::vector correct = {}; + CHECK(result == correct); + } } } From 5a5d27681bee6d0b3c1d49c2cccfe1d103e061bb Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Thu, 8 Aug 2024 22:08:29 -0700 Subject: [PATCH 07/34] Update to StridedRectangle interface --- lib/compiler/src/machine_mapping.cc | 174 ++++++++---------- .../pcg/device_coordinates.struct.toml | 3 +- lib/pcg/include/pcg/machine_view.h | 6 +- lib/pcg/include/pcg/strided_rectangle.h | 40 +++- .../include/pcg/strided_rectangle.struct.toml | 21 --- lib/pcg/src/pcg/device_id.cc | 17 +- lib/pcg/src/pcg/machine_view.cc | 17 +- lib/pcg/src/pcg/strided_rectangle.cc | 60 ++++++ lib/pcg/test/src/pcg/machine_view.cc | 55 ++++-- lib/pcg/test/src/pcg/strided_rectangle.cc | 38 ++-- 10 files changed, 261 insertions(+), 170 deletions(-) delete mode 100644 lib/pcg/include/pcg/strided_rectangle.struct.toml diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 1e9d6ee2eb..a8f7bfdf71 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -9,9 +9,13 @@ #include "utils/containers.h" #include "utils/containers/are_disjoint.h" #include "utils/containers/as_vector.h" +#include "utils/containers/cartesian_product.h" #include "utils/containers/contains_key.h" #include "utils/containers/get_only.h" #include "utils/containers/keys.h" +#include "utils/containers/product.h" +#include "utils/containers/replicate.h" +#include "utils/containers/zip.h" #include "utils/exception.h" #include "utils/graph/graph_split.dtg.h" #include "utils/graph/node/algorithms.h" @@ -359,126 +363,108 @@ OptimalCostResult optimal_cost( } // bool is_valid_machine_view(MachineSpecification const &machinespec, -// MachineView const &mv) { -// Note: we're checking the size of the machineview, not the last device id -// (e.g. consider 2D machine view of size 4x4 with stride 2 along each -// dimension, technically it could fit into a 3x3 machine spec but the machine -// view is really describing a 4x4 space) -// if (get_device_type(mv) == DeviceType::GPU) { -// return get_size(mv) <= -// machinespec.num_nodes * machinespec.num_gpus_per_node; -// } else if (get_device_type(mv) == DeviceType::CPU) { -// return get_size(mv) <= -// machinespec.num_nodes * machinespec.num_cpus_per_node; -// } else { -// assert(false && "Unsupported DeviceType"); -// return false; -// } - -// Add check that the last one doesn't go over -// Add other check that you can cram them into the 2D frame (there has to -// exist a bipartition of the dimension that fits) +// MachineView const &mv) { +// auto get_all_bipartition_products = [&](std::vector elements) { +// std::unordered_multiset> mappings = +// transform(cartesian_product(replicate(elements.size(), +// std::vector{0,1}))); std::pair products; for (const auto +// &mapping : mappings) { +// int prod1 = product(transform(zip(elements, mapping), [&](auto elem) +// {return (elem.second ? 1 : elem.first);})); int prod2 = +// product(transform(zip(elements, mapping), [&](auto elem) {return +// (!elem.second ? 1 : elem.first);})); products.push_back({prod1 +// ,prod2}); assert(prod1*prod2 == product(elements)); +// } +// return products; +// } +// // assert(contains({DeviceType::GPU, DeviceType::CPU}, +// get_device_type(mv)); int num_devices_per_node = ((get_device_type(mv) == +// DeviceType::GPU) ? machinespec.num_gpus_per_node : +// machinespec.num_cpus_per_node); int num_devices = machinespec.num_nodes * +// num_devices_per_node; if (num_devices >= +// get_raw_id(get_last_device_id(mv))) {return false;} if +// (!any_of(get_all_bipartition_products(as_vector(get_num_devices_per_dim(mv))), +// [&](auto pair) {return (pair.first <= machinespec.num_nodes) && +// (pair.second <= num_devices_per_node);})) { +// return false; +// } +// return true; // } // bool is_valid_machine_view(MachineView const &mv, // ParallelTensorShape const &shape) { -// std::unordered_set unordered_mv_degrees = -// without_order(get_point_dims(mv)); -// std::unordered_set unordered_tensor_degrees = -// without_order(ff_ordered_shard_degrees(shape)) + {get_sum_degree(shape)} -// + {get_discard_copy_degree(shape)}; // filter for the 1s (no parallelism) -// return unordered_mv_dims == unordered_tensor_dims; +// std::unordered_set unordered_mv_degrees = +// without_order(get_point_dims(mv)); +// std::unordered_set unordered_tensor_degrees = +// without_order(ff_ordered_shard_degrees(shape)) + +// {get_sum_degree(shape)} + {get_discard_copy_degree(shape)}; // filter +// for the 1s (no parallelism) +// return unordered_mv_dims == unordered_tensor_dims; // } -// WARNING: some machine_views returned are invalid, get allowed_machine_views -// for valid ones. +// // WARNING: some machine_views returned are invalid, get +// allowed_machine_views +// // for valid ones. + +// //TODO: add support for both CPU and GPU // static std::unordered_set -// get_all_machine_views(MachineSpecification const &machinespec, +// get_all_candidate_machine_views(MachineSpecification const &machinespec, // ParallelTensorShape const &shape) { -// auto all_possible_strides = -// [](std::vector tensor_dims, -// size_t total_devices, -// size_t num_devices_used_by_tensor) { -// size_t room_for_stride = total_devices / num_devices_used_by_tensor; -// auto x = cartesian_product(replicate(range(1, room_for_stride + 1)), -// tensor_dims.size()); -// return filter(x, product((elem-1 for elem in x)) <= room_for_stride); +// auto all_possible_strides = +// [](std::vector tensor_dims, +// size_t num_total_devices, +// size_t num_devices_used_by_tensor) { +// size_t room_for_stride = num_total_devices / +// num_devices_used_by_tensor; std::unordered_multiset> +// strides = cartesian_product(replicate(range(1, room_for_stride + 1)), +// tensor_dims.size()); +// return strides; +// // return filter(strides, (std::vector const &stride) {return +// product((elem-1 for elem in x)) <= room_for_stride); +// }; + +// size_t num_total_devices = machinespec.num_nodes * +// machinespec.num_gpus_per_node; std::unordered_set +// machine_views; std::vector tensor_dims; size_t +// num_devices_used_by_tensor = product(tensor_dims); for (std::vector +// stride : +// all_possible_strides(tensor_dims, num_total_devices, +// num_devices_used_by_tensor)) { +// for (int start_id = 0 ; +// start_id <= num_total_devices - num_devices_used_by_tensor + 1; +// start_id++) { +// std::vector sides = +// transform(zip(tensor_dims, stride)); +// MachineView mv = {device_id_t(gpu_id_t(start_id)), +// StridedRectangle{sides}}; machine_views.insert(mv); // } - -// size_t total_devices = machinespec.num_nodes * -// machinespec.num_gpus_per_nodes; std::unordered_set -// machine_views; std::vector tensor_dims; size_t -// num_devices_used_by_tensor = product(tensor_dims); -// std::unordered_set> stride_sets = -// make_stride_sets(tensor_dims, total_devices); -// for (std::vector stride : -// all_possible_strides(tensor_dims, total_devices)) { -// for (int start_id = 0 : -// start_id <= total_devices - num_devices_used_by_tensor + 1; -// start_id++) { -// std::vector sides = -// transform(zip(tensor_dims, stride)); -// MachineView mv = {start, StridedRectangle{sides}}; -// machine_views.insert(mv); // } -// } -// return machine_views; +// return machine_views; // } -// static std::unordered_set -// get_all_start_invariant_machine_views( -// MachineSpecification const &machinespec, -// ParallelTensorShape const &shape) { -// NOT_IMPLEMENTED(); -// } +// // static std::unordered_set +// // get_all_start_invariant_machine_views( +// // MachineSpecification const &machinespec, +// // ParallelTensorShape const &shape) { +// // NOT_IMPLEMENTED(); +// // } // auto get_all_machine_views_to_tensor_dim_bijections(MachineView const &mv, // ParallelTensorShape const &shape) { // NOT_IMPLEMENTED(); // } -// // do product (total num of devices vs total num of elements in the tensor). -// bool is_valid_machine_view(ParallelTensorShape const& -// output_shape, MachineView const& view) { -// int is_dim = 0; -// for (int i = 0; i < num_dims; i++) { -// if (dims[i].parallel_idx != -1) { -// is_dim++; -// if (dims[i].parallel_idx > view.ndims) { -// return false; -// } -// if (view.dim[dims[i].parallel_idx] != dims[i].degree) { -// return false; -// } -// } -// } -// if (is_dim == 0) { -// is_dim = 1; -// } -// if (is_dim != view.ndims) { -// return false; -// } -// if (get_total_num_parts() != view.num_parts()) { -// return false; -// } -// return true; -// } - // std::unordered_set // get_allowed_machine_views(MachineSpecification const &machinespec, // ParallelTensorShape const &shape) { // std::unordered_set operator_views = -// get_all_machine_views(machinespec, shape); -// // operator_views = filter(operator_views, [&](MachineView const& view) -// // {return all_of(outputs(op), [&](ParallelTensorShape const& -// output){return -// // is_valid_machine_view(op, output, view);}); +// get_all_candidate_machine_views(machinespec, shape); // operator_views = filter(operator_views, [&](MachineView const &view) { -// return is_valid_machine_view(shape, view); +// return is_valid_machine_view(view, shape); // }); // operator_views = filter(operator_views, [&](MachineView const &view) { -// return is_valid_machine_view(machinespec, view); +// return is_valid_machine_view(view, machinespec); // }); // return operator_views; // } diff --git a/lib/pcg/include/pcg/device_coordinates.struct.toml b/lib/pcg/include/pcg/device_coordinates.struct.toml index 80d8ce45c9..a19d324270 100644 --- a/lib/pcg/include/pcg/device_coordinates.struct.toml +++ b/lib/pcg/include/pcg/device_coordinates.struct.toml @@ -1,4 +1,3 @@ -# DeviceCoordinates is exclusive to machine_view.cc, must not be used outside of it. namespace = "FlexFlow" name = "DeviceCoordinates" features = [ @@ -16,4 +15,4 @@ includes = [ [[fields]] name = "coords" -type = "::FlexFlow::FFOrdered" +type = "::FlexFlow::FFOrdered" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 1006e20186..b1e29e2a07 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -21,10 +21,8 @@ device_id_t get_last_device_id(MachineView const &mv); size_t num_dims(MachineView const &mv); size_t num_devices(MachineView const &mv); size_t get_size(MachineView const &mv); -std::unordered_multiset - get_num_devices_per_dim(MachineView const &mv); -std::unordered_multiset - get_side_size_per_dim(MachineView const &mv); +std::vector get_num_devices_per_dim(MachineView const &mv); +std::vector get_side_size_per_dim(MachineView const &mv); DeviceType get_device_type(MachineView const &mv); diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index ced747d5d3..030bec423a 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -3,11 +3,27 @@ #include "op-attrs/ff_dim.dtg.h" #include "pcg/device_id_t.dtg.h" +#include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" -#include "pcg/strided_rectangle.dtg.h" +#include "pcg/strided_rectangle_side.dtg.h" namespace FlexFlow { +struct StridedRectangle { + StridedRectangle() = delete; + explicit StridedRectangle(std::vector const &sides); + + bool operator==(StridedRectangle const &) const; + bool operator!=(StridedRectangle const &) const; + + std::vector const &sides; + +private: + std::vector _sides; +}; +std::string format_as(StridedRectangle const &); +std::ostream &operator<<(std::ostream &, StridedRectangle const &); + size_t get_num_dims(StridedRectangle const &rect); num_points_t get_num_points(StridedRectangle const &rect); @@ -16,4 +32,26 @@ size_t get_size(StridedRectangle const &rect); } // namespace FlexFlow +namespace std { +template <> +struct hash<::FlexFlow::StridedRectangle> { + size_t operator()(::FlexFlow::StridedRectangle const &) const; +}; +} // namespace std + +namespace nlohmann { +template <> +struct adl_serializer<::FlexFlow::StridedRectangle> { + static ::FlexFlow::StridedRectangle from_json(json const &); + static void to_json(json &, ::FlexFlow::StridedRectangle const &); +}; +} // namespace nlohmann + +namespace rc { +template <> +struct Arbitrary<::FlexFlow::StridedRectangle> { + static Gen<::FlexFlow::StridedRectangle> arbitrary(); +}; +} // namespace rc + #endif diff --git a/lib/pcg/include/pcg/strided_rectangle.struct.toml b/lib/pcg/include/pcg/strided_rectangle.struct.toml deleted file mode 100644 index 1b2318173d..0000000000 --- a/lib/pcg/include/pcg/strided_rectangle.struct.toml +++ /dev/null @@ -1,21 +0,0 @@ -namespace = "FlexFlow" -name = "StridedRectangle" -features = [ - "eq", - # "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -includes = [ - "pcg/strided_rectangle_side.dtg.h", - "", - "utils/hash/unordered_multiset.h", - "utils/fmt/unordered_multiset.h", -] - -[[fields]] -name = "sides" -type = "std::unordered_multiset<::FlexFlow::StridedRectangleSide>" diff --git a/lib/pcg/src/pcg/device_id.cc b/lib/pcg/src/pcg/device_id.cc index 86c943045b..590d5545ed 100644 --- a/lib/pcg/src/pcg/device_id.cc +++ b/lib/pcg/src/pcg/device_id.cc @@ -26,13 +26,16 @@ cpu_id_t unwrap_cpu(device_id_t device_id) { } int get_raw_id(device_id_t device_id) { - if (get_device_type(device_id) == DeviceType::GPU) { - return unwrap_gpu(device_id).gpu_index; - } else if (get_device_type(device_id) == DeviceType::CPU) { - return unwrap_cpu(device_id).cpu_index; - } else { - assert(false && "Unsupported DeviceType"); - return -1; + switch (get_device_type(device_id)) { + case DeviceType::GPU: + return unwrap_gpu(device_id).gpu_index; + case DeviceType::CPU: + return unwrap_cpu(device_id).cpu_index; + default: + throw mk_runtime_error( + fmt::format("Unsupported DeviceType {} for device_id_t {}", + get_device_type(device_id), + device_id)); } } diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index bccaa84bd5..79a60ee5fd 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,7 +1,6 @@ #include "pcg/machine_view.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" -#include "pcg/strided_rectangle.dtg.h" #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers.h" @@ -37,11 +36,9 @@ static device_id_t get_device_id(MachineView const &mv, } std::unordered_multiset get_device_ids(MachineView const &mv) { - std::vector> ranges = + std::vector> ranges = transform(sorted(mv.rect.sides), [](StridedRectangleSide const &side) { - return range(size_t(0), - size_t(get_side_size(side).unwrapped), - size_t(side.stride.unwrapped)); + return range(0, get_side_size(side).unwrapped, side.stride.unwrapped); }); std::unordered_multiset devices_as_points = transform(cartesian_product(ranges), @@ -56,7 +53,7 @@ std::unordered_multiset get_device_ids(MachineView const &mv) { device_id_t get_last_device_id(MachineView const &mv) { DeviceCoordinates last_device = DeviceCoordinates( transform(sorted(mv.rect.sides), [](StridedRectangleSide const &s) { - return size_t(s.stride.unwrapped); + return s.stride.unwrapped; })); return maximum(get_device_ids(mv)); } @@ -65,15 +62,13 @@ size_t num_dims(MachineView const &mv) { return get_num_dims(mv.rect); } -std::unordered_multiset - get_num_devices_per_dim(MachineView const &mv) { +std::vector get_num_devices_per_dim(MachineView const &mv) { return transform(mv.rect.sides, [](StridedRectangleSide const &side) { return side.num_points; }); } -std::unordered_multiset - get_side_size_per_dim(MachineView const &mv) { +std::vector get_side_size_per_dim(MachineView const &mv) { return transform(mv.rect.sides, get_side_size); } @@ -95,7 +90,7 @@ static StridedRectangle make_1d_rect(int start, int stop, int stride) { StridedRectangleSide side = strided_side_from_size_and_stride( side_size_t{stop - start}, stride_t{stride}); StridedRectangle rect = - StridedRectangle{std::unordered_multiset{side}}; + StridedRectangle{std::vector{side}}; return rect; } diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index c2b4d77f60..46a7caa92f 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -2,13 +2,40 @@ #include "op-attrs/dim_ordered/transform.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id_t.dtg.h" +#include "pcg/strided_rectangle_side.dtg.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers/as_vector.h" #include "utils/containers/product.h" +#include "utils/containers/sorted.h" #include "utils/containers/transform.h" +#include "utils/fmt/vector.h" +#include "utils/hash/vector.h" namespace FlexFlow { +StridedRectangle::StridedRectangle( + std::vector<::FlexFlow::StridedRectangleSide> const &sides) + : _sides(sorted(sides)), sides(_sides) {} + +bool StridedRectangle::operator==(StridedRectangle const &other) const { + return std::tie(this->_sides) == std::tie(other._sides); +} +bool StridedRectangle::operator!=(StridedRectangle const &other) const { + return std::tie(this->_sides) != std::tie(other._sides); +} + +std::string format_as(StridedRectangle const &x) { + std::ostringstream oss; + oss << ""; + return oss.str(); +} + +std::ostream &operator<<(std::ostream &s, StridedRectangle const &x) { + return s << fmt::to_string(x); +} + size_t get_num_dims(StridedRectangle const &rect) { return rect.sides.size(); } @@ -27,3 +54,36 @@ size_t get_size(StridedRectangle const &rect) { } } // namespace FlexFlow + +namespace std { +size_t hash::operator()( + ::FlexFlow::StridedRectangle const &x) const { + size_t result = 0; + result ^= + std::hash>{}(x.sides) + + 0x9e3779b9 + (result << 6) + (result >> 2); + return result; +} +} // namespace std + +namespace nlohmann { +::FlexFlow::StridedRectangle + adl_serializer<::FlexFlow::StridedRectangle>::from_json(json const &j) { + return ::FlexFlow::StridedRectangle{ + j.at("sides") + .template get>()}; +} +void adl_serializer<::FlexFlow::StridedRectangle>::to_json( + json &j, ::FlexFlow::StridedRectangle const &v) { + j["__type"] = "StridedRectangle"; + j["sides"] = v.sides; +} +} // namespace nlohmann + +namespace rc { +Gen<::FlexFlow::StridedRectangle> + Arbitrary<::FlexFlow::StridedRectangle>::arbitrary() { + return gen::construct<::FlexFlow::StridedRectangle>( + gen::arbitrary>()); +} +} // namespace rc diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 71b07b23a9..11ed85b889 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -13,7 +13,7 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("MachineView utility functions") { + TEST_CASE("MachineView - utility functions") { StridedRectangle rect{{StridedRectangleSide(num_points_t(7), stride_t(5)), StridedRectangleSide(num_points_t(10), stride_t(2)), StridedRectangleSide(num_points_t(1), stride_t(4))}}; @@ -31,16 +31,15 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("get_side_size_per_dim") { - std::unordered_multiset expected = { - side_size_t(7 * 5), side_size_t(10 * 2), side_size_t(1 * 4)}; - std::unordered_multiset result = get_side_size_per_dim(mv); + std::vector expected = { + side_size_t(1 * 4), side_size_t(7 * 5), side_size_t(10 * 2)}; + std::vector result = get_side_size_per_dim(mv); CHECK(expected == result); } SUBCASE("get_num_devices_per_dim") { - std::unordered_multiset expected = { - num_points_t(7), num_points_t(10), num_points_t(1)}; - std::unordered_multiset result = - get_num_devices_per_dim(mv); + std::vector expected = { + num_points_t(1), num_points_t(7), num_points_t(10)}; + std::vector result = get_num_devices_per_dim(mv); CHECK(expected == result); } @@ -49,8 +48,9 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("MachineView - device ids") { - SUBCASE("MachineView #1") { + TEST_CASE("get_device_ids") { + + SUBCASE("2D MachineView") { StridedRectangle rect{{ StridedRectangleSide(num_points_t(2), stride_t(3)), StridedRectangleSide(num_points_t(2), stride_t(2)), @@ -63,11 +63,8 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_multiset result = get_device_ids(mv); CHECK(expected == result); } - SUBCASE("get_last_device_id") { - CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(14))); - } } - SUBCASE("MachineView #2") { + SUBCASE("3D MachineView") { StridedRectangle rect{{ StridedRectangleSide(num_points_t(1), stride_t(3)), StridedRectangleSide(num_points_t(2), stride_t(1)), @@ -80,9 +77,33 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_multiset expected = make_gpu_device_ids({1, 4, 13, 16}); std::unordered_multiset result = get_device_ids(mv); - CHECK(expected == result); } + } + } + + TEST_CASE("get_last_device_id") { + SUBCASE("2D MachineView") { + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(2), stride_t(3)), + StridedRectangleSide(num_points_t(2), stride_t(2)), + }}; + gpu_id_t start(0); + MachineView mv{device_id_t{start}, rect}; + + SUBCASE("get_last_device_id") { + CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(14))); + } + } + + SUBCASE("3D MachineView") { + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(1), stride_t(3)), + StridedRectangleSide(num_points_t(2), stride_t(1)), + StridedRectangleSide(num_points_t(2), stride_t(2)), + }}; + gpu_id_t start(1); + MachineView mv{device_id_t{start}, rect}; SUBCASE("get_last_device_id") { CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(16))); @@ -90,7 +111,7 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("MachineView make_1d_machine_view - GPU") { + TEST_CASE("make_1d_machine_view - GPU") { StridedRectangle rect{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}; device_id_t start_gpu{gpu_id_t{1}}; @@ -111,7 +132,7 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("MachineView make_1d_machine_view - CPU") { + TEST_CASE("make_1d_machine_view - CPU") { StridedRectangle rect{ {StridedRectangleSide{num_points_t{11}, stride_t{4}}}}; device_id_t start_cpu{cpu_id_t{2}}; diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc index ed48502263..51fa13c2b9 100644 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -6,22 +6,34 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("StridedRectangle - helper functions") { + TEST_CASE("StridedRectangle") { + SUBCASE("constructor sorts the StridedRectangleSides") { + StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; + StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; - StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; - StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; - StridedRectangleSide s2{num_points_t{8}, stride_t{1}}; - StridedRectangle rect{{s0, s1, s2}}; - - SUBCASE("get_num_dims") { - CHECK(get_num_dims(rect) == 3); - } - SUBCASE("get_num_points") { - CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); + StridedRectangle r0 = StridedRectangle{{s0, s1}}; + StridedRectangle r1 = StridedRectangle{{s1, s0}}; + CHECK(r0 == r1); + CHECK(r1.sides == std::vector{s0, s1}); + CHECK(r1.sides != std::vector{s1, s0}); } - SUBCASE("get_size") { - CHECK(get_size(rect) == size_t{(7 * 5) * (10 * 2) * (8 * 1)}); + SUBCASE("helper functions") { + StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; + StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; + StridedRectangleSide s2{num_points_t{8}, stride_t{1}}; + StridedRectangle rect{{s0, s1, s2}}; + + SUBCASE("get_num_dims") { + CHECK(get_num_dims(rect) == 3); + } + SUBCASE("get_num_points") { + CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); + } + + SUBCASE("get_size") { + CHECK(get_size(rect) == size_t{(7 * 5) * (10 * 2) * (8 * 1)}); + } } } } From b30290386345dff303463c2b05cf11aace8014a0 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 9 Aug 2024 13:15:10 -0700 Subject: [PATCH 08/34] Minor updates --- lib/compiler/test/src/test_machine_mapping.cc | 23 -------- lib/pcg/include/pcg/machine_view.h | 25 +++++---- lib/pcg/src/pcg/machine_view.cc | 43 ++++++++------- lib/pcg/test/src/pcg/machine_view.cc | 54 ++++++++++--------- lib/utils/include/utils/containers/any_of.h | 18 +++++++ .../utils/containers/cartesian_product.h | 3 +- lib/utils/src/utils/containers/any_of.h | 1 + 7 files changed, 89 insertions(+), 78 deletions(-) delete mode 100644 lib/compiler/test/src/test_machine_mapping.cc create mode 100644 lib/utils/include/utils/containers/any_of.h create mode 100644 lib/utils/src/utils/containers/any_of.h diff --git a/lib/compiler/test/src/test_machine_mapping.cc b/lib/compiler/test/src/test_machine_mapping.cc deleted file mode 100644 index 4f9b879574..0000000000 --- a/lib/compiler/test/src/test_machine_mapping.cc +++ /dev/null @@ -1,23 +0,0 @@ -#include "doctest/doctest.h" -#include "test_generator.h" - -TEST_SUITE(FF_TEST_SUITE) { - // TEST_CASE("MachineMapping::combine") { - // RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) { - // RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1)); - - // MachineMapping comb = MachineMapping::combine(m0, m1); - - // RC_ASSERT(comb.machine_views.size() == - // m0.machine_views.size() + m1.machine_views.size()); - // RC_ASSERT(is_submap(comb.machine_views, m0.machine_views)); - // RC_ASSERT(is_submap(comb.machine_views, m1.machine_views)); - // }); - // } - - // TEST_CASE("OptimalCostResult::infinity") { - // RC_SUBCASE([](OptimalCostResult const &c) { - // RC_ASSERT(c.runtime <= OptimalCostResult::infinity().runtime); - // }); - // } -} diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index b1e29e2a07..0bf80e691a 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -26,30 +26,35 @@ std::vector get_side_size_per_dim(MachineView const &mv); DeviceType get_device_type(MachineView const &mv); -MachineView make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride = 1); -MachineView make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride = 1); -MachineView - make_1d_machine_view(device_id_t start, device_id_t stop, int stride = 1); +MachineView make_1d_machine_view(gpu_id_t start, + gpu_id_t stop, + stride_t stride = stride_t{1}); +MachineView make_1d_machine_view(cpu_id_t start, + cpu_id_t stop, + stride_t stride = stride_t{1}); +MachineView make_1d_machine_view(device_id_t start, + device_id_t stop, + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(cpu_id_t start, num_points_t num_points, - int stride = 1); + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(gpu_id_t start, num_points_t num_points, - int stride = 1); + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(device_id_t start, num_points_t num_points, - int stride = 1); + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(cpu_id_t start, side_size_t interval_size, - int stride = 1); + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(gpu_id_t start, side_size_t interval_size, - int stride = 1); + stride_t stride = stride_t{1}); MachineView make_1d_machine_view(device_id_t start, side_size_t interval_size, - int stride = 1); + stride_t stride = stride_t{1}); } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 79a60ee5fd..ac319a6b0e 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -84,28 +84,30 @@ DeviceType get_device_type(MachineView const &mv) { return get_device_type(mv.start); } -static StridedRectangle make_1d_rect(int start, int stop, int stride) { +static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { assert(stop > start); - assert(stride > 0); - StridedRectangleSide side = strided_side_from_size_and_stride( - side_size_t{stop - start}, stride_t{stride}); + assert(stride > stride_t(0)); + StridedRectangleSide side = + strided_side_from_size_and_stride(side_size_t{stop - start}, stride); StridedRectangle rect = StridedRectangle{std::vector{side}}; return rect; } -MachineView make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride) { +MachineView + make_1d_machine_view(gpu_id_t start, gpu_id_t stop, stride_t stride) { StridedRectangle rect = make_1d_rect(start.gpu_index, stop.gpu_index, stride); return MachineView{device_id_t{start}, rect}; } -MachineView make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride) { +MachineView + make_1d_machine_view(cpu_id_t start, cpu_id_t stop, stride_t stride) { StridedRectangle rect = make_1d_rect(start.cpu_index, stop.cpu_index, stride); return MachineView{device_id_t{start}, rect}; } MachineView - make_1d_machine_view(device_id_t start, device_id_t stop, int stride) { + make_1d_machine_view(device_id_t start, device_id_t stop, stride_t stride) { assert(get_device_type(start) == get_device_type(stop)); if (get_device_type(start) == DeviceType::CPU) { return make_1d_machine_view(unwrap_cpu(start), unwrap_cpu(stop), stride); @@ -114,26 +116,31 @@ MachineView return make_1d_machine_view(unwrap_gpu(start), unwrap_gpu(stop), stride); } +// TODO(@pietro) change from int to stride_t, makes more sense + static StridedRectangle - make_1d_rect(int start, num_points_t num_points, int stride) { - return make_1d_rect(start, start + num_points.unwrapped * stride, stride); + make_1d_rect(int start, num_points_t num_points, stride_t stride) { + return make_1d_rect( + start, start + num_points.unwrapped * stride.unwrapped, stride); } -MachineView - make_1d_machine_view(cpu_id_t start, num_points_t num_points, int stride) { +MachineView make_1d_machine_view(cpu_id_t start, + num_points_t num_points, + stride_t stride) { StridedRectangle rect = make_1d_rect(start.cpu_index, num_points, stride); return MachineView{device_id_t{start}, rect}; } -MachineView - make_1d_machine_view(gpu_id_t start, num_points_t num_points, int stride) { +MachineView make_1d_machine_view(gpu_id_t start, + num_points_t num_points, + stride_t stride) { StridedRectangle rect = make_1d_rect(start.gpu_index, num_points, stride); return MachineView{device_id_t{start}, rect}; } MachineView make_1d_machine_view(device_id_t start, num_points_t num_points, - int stride) { + stride_t stride) { if (get_device_type(start) == DeviceType::CPU) { return make_1d_machine_view(unwrap_cpu(start), num_points, stride); } else { @@ -143,26 +150,26 @@ MachineView make_1d_machine_view(device_id_t start, } static StridedRectangle - make_1d_rect(int start, side_size_t interval_size, int stride) { + make_1d_rect(int start, side_size_t interval_size, stride_t stride) { return make_1d_rect(start, start + interval_size.unwrapped, stride); } MachineView make_1d_machine_view(cpu_id_t start, side_size_t interval_size, - int stride) { + stride_t stride) { StridedRectangle rect = make_1d_rect(start.cpu_index, interval_size, stride); return MachineView{device_id_t{start}, rect}; } MachineView make_1d_machine_view(gpu_id_t start, side_size_t interval_size, - int stride) { + stride_t stride) { StridedRectangle rect = make_1d_rect(start.gpu_index, interval_size, stride); return MachineView{device_id_t{start}, rect}; } MachineView make_1d_machine_view(device_id_t start, side_size_t interval_size, - int stride) { + stride_t stride) { if (get_device_type(start) == DeviceType::CPU) { return make_1d_machine_view(unwrap_cpu(start), interval_size, stride); diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 11ed85b889..a9175aaad4 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -14,9 +14,9 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { - StridedRectangle rect{{StridedRectangleSide(num_points_t(7), stride_t(5)), - StridedRectangleSide(num_points_t(10), stride_t(2)), - StridedRectangleSide(num_points_t(1), stride_t(4))}}; + StridedRectangle rect{{StridedRectangleSide(num_points_t(7), stride_t{5}), + StridedRectangleSide(num_points_t(10), stride_t{2}), + StridedRectangleSide(num_points_t(1), stride_t{4})}}; gpu_id_t start(1); MachineView mv{device_id_t{start}, rect}; @@ -52,8 +52,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2D MachineView") { StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t(3)), - StridedRectangleSide(num_points_t(2), stride_t(2)), + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), }}; gpu_id_t start(0); MachineView mv{device_id_t{start}, rect}; @@ -66,9 +66,9 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("3D MachineView") { StridedRectangle rect{{ - StridedRectangleSide(num_points_t(1), stride_t(3)), - StridedRectangleSide(num_points_t(2), stride_t(1)), - StridedRectangleSide(num_points_t(2), stride_t(2)), + StridedRectangleSide(num_points_t(1), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), }}; gpu_id_t start(1); MachineView mv{device_id_t{start}, rect}; @@ -85,8 +85,8 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_last_device_id") { SUBCASE("2D MachineView") { StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t(3)), - StridedRectangleSide(num_points_t(2), stride_t(2)), + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), }}; gpu_id_t start(0); MachineView mv{device_id_t{start}, rect}; @@ -98,9 +98,9 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("3D MachineView") { StridedRectangle rect{{ - StridedRectangleSide(num_points_t(1), stride_t(3)), - StridedRectangleSide(num_points_t(2), stride_t(1)), - StridedRectangleSide(num_points_t(2), stride_t(2)), + StridedRectangleSide(num_points_t(1), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), }}; gpu_id_t start(1); MachineView mv{device_id_t{start}, rect}; @@ -117,16 +117,18 @@ TEST_SUITE(FF_TEST_SUITE) { device_id_t start_gpu{gpu_id_t{1}}; MachineView gpu_mv{start_gpu, rect}; - SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, int stride)") { - MachineView result = - make_1d_machine_view(start_gpu, device_id_t{gpu_id_t(1 + 7 * 5)}, 5); + SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, stride_t " + "stride)") { + MachineView result = make_1d_machine_view( + start_gpu, device_id_t{gpu_id_t(1 + 7 * 5)}, stride_t{5}); MachineView correct = gpu_mv; CHECK(result == correct); } - SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, int " - "stride)") { - MachineView result = make_1d_machine_view(start_gpu, num_points_t{7}, 5); + SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, " + "stride_t stride)") { + MachineView result = + make_1d_machine_view(start_gpu, num_points_t{7}, stride_t{5}); MachineView correct = gpu_mv; CHECK(result == correct); } @@ -138,15 +140,17 @@ TEST_SUITE(FF_TEST_SUITE) { device_id_t start_cpu{cpu_id_t{2}}; MachineView cpu_mv{start_cpu, rect}; - SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, int stride)") { - MachineView result = - make_1d_machine_view(start_cpu, device_id_t{cpu_id_t(2 + 11 * 4)}, 4); + SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, stride_t " + "stride)") { + MachineView result = make_1d_machine_view( + start_cpu, device_id_t{cpu_id_t(2 + 11 * 4)}, stride_t{4}); MachineView correct = cpu_mv; CHECK(result == correct); } - SUBCASE("make_1d_machine_view(cpu_id_t start, num_points_t num_points, int " - "stride)") { - MachineView result = make_1d_machine_view(start_cpu, num_points_t{11}, 4); + SUBCASE("make_1d_machine_view(cpu_id_t start, num_points_t num_points, " + "stride_t stride)") { + MachineView result = + make_1d_machine_view(start_cpu, num_points_t{11}, stride_t{4}); MachineView correct = cpu_mv; CHECK(result == correct); } diff --git a/lib/utils/include/utils/containers/any_of.h b/lib/utils/include/utils/containers/any_of.h new file mode 100644 index 0000000000..1504e2aa84 --- /dev/null +++ b/lib/utils/include/utils/containers/any_of.h @@ -0,0 +1,18 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ANY_OF_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ANY_OF_H + +namespace FlexFlow { + +template +bool any_of(C const &c, F const &f) { + for (auto const &v : c) { + if (f(v)) { + return true; + } + } + return false; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/cartesian_product.h b/lib/utils/include/utils/containers/cartesian_product.h index dd143bc302..1232dd2cbd 100644 --- a/lib/utils/include/utils/containers/cartesian_product.h +++ b/lib/utils/include/utils/containers/cartesian_product.h @@ -13,9 +13,8 @@ template auto cartesian_product(Container const &containers) { using ValueType = typename Container::value_type::value_type; using VectorType = std::vector; - using SetType = std::unordered_multiset; auto ordered = as_vector(containers); - SetType result; + std::unordered_multiset result; std::function recurse = [&](VectorType ¤t, std::size_t depth) { diff --git a/lib/utils/src/utils/containers/any_of.h b/lib/utils/src/utils/containers/any_of.h new file mode 100644 index 0000000000..c6784c346b --- /dev/null +++ b/lib/utils/src/utils/containers/any_of.h @@ -0,0 +1 @@ +#include "utils/containers/any_of.h" From 775cb90c9764da6f2ce6e32eaa84eaf3b8abdd8b Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 10 Aug 2024 14:25:50 -0700 Subject: [PATCH 09/34] added get_allowed_machine_views --- .../include/compiler/machine_mapping.h | 15 +- lib/compiler/src/machine_mapping.cc | 213 +++++++----------- lib/compiler/test/src/machine_mapping.cc | 103 +++++++++ lib/pcg/include/pcg/strided_rectangle.h | 13 +- lib/pcg/src/pcg/machine_view.cc | 16 +- lib/pcg/src/pcg/strided_rectangle.cc | 27 ++- lib/pcg/test/src/pcg/strided_rectangle.cc | 4 +- 7 files changed, 226 insertions(+), 165 deletions(-) create mode 100644 lib/compiler/test/src/machine_mapping.cc diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index 5d17cbb373..ee53f527ea 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -53,17 +53,10 @@ OptimalCostResult optimal_cost( MachineSpecification const &resources, OptimalCostCache &cached_subgraph_costs); -} // namespace FlexFlow - -// namespace std { -// -// template <> -// struct hash> { -// size_t operator()( -// std::unordered_map const &g) -// const; -// }; +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machinespec, + ParallelTensorShape const &shape); -// }; // namespace std +} // namespace FlexFlow #endif diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index a8f7bfdf71..0f117c5225 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -14,7 +14,10 @@ #include "utils/containers/get_only.h" #include "utils/containers/keys.h" #include "utils/containers/product.h" +#include "utils/containers/range.h" #include "utils/containers/replicate.h" +#include "utils/containers/transform.h" +#include "utils/containers/without_order.h" #include "utils/containers/zip.h" #include "utils/exception.h" #include "utils/graph/graph_split.dtg.h" @@ -24,6 +27,7 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" +# namespace FlexFlow { @@ -362,140 +366,93 @@ OptimalCostResult optimal_cost( return searcher.optimal_cost(subpcg, resources, sp_decomposition); } -// bool is_valid_machine_view(MachineSpecification const &machinespec, -// MachineView const &mv) { -// auto get_all_bipartition_products = [&](std::vector elements) { -// std::unordered_multiset> mappings = -// transform(cartesian_product(replicate(elements.size(), -// std::vector{0,1}))); std::pair products; for (const auto -// &mapping : mappings) { -// int prod1 = product(transform(zip(elements, mapping), [&](auto elem) -// {return (elem.second ? 1 : elem.first);})); int prod2 = -// product(transform(zip(elements, mapping), [&](auto elem) {return -// (!elem.second ? 1 : elem.first);})); products.push_back({prod1 -// ,prod2}); assert(prod1*prod2 == product(elements)); -// } -// return products; -// } -// // assert(contains({DeviceType::GPU, DeviceType::CPU}, -// get_device_type(mv)); int num_devices_per_node = ((get_device_type(mv) == -// DeviceType::GPU) ? machinespec.num_gpus_per_node : -// machinespec.num_cpus_per_node); int num_devices = machinespec.num_nodes * -// num_devices_per_node; if (num_devices >= -// get_raw_id(get_last_device_id(mv))) {return false;} if -// (!any_of(get_all_bipartition_products(as_vector(get_num_devices_per_dim(mv))), -// [&](auto pair) {return (pair.first <= machinespec.num_nodes) && -// (pair.second <= num_devices_per_node);})) { -// return false; -// } -// return true; -// } +bool is_valid_machine_view(MachineView const &mv, + MachineSpecification const &machinespec) { + int num_devices_per_node = ((get_device_type(mv) == DeviceType::GPU) + ? machinespec.num_gpus_per_node + : machinespec.num_cpus_per_node); + int num_devices = machinespec.num_nodes * num_devices_per_node; + return (num_devices > get_raw_id(get_last_device_id(mv))); +} -// bool is_valid_machine_view(MachineView const &mv, -// ParallelTensorShape const &shape) { -// std::unordered_set unordered_mv_degrees = -// without_order(get_point_dims(mv)); -// std::unordered_set unordered_tensor_degrees = -// without_order(ff_ordered_shard_degrees(shape)) + -// {get_sum_degree(shape)} + {get_discard_copy_degree(shape)}; // filter -// for the 1s (no parallelism) -// return unordered_mv_dims == unordered_tensor_dims; -// } +std::vector get_tensor_parallel_degrees(ParallelTensorShape const &shape) { + std::vector degrees = as_vector(ff_ordered_shard_degrees(shape)); + degrees.push_back(get_sum_degree(shape)); + degrees.push_back(get_discard_copy_degree(shape)); + return degrees; +} -// // WARNING: some machine_views returned are invalid, get -// allowed_machine_views -// // for valid ones. - -// //TODO: add support for both CPU and GPU -// static std::unordered_set -// get_all_candidate_machine_views(MachineSpecification const &machinespec, -// ParallelTensorShape const &shape) { - -// auto all_possible_strides = -// [](std::vector tensor_dims, -// size_t num_total_devices, -// size_t num_devices_used_by_tensor) { -// size_t room_for_stride = num_total_devices / -// num_devices_used_by_tensor; std::unordered_multiset> -// strides = cartesian_product(replicate(range(1, room_for_stride + 1)), -// tensor_dims.size()); -// return strides; -// // return filter(strides, (std::vector const &stride) {return -// product((elem-1 for elem in x)) <= room_for_stride); -// }; - -// size_t num_total_devices = machinespec.num_nodes * -// machinespec.num_gpus_per_node; std::unordered_set -// machine_views; std::vector tensor_dims; size_t -// num_devices_used_by_tensor = product(tensor_dims); for (std::vector -// stride : -// all_possible_strides(tensor_dims, num_total_devices, -// num_devices_used_by_tensor)) { -// for (int start_id = 0 ; -// start_id <= num_total_devices - num_devices_used_by_tensor + 1; -// start_id++) { -// std::vector sides = -// transform(zip(tensor_dims, stride)); -// MachineView mv = {device_id_t(gpu_id_t(start_id)), -// StridedRectangle{sides}}; machine_views.insert(mv); -// } -// } -// return machine_views; -// } +bool is_valid_machine_view(MachineView const &mv, + ParallelTensorShape const &shape) { + std::vector mv_degrees = + transform(get_num_devices_per_dim(mv), + [](num_points_t degree) { return degree.unwrapped; }); + std::vector tensor_degrees = get_tensor_parallel_degrees(shape); + tensor_degrees = + filter(tensor_degrees, [](int degree) { return degree != 1; }); + return without_order(mv_degrees) == without_order(tensor_degrees); +} -// // static std::unordered_set -// // get_all_start_invariant_machine_views( -// // MachineSpecification const &machinespec, -// // ParallelTensorShape const &shape) { -// // NOT_IMPLEMENTED(); -// // } +// TODO(@pietro): add support for both CPU and GPU +static std::unordered_set + get_candidate_machine_views(MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + + auto candidate_strides = [](std::vector tensor_dims, + int total_devices) { + int max_stride_upper_bound = + (total_devices + 1) / + product(transform(tensor_dims, [](int degree) { return degree - 1; })); + std::unordered_multiset> strides = cartesian_product( + replicate(tensor_dims.size(), range(1, max_stride_upper_bound + 1))); + return strides; + }; -// auto get_all_machine_views_to_tensor_dim_bijections(MachineView const &mv, -// ParallelTensorShape const &shape) { -// NOT_IMPLEMENTED(); -// } + std::vector tensor_dims = filter(get_tensor_parallel_degrees(shape), + [](int degree) { return degree != 1; }); + std::unordered_set machine_views; + int total_devices = machinespec.num_nodes * machinespec.num_gpus_per_node; + for (std::vector stride : + candidate_strides(tensor_dims, total_devices)) { + for (int start_id = 0; start_id < total_devices; start_id++) { + std::vector sides = + transform(zip(tensor_dims, stride), [&](auto const &pair) { + return StridedRectangleSide(num_points_t(pair.first), + stride_t(pair.second)); + }); + MachineView mv = + MachineView{device_id_t(gpu_id_t(start_id)), StridedRectangle{sides}}; + machine_views.insert(mv); + } + } + return machine_views; +} -// std::unordered_set -// get_allowed_machine_views(MachineSpecification const &machinespec, -// ParallelTensorShape const &shape) { -// std::unordered_set operator_views = -// get_all_candidate_machine_views(machinespec, shape); -// operator_views = filter(operator_views, [&](MachineView const &view) { -// return is_valid_machine_view(view, shape); -// }); -// operator_views = filter(operator_views, [&](MachineView const &view) { -// return is_valid_machine_view(view, machinespec); -// }); -// return operator_views; -// } +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + + std::unordered_set views = + get_candidate_machine_views(machinespec, shape); + views = filter(views, [&](MachineView const &view) { + return is_valid_machine_view(view, shape); + }); + views = filter(views, [&](MachineView const &view) { + return is_valid_machine_view(view, machinespec); + }); + return views; +} -// // Ask the output shapes -// // Get the PCG - -// std::vector SearchHelper::get_valid_machine_views( -// Op const *op, MachineResource const &resource) const { -// std::vector const cached_op_views; -// std::vector valid_views; -// for (size_t i = 0; i < this->model->all_valid_views.size(); i++) { -// bool valid = true; -// for (int j = 0; j < op->numOutputs; j++) { -// if (!op->outputs[j]->is_valid_machine_view( -// this->model->all_valid_views[i])) { -// valid = false; -// break; -// } -// } -// if (valid) { -// cached_op_views.push_back(this->model->all_valid_views[i]); -// } -// } - -// for (size_t i = 0; i < cached_op_views->size(); i++) { -// if (resource.is_valid_machine_view(view)) { -// valid_views.push_back(view); -// } -// } -// return valid_views; +// static std::unordered_set +// get_all_start_invariant_machine_views( +// MachineSpecification const &machinespec, +// ParallelTensorShape const &shape) { +// NOT_IMPLEMENTED(); // } +auto get_all_machine_views_to_tensor_dim_bijections( + MachineView const &mv, ParallelTensorShape const &shape) { + NOT_IMPLEMENTED(); +} + } // namespace FlexFlow diff --git a/lib/compiler/test/src/machine_mapping.cc b/lib/compiler/test/src/machine_mapping.cc new file mode 100644 index 0000000000..1565d80421 --- /dev/null +++ b/lib/compiler/test/src/machine_mapping.cc @@ -0,0 +1,103 @@ +#include "compiler/machine_mapping.h" +#include "doctest/doctest.h" +#include "pcg/machine_specification.dtg.h" +#include "test_generator.h" +#include "utils/containers/extend.h" +#include "utils/containers/is_subseteq_of.h" +#include "utils/containers/set_difference.h" + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("get_allowed_machine_view") { + SUBCASE("no parallelism") {} + + SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::unordered_set correct = { + make_1d_machine_view(gpu_id_t(0), gpu_id_t(3), stride_t(1)), + make_1d_machine_view(gpu_id_t(1), gpu_id_t(4), stride_t(1)), + make_1d_machine_view(gpu_id_t(2), gpu_id_t(5), stride_t(1)), + make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))}; + std::unordered_set result = + get_allowed_machine_views(ms, shape); + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_views = [&](int num_starts, int stride1, int stride2) { + std::unordered_set views; + for (int i = 0; i < num_starts; i++) { + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + MachineView mv = MachineView{device_id_t(gpu_id_t(i)), rect}; + views.insert(mv); + } + return views; + }; + std::unordered_set correct; + extend(correct, + make_2d_views(/*num_starts*/ 13, /*stride1*/ 1, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 8, /*stride1*/ 2, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 9, /*stride1*/ 1, /*stride2*/ 2)); + extend(correct, + make_2d_views(/*num_starts*/ 3, /*stride1*/ 3, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 5, /*stride1*/ 1, /*stride2*/ 3)); + extend(correct, + make_2d_views(/*num_starts*/ 1, /*stride1*/ 1, /*stride2*/ 4)); + + std::unordered_set result = + get_allowed_machine_views(ms, shape); + CHECK(result == correct); + } + } + + // TEST_CASE("MachineMapping::combine") { + // RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) { + // RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1)); + + // MachineMapping comb = MachineMapping::combine(m0, m1); + + // RC_ASSERT(comb.machine_views.size() == + // m0.machine_views.size() + m1.machine_views.size()); + // RC_ASSERT(is_submap(comb.machine_views, m0.machine_views)); + // RC_ASSERT(is_submap(comb.machine_views, m1.machine_views)); + // }); + // } + + // TEST_CASE("OptimalCostResult::infinity") { + // RC_SUBCASE([](OptimalCostResult const &c) { + // RC_ASSERT(c.runtime <= OptimalCostResult::infinity().runtime); + // }); + // } +} diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 030bec423a..44de42395f 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -7,19 +7,22 @@ #include "pcg/side_size_t.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" +// TODO(@pietro) add get_sides() + namespace FlexFlow { struct StridedRectangle { + +private: + std::vector _sides; + +public: StridedRectangle() = delete; explicit StridedRectangle(std::vector const &sides); bool operator==(StridedRectangle const &) const; bool operator!=(StridedRectangle const &) const; - - std::vector const &sides; - -private: - std::vector _sides; + std::vector get_sides() const; }; std::string format_as(StridedRectangle const &); std::ostream &operator<<(std::ostream &, StridedRectangle const &); diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index ac319a6b0e..3631cfee6e 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -20,7 +20,7 @@ static device_id_t get_device_id(MachineView const &mv, DeviceCoordinates const &point) { assert(point.coords.size() == get_num_dims(mv.rect)); std::vector coefficients = - scanl(sorted(mv.rect.sides), + scanl(mv.rect.get_sides(), 1, [](size_t const &result, StridedRectangleSide const &side) { return result * get_side_size(side).unwrapped; @@ -37,7 +37,7 @@ static device_id_t get_device_id(MachineView const &mv, std::unordered_multiset get_device_ids(MachineView const &mv) { std::vector> ranges = - transform(sorted(mv.rect.sides), [](StridedRectangleSide const &side) { + transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { return range(0, get_side_size(side).unwrapped, side.stride.unwrapped); }); std::unordered_multiset devices_as_points = @@ -51,10 +51,10 @@ std::unordered_multiset get_device_ids(MachineView const &mv) { } device_id_t get_last_device_id(MachineView const &mv) { - DeviceCoordinates last_device = DeviceCoordinates( - transform(sorted(mv.rect.sides), [](StridedRectangleSide const &s) { - return s.stride.unwrapped; - })); + // DeviceCoordinates last_device = DeviceCoordinates( + // transform(mv.rect.get_sides(), [](StridedRectangleSide const &s) { + // return s.stride.unwrapped; + // })); return maximum(get_device_ids(mv)); } @@ -63,13 +63,13 @@ size_t num_dims(MachineView const &mv) { } std::vector get_num_devices_per_dim(MachineView const &mv) { - return transform(mv.rect.sides, [](StridedRectangleSide const &side) { + return transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { return side.num_points; }); } std::vector get_side_size_per_dim(MachineView const &mv) { - return transform(mv.rect.sides, get_side_size); + return transform(mv.rect.get_sides(), get_side_size); } size_t num_devices(MachineView const &mv) { diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index 46a7caa92f..94c0e372c6 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -15,7 +15,7 @@ namespace FlexFlow { StridedRectangle::StridedRectangle( std::vector<::FlexFlow::StridedRectangleSide> const &sides) - : _sides(sorted(sides)), sides(_sides) {} + : _sides(sorted(sides)) {} bool StridedRectangle::operator==(StridedRectangle const &other) const { return std::tie(this->_sides) == std::tie(other._sides); @@ -24,10 +24,14 @@ bool StridedRectangle::operator!=(StridedRectangle const &other) const { return std::tie(this->_sides) != std::tie(other._sides); } +std::vector StridedRectangle::get_sides() const { + return _sides; +} + std::string format_as(StridedRectangle const &x) { std::ostringstream oss; oss << ""; return oss.str(); } @@ -37,20 +41,21 @@ std::ostream &operator<<(std::ostream &s, StridedRectangle const &x) { } size_t get_num_dims(StridedRectangle const &rect) { - return rect.sides.size(); + return rect.get_sides().size(); } num_points_t get_num_points(StridedRectangle const &rect) { return num_points_t{ - product(transform(rect.sides, [](StridedRectangleSide const &side) { + product(transform(rect.get_sides(), [](StridedRectangleSide const &side) { return side.num_points.unwrapped; }))}; } size_t get_size(StridedRectangle const &rect) { - return product(transform(rect.sides, [](StridedRectangleSide const &side) { - return get_side_size(side).unwrapped; - })); + return product( + transform(rect.get_sides(), [](StridedRectangleSide const &side) { + return get_side_size(side).unwrapped; + })); } } // namespace FlexFlow @@ -59,9 +64,9 @@ namespace std { size_t hash::operator()( ::FlexFlow::StridedRectangle const &x) const { size_t result = 0; - result ^= - std::hash>{}(x.sides) + - 0x9e3779b9 + (result << 6) + (result >> 2); + result ^= std::hash>{}( + x.get_sides()) + + 0x9e3779b9 + (result << 6) + (result >> 2); return result; } } // namespace std @@ -76,7 +81,7 @@ ::FlexFlow::StridedRectangle void adl_serializer<::FlexFlow::StridedRectangle>::to_json( json &j, ::FlexFlow::StridedRectangle const &v) { j["__type"] = "StridedRectangle"; - j["sides"] = v.sides; + j["sides"] = v.get_sides(); } } // namespace nlohmann diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc index 51fa13c2b9..c5e3e70ce7 100644 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -14,8 +14,8 @@ TEST_SUITE(FF_TEST_SUITE) { StridedRectangle r0 = StridedRectangle{{s0, s1}}; StridedRectangle r1 = StridedRectangle{{s1, s0}}; CHECK(r0 == r1); - CHECK(r1.sides == std::vector{s0, s1}); - CHECK(r1.sides != std::vector{s1, s0}); + CHECK(r1.get_sides() == std::vector{s0, s1}); + CHECK(r1.get_sides() != std::vector{s1, s0}); } SUBCASE("helper functions") { From c0c1c001f28c01df227656bd181fbb8800dd9dc1 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 10 Aug 2024 14:26:54 -0700 Subject: [PATCH 10/34] formatting --- lib/compiler/src/machine_mapping.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 0f117c5225..9f32d5c540 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -27,7 +27,6 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" -# namespace FlexFlow { @@ -398,8 +397,7 @@ static std::unordered_set get_candidate_machine_views(MachineSpecification const &machinespec, ParallelTensorShape const &shape) { - auto candidate_strides = [](std::vector tensor_dims, - int total_devices) { + auto candidate_strides = [](std::vector tensor_dims, int total_devices) { int max_stride_upper_bound = (total_devices + 1) / product(transform(tensor_dims, [](int degree) { return degree - 1; })); From fdd556ede9580e03155109fab0c0d7b1cf4747fa Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 10 Aug 2024 14:46:42 -0700 Subject: [PATCH 11/34] minor fix --- lib/pcg/include/pcg/machine_view.struct.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index 07418f7825..a3b1e55207 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -11,7 +11,7 @@ features = [ includes = [ "pcg/device_id_t.dtg.h", - "pcg/strided_rectangle.dtg.h", + "pcg/strided_rectangle.h", ] [[fields]] From 075f4de7c8b27cca91035d3342e51389aff13175 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 10 Aug 2024 16:04:05 -0700 Subject: [PATCH 12/34] Added StartInvariantMachineView --- .../include/compiler/machine_mapping.h | 7 ++ lib/compiler/src/machine_mapping.cc | 13 ++-- lib/compiler/test/src/machine_mapping.cc | 67 ++++++++++++++++++- lib/pcg/include/pcg/machine_view.h | 5 +- lib/pcg/include/pcg/machine_view.struct.toml | 2 +- .../pcg/start_invariant_machine_view.h | 17 +++++ .../start_invariant_machine_view.struct.toml | 18 +++++ lib/pcg/include/pcg/strided_rectangle.h | 6 +- lib/pcg/src/pcg/machine_view.cc | 11 ++- .../src/pcg/start_invariant_machine_view.cc | 16 +++++ lib/pcg/src/pcg/strided_rectangle.cc | 16 +++++ lib/pcg/test/src/pcg/machine_view.cc | 12 ++-- .../src/pcg/start_invariant_machine_view.cc | 20 ++++++ 13 files changed, 183 insertions(+), 27 deletions(-) create mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.h create mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.struct.toml create mode 100644 lib/pcg/src/pcg/start_invariant_machine_view.cc create mode 100644 lib/pcg/test/src/pcg/start_invariant_machine_view.cc diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index ee53f527ea..90d547e409 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -7,10 +7,12 @@ #include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "substitutions/sub_parallel_computation_graph.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" + namespace FlexFlow { MachineMapping combine(MachineMapping const &, MachineMapping const &); @@ -57,6 +59,11 @@ std::unordered_set get_allowed_machine_views(MachineSpecification const &machinespec, ParallelTensorShape const &shape); +std::unordered_set + get_allowed_start_invariant_machine_views( + MachineSpecification const &machinespec, + ParallelTensorShape const &shape); + } // namespace FlexFlow #endif diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 9f32d5c540..109b98753d 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -27,6 +27,7 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" +#include "pcg/start_invariant_machine_view.h" namespace FlexFlow { @@ -441,12 +442,12 @@ std::unordered_set return views; } -// static std::unordered_set -// get_all_start_invariant_machine_views( -// MachineSpecification const &machinespec, -// ParallelTensorShape const &shape) { -// NOT_IMPLEMENTED(); -// } +std::unordered_set + get_allowed_start_invariant_machine_views( + MachineSpecification const &machinespec, + ParallelTensorShape const &shape) { + return transform(get_allowed_machine_views(machinespec, shape), to_start_invariant); +} auto get_all_machine_views_to_tensor_dim_bijections( MachineView const &mv, ParallelTensorShape const &shape) { diff --git a/lib/compiler/test/src/machine_mapping.cc b/lib/compiler/test/src/machine_mapping.cc index 1565d80421..fb975c88a4 100644 --- a/lib/compiler/test/src/machine_mapping.cc +++ b/lib/compiler/test/src/machine_mapping.cc @@ -3,13 +3,10 @@ #include "pcg/machine_specification.dtg.h" #include "test_generator.h" #include "utils/containers/extend.h" -#include "utils/containers/is_subseteq_of.h" -#include "utils/containers/set_difference.h" TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_allowed_machine_view") { - SUBCASE("no parallelism") {} SUBCASE("1 degree of parallelism") { MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0); @@ -80,6 +77,70 @@ TEST_SUITE(FF_TEST_SUITE) { get_allowed_machine_views(ms, shape); CHECK(result == correct); } + + } + + TEST_CASE("get_allowed_start_invariant_machine_views") { + + SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::unordered_set correct = { + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2)) + }; + std::unordered_set result = + get_allowed_start_invariant_machine_views(ms, shape); + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_view = [&](int stride1, int stride2) { + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + return StartInvariantMachineView{rect}; + }; + std::unordered_set correct = { + make_2d_view(/*stride1*/ 1, /*stride2*/ 1), + make_2d_view(/*stride1*/ 2, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 2), + make_2d_view(/*stride1*/ 3, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 3), + make_2d_view(/*stride1*/ 1, /*stride2*/ 4) + }; + + std::unordered_set result = + get_allowed_start_invariant_machine_views(ms, shape); + CHECK(result == correct); + } + } // TEST_CASE("MachineMapping::combine") { diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 0bf80e691a..a03f7fd109 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -1,12 +1,9 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H -#include "pcg/cpu_id_t.dtg.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" -#include "pcg/device_id_t.dtg.h" #include "pcg/device_type.dtg.h" -#include "pcg/gpu_id_t.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" @@ -15,7 +12,7 @@ namespace FlexFlow { -std::unordered_multiset get_device_ids(MachineView const &mv); +std::unordered_set get_device_ids(MachineView const &mv); device_id_t get_last_device_id(MachineView const &mv); size_t num_dims(MachineView const &mv); diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index a3b1e55207..eb933ed9b7 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -2,7 +2,7 @@ namespace = "FlexFlow" name = "MachineView" features = [ "eq", - # "ord", + "ord", "hash", "json", # "rapidcheck", diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h new file mode 100644 index 0000000000..2297a38f2b --- /dev/null +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H +#define _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H + +#include "pcg/machine_view.dtg.h" +#include "pcg/start_invariant_machine_view.dtg.h" + +namespace FlexFlow { + +MachineView to_start_dependent(StartInvariantMachineView const &mv, + device_id_t const &start_id); +StartInvariantMachineView to_start_invariant(MachineView const &mv); + +StartInvariantMachineView make_1d_start_invariant_machine_view(num_points_t num_points, stride_t stride); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml new file mode 100644 index 0000000000..d6ad89a14e --- /dev/null +++ b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml @@ -0,0 +1,18 @@ +namespace = "FlexFlow" +name = "StartInvariantMachineView" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/strided_rectangle.h", +] + +[[fields]] +name = "rect" +type = "::FlexFlow::StridedRectangle" diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 44de42395f..9099307506 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -7,7 +7,6 @@ #include "pcg/side_size_t.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" -// TODO(@pietro) add get_sides() namespace FlexFlow { @@ -22,6 +21,11 @@ struct StridedRectangle { bool operator==(StridedRectangle const &) const; bool operator!=(StridedRectangle const &) const; + bool operator<(StridedRectangle const &) const; + bool operator>(StridedRectangle const &) const; + bool operator<=(StridedRectangle const &) const; + bool operator>=(StridedRectangle const &) const; + std::vector get_sides() const; }; std::string format_as(StridedRectangle const &); diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 3631cfee6e..ee0f71fed1 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -11,6 +11,7 @@ #include "utils/containers/reversed.h" #include "utils/containers/scanl.h" #include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" #include "utils/containers/zip.h" #include "utils/hash/vector.h" @@ -35,15 +36,15 @@ static device_id_t get_device_id(MachineView const &mv, : device_id_t(gpu_id_t(raw_id))); } -std::unordered_multiset get_device_ids(MachineView const &mv) { +std::unordered_set get_device_ids(MachineView const &mv) { std::vector> ranges = transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { return range(0, get_side_size(side).unwrapped, side.stride.unwrapped); }); - std::unordered_multiset devices_as_points = + std::unordered_set devices_as_points = unordered_set_of( transform(cartesian_product(ranges), - [](auto const &point) { return DeviceCoordinates(point); }); - std::unordered_multiset ids = + [](auto const &point) { return DeviceCoordinates(point); })); + std::unordered_set ids = transform(devices_as_points, [&](DeviceCoordinates const &dc) { return get_device_id(mv, dc); }); @@ -116,8 +117,6 @@ MachineView return make_1d_machine_view(unwrap_gpu(start), unwrap_gpu(stop), stride); } -// TODO(@pietro) change from int to stride_t, makes more sense - static StridedRectangle make_1d_rect(int start, num_points_t num_points, stride_t stride) { return make_1d_rect( diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc new file mode 100644 index 0000000000..a97e9553bf --- /dev/null +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -0,0 +1,16 @@ +#include "pcg/start_invariant_machine_view.h" +#include "pcg/strided_rectangle.h" + +namespace FlexFlow { + +MachineView to_start_dependent(StartInvariantMachineView const &start_invariant_mv, device_id_t const &start_id) { + return MachineView{start_id, start_invariant_mv.rect}; +} +StartInvariantMachineView to_start_invariant(MachineView const &mv) { + return StartInvariantMachineView{mv.rect}; +} + +StartInvariantMachineView make_1d_start_invariant_machine_view(num_points_t num_points, stride_t stride) { + return StartInvariantMachineView{StridedRectangle{{StridedRectangleSide{num_points, stride}}}}; +} +} //namespace FlexFlow \ No newline at end of file diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index 94c0e372c6..bebfd05c44 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -24,6 +24,22 @@ bool StridedRectangle::operator!=(StridedRectangle const &other) const { return std::tie(this->_sides) != std::tie(other._sides); } +bool StridedRectangle::operator<(StridedRectangle const &other) const { + return std::tie(this->_sides) < std::tie(other._sides); +} + +bool StridedRectangle::operator>(StridedRectangle const &other) const { + return std::tie(this->_sides) > std::tie(other._sides); +} + +bool StridedRectangle::operator<=(StridedRectangle const &other) const { + return std::tie(this->_sides) <= std::tie(other._sides); +} + +bool StridedRectangle::operator>=(StridedRectangle const &other) const { + return std::tie(this->_sides) >= std::tie(other._sides); +} + std::vector StridedRectangle::get_sides() const { return _sides; } diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index a9175aaad4..44f02e7a40 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -4,8 +4,8 @@ #include "test/utils/doctest.h" #include "utils/containers/transform.h" -std::unordered_multiset - make_gpu_device_ids(std::unordered_multiset ids) { +std::unordered_set + make_gpu_device_ids(std::unordered_set ids) { return transform(ids, [](int id) { return device_id_t(gpu_id_t(id)); }); } @@ -58,9 +58,9 @@ TEST_SUITE(FF_TEST_SUITE) { gpu_id_t start(0); MachineView mv{device_id_t{start}, rect}; SUBCASE("get_device_ids") { - std::unordered_multiset expected = + std::unordered_set expected = make_gpu_device_ids({0, 2, 12, 14}); - std::unordered_multiset result = get_device_ids(mv); + std::unordered_set result = get_device_ids(mv); CHECK(expected == result); } } @@ -74,9 +74,9 @@ TEST_SUITE(FF_TEST_SUITE) { MachineView mv{device_id_t{start}, rect}; SUBCASE("get_device_ids") { - std::unordered_multiset expected = + std::unordered_set expected = make_gpu_device_ids({1, 4, 13, 16}); - std::unordered_multiset result = get_device_ids(mv); + std::unordered_set result = get_device_ids(mv); CHECK(expected == result); } } diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc new file mode 100644 index 0000000000..b9434ed898 --- /dev/null +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -0,0 +1,20 @@ +#include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.h" +#include "test/utils/doctest.h" + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("StartInvariantMachineView") { + + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}; + device_id_t start = device_id_t(gpu_id_t(5)); + MachineView input = MachineView{start, rect}; + + MachineView result = to_start_dependent(to_start_invariant(input), start); + MachineView correct = input; + CHECK(correct == input); + } +} From 878954fdb6535a5e07ca65086febb7a6f4a145b0 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 10 Aug 2024 16:06:03 -0700 Subject: [PATCH 13/34] formatting --- .../include/compiler/machine_mapping.h | 3 +- lib/compiler/src/machine_mapping.cc | 5 ++-- lib/compiler/test/src/machine_mapping.cc | 28 ++++++++----------- .../pcg/start_invariant_machine_view.h | 4 ++- lib/pcg/include/pcg/strided_rectangle.h | 1 - .../src/pcg/start_invariant_machine_view.cc | 13 ++++++--- .../src/pcg/start_invariant_machine_view.cc | 22 +++++++-------- 7 files changed, 39 insertions(+), 37 deletions(-) diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index 90d547e409..6c3aab9ac0 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -7,12 +7,11 @@ #include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" -#include "pcg/start_invariant_machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/start_invariant_machine_view.h" #include "substitutions/sub_parallel_computation_graph.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" - namespace FlexFlow { MachineMapping combine(MachineMapping const &, MachineMapping const &); diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 109b98753d..8d3bcc3142 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -6,6 +6,7 @@ #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/start_invariant_machine_view.h" #include "utils/containers.h" #include "utils/containers/are_disjoint.h" #include "utils/containers/as_vector.h" @@ -27,7 +28,6 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" -#include "pcg/start_invariant_machine_view.h" namespace FlexFlow { @@ -446,7 +446,8 @@ std::unordered_set get_allowed_start_invariant_machine_views( MachineSpecification const &machinespec, ParallelTensorShape const &shape) { - return transform(get_allowed_machine_views(machinespec, shape), to_start_invariant); + return transform(get_allowed_machine_views(machinespec, shape), + to_start_invariant); } auto get_all_machine_views_to_tensor_dim_bijections( diff --git a/lib/compiler/test/src/machine_mapping.cc b/lib/compiler/test/src/machine_mapping.cc index fb975c88a4..43c4a6aaaa 100644 --- a/lib/compiler/test/src/machine_mapping.cc +++ b/lib/compiler/test/src/machine_mapping.cc @@ -77,7 +77,6 @@ TEST_SUITE(FF_TEST_SUITE) { get_allowed_machine_views(ms, shape); CHECK(result == correct); } - } TEST_CASE("get_allowed_start_invariant_machine_views") { @@ -98,9 +97,8 @@ TEST_SUITE(FF_TEST_SUITE) { }; std::unordered_set correct = { - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2)) - }; + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))}; std::unordered_set result = get_allowed_start_invariant_machine_views(ms, shape); CHECK(correct == result); @@ -122,25 +120,23 @@ TEST_SUITE(FF_TEST_SUITE) { }; auto make_2d_view = [&](int stride1, int stride2) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - return StartInvariantMachineView{rect}; + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + return StartInvariantMachineView{rect}; }; std::unordered_set correct = { - make_2d_view(/*stride1*/ 1, /*stride2*/ 1), - make_2d_view(/*stride1*/ 2, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 2), - make_2d_view(/*stride1*/ 3, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 3), - make_2d_view(/*stride1*/ 1, /*stride2*/ 4) - }; + make_2d_view(/*stride1*/ 1, /*stride2*/ 1), + make_2d_view(/*stride1*/ 2, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 2), + make_2d_view(/*stride1*/ 3, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 3), + make_2d_view(/*stride1*/ 1, /*stride2*/ 4)}; std::unordered_set result = get_allowed_start_invariant_machine_views(ms, shape); CHECK(result == correct); } - } // TEST_CASE("MachineMapping::combine") { diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index 2297a38f2b..165e6ab5cc 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -10,7 +10,9 @@ MachineView to_start_dependent(StartInvariantMachineView const &mv, device_id_t const &start_id); StartInvariantMachineView to_start_invariant(MachineView const &mv); -StartInvariantMachineView make_1d_start_invariant_machine_view(num_points_t num_points, stride_t stride); +StartInvariantMachineView + make_1d_start_invariant_machine_view(num_points_t num_points, + stride_t stride); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 9099307506..6b7a92fc90 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -7,7 +7,6 @@ #include "pcg/side_size_t.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" - namespace FlexFlow { struct StridedRectangle { diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index a97e9553bf..d0fdd84810 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -3,14 +3,19 @@ namespace FlexFlow { -MachineView to_start_dependent(StartInvariantMachineView const &start_invariant_mv, device_id_t const &start_id) { +MachineView + to_start_dependent(StartInvariantMachineView const &start_invariant_mv, + device_id_t const &start_id) { return MachineView{start_id, start_invariant_mv.rect}; } StartInvariantMachineView to_start_invariant(MachineView const &mv) { return StartInvariantMachineView{mv.rect}; } -StartInvariantMachineView make_1d_start_invariant_machine_view(num_points_t num_points, stride_t stride) { - return StartInvariantMachineView{StridedRectangle{{StridedRectangleSide{num_points, stride}}}}; +StartInvariantMachineView + make_1d_start_invariant_machine_view(num_points_t num_points, + stride_t stride) { + return StartInvariantMachineView{ + StridedRectangle{{StridedRectangleSide{num_points, stride}}}}; } -} //namespace FlexFlow \ No newline at end of file +} // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index b9434ed898..be637b1e0e 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -1,20 +1,20 @@ -#include "pcg/machine_view.h" #include "pcg/start_invariant_machine_view.h" +#include "pcg/machine_view.h" #include "test/utils/doctest.h" TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}; - device_id_t start = device_id_t(gpu_id_t(5)); - MachineView input = MachineView{start, rect}; + StridedRectangle rect{{ + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}; + device_id_t start = device_id_t(gpu_id_t(5)); + MachineView input = MachineView{start, rect}; - MachineView result = to_start_dependent(to_start_invariant(input), start); - MachineView correct = input; - CHECK(correct == input); - } + MachineView result = to_start_dependent(to_start_invariant(input), start); + MachineView correct = input; + CHECK(correct == input); + } } From 340f441c29eb06efb312cc52d57022d229f9f29e Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Mon, 12 Aug 2024 22:16:25 -0700 Subject: [PATCH 14/34] Containers fix --- lib/utils/include/utils/bidict/bidict.h | 3 ++ .../utils/containers/cartesian_product.h | 20 +++++------ lib/utils/include/utils/containers/filter.h | 8 +++++ .../include/utils/containers/permutations.h | 29 ++++++++++++++++ .../include/utils/containers/replicate.h | 8 ++--- .../utils/containers/{any_of.h => any_of.cc} | 0 .../src/utils/containers/permutations.cc | 1 + .../src/utils/containers/cartesian_product.cc | 15 ++++---- lib/utils/test/src/utils/containers/filter.cc | 9 +++++ .../test/src/utils/containers/permutations.cc | 34 +++++++++++++++++++ .../test/src/utils/containers/replicate.cc | 9 +++-- 11 files changed, 108 insertions(+), 28 deletions(-) create mode 100644 lib/utils/include/utils/containers/permutations.h rename lib/utils/src/utils/containers/{any_of.h => any_of.cc} (100%) create mode 100644 lib/utils/src/utils/containers/permutations.cc create mode 100644 lib/utils/test/src/utils/containers/permutations.cc diff --git a/lib/utils/include/utils/bidict/bidict.h b/lib/utils/include/utils/bidict/bidict.h index eaecb6e405..ff17697d31 100644 --- a/lib/utils/include/utils/bidict/bidict.h +++ b/lib/utils/include/utils/bidict/bidict.h @@ -22,6 +22,9 @@ struct bidict { } } + bidict(std::vector> init) + : bidict(init.begin(), init.end()) {} + bool contains_l(L const &l) const { return fwd_map.find(l) != fwd_map.end(); } diff --git a/lib/utils/include/utils/containers/cartesian_product.h b/lib/utils/include/utils/containers/cartesian_product.h index 1232dd2cbd..c4ad07aa8e 100644 --- a/lib/utils/include/utils/containers/cartesian_product.h +++ b/lib/utils/include/utils/containers/cartesian_product.h @@ -9,28 +9,24 @@ namespace FlexFlow { -template -auto cartesian_product(Container const &containers) { - using ValueType = typename Container::value_type::value_type; - using VectorType = std::vector; - auto ordered = as_vector(containers); - std::unordered_multiset result; - - std::function recurse = [&](VectorType ¤t, - std::size_t depth) { - if (depth == ordered.size()) { +template > +auto cartesian_product(std::vector const &containers) { + std::unordered_multiset result; + + std::function recurse = [&](V ¤t, size_t depth) { + if (depth == containers.size()) { result.insert(current); return; } - for (const auto &item : ordered[depth]) { + for (const auto &item : containers.at(depth)) { current.push_back(item); recurse(current, depth + 1); current.pop_back(); } }; - VectorType current; + V current; recurse(current, 0); return result; diff --git a/lib/utils/include/utils/containers/filter.h b/lib/utils/include/utils/containers/filter.h index fb8c703d2a..f65b50fdbc 100644 --- a/lib/utils/include/utils/containers/filter.h +++ b/lib/utils/include/utils/containers/filter.h @@ -44,6 +44,14 @@ std::map filter(std::map const &m, F const &f) { return result; } +template +std::unordered_multiset filter(std::unordered_multiset const &m, + F const &f) { + std::unordered_multiset result; + std::copy_if(m.cbegin(), m.cend(), std::inserter(result, result.begin()), f); + return result; +} + } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/permutations.h b/lib/utils/include/utils/containers/permutations.h new file mode 100644 index 0000000000..803bd22df0 --- /dev/null +++ b/lib/utils/include/utils/containers/permutations.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTATIONS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTATIONS_H + +#include "utils/containers/sorted.h" +#include "utils/hash/vector.h" +#include +#include +#include + +namespace FlexFlow { + +template > +auto permutations(C const &container) { + std::unordered_set result; + + V elements = sorted(container); + + result.insert(elements); + + while (std::next_permutation(elements.begin(), elements.end())) { + result.insert(elements); + } + + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/replicate.h b/lib/utils/include/utils/containers/replicate.h index 8a8fca532e..0bed081ad5 100644 --- a/lib/utils/include/utils/containers/replicate.h +++ b/lib/utils/include/utils/containers/replicate.h @@ -1,15 +1,15 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_REPLICATE_H -#include +#include namespace FlexFlow { template -std::unordered_multiset replicate(std::size_t n, T const &element) { - std::unordered_multiset result; +std::vector replicate(std::size_t n, T const &element) { + std::vector result; for (std::size_t i = 0; i < n; ++i) { - result.insert(element); + result.push_back(element); } return result; } diff --git a/lib/utils/src/utils/containers/any_of.h b/lib/utils/src/utils/containers/any_of.cc similarity index 100% rename from lib/utils/src/utils/containers/any_of.h rename to lib/utils/src/utils/containers/any_of.cc diff --git a/lib/utils/src/utils/containers/permutations.cc b/lib/utils/src/utils/containers/permutations.cc new file mode 100644 index 0000000000..5876f84440 --- /dev/null +++ b/lib/utils/src/utils/containers/permutations.cc @@ -0,0 +1 @@ +#include "utils/containers/permutations.h" diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc index 7809f0c513..9e00794b1c 100644 --- a/lib/utils/test/src/utils/containers/cartesian_product.cc +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -7,13 +7,6 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("cartesian_product") { - SUBCASE("empty") { - std::vector> containers = {}; - std::unordered_multiset> result = - cartesian_product(containers); - std::unordered_multiset> correct = {{}}; - CHECK(result == correct); - } SUBCASE("single container, one element") { std::vector> containers = {{1}}; @@ -56,5 +49,13 @@ TEST_SUITE(FF_TEST_SUITE) { {1, 2}, {1, 3}, {1, 3}, {1, 2}}; CHECK(result == correct); } + + SUBCASE("1 empty container, 1 non-empty container") { + std::vector> containers = {{}, {2, 3}}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {}; + CHECK(result == correct); + } } } diff --git a/lib/utils/test/src/utils/containers/filter.cc b/lib/utils/test/src/utils/containers/filter.cc index 5971022c63..aaafcc9063 100644 --- a/lib/utils/test/src/utils/containers/filter.cc +++ b/lib/utils/test/src/utils/containers/filter.cc @@ -90,4 +90,13 @@ TEST_SUITE(FF_TEST_SUITE) { }; CHECK(result == correct); } + + TEST_CASE("filter(std::unordered_multiset, F)") { + std::unordered_multiset input = {1, 1, 2, 2, 2, 3, 4, 5, 6, 7, 8, 8}; + auto predicate = [](int x) { return x % 2 == 0; }; + + std::unordered_multiset result = filter(input, predicate); + std::unordered_multiset correct = {2, 2, 2, 4, 6, 8, 8}; + CHECK(result == correct); + } } diff --git a/lib/utils/test/src/utils/containers/permutations.cc b/lib/utils/test/src/utils/containers/permutations.cc new file mode 100644 index 0000000000..3cf35579fe --- /dev/null +++ b/lib/utils/test/src/utils/containers/permutations.cc @@ -0,0 +1,34 @@ +#include "utils/containers/permutations.h" +#include "utils/hash/unordered_set.h" +#include +#include +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("permutations") { + SUBCASE("size=1") { + std::vector vec = {1}; + auto result = permutations(vec); + std::unordered_set> correct = {{1}}; + CHECK(result == correct); + } + + SUBCASE("size=3") { + std::vector vec = {1, 2, 3}; + auto result = permutations(vec); + std::unordered_set> correct = { + {1, 2, 3}, {1, 3, 2}, {2, 1, 3}, {2, 3, 1}, {3, 1, 2}, {3, 2, 1}}; + CHECK(result == correct); + } + + SUBCASE("elements repeated") { + std::vector vec = {1, 2, 2}; + auto result = permutations(vec); + std::unordered_set> correct = { + {1, 2, 2}, {2, 1, 2}, {2, 2, 1}}; + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/replicate.cc index b992a26487..5a02fcdcaa 100644 --- a/lib/utils/test/src/utils/containers/replicate.cc +++ b/lib/utils/test/src/utils/containers/replicate.cc @@ -9,15 +9,14 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("replicate") { SUBCASE("ints") { int x = 42; - std::unordered_multiset result = replicate(5, x); - std::unordered_multiset correct = {42, 42, 42, 42, 42}; + std::vector result = replicate(5, x); + std::vector correct = {42, 42, 42, 42, 42}; CHECK(result == correct); } SUBCASE("unordered_set") { std::unordered_set x = {1.0, 1.5}; - std::unordered_multiset> result = - replicate(3, x); - std::unordered_multiset> correct = { + std::vector> result = replicate(3, x); + std::vector> correct = { {1.0, 1.5}, {1.0, 1.5}, {1.0, 1.5}}; CHECK(result == correct); } From 1e8fa900c9da19a819cec9c5f7782f6920786a9d Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 13 Aug 2024 11:49:32 -0700 Subject: [PATCH 15/34] Implemented tensor to machine view injection --- .../include/compiler/machine_mapping.h | 15 +- ...nsor_to_machine_view_injection.struct.toml | 18 ++ lib/compiler/src/machine_mapping.cc | 194 +++++++++++++----- lib/compiler/test/src/machine_mapping.cc | 44 +++- lib/op-attrs/include/op-attrs/parallel_dim.h | 1 + .../parallel_tensor_dim_idx.variant.toml | 21 ++ lib/op-attrs/src/op-attrs/parallel_dim.cc | 14 ++ .../pcg/device_coordinates.struct.toml | 7 +- lib/pcg/include/pcg/machine_specification.h | 12 +- lib/pcg/include/pcg/machine_view.h | 4 +- .../pcg/machine_view_dim_idx.struct.toml | 14 ++ .../pcg/start_invariant_machine_view.h | 8 +- lib/pcg/include/pcg/strided_rectangle.h | 9 +- lib/pcg/src/pcg/device_id.cc | 16 +- lib/pcg/src/pcg/machine_specification.cc | 24 +++ lib/pcg/src/pcg/machine_view.cc | 10 +- .../src/pcg/start_invariant_machine_view.cc | 9 +- lib/pcg/src/pcg/strided_rectangle.cc | 36 ++-- lib/pcg/test/src/pcg/machine_specification.cc | 25 +++ lib/pcg/test/src/pcg/machine_view.cc | 3 - .../src/pcg/start_invariant_machine_view.cc | 3 +- lib/pcg/test/src/pcg/strided_rectangle.cc | 4 - 22 files changed, 382 insertions(+), 109 deletions(-) create mode 100644 lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml create mode 100644 lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml create mode 100644 lib/op-attrs/src/op-attrs/parallel_dim.cc create mode 100644 lib/pcg/include/pcg/machine_view_dim_idx.struct.toml create mode 100644 lib/pcg/src/pcg/machine_specification.cc create mode 100644 lib/pcg/test/src/pcg/machine_specification.cc diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index 6c3aab9ac0..2f07e48100 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -3,6 +3,7 @@ #include "compiler/machine_mapping.dtg.h" #include "compiler/optimal_cost_state.dtg.h" +#include "compiler/tensor_to_machine_view_injection.dtg.h" #include "cost_estimate.h" #include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" @@ -55,13 +56,19 @@ OptimalCostResult optimal_cost( OptimalCostCache &cached_subgraph_costs); std::unordered_set - get_allowed_machine_views(MachineSpecification const &machinespec, - ParallelTensorShape const &shape); + get_allowed_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type = DeviceType::GPU); std::unordered_set get_allowed_start_invariant_machine_views( - MachineSpecification const &machinespec, - ParallelTensorShape const &shape); + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type = DeviceType::GPU); + +std::unordered_set + get_all_tensor_to_machine_view_injections(MachineView const &mv, + ParallelTensorShape const &shape); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml b/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml new file mode 100644 index 0000000000..deb65defcb --- /dev/null +++ b/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml @@ -0,0 +1,18 @@ +namespace = "FlexFlow" +name = "TensorToMachineViewInjection" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "pcg/machine_view_dim_idx.dtg.h", + "op-attrs/parallel_tensor_dim_idx.dtg.h", + "utils/bidict/bidict.h", + "utils/hash/unordered_map.h" +] + +[[fields]] +name = "raw_bidict" +type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx, ::FlexFlow::parallel_tensor_dim_idx>" diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 8d3bcc3142..76e6ff1882 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -1,19 +1,25 @@ #include "compiler/machine_mapping.h" #include "compiler/cost_estimate.h" #include "compiler/graph_utils.h" +#include "compiler/tensor_to_machine_view_injection.dtg.h" +#include "op-attrs/parallel_tensor_dim_idx.dtg.h" #include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" +#include "pcg/machine_view_dim_idx.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "pcg/start_invariant_machine_view.h" #include "utils/containers.h" +#include "utils/containers/all_of.h" #include "utils/containers/are_disjoint.h" #include "utils/containers/as_vector.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/contains_key.h" +#include "utils/containers/filter.h" #include "utils/containers/get_only.h" #include "utils/containers/keys.h" +#include "utils/containers/permutations.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/replicate.h" @@ -28,6 +34,7 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" +#include "utils/overload.h" namespace FlexFlow { @@ -366,93 +373,170 @@ OptimalCostResult optimal_cost( return searcher.optimal_cost(subpcg, resources, sp_decomposition); } -bool is_valid_machine_view(MachineView const &mv, - MachineSpecification const &machinespec) { - int num_devices_per_node = ((get_device_type(mv) == DeviceType::GPU) - ? machinespec.num_gpus_per_node - : machinespec.num_cpus_per_node); - int num_devices = machinespec.num_nodes * num_devices_per_node; - return (num_devices > get_raw_id(get_last_device_id(mv))); +static std::unordered_multiset + get_unordered_tensor_parallel_degrees(ParallelTensorShape const &shape) { + std::unordered_multiset degrees = + without_order(ff_ordered_shard_degrees(shape)); + degrees.insert(get_sum_degree(shape)); + degrees.insert(get_discard_copy_degree(shape)); + // filtering non-parallel dims + degrees = filter(degrees, [](int degree) { return degree != 1; }); + return degrees; } -std::vector get_tensor_parallel_degrees(ParallelTensorShape const &shape) { - std::vector degrees = as_vector(ff_ordered_shard_degrees(shape)); - degrees.push_back(get_sum_degree(shape)); - degrees.push_back(get_discard_copy_degree(shape)); - return degrees; +bool is_valid_machine_view(MachineView const &mv, + MachineSpecification const &machine_spec) { + + int num_devices = get_num_devices(machine_spec, get_device_type(mv)); + return (num_devices > get_raw_id(get_last_device_id(mv))); } bool is_valid_machine_view(MachineView const &mv, ParallelTensorShape const &shape) { + std::vector mv_degrees = transform(get_num_devices_per_dim(mv), [](num_points_t degree) { return degree.unwrapped; }); - std::vector tensor_degrees = get_tensor_parallel_degrees(shape); - tensor_degrees = - filter(tensor_degrees, [](int degree) { return degree != 1; }); - return without_order(mv_degrees) == without_order(tensor_degrees); + std::unordered_multiset unordered_tensor_degrees = + get_unordered_tensor_parallel_degrees(shape); + + return without_order(mv_degrees) == unordered_tensor_degrees; } -// TODO(@pietro): add support for both CPU and GPU static std::unordered_set - get_candidate_machine_views(MachineSpecification const &machinespec, - ParallelTensorShape const &shape) { - - auto candidate_strides = [](std::vector tensor_dims, int total_devices) { - int max_stride_upper_bound = - (total_devices + 1) / + get_candidate_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType const &device_type) { + + auto candidate_strides = + [](std::vector const &tensor_dims, + int total_devices) -> std::unordered_multiset> { + int min_num_devices_with_full_stride_volume = product(transform(tensor_dims, [](int degree) { return degree - 1; })); - std::unordered_multiset> strides = cartesian_product( - replicate(tensor_dims.size(), range(1, max_stride_upper_bound + 1))); + int max_stride_upper_bound = + std::ceil(total_devices / min_num_devices_with_full_stride_volume); + std::vector single_stride_range = + transform(range(1, max_stride_upper_bound + 1), + [](int stride) { return stride_t(stride); }); + std::unordered_multiset> strides = + cartesian_product(replicate(tensor_dims.size(), single_stride_range)); return strides; }; - std::vector tensor_dims = filter(get_tensor_parallel_degrees(shape), - [](int degree) { return degree != 1; }); + auto get_strided_rectangle = [](std::vector const &strides, + std::vector const &num_points_per_dim) { + std::vector sides = + transform(zip(num_points_per_dim, strides), [&](auto const &p) { + return StridedRectangleSide(num_points_t(p.first), + stride_t(p.second)); + }); + return StridedRectangle{sides}; + }; + + std::unordered_multiset tensor_dims = + get_unordered_tensor_parallel_degrees(shape); + int total_devices = get_num_devices(machine_spec, device_type); + std::unordered_set machine_views; - int total_devices = machinespec.num_nodes * machinespec.num_gpus_per_node; - for (std::vector stride : - candidate_strides(tensor_dims, total_devices)) { - for (int start_id = 0; start_id < total_devices; start_id++) { - std::vector sides = - transform(zip(tensor_dims, stride), [&](auto const &pair) { - return StridedRectangleSide(num_points_t(pair.first), - stride_t(pair.second)); - }); - MachineView mv = - MachineView{device_id_t(gpu_id_t(start_id)), StridedRectangle{sides}}; - machine_views.insert(mv); + for (std::vector const &strides : + candidate_strides(sorted(tensor_dims), total_devices)) { + StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); + auto start_inv_mv = StartInvariantMachineView{rect}; + for (int start_id : range(total_devices)) { + device_id_t start_device = device_id_from_index(start_id, device_type); + machine_views.insert( + machine_view_from_start_invariant(start_inv_mv, start_device)); } } return machine_views; } std::unordered_set - get_allowed_machine_views(MachineSpecification const &machinespec, - ParallelTensorShape const &shape) { + get_allowed_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type) { std::unordered_set views = - get_candidate_machine_views(machinespec, shape); - views = filter(views, [&](MachineView const &view) { - return is_valid_machine_view(view, shape); - }); - views = filter(views, [&](MachineView const &view) { - return is_valid_machine_view(view, machinespec); + get_candidate_machine_views(machine_spec, shape, device_type); + return filter(views, [&](MachineView const &view) { + return is_valid_machine_view(view, shape) && + is_valid_machine_view(view, machine_spec); }); - return views; } std::unordered_set get_allowed_start_invariant_machine_views( - MachineSpecification const &machinespec, - ParallelTensorShape const &shape) { - return transform(get_allowed_machine_views(machinespec, shape), - to_start_invariant); + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type) { + return transform(get_allowed_machine_views(machine_spec, shape, device_type), + start_invariant_from_machine_view); +} + +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx idx) { + return idx.visit( + overload{[&](ff_dim_t shard_dim) { + return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; + }, + [&](ReplicaType replica_type) { + ReplicaParallelDimSet replicas = shape.dims.replica_dims; + int degree = (ReplicaType::SUM == replica_type + ? replicas.sum_degree.value + : replicas.discard_copy_degree.value); + return ParallelDim{ReplicaParallelDim{degree, replica_type}}; + }}); +} + +std::unordered_set + get_parallel_tensor_indices(ParallelTensorShape const &shape) { + std::unordered_set indices; + extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { + return parallel_tensor_dim_idx(ff_dim_t(idx)); + })); + indices.insert(parallel_tensor_dim_idx(ReplicaType::SUM)); + indices.insert(parallel_tensor_dim_idx(ReplicaType::DISCARD_COPY)); + return indices; +} + +std::unordered_set + get_machine_view_indices(MachineView const &mv) { + return transform(unordered_set_of(range(num_dims(mv))), + [](int idx) { return machine_view_dim_idx(idx); }); } -auto get_all_machine_views_to_tensor_dim_bijections( - MachineView const &mv, ParallelTensorShape const &shape) { - NOT_IMPLEMENTED(); +bool is_valid_injection(TensorToMachineViewInjection const &injection, + MachineView const &mv, + ParallelTensorShape const &shape) { + return all_of(injection.raw_bidict, [&](auto const pair) { + int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; + int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); + return (tensor_degree == mv_degree); + }); +} + +std::unordered_set + get_all_tensor_to_machine_view_injections( + MachineView const &mv, ParallelTensorShape const &shape) { + assert(is_valid_machine_view(mv, shape)); + std::unordered_set mv_indices = + get_machine_view_indices(mv); + std::unordered_set shape_indices = + get_parallel_tensor_indices(shape); + shape_indices = filter(shape_indices, [&](auto const idx) { + return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; + }); + + std::unordered_set injections; + for (std::vector const &p : + permutations(shape_indices)) { + TensorToMachineViewInjection injection = + TensorToMachineViewInjection(bidict(zip(sorted(mv_indices), p))); + if (is_valid_injection(injection, mv, shape)) { + injections.insert(injection); + } + } + return injections; } } // namespace FlexFlow diff --git a/lib/compiler/test/src/machine_mapping.cc b/lib/compiler/test/src/machine_mapping.cc index 43c4a6aaaa..729ea972c6 100644 --- a/lib/compiler/test/src/machine_mapping.cc +++ b/lib/compiler/test/src/machine_mapping.cc @@ -6,10 +6,10 @@ TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_allowed_machine_view") { + TEST_CASE("get_allowed_machine_views") { SUBCASE("1 degree of parallelism") { - MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0); + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -34,7 +34,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); + MachineSpecification ms = MachineSpecification{18, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -82,7 +82,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_allowed_start_invariant_machine_views") { SUBCASE("1 degree of parallelism") { - MachineSpecification ms = MachineSpecification(5, 1, 1, 0, 0); + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -139,6 +139,42 @@ TEST_SUITE(FF_TEST_SUITE) { } } + TEST_CASE("get_all_tensor_to_machine_view_injections") { + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{2}, + }, + }, + DataType::FLOAT, + }; + MachineView view = + MachineView{device_id_from_index(0, DeviceType::GPU), + StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t(3), stride_t(1)}}, + }}; + bidict b1 = { + {machine_view_dim_idx(2), ff_dim_t(0)}, + {machine_view_dim_idx(1), ReplicaType::SUM}, + {machine_view_dim_idx(0), ReplicaType::DISCARD_COPY}}; + + bidict b2 = { + {machine_view_dim_idx(2), ff_dim_t(0)}, + {machine_view_dim_idx(0), ReplicaType::SUM}, + {machine_view_dim_idx(1), ReplicaType::DISCARD_COPY}}; + std::unordered_set correct = { + TensorToMachineViewInjection{b1}, TensorToMachineViewInjection{b2}}; + std::unordered_set result = + get_all_tensor_to_machine_view_injections(view, shape); + CHECK(correct == result); + } + // TEST_CASE("MachineMapping::combine") { // RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) { // RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1)); diff --git a/lib/op-attrs/include/op-attrs/parallel_dim.h b/lib/op-attrs/include/op-attrs/parallel_dim.h index 5397ad7c68..a12951dec9 100644 --- a/lib/op-attrs/include/op-attrs/parallel_dim.h +++ b/lib/op-attrs/include/op-attrs/parallel_dim.h @@ -11,6 +11,7 @@ bool is_replica_dim(ParallelDim const &); ParallelDim with_size_set_to(ParallelDim const &, size_t); ParallelDim with_degree_set_to(ParallelDim const &, int); ParallelDim with_is_replica_set_to(ParallelDim const &, bool); +int get_degree(ParallelDim const &); } // namespace FlexFlow diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml new file mode 100644 index 0000000000..8dc906e378 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml @@ -0,0 +1,21 @@ +namespace = "FlexFlow" +name = "parallel_tensor_dim_idx" +features = [ + "eq", + "ord", + "hash", + "json", + "fmt", +] +explicit_constructors = false + +includes = [ + "op-attrs/ff_dim.dtg.h", + "op-attrs/replica_type.dtg.h", +] + +[[values]] +type = "::FlexFlow::ff_dim_t" + +[[values]] +type = "::FlexFlow::ReplicaType" diff --git a/lib/op-attrs/src/op-attrs/parallel_dim.cc b/lib/op-attrs/src/op-attrs/parallel_dim.cc new file mode 100644 index 0000000000..26ba2b3fa1 --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_dim.cc @@ -0,0 +1,14 @@ +#include "op-attrs/parallel_dim.h" +#include "utils/overload.h" + +namespace FlexFlow { + +int get_degree(ParallelDim const &dim) { + return dim.visit(overload{ + [](ShardParallelDim const &shard_dim) { return shard_dim.degree; }, + [](ReplicaParallelDim const &replica_dim) { + return replica_dim.degree; + }}); +} + +} // namespace FlexFlow diff --git a/lib/pcg/include/pcg/device_coordinates.struct.toml b/lib/pcg/include/pcg/device_coordinates.struct.toml index a19d324270..19373cca6e 100644 --- a/lib/pcg/include/pcg/device_coordinates.struct.toml +++ b/lib/pcg/include/pcg/device_coordinates.struct.toml @@ -10,9 +10,12 @@ features = [ ] includes = [ - "op-attrs/dim_ordered.h", + "", + "utils/hash/vector.h", + "utils/fmt/vector.h", + ] [[fields]] name = "coords" -type = "::FlexFlow::FFOrdered" +type = "std::vector" diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index f66723b0ff..22f9c12744 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -1,6 +1,16 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H -namespace FlexFlow {} // namespace FlexFlow +#include "pcg/device_type.dtg.h" +#include "pcg/machine_specification.dtg.h" + +namespace FlexFlow { + +int get_num_gpus(MachineSpecification const &ms); +int get_num_cpus(MachineSpecification const &ms); +int get_num_devices(MachineSpecification const &ms, + DeviceType const &device_type); + +} // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index a03f7fd109..75c7e3ea63 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -5,6 +5,7 @@ #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_view.dtg.h" +#include "pcg/machine_view_dim_idx.dtg.h" #include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" #include @@ -14,10 +15,11 @@ namespace FlexFlow { std::unordered_set get_device_ids(MachineView const &mv); device_id_t get_last_device_id(MachineView const &mv); +StridedRectangleSide get_side_at_idx(MachineView const &mv, + machine_view_dim_idx const &idx); size_t num_dims(MachineView const &mv); size_t num_devices(MachineView const &mv); -size_t get_size(MachineView const &mv); std::vector get_num_devices_per_dim(MachineView const &mv); std::vector get_side_size_per_dim(MachineView const &mv); diff --git a/lib/pcg/include/pcg/machine_view_dim_idx.struct.toml b/lib/pcg/include/pcg/machine_view_dim_idx.struct.toml new file mode 100644 index 0000000000..fec0b17bae --- /dev/null +++ b/lib/pcg/include/pcg/machine_view_dim_idx.struct.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "machine_view_dim_idx" +features = [ + "eq", + "ord", + "hash", + "json", + "rapidcheck", + "fmt", +] + +[[fields]] +name = "unwrapped" +type = "int" diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index 165e6ab5cc..b560dd095a 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -6,9 +6,11 @@ namespace FlexFlow { -MachineView to_start_dependent(StartInvariantMachineView const &mv, - device_id_t const &start_id); -StartInvariantMachineView to_start_invariant(MachineView const &mv); +MachineView + machine_view_from_start_invariant(StartInvariantMachineView const &mv, + device_id_t const &start_id); +StartInvariantMachineView + start_invariant_from_machine_view(MachineView const &mv); StartInvariantMachineView make_1d_start_invariant_machine_view(num_points_t num_points, diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 6b7a92fc90..a416a96c59 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -12,7 +12,9 @@ namespace FlexFlow { struct StridedRectangle { private: - std::vector _sides; + std::vector sides; + std::tuple const &> tie() const; + friend struct std::hash; public: StridedRectangle() = delete; @@ -25,7 +27,8 @@ struct StridedRectangle { bool operator<=(StridedRectangle const &) const; bool operator>=(StridedRectangle const &) const; - std::vector get_sides() const; + StridedRectangleSide const &at(int idx) const; + std::vector const &get_sides() const; }; std::string format_as(StridedRectangle const &); std::ostream &operator<<(std::ostream &, StridedRectangle const &); @@ -34,8 +37,6 @@ size_t get_num_dims(StridedRectangle const &rect); num_points_t get_num_points(StridedRectangle const &rect); -size_t get_size(StridedRectangle const &rect); - } // namespace FlexFlow namespace std { diff --git a/lib/pcg/src/pcg/device_id.cc b/lib/pcg/src/pcg/device_id.cc index 590d5545ed..f027b73a5d 100644 --- a/lib/pcg/src/pcg/device_id.cc +++ b/lib/pcg/src/pcg/device_id.cc @@ -33,14 +33,20 @@ int get_raw_id(device_id_t device_id) { return unwrap_cpu(device_id).cpu_index; default: throw mk_runtime_error( - fmt::format("Unsupported DeviceType {} for device_id_t {}", - get_device_type(device_id), - device_id)); + fmt::format("Unsupported DeviceType {}", get_device_type(device_id))); } } -device_id_t device_id_from_index(int, DeviceType) { - NOT_IMPLEMENTED(); +device_id_t device_id_from_index(int idx, DeviceType device_type) { + switch (device_type) { + case DeviceType::GPU: + return device_id_t(gpu_id_t(idx)); + case DeviceType::CPU: + return device_id_t(cpu_id_t(idx)); + default: + throw mk_runtime_error( + fmt::format("Unsupported DeviceType {}", device_type)); + } } } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc new file mode 100644 index 0000000000..aa4a047d8b --- /dev/null +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -0,0 +1,24 @@ +#include "pcg/machine_specification.h" +#include "pcg/device_id.h" +#include "utils/exception.h" + +namespace FlexFlow { + +int get_num_gpus(MachineSpecification const &ms) { + return ms.num_nodes * ms.num_gpus_per_node; +} +int get_num_cpus(MachineSpecification const &ms) { + return ms.num_nodes * ms.num_cpus_per_node; +} +int get_num_devices(MachineSpecification const &ms, + DeviceType const &device_type) { + switch (device_type) { + case DeviceType::GPU: + return get_num_gpus(ms); + case DeviceType::CPU: + return get_num_cpus(ms); + default: + throw mk_runtime_error("Unknown DeviceType {}", device_type); + } +} +} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index ee0f71fed1..905a71d0ab 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,6 +1,7 @@ #include "pcg/machine_view.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" +#include "pcg/machine_view_dim_idx.dtg.h" #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers.h" @@ -77,14 +78,15 @@ size_t num_devices(MachineView const &mv) { return get_num_points(mv.rect).unwrapped; } -size_t get_size(MachineView const &mv) { - return get_size(mv.rect); -} - DeviceType get_device_type(MachineView const &mv) { return get_device_type(mv.start); } +StridedRectangleSide get_side_at_idx(MachineView const &mv, + machine_view_dim_idx const &idx) { + return mv.rect.at(idx.unwrapped); +} + static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { assert(stop > start); assert(stride > stride_t(0)); diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index d0fdd84810..c481279042 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -3,12 +3,13 @@ namespace FlexFlow { -MachineView - to_start_dependent(StartInvariantMachineView const &start_invariant_mv, - device_id_t const &start_id) { +MachineView machine_view_from_start_invariant( + StartInvariantMachineView const &start_invariant_mv, + device_id_t const &start_id) { return MachineView{start_id, start_invariant_mv.rect}; } -StartInvariantMachineView to_start_invariant(MachineView const &mv) { +StartInvariantMachineView + start_invariant_from_machine_view(MachineView const &mv) { return StartInvariantMachineView{mv.rect}; } diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index bebfd05c44..277175a93d 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -9,39 +9,51 @@ #include "utils/containers/sorted.h" #include "utils/containers/transform.h" #include "utils/fmt/vector.h" +#include "utils/hash-utils.h" +#include "utils/hash/tuple.h" #include "utils/hash/vector.h" namespace FlexFlow { StridedRectangle::StridedRectangle( std::vector<::FlexFlow::StridedRectangleSide> const &sides) - : _sides(sorted(sides)) {} + : sides(sorted(sides)) {} + +std::tuple const &> + StridedRectangle::tie() const { + return std::tie(sides); +} bool StridedRectangle::operator==(StridedRectangle const &other) const { - return std::tie(this->_sides) == std::tie(other._sides); + return this->tie() == other.tie(); } + bool StridedRectangle::operator!=(StridedRectangle const &other) const { - return std::tie(this->_sides) != std::tie(other._sides); + return this->tie() != other.tie(); } bool StridedRectangle::operator<(StridedRectangle const &other) const { - return std::tie(this->_sides) < std::tie(other._sides); + return this->tie() < other.tie(); } bool StridedRectangle::operator>(StridedRectangle const &other) const { - return std::tie(this->_sides) > std::tie(other._sides); + return this->tie() > other.tie(); } bool StridedRectangle::operator<=(StridedRectangle const &other) const { - return std::tie(this->_sides) <= std::tie(other._sides); + return this->tie() <= other.tie(); } bool StridedRectangle::operator>=(StridedRectangle const &other) const { - return std::tie(this->_sides) >= std::tie(other._sides); + return this->tie() >= other.tie(); +} + +std::vector const &StridedRectangle::get_sides() const { + return sides; } -std::vector StridedRectangle::get_sides() const { - return _sides; +StridedRectangleSide const &StridedRectangle::at(int idx) const { + return this->sides.at(idx); } std::string format_as(StridedRectangle const &x) { @@ -79,11 +91,7 @@ size_t get_size(StridedRectangle const &rect) { namespace std { size_t hash::operator()( ::FlexFlow::StridedRectangle const &x) const { - size_t result = 0; - result ^= std::hash>{}( - x.get_sides()) + - 0x9e3779b9 + (result << 6) + (result >> 2); - return result; + return get_std_hash(x.tie()); } } // namespace std diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_specification.cc new file mode 100644 index 0000000000..8f6e1dc24b --- /dev/null +++ b/lib/pcg/test/src/pcg/machine_specification.cc @@ -0,0 +1,25 @@ +#include "pcg/machine_specification.h" +#include "test/utils/doctest.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("MachineSpecification") { + + MachineSpecification ms = MachineSpecification{4, 16, 8, 100.0f, 200.0f}; + + SUBCASE("get_num_gpus") { + CHECK(get_num_gpus(ms) == 4 * 8); + } + + SUBCASE("get_num_cpus") { + CHECK(get_num_cpus(ms) == 4 * 16); + } + + SUBCASE("get_num_devices") { + CHECK(get_num_devices(ms, DeviceType::GPU) == 4 * 8); + CHECK(get_num_devices(ms, DeviceType::CPU) == 16 * 4); + } + } +} diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 44f02e7a40..d759f302cd 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -26,9 +26,6 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("num_devices") { CHECK(num_devices(mv) == 7 * 10 * 1); } - SUBCASE("get_size") { - CHECK(get_size(mv) == (7 * 5) * (10 * 2) * (1 * 4)); - } SUBCASE("get_side_size_per_dim") { std::vector expected = { diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index be637b1e0e..57cb419669 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -13,7 +13,8 @@ TEST_SUITE(FF_TEST_SUITE) { device_id_t start = device_id_t(gpu_id_t(5)); MachineView input = MachineView{start, rect}; - MachineView result = to_start_dependent(to_start_invariant(input), start); + MachineView result = machine_view_from_start_invariant( + start_invariant_from_machine_view(input), start); MachineView correct = input; CHECK(correct == input); } diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc index c5e3e70ce7..b1b8e13757 100644 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -30,10 +30,6 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_num_points") { CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); } - - SUBCASE("get_size") { - CHECK(get_size(rect) == size_t{(7 * 5) * (10 * 2) * (8 * 1)}); - } } } } From 6e6adaad14ba841f13afe8ea39cf1799cd2f6f77 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 13 Aug 2024 12:33:13 -0700 Subject: [PATCH 16/34] small refactor --- .../include/compiler/allowed_machine_views.h | 29 +++ .../include/compiler/machine_mapping.h | 19 -- .../src/compiler/allowed_machine_views.cc | 191 ++++++++++++++++++ lib/compiler/src/machine_mapping.cc | 181 ----------------- .../test/src/allowed_machine_ views.cc | 180 +++++++++++++++++ lib/compiler/test/src/machine_mapping.cc | 171 ---------------- 6 files changed, 400 insertions(+), 371 deletions(-) create mode 100644 lib/compiler/include/compiler/allowed_machine_views.h create mode 100644 lib/compiler/src/compiler/allowed_machine_views.cc create mode 100644 lib/compiler/test/src/allowed_machine_ views.cc diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h new file mode 100644 index 0000000000..bf401b70f7 --- /dev/null +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H +#define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H + +#include "compiler/tensor_to_machine_view_injection.dtg.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" + +namespace FlexFlow { + +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type = DeviceType::GPU); + +std::unordered_set + get_allowed_start_invariant_machine_views( + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type = DeviceType::GPU); + +std::unordered_set + get_all_tensor_to_machine_view_injections(MachineView const &mv, + ParallelTensorShape const &shape); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_mapping.h b/lib/compiler/include/compiler/machine_mapping.h index 2f07e48100..9ee57f9895 100644 --- a/lib/compiler/include/compiler/machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping.h @@ -3,13 +3,9 @@ #include "compiler/machine_mapping.dtg.h" #include "compiler/optimal_cost_state.dtg.h" -#include "compiler/tensor_to_machine_view_injection.dtg.h" #include "cost_estimate.h" -#include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" -#include "pcg/start_invariant_machine_view.h" #include "substitutions/sub_parallel_computation_graph.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" @@ -55,21 +51,6 @@ OptimalCostResult optimal_cost( MachineSpecification const &resources, OptimalCostCache &cached_subgraph_costs); -std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type = DeviceType::GPU); - -std::unordered_set - get_allowed_start_invariant_machine_views( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type = DeviceType::GPU); - -std::unordered_set - get_all_tensor_to_machine_view_injections(MachineView const &mv, - ParallelTensorShape const &shape); - } // namespace FlexFlow #endif diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc new file mode 100644 index 0000000000..ace2852071 --- /dev/null +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -0,0 +1,191 @@ +#include "compiler/allowed_machine_views.h" +#include "op-attrs/parallel_tensor_dims.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "pcg/machine_specification.h" +#include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.h" +#include "utils/containers/all_of.h" +#include "utils/containers/cartesian_product.h" +#include "utils/containers/extend.h" +#include "utils/containers/filter.h" +#include "utils/containers/permutations.h" +#include "utils/containers/product.h" +#include "utils/containers/range.h" +#include "utils/containers/replicate.h" +#include "utils/containers/sorted.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/containers/without_order.h" +#include "utils/containers/zip.h" +#include "utils/graph/serial_parallel/serial_parallel_decomposition.h" +#include "utils/overload.h" + +namespace FlexFlow { + +static std::unordered_multiset + get_unordered_tensor_parallel_degrees(ParallelTensorShape const &shape) { + std::unordered_multiset degrees = + without_order(ff_ordered_shard_degrees(shape)); + degrees.insert(get_sum_degree(shape)); + degrees.insert(get_discard_copy_degree(shape)); + // filtering non-parallel dims + degrees = filter(degrees, [](int degree) { return degree != 1; }); + return degrees; +} + +bool is_valid_machine_view(MachineView const &mv, + MachineSpecification const &machine_spec) { + + int num_devices = get_num_devices(machine_spec, get_device_type(mv)); + return (num_devices > get_raw_id(get_last_device_id(mv))); +} + +bool is_valid_machine_view(MachineView const &mv, + ParallelTensorShape const &shape) { + + std::vector mv_degrees = + transform(get_num_devices_per_dim(mv), + [](num_points_t degree) { return degree.unwrapped; }); + std::unordered_multiset unordered_tensor_degrees = + get_unordered_tensor_parallel_degrees(shape); + + return without_order(mv_degrees) == unordered_tensor_degrees; +} + +static std::unordered_set + get_candidate_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType const &device_type) { + + auto candidate_strides = + [](std::vector const &tensor_dims, + int total_devices) -> std::unordered_multiset> { + int min_num_devices_with_full_stride_volume = + product(transform(tensor_dims, [](int degree) { return degree - 1; })); + int max_stride_upper_bound = + std::ceil(total_devices / min_num_devices_with_full_stride_volume); + std::vector single_stride_range = + transform(range(1, max_stride_upper_bound + 1), + [](int stride) { return stride_t(stride); }); + std::unordered_multiset> strides = + cartesian_product(replicate(tensor_dims.size(), single_stride_range)); + return strides; + }; + + auto get_strided_rectangle = [](std::vector const &strides, + std::vector const &num_points_per_dim) { + std::vector sides = + transform(zip(num_points_per_dim, strides), [&](auto const &p) { + return StridedRectangleSide(num_points_t(p.first), + stride_t(p.second)); + }); + return StridedRectangle{sides}; + }; + + std::unordered_multiset tensor_dims = + get_unordered_tensor_parallel_degrees(shape); + int total_devices = get_num_devices(machine_spec, device_type); + + std::unordered_set machine_views; + for (std::vector const &strides : + candidate_strides(sorted(tensor_dims), total_devices)) { + StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); + auto start_inv_mv = StartInvariantMachineView{rect}; + for (int start_id : range(total_devices)) { + device_id_t start_device = device_id_from_index(start_id, device_type); + machine_views.insert( + machine_view_from_start_invariant(start_inv_mv, start_device)); + } + } + return machine_views; +} + +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type) { + + std::unordered_set views = + get_candidate_machine_views(machine_spec, shape, device_type); + return filter(views, [&](MachineView const &view) { + return is_valid_machine_view(view, shape) && + is_valid_machine_view(view, machine_spec); + }); +} + +std::unordered_set + get_allowed_start_invariant_machine_views( + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type) { + return transform(get_allowed_machine_views(machine_spec, shape, device_type), + start_invariant_from_machine_view); +} + +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx idx) { + return idx.visit( + overload{[&](ff_dim_t shard_dim) { + return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; + }, + [&](ReplicaType replica_type) { + ReplicaParallelDimSet replicas = shape.dims.replica_dims; + int degree = (ReplicaType::SUM == replica_type + ? replicas.sum_degree.value + : replicas.discard_copy_degree.value); + return ParallelDim{ReplicaParallelDim{degree, replica_type}}; + }}); +} + +std::unordered_set + get_parallel_tensor_indices(ParallelTensorShape const &shape) { + std::unordered_set indices; + extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { + return parallel_tensor_dim_idx(ff_dim_t(idx)); + })); + indices.insert(parallel_tensor_dim_idx(ReplicaType::SUM)); + indices.insert(parallel_tensor_dim_idx(ReplicaType::DISCARD_COPY)); + return indices; +} + +std::unordered_set + get_machine_view_indices(MachineView const &mv) { + return transform(unordered_set_of(range(num_dims(mv))), + [](int idx) { return machine_view_dim_idx(idx); }); +} + +bool is_valid_injection(TensorToMachineViewInjection const &injection, + MachineView const &mv, + ParallelTensorShape const &shape) { + return all_of(injection.raw_bidict, [&](auto const pair) { + int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; + int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); + return (tensor_degree == mv_degree); + }); +} + +std::unordered_set + get_all_tensor_to_machine_view_injections( + MachineView const &mv, ParallelTensorShape const &shape) { + assert(is_valid_machine_view(mv, shape)); + std::unordered_set mv_indices = + get_machine_view_indices(mv); + std::unordered_set shape_indices = + get_parallel_tensor_indices(shape); + shape_indices = filter(shape_indices, [&](auto const idx) { + return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; + }); + + std::unordered_set injections; + for (std::vector const &p : + permutations(shape_indices)) { + TensorToMachineViewInjection injection = + TensorToMachineViewInjection(bidict(zip(sorted(mv_indices), p))); + if (is_valid_injection(injection, mv, shape)) { + injections.insert(injection); + } + } + return injections; +} + +} // namespace FlexFlow diff --git a/lib/compiler/src/machine_mapping.cc b/lib/compiler/src/machine_mapping.cc index 76e6ff1882..12eacb2a30 100644 --- a/lib/compiler/src/machine_mapping.cc +++ b/lib/compiler/src/machine_mapping.cc @@ -1,31 +1,17 @@ #include "compiler/machine_mapping.h" #include "compiler/cost_estimate.h" #include "compiler/graph_utils.h" -#include "compiler/tensor_to_machine_view_injection.dtg.h" -#include "op-attrs/parallel_tensor_dim_idx.dtg.h" #include "pcg/machine_specification.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" -#include "pcg/machine_view_dim_idx.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" -#include "pcg/start_invariant_machine_view.h" #include "utils/containers.h" -#include "utils/containers/all_of.h" #include "utils/containers/are_disjoint.h" #include "utils/containers/as_vector.h" -#include "utils/containers/cartesian_product.h" #include "utils/containers/contains_key.h" -#include "utils/containers/filter.h" #include "utils/containers/get_only.h" #include "utils/containers/keys.h" -#include "utils/containers/permutations.h" -#include "utils/containers/product.h" -#include "utils/containers/range.h" -#include "utils/containers/replicate.h" -#include "utils/containers/transform.h" -#include "utils/containers/without_order.h" -#include "utils/containers/zip.h" #include "utils/exception.h" #include "utils/graph/graph_split.dtg.h" #include "utils/graph/node/algorithms.h" @@ -34,7 +20,6 @@ #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/graph/serial_parallel/serial_parallel_splits.h" -#include "utils/overload.h" namespace FlexFlow { @@ -373,170 +358,4 @@ OptimalCostResult optimal_cost( return searcher.optimal_cost(subpcg, resources, sp_decomposition); } -static std::unordered_multiset - get_unordered_tensor_parallel_degrees(ParallelTensorShape const &shape) { - std::unordered_multiset degrees = - without_order(ff_ordered_shard_degrees(shape)); - degrees.insert(get_sum_degree(shape)); - degrees.insert(get_discard_copy_degree(shape)); - // filtering non-parallel dims - degrees = filter(degrees, [](int degree) { return degree != 1; }); - return degrees; -} - -bool is_valid_machine_view(MachineView const &mv, - MachineSpecification const &machine_spec) { - - int num_devices = get_num_devices(machine_spec, get_device_type(mv)); - return (num_devices > get_raw_id(get_last_device_id(mv))); -} - -bool is_valid_machine_view(MachineView const &mv, - ParallelTensorShape const &shape) { - - std::vector mv_degrees = - transform(get_num_devices_per_dim(mv), - [](num_points_t degree) { return degree.unwrapped; }); - std::unordered_multiset unordered_tensor_degrees = - get_unordered_tensor_parallel_degrees(shape); - - return without_order(mv_degrees) == unordered_tensor_degrees; -} - -static std::unordered_set - get_candidate_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType const &device_type) { - - auto candidate_strides = - [](std::vector const &tensor_dims, - int total_devices) -> std::unordered_multiset> { - int min_num_devices_with_full_stride_volume = - product(transform(tensor_dims, [](int degree) { return degree - 1; })); - int max_stride_upper_bound = - std::ceil(total_devices / min_num_devices_with_full_stride_volume); - std::vector single_stride_range = - transform(range(1, max_stride_upper_bound + 1), - [](int stride) { return stride_t(stride); }); - std::unordered_multiset> strides = - cartesian_product(replicate(tensor_dims.size(), single_stride_range)); - return strides; - }; - - auto get_strided_rectangle = [](std::vector const &strides, - std::vector const &num_points_per_dim) { - std::vector sides = - transform(zip(num_points_per_dim, strides), [&](auto const &p) { - return StridedRectangleSide(num_points_t(p.first), - stride_t(p.second)); - }); - return StridedRectangle{sides}; - }; - - std::unordered_multiset tensor_dims = - get_unordered_tensor_parallel_degrees(shape); - int total_devices = get_num_devices(machine_spec, device_type); - - std::unordered_set machine_views; - for (std::vector const &strides : - candidate_strides(sorted(tensor_dims), total_devices)) { - StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); - auto start_inv_mv = StartInvariantMachineView{rect}; - for (int start_id : range(total_devices)) { - device_id_t start_device = device_id_from_index(start_id, device_type); - machine_views.insert( - machine_view_from_start_invariant(start_inv_mv, start_device)); - } - } - return machine_views; -} - -std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type) { - - std::unordered_set views = - get_candidate_machine_views(machine_spec, shape, device_type); - return filter(views, [&](MachineView const &view) { - return is_valid_machine_view(view, shape) && - is_valid_machine_view(view, machine_spec); - }); -} - -std::unordered_set - get_allowed_start_invariant_machine_views( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type) { - return transform(get_allowed_machine_views(machine_spec, shape, device_type), - start_invariant_from_machine_view); -} - -ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, - parallel_tensor_dim_idx idx) { - return idx.visit( - overload{[&](ff_dim_t shard_dim) { - return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; - }, - [&](ReplicaType replica_type) { - ReplicaParallelDimSet replicas = shape.dims.replica_dims; - int degree = (ReplicaType::SUM == replica_type - ? replicas.sum_degree.value - : replicas.discard_copy_degree.value); - return ParallelDim{ReplicaParallelDim{degree, replica_type}}; - }}); -} - -std::unordered_set - get_parallel_tensor_indices(ParallelTensorShape const &shape) { - std::unordered_set indices; - extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { - return parallel_tensor_dim_idx(ff_dim_t(idx)); - })); - indices.insert(parallel_tensor_dim_idx(ReplicaType::SUM)); - indices.insert(parallel_tensor_dim_idx(ReplicaType::DISCARD_COPY)); - return indices; -} - -std::unordered_set - get_machine_view_indices(MachineView const &mv) { - return transform(unordered_set_of(range(num_dims(mv))), - [](int idx) { return machine_view_dim_idx(idx); }); -} - -bool is_valid_injection(TensorToMachineViewInjection const &injection, - MachineView const &mv, - ParallelTensorShape const &shape) { - return all_of(injection.raw_bidict, [&](auto const pair) { - int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; - int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); - return (tensor_degree == mv_degree); - }); -} - -std::unordered_set - get_all_tensor_to_machine_view_injections( - MachineView const &mv, ParallelTensorShape const &shape) { - assert(is_valid_machine_view(mv, shape)); - std::unordered_set mv_indices = - get_machine_view_indices(mv); - std::unordered_set shape_indices = - get_parallel_tensor_indices(shape); - shape_indices = filter(shape_indices, [&](auto const idx) { - return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; - }); - - std::unordered_set injections; - for (std::vector const &p : - permutations(shape_indices)) { - TensorToMachineViewInjection injection = - TensorToMachineViewInjection(bidict(zip(sorted(mv_indices), p))); - if (is_valid_injection(injection, mv, shape)) { - injections.insert(injection); - } - } - return injections; -} - } // namespace FlexFlow diff --git a/lib/compiler/test/src/allowed_machine_ views.cc b/lib/compiler/test/src/allowed_machine_ views.cc new file mode 100644 index 0000000000..9187c25dd8 --- /dev/null +++ b/lib/compiler/test/src/allowed_machine_ views.cc @@ -0,0 +1,180 @@ +#include "compiler/allowed_machine_views.h" +#include "doctest/doctest.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.h" +#include "utils/containers/extend.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_allowed_machine_views") { + + SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::unordered_set correct = { + make_1d_machine_view(gpu_id_t(0), gpu_id_t(3), stride_t(1)), + make_1d_machine_view(gpu_id_t(1), gpu_id_t(4), stride_t(1)), + make_1d_machine_view(gpu_id_t(2), gpu_id_t(5), stride_t(1)), + make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))}; + std::unordered_set result = + get_allowed_machine_views(ms, shape); + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + MachineSpecification ms = MachineSpecification{18, 1, 1, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_views = [&](int num_starts, int stride1, int stride2) { + std::unordered_set views; + for (int i = 0; i < num_starts; i++) { + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + MachineView mv = MachineView{device_id_t(gpu_id_t(i)), rect}; + views.insert(mv); + } + return views; + }; + std::unordered_set correct; + extend(correct, + make_2d_views(/*num_starts*/ 13, /*stride1*/ 1, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 8, /*stride1*/ 2, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 9, /*stride1*/ 1, /*stride2*/ 2)); + extend(correct, + make_2d_views(/*num_starts*/ 3, /*stride1*/ 3, /*stride2*/ 1)); + extend(correct, + make_2d_views(/*num_starts*/ 5, /*stride1*/ 1, /*stride2*/ 3)); + extend(correct, + make_2d_views(/*num_starts*/ 1, /*stride1*/ 1, /*stride2*/ 4)); + + std::unordered_set result = + get_allowed_machine_views(ms, shape); + CHECK(result == correct); + } + } + + TEST_CASE("get_allowed_start_invariant_machine_views") { + + SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::unordered_set correct = { + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), + make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))}; + std::unordered_set result = + get_allowed_start_invariant_machine_views(ms, shape); + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_view = [&](int stride1, int stride2) { + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + return StartInvariantMachineView{rect}; + }; + std::unordered_set correct = { + make_2d_view(/*stride1*/ 1, /*stride2*/ 1), + make_2d_view(/*stride1*/ 2, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 2), + make_2d_view(/*stride1*/ 3, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 3), + make_2d_view(/*stride1*/ 1, /*stride2*/ 4)}; + + std::unordered_set result = + get_allowed_start_invariant_machine_views(ms, shape); + CHECK(result == correct); + } + } + + TEST_CASE("get_all_tensor_to_machine_view_injections") { + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{2}, + }, + }, + DataType::FLOAT, + }; + MachineView view = + MachineView{device_id_from_index(0, DeviceType::GPU), + StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t(3), stride_t(1)}}, + }}; + bidict b1 = { + {machine_view_dim_idx(2), ff_dim_t(0)}, + {machine_view_dim_idx(1), ReplicaType::SUM}, + {machine_view_dim_idx(0), ReplicaType::DISCARD_COPY}}; + + bidict b2 = { + {machine_view_dim_idx(2), ff_dim_t(0)}, + {machine_view_dim_idx(0), ReplicaType::SUM}, + {machine_view_dim_idx(1), ReplicaType::DISCARD_COPY}}; + + std::unordered_set correct = { + TensorToMachineViewInjection{b1}, TensorToMachineViewInjection{b2}}; + std::unordered_set result = + get_all_tensor_to_machine_view_injections(view, shape); + CHECK(correct == result); + } +} diff --git a/lib/compiler/test/src/machine_mapping.cc b/lib/compiler/test/src/machine_mapping.cc index 729ea972c6..201704f66c 100644 --- a/lib/compiler/test/src/machine_mapping.cc +++ b/lib/compiler/test/src/machine_mapping.cc @@ -1,180 +1,9 @@ #include "compiler/machine_mapping.h" #include "doctest/doctest.h" -#include "pcg/machine_specification.dtg.h" #include "test_generator.h" -#include "utils/containers/extend.h" TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_allowed_machine_views") { - - SUBCASE("1 degree of parallelism") { - MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - std::unordered_set correct = { - make_1d_machine_view(gpu_id_t(0), gpu_id_t(3), stride_t(1)), - make_1d_machine_view(gpu_id_t(1), gpu_id_t(4), stride_t(1)), - make_1d_machine_view(gpu_id_t(2), gpu_id_t(5), stride_t(1)), - make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))}; - std::unordered_set result = - get_allowed_machine_views(ms, shape); - CHECK(correct == result); - } - - SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification{18, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_views = [&](int num_starts, int stride1, int stride2) { - std::unordered_set views; - for (int i = 0; i < num_starts; i++) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - MachineView mv = MachineView{device_id_t(gpu_id_t(i)), rect}; - views.insert(mv); - } - return views; - }; - std::unordered_set correct; - extend(correct, - make_2d_views(/*num_starts*/ 13, /*stride1*/ 1, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 8, /*stride1*/ 2, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 9, /*stride1*/ 1, /*stride2*/ 2)); - extend(correct, - make_2d_views(/*num_starts*/ 3, /*stride1*/ 3, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 5, /*stride1*/ 1, /*stride2*/ 3)); - extend(correct, - make_2d_views(/*num_starts*/ 1, /*stride1*/ 1, /*stride2*/ 4)); - - std::unordered_set result = - get_allowed_machine_views(ms, shape); - CHECK(result == correct); - } - } - - TEST_CASE("get_allowed_start_invariant_machine_views") { - - SUBCASE("1 degree of parallelism") { - MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - std::unordered_set correct = { - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))}; - std::unordered_set result = - get_allowed_start_invariant_machine_views(ms, shape); - CHECK(correct == result); - } - - SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_view = [&](int stride1, int stride2) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - return StartInvariantMachineView{rect}; - }; - std::unordered_set correct = { - make_2d_view(/*stride1*/ 1, /*stride2*/ 1), - make_2d_view(/*stride1*/ 2, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 2), - make_2d_view(/*stride1*/ 3, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 3), - make_2d_view(/*stride1*/ 1, /*stride2*/ 4)}; - - std::unordered_set result = - get_allowed_start_invariant_machine_views(ms, shape); - CHECK(result == correct); - } - } - - TEST_CASE("get_all_tensor_to_machine_view_injections") { - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{2}, - }, - }, - DataType::FLOAT, - }; - MachineView view = - MachineView{device_id_from_index(0, DeviceType::GPU), - StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, - StridedRectangleSide{num_points_t(3), stride_t(1)}}, - }}; - bidict b1 = { - {machine_view_dim_idx(2), ff_dim_t(0)}, - {machine_view_dim_idx(1), ReplicaType::SUM}, - {machine_view_dim_idx(0), ReplicaType::DISCARD_COPY}}; - - bidict b2 = { - {machine_view_dim_idx(2), ff_dim_t(0)}, - {machine_view_dim_idx(0), ReplicaType::SUM}, - {machine_view_dim_idx(1), ReplicaType::DISCARD_COPY}}; - std::unordered_set correct = { - TensorToMachineViewInjection{b1}, TensorToMachineViewInjection{b2}}; - std::unordered_set result = - get_all_tensor_to_machine_view_injections(view, shape); - CHECK(correct == result); - } - // TEST_CASE("MachineMapping::combine") { // RC_SUBCASE([](MachineMapping const &m0, MachineMapping const &m1) { // RC_PRE(MachineMapping::nodes_are_disjoint(m0, m1)); From 4b8600da25d64ae2c0bee61e5ab682f6b4c32fc4 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 13 Aug 2024 12:47:19 -0700 Subject: [PATCH 17/34] formatting --- lib/compiler/include/compiler/allowed_machine_views.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index bf401b70f7..f57323c8a1 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -2,10 +2,10 @@ #define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H #include "compiler/tensor_to_machine_view_injection.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" #include "pcg/start_invariant_machine_view.dtg.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" namespace FlexFlow { From c9532d169ae6bfe0aba773a23dd97cf98e2caf5b Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 27 Aug 2024 17:56:59 -0700 Subject: [PATCH 18/34] Cleaning Up --- .../include/compiler/allowed_machine_views.h | 12 +- .../compiler/machine_view_to_tensor_mapping.h | 22 +++ ...machine_view_to_tensor_mapping.struct.toml | 17 ++ ...nsor_to_machine_view_injection.struct.toml | 18 -- .../src/compiler/allowed_machine_views.cc | 170 +++++++----------- .../machine_view_to_tensor_mapping.cc | 47 +++++ lib/compiler/src/graph_utils.cc | 1 - .../test/src/allowed_machine_ views.cc | 87 +++------ .../src/machine_view_to_tensor_mapping.cc | 53 ++++++ .../op-attrs/parallel_tensor_dim_idx_t.h | 17 ++ ...=> parallel_tensor_dim_idx_t.variant.toml} | 3 +- .../src/op-attrs/parallel_tensor_dim_idx_t.cc | 36 ++++ .../pcg/device_coordinates.struct.toml | 6 +- lib/pcg/include/pcg/machine_view.h | 6 +- lib/pcg/include/pcg/machine_view_dim_idx_t.h | 14 ++ ...oml => machine_view_dim_idx_t.struct.toml} | 2 +- .../pcg/multi_dimensional_stride.struct.toml | 25 +++ lib/pcg/include/pcg/strided_rectangle.h | 9 +- lib/pcg/include/pcg/strided_rectangle_side.h | 2 + lib/pcg/src/pcg/device_id.cc | 7 +- lib/pcg/src/pcg/machine_view.cc | 47 +++-- lib/pcg/src/pcg/machine_view_dim_idx_t.cc | 14 ++ lib/pcg/src/pcg/strided_rectangle.cc | 15 +- lib/pcg/src/pcg/strided_rectangle_side.cc | 5 + lib/pcg/test/src/pcg/machine_view.cc | 107 +++++++---- .../src/pcg/start_invariant_machine_view.cc | 44 ++++- lib/utils/include/utils/bidict/bidict.h | 3 +- .../utils/containers/cartesian_product.h | 38 ++-- lib/utils/include/utils/containers/filter.h | 8 +- .../utils/containers/get_all_permutations.h | 96 ++++++++++ .../include/utils/containers/permutations.h | 29 --- lib/utils/include/utils/containers/range.h | 23 +-- .../include/utils/containers/replicate.h | 4 +- .../utils/containers/unordered_multiset_of.h | 15 ++ .../include/utils/containers/without_order.h | 15 -- .../utils/containers/get_all_permutations.cc | 1 + .../src/utils/containers/permutations.cc | 1 - lib/utils/src/utils/containers/range.cc | 25 +++ .../utils/containers/unordered_multiset_of.cc | 1 + .../src/utils/containers/without_order.cc | 1 - lib/utils/test/src/test_containers.cc | 2 +- .../test/src/utils/containers/permutations.cc | 34 ---- ...hout_order.cc => unordered_multiset_of.cc} | 6 +- 43 files changed, 686 insertions(+), 402 deletions(-) create mode 100644 lib/compiler/include/compiler/machine_view_to_tensor_mapping.h create mode 100644 lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml delete mode 100644 lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml create mode 100644 lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc create mode 100644 lib/compiler/test/src/machine_view_to_tensor_mapping.cc create mode 100644 lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h rename lib/op-attrs/include/op-attrs/{parallel_tensor_dim_idx.variant.toml => parallel_tensor_dim_idx_t.variant.toml} (79%) create mode 100644 lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc create mode 100644 lib/pcg/include/pcg/machine_view_dim_idx_t.h rename lib/pcg/include/pcg/{machine_view_dim_idx.struct.toml => machine_view_dim_idx_t.struct.toml} (81%) create mode 100644 lib/pcg/include/pcg/multi_dimensional_stride.struct.toml create mode 100644 lib/pcg/src/pcg/machine_view_dim_idx_t.cc create mode 100644 lib/utils/include/utils/containers/get_all_permutations.h delete mode 100644 lib/utils/include/utils/containers/permutations.h create mode 100644 lib/utils/include/utils/containers/unordered_multiset_of.h delete mode 100644 lib/utils/include/utils/containers/without_order.h create mode 100644 lib/utils/src/utils/containers/get_all_permutations.cc delete mode 100644 lib/utils/src/utils/containers/permutations.cc create mode 100644 lib/utils/src/utils/containers/unordered_multiset_of.cc delete mode 100644 lib/utils/src/utils/containers/without_order.cc delete mode 100644 lib/utils/test/src/utils/containers/permutations.cc rename lib/utils/test/src/utils/containers/{without_order.cc => unordered_multiset_of.cc} (60%) diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index f57323c8a1..1029ed5fd8 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -1,7 +1,7 @@ #ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H #define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H -#include "compiler/tensor_to_machine_view_injection.dtg.h" +#include "compiler/machine_view_to_tensor_mapping.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" @@ -9,6 +9,12 @@ namespace FlexFlow { +bool is_valid_machine_view(MachineView const &mv, + MachineSpecification const &machine_spec); + +bool is_valid_machine_view(MachineView const &mv, + ParallelTensorShape const &shape); + std::unordered_set get_allowed_machine_views(MachineSpecification const &machine_spec, ParallelTensorShape const &shape, @@ -20,10 +26,6 @@ std::unordered_set ParallelTensorShape const &shape, DeviceType device_type = DeviceType::GPU); -std::unordered_set - get_all_tensor_to_machine_view_injections(MachineView const &mv, - ParallelTensorShape const &shape); - } // namespace FlexFlow #endif diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h new file mode 100644 index 0000000000..2244620304 --- /dev/null +++ b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H +#define _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H + +#include "compiler/machine_view_to_tensor_mapping.dtg.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" +#include "pcg/machine_view.h" + +#include + +namespace FlexFlow { + +bool is_valid_mapping(MachineViewToTensorMapping const &mapping, + MachineView const &mv, + ParallelTensorShape const &shape); + +std::unordered_set + get_all_machine_view_to_tensor_mappings(MachineView const &mv, + ParallelTensorShape const &shape); + +} // namespace FlexFlow + +#endif diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml new file mode 100644 index 0000000000..b3f154188b --- /dev/null +++ b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "MachineViewToTensorMapping" +features = [ + "eq", + "hash", + "fmt", +] + +includes = [ + "pcg/machine_view_dim_idx_t.dtg.h", + "op-attrs/parallel_tensor_dim_idx_t.dtg.h", + "utils/bidict/bidict.h", +] + +[[fields]] +name = "raw_bidict" +type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>" diff --git a/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml b/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml deleted file mode 100644 index deb65defcb..0000000000 --- a/lib/compiler/include/compiler/tensor_to_machine_view_injection.struct.toml +++ /dev/null @@ -1,18 +0,0 @@ -namespace = "FlexFlow" -name = "TensorToMachineViewInjection" -features = [ - "eq", - "hash", - "fmt", -] - -includes = [ - "pcg/machine_view_dim_idx.dtg.h", - "op-attrs/parallel_tensor_dim_idx.dtg.h", - "utils/bidict/bidict.h", - "utils/hash/unordered_map.h" -] - -[[fields]] -name = "raw_bidict" -type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx, ::FlexFlow::parallel_tensor_dim_idx>" diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index ace2852071..bf762e3b65 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -1,102 +1,136 @@ #include "compiler/allowed_machine_views.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" #include "op-attrs/parallel_tensor_dims.h" #include "op-attrs/parallel_tensor_shape.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" +#include "pcg/machine_view_dim_idx_t.h" +#include "pcg/multi_dimensional_stride.dtg.h" #include "pcg/start_invariant_machine_view.h" #include "utils/containers/all_of.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/extend.h" #include "utils/containers/filter.h" -#include "utils/containers/permutations.h" +#include "utils/containers/get_all_permutations.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/replicate.h" #include "utils/containers/sorted.h" #include "utils/containers/transform.h" +#include "utils/containers/unordered_multiset_of.h" #include "utils/containers/unordered_set_of.h" -#include "utils/containers/without_order.h" #include "utils/containers/zip.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/overload.h" namespace FlexFlow { -static std::unordered_multiset - get_unordered_tensor_parallel_degrees(ParallelTensorShape const &shape) { - std::unordered_multiset degrees = - without_order(ff_ordered_shard_degrees(shape)); - degrees.insert(get_sum_degree(shape)); - degrees.insert(get_discard_copy_degree(shape)); +static std::unordered_multiset + get_num_devices_per_parallel_dim(ParallelTensorShape const &shape) { + std::unordered_multiset raw_device_nums = + unordered_multiset_of(ff_ordered_shard_degrees(shape)); + raw_device_nums.insert(get_sum_degree(shape)); + raw_device_nums.insert(get_discard_copy_degree(shape)); // filtering non-parallel dims - degrees = filter(degrees, [](int degree) { return degree != 1; }); - return degrees; + raw_device_nums = + filter(raw_device_nums, [](int num_devices) { return num_devices != 1; }); + + return transform(raw_device_nums, + [&](int num_devices) { return num_points_t{num_devices}; }); } bool is_valid_machine_view(MachineView const &mv, MachineSpecification const &machine_spec) { int num_devices = get_num_devices(machine_spec, get_device_type(mv)); - return (num_devices > get_raw_id(get_last_device_id(mv))); + return (num_devices > get_raw_id(get_maximum_device_id(mv))); } bool is_valid_machine_view(MachineView const &mv, ParallelTensorShape const &shape) { - std::vector mv_degrees = - transform(get_num_devices_per_dim(mv), - [](num_points_t degree) { return degree.unwrapped; }); - std::unordered_multiset unordered_tensor_degrees = - get_unordered_tensor_parallel_degrees(shape); + std::vector mv_num_devices = get_num_devices_per_dim(mv); + std::unordered_multiset tensor_num_devices = + get_num_devices_per_parallel_dim(shape); - return without_order(mv_degrees) == unordered_tensor_degrees; + return unordered_multiset_of(mv_num_devices) == tensor_num_devices; } +/* Generates a set of candidate `MachineView`s. + * The returned set includes all valid machine views, and might contain invalid + * ones. This function should never be used externally (see + * `get_allowed_machine_views` instead). There is no guarantee that a non-empty + * returned set contains a valid machine view (i.e. its possible for all + * `MachineView`s to be invalid) + */ static std::unordered_set get_candidate_machine_views(MachineSpecification const &machine_spec, ParallelTensorShape const &shape, DeviceType const &device_type) { + // Explanation for `candidate_strides`: + // + // Naively, we could think that, given, for example, a (2,3) stride, it would + // result in 3*2=6 tiles device-slots occupied for every actual device, and so + // we could say `max_stride_product = + // num_total_devicesnum_devices_used_by_tensor` (where + // num_devices_used_by_tensor is the product of the parallel dims) and thus + // that the max stride across any dimension is `max_stride_product`. + // + // This however, doesn't quite work: consider, for example, a 2D MachineView + // with 2x2 devices, and stride 2 across each dimension, and suppose there are + // 9 total device. While the "volume" of the MachineView is technically 4x4, + // it can really fit into a 3x3 (since part of the "external layer" of the 4x4 + // is not actually occupied by any of the 4 devices) and thus we could fit it + // with the existing devices. To address this, we thus compute not the number + // of total devices used by the tensor, but, the total number of "inner" + // devices, essentially the ones such that they have associated with them a + // full stride "volume". So we find the max stride for these using the + // previous naive procedure (which works since they all have full stride + // volume) and we know that if a given stride is too large for them then + // surely it'll be too large for the full set of devices, which essentially + // contains them. (Note that we are overestimating `max_stride_upper_bound` + // by a huge margin). auto candidate_strides = - [](std::vector const &tensor_dims, - int total_devices) -> std::unordered_multiset> { + [](std::vector const &tensor_dims, + int total_devices) -> std::unordered_multiset { int min_num_devices_with_full_stride_volume = - product(transform(tensor_dims, [](int degree) { return degree - 1; })); + product(transform(tensor_dims, [](num_points_t const &num_devices) { + return num_devices.unwrapped - 1; + })); int max_stride_upper_bound = std::ceil(total_devices / min_num_devices_with_full_stride_volume); + std::vector single_stride_range = transform(range(1, max_stride_upper_bound + 1), [](int stride) { return stride_t(stride); }); - std::unordered_multiset> strides = + std::unordered_multiset> raw_stride_vectors = cartesian_product(replicate(tensor_dims.size(), single_stride_range)); - return strides; - }; - - auto get_strided_rectangle = [](std::vector const &strides, - std::vector const &num_points_per_dim) { - std::vector sides = - transform(zip(num_points_per_dim, strides), [&](auto const &p) { - return StridedRectangleSide(num_points_t(p.first), - stride_t(p.second)); + std::unordered_multiset strides = + transform(raw_stride_vectors, [](auto const &stride_vec) { + return MultiDimensionalStride{stride_vec}; }); - return StridedRectangle{sides}; + return strides; }; - std::unordered_multiset tensor_dims = - get_unordered_tensor_parallel_degrees(shape); + std::unordered_multiset tensor_dims = + get_num_devices_per_parallel_dim(shape); int total_devices = get_num_devices(machine_spec, device_type); std::unordered_set machine_views; - for (std::vector const &strides : + + for (MultiDimensionalStride const &strides : candidate_strides(sorted(tensor_dims), total_devices)) { StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); - auto start_inv_mv = StartInvariantMachineView{rect}; + StartInvariantMachineView start_inv_mv = StartInvariantMachineView{rect}; + for (int start_id : range(total_devices)) { device_id_t start_device = device_id_from_index(start_id, device_type); machine_views.insert( machine_view_from_start_invariant(start_inv_mv, start_device)); } } + return machine_views; } @@ -122,70 +156,4 @@ std::unordered_set start_invariant_from_machine_view); } -ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, - parallel_tensor_dim_idx idx) { - return idx.visit( - overload{[&](ff_dim_t shard_dim) { - return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; - }, - [&](ReplicaType replica_type) { - ReplicaParallelDimSet replicas = shape.dims.replica_dims; - int degree = (ReplicaType::SUM == replica_type - ? replicas.sum_degree.value - : replicas.discard_copy_degree.value); - return ParallelDim{ReplicaParallelDim{degree, replica_type}}; - }}); -} - -std::unordered_set - get_parallel_tensor_indices(ParallelTensorShape const &shape) { - std::unordered_set indices; - extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { - return parallel_tensor_dim_idx(ff_dim_t(idx)); - })); - indices.insert(parallel_tensor_dim_idx(ReplicaType::SUM)); - indices.insert(parallel_tensor_dim_idx(ReplicaType::DISCARD_COPY)); - return indices; -} - -std::unordered_set - get_machine_view_indices(MachineView const &mv) { - return transform(unordered_set_of(range(num_dims(mv))), - [](int idx) { return machine_view_dim_idx(idx); }); -} - -bool is_valid_injection(TensorToMachineViewInjection const &injection, - MachineView const &mv, - ParallelTensorShape const &shape) { - return all_of(injection.raw_bidict, [&](auto const pair) { - int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; - int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); - return (tensor_degree == mv_degree); - }); -} - -std::unordered_set - get_all_tensor_to_machine_view_injections( - MachineView const &mv, ParallelTensorShape const &shape) { - assert(is_valid_machine_view(mv, shape)); - std::unordered_set mv_indices = - get_machine_view_indices(mv); - std::unordered_set shape_indices = - get_parallel_tensor_indices(shape); - shape_indices = filter(shape_indices, [&](auto const idx) { - return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; - }); - - std::unordered_set injections; - for (std::vector const &p : - permutations(shape_indices)) { - TensorToMachineViewInjection injection = - TensorToMachineViewInjection(bidict(zip(sorted(mv_indices), p))); - if (is_valid_injection(injection, mv, shape)) { - injections.insert(injection); - } - } - return injections; -} - } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc new file mode 100644 index 0000000000..2436689f2c --- /dev/null +++ b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc @@ -0,0 +1,47 @@ +#include "compiler/machine_view_to_tensor_mapping.h" +#include "compiler/allowed_machine_views.h" +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "pcg/machine_view_dim_idx_t.h" +#include "utils/containers/all_of.h" +#include "utils/containers/filter.h" +#include "utils/containers/get_all_permutations.h" +#include "utils/containers/sorted.h" +#include "utils/containers/zip.h" + +namespace FlexFlow { + +std::unordered_set + get_all_machine_view_to_tensor_mappings(MachineView const &mv, + ParallelTensorShape const &shape) { + assert(is_valid_machine_view(mv, shape)); + std::vector machine_view_dim_ordering = + sorted(get_machine_view_indices(mv)); + std::unordered_set shape_indices = + get_parallel_tensor_indices(shape); + shape_indices = + filter(shape_indices, [&](parallel_tensor_dim_idx_t const &idx) { + return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; + }); + + std::unordered_set result; + for (std::vector const &tensor_dim_orderings : + get_all_permutations(shape_indices)) { + MachineViewToTensorMapping mapping = MachineViewToTensorMapping( + bidict(zip(machine_view_dim_ordering, tensor_dim_orderings))); + if (is_valid_mapping(mapping, mv, shape)) { + result.insert(mapping); + } + } + return result; +} + +bool is_valid_mapping(MachineViewToTensorMapping const &mapping, + MachineView const &mv, + ParallelTensorShape const &shape) { + return all_of(mapping.raw_bidict, [&](auto const pair) { + int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; + int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); + return (tensor_degree == mv_degree); + }); +} +} // namespace FlexFlow diff --git a/lib/compiler/src/graph_utils.cc b/lib/compiler/src/graph_utils.cc index 08db219a21..3c20e51b90 100644 --- a/lib/compiler/src/graph_utils.cc +++ b/lib/compiler/src/graph_utils.cc @@ -3,7 +3,6 @@ #include "pcg/parallel_computation_graph/parallel_computation_graph.dtg.h" #include "pcg/parallel_computation_graph/parallel_computation_graph.h" #include "substitutions/sub_parallel_computation_graph.dtg.h" -#include "utils/containers/without_order.h" #include "utils/graph/serial_parallel/serial_parallel_decomposition.dtg.h" namespace FlexFlow { diff --git a/lib/compiler/test/src/allowed_machine_ views.cc b/lib/compiler/test/src/allowed_machine_ views.cc index 9187c25dd8..dd7a1c8367 100644 --- a/lib/compiler/test/src/allowed_machine_ views.cc +++ b/lib/compiler/test/src/allowed_machine_ views.cc @@ -4,13 +4,18 @@ #include "pcg/machine_view.h" #include "pcg/start_invariant_machine_view.h" #include "utils/containers/extend.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_allowed_machine_views") { SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ @@ -32,11 +37,13 @@ TEST_SUITE(FF_TEST_SUITE) { make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))}; std::unordered_set result = get_allowed_machine_views(ms, shape); + CHECK(correct == result); } SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification{18, 1, 1, 0, 0}; + + MachineSpecification ms = MachineSpecification{11, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -51,32 +58,27 @@ TEST_SUITE(FF_TEST_SUITE) { }; auto make_2d_views = [&](int num_starts, int stride1, int stride2) { - std::unordered_set views; - for (int i = 0; i < num_starts; i++) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - MachineView mv = MachineView{device_id_t(gpu_id_t(i)), rect}; - views.insert(mv); - } - return views; + return unordered_set_of(transform(range(num_starts), [&](int start) { + return MachineView{ + device_id_t{gpu_id_t{start}}, + StridedRectangle{ + {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, + StridedRectangleSide{num_points_t{3}, stride_t{stride2}}}}, + }; + })); }; + std::unordered_set correct; extend(correct, - make_2d_views(/*num_starts*/ 13, /*stride1*/ 1, /*stride2*/ 1)); + make_2d_views(/*num_starts*/ 6, /*stride1*/ 1, /*stride2*/ 1)); extend(correct, - make_2d_views(/*num_starts*/ 8, /*stride1*/ 2, /*stride2*/ 1)); + make_2d_views(/*num_starts*/ 1, /*stride1*/ 2, /*stride2*/ 1)); extend(correct, - make_2d_views(/*num_starts*/ 9, /*stride1*/ 1, /*stride2*/ 2)); - extend(correct, - make_2d_views(/*num_starts*/ 3, /*stride1*/ 3, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 5, /*stride1*/ 1, /*stride2*/ 3)); - extend(correct, - make_2d_views(/*num_starts*/ 1, /*stride1*/ 1, /*stride2*/ 4)); + make_2d_views(/*num_starts*/ 2, /*stride1*/ 1, /*stride2*/ 2)); std::unordered_set result = get_allowed_machine_views(ms, shape); + CHECK(result == correct); } } @@ -84,6 +86,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_allowed_start_invariant_machine_views") { SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ @@ -103,11 +106,13 @@ TEST_SUITE(FF_TEST_SUITE) { make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))}; std::unordered_set result = get_allowed_start_invariant_machine_views(ms, shape); + CHECK(correct == result); } SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification(18, 1, 1, 0, 0); + + MachineSpecification ms = MachineSpecification(15, 1, 1, 0, 0); ParallelTensorShape shape = ParallelTensorShape{ ParallelTensorDims{ FFOrdered{ @@ -127,54 +132,18 @@ TEST_SUITE(FF_TEST_SUITE) { StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; return StartInvariantMachineView{rect}; }; + std::unordered_set correct = { make_2d_view(/*stride1*/ 1, /*stride2*/ 1), make_2d_view(/*stride1*/ 2, /*stride2*/ 1), make_2d_view(/*stride1*/ 1, /*stride2*/ 2), - make_2d_view(/*stride1*/ 3, /*stride2*/ 1), make_2d_view(/*stride1*/ 1, /*stride2*/ 3), - make_2d_view(/*stride1*/ 1, /*stride2*/ 4)}; + }; std::unordered_set result = get_allowed_start_invariant_machine_views(ms, shape); + CHECK(result == correct); } } - - TEST_CASE("get_all_tensor_to_machine_view_injections") { - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{2}, - }, - }, - DataType::FLOAT, - }; - MachineView view = - MachineView{device_id_from_index(0, DeviceType::GPU), - StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, - StridedRectangleSide{num_points_t(3), stride_t(1)}}, - }}; - bidict b1 = { - {machine_view_dim_idx(2), ff_dim_t(0)}, - {machine_view_dim_idx(1), ReplicaType::SUM}, - {machine_view_dim_idx(0), ReplicaType::DISCARD_COPY}}; - - bidict b2 = { - {machine_view_dim_idx(2), ff_dim_t(0)}, - {machine_view_dim_idx(0), ReplicaType::SUM}, - {machine_view_dim_idx(1), ReplicaType::DISCARD_COPY}}; - - std::unordered_set correct = { - TensorToMachineViewInjection{b1}, TensorToMachineViewInjection{b2}}; - std::unordered_set result = - get_all_tensor_to_machine_view_injections(view, shape); - CHECK(correct == result); - } } diff --git a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc new file mode 100644 index 0000000000..f6a0484b9b --- /dev/null +++ b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc @@ -0,0 +1,53 @@ +#include "compiler/machine_view_to_tensor_mapping.h" +#include "doctest/doctest.h" +#include "pcg/machine_view.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("get_all_machine_view_to_tensor_mappings") { + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{2}, + }, + }, + DataType::FLOAT, + }; + MachineView view = MachineView{ + device_id_from_index(0, DeviceType::GPU), + StridedRectangle{{ + StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t(3), stride_t(1)}, + }}, + }; + + bidict b1 = { + {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, + {machine_view_dim_idx_t(1), + parallel_tensor_dim_idx_t{ReplicaType::SUM}}, + {machine_view_dim_idx_t(0), + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + + bidict b2 = { + {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, + {machine_view_dim_idx_t(0), + parallel_tensor_dim_idx_t{ReplicaType::SUM}}, + {machine_view_dim_idx_t(1), + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + + std::unordered_set correct = { + MachineViewToTensorMapping{b1}, MachineViewToTensorMapping{b2}}; + std::unordered_set result = + get_all_machine_view_to_tensor_mappings(view, shape); + + CHECK(correct == result); + } +} diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h new file mode 100644 index 0000000000..ca9f214b29 --- /dev/null +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H +#define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H + +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" +#include "op-attrs/parallel_tensor_shape.h" + +namespace FlexFlow { + +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx_t idx); + +std::unordered_set + get_parallel_tensor_indices(ParallelTensorShape const &shape); + +} // namespace FlexFlow + +#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml similarity index 79% rename from lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml rename to lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml index 8dc906e378..9396cbcbe8 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx.variant.toml +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.variant.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "parallel_tensor_dim_idx" +name = "parallel_tensor_dim_idx_t" features = [ "eq", "ord", @@ -7,7 +7,6 @@ features = [ "json", "fmt", ] -explicit_constructors = false includes = [ "op-attrs/ff_dim.dtg.h", diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc new file mode 100644 index 0000000000..1487d7f49f --- /dev/null +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc @@ -0,0 +1,36 @@ + +#include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "utils/containers/extend.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/overload.h" + +namespace FlexFlow { + +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx_t idx) { + return idx.visit( + overload{[&](ff_dim_t shard_dim) { + return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; + }, + [&](ReplicaType replica_type) { + ReplicaParallelDimSet replicas = shape.dims.replica_dims; + int degree = (ReplicaType::SUM == replica_type + ? replicas.sum_degree.value + : replicas.discard_copy_degree.value); + return ParallelDim{ReplicaParallelDim{degree, replica_type}}; + }}); +} + +std::unordered_set + get_parallel_tensor_indices(ParallelTensorShape const &shape) { + std::unordered_set indices; + extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { + return parallel_tensor_dim_idx_t(ff_dim_t(idx)); + })); + indices.insert(parallel_tensor_dim_idx_t(ReplicaType::SUM)); + indices.insert(parallel_tensor_dim_idx_t(ReplicaType::DISCARD_COPY)); + return indices; +} + +} // namespace FlexFlow diff --git a/lib/pcg/include/pcg/device_coordinates.struct.toml b/lib/pcg/include/pcg/device_coordinates.struct.toml index 19373cca6e..c97729e1c7 100644 --- a/lib/pcg/include/pcg/device_coordinates.struct.toml +++ b/lib/pcg/include/pcg/device_coordinates.struct.toml @@ -11,11 +11,13 @@ features = [ includes = [ "", +] + +src_includes = [ "utils/hash/vector.h", "utils/fmt/vector.h", - ] [[fields]] -name = "coords" +name = "raw_coords" type = "std::vector" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 75c7e3ea63..0e59d4acb2 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -5,7 +5,7 @@ #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_view.dtg.h" -#include "pcg/machine_view_dim_idx.dtg.h" +#include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" #include @@ -14,9 +14,9 @@ namespace FlexFlow { std::unordered_set get_device_ids(MachineView const &mv); -device_id_t get_last_device_id(MachineView const &mv); +device_id_t get_maximum_device_id(MachineView const &mv); StridedRectangleSide get_side_at_idx(MachineView const &mv, - machine_view_dim_idx const &idx); + machine_view_dim_idx_t const &idx); size_t num_dims(MachineView const &mv); size_t num_devices(MachineView const &mv); diff --git a/lib/pcg/include/pcg/machine_view_dim_idx_t.h b/lib/pcg/include/pcg/machine_view_dim_idx_t.h new file mode 100644 index 0000000000..c247b2ed5d --- /dev/null +++ b/lib/pcg/include/pcg/machine_view_dim_idx_t.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_DIM_IDX_H +#define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_DIM_IDX_H + +#include "pcg/machine_view.dtg.h" +#include "pcg/machine_view_dim_idx_t.dtg.h" + +namespace FlexFlow { + +std::unordered_set + get_machine_view_indices(MachineView const &mv); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/machine_view_dim_idx.struct.toml b/lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml similarity index 81% rename from lib/pcg/include/pcg/machine_view_dim_idx.struct.toml rename to lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml index fec0b17bae..79989d9b7b 100644 --- a/lib/pcg/include/pcg/machine_view_dim_idx.struct.toml +++ b/lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "machine_view_dim_idx" +name = "machine_view_dim_idx_t" features = [ "eq", "ord", diff --git a/lib/pcg/include/pcg/multi_dimensional_stride.struct.toml b/lib/pcg/include/pcg/multi_dimensional_stride.struct.toml new file mode 100644 index 0000000000..9fa5a77f77 --- /dev/null +++ b/lib/pcg/include/pcg/multi_dimensional_stride.struct.toml @@ -0,0 +1,25 @@ +namespace = "FlexFlow" +name = "MultiDimensionalStride" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "", + "pcg/stride_t.dtg.h", +] + +src_includes = [ + "utils/hash/vector.h", + "utils/fmt/vector.h" + +] + +[[fields]] +name = "raw_strides" +type = "std::vector<::FlexFlow::stride_t>" diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index a416a96c59..6dab48b9b2 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -3,6 +3,7 @@ #include "op-attrs/ff_dim.dtg.h" #include "pcg/device_id_t.dtg.h" +#include "pcg/multi_dimensional_stride.dtg.h" #include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" @@ -12,7 +13,6 @@ namespace FlexFlow { struct StridedRectangle { private: - std::vector sides; std::tuple const &> tie() const; friend struct std::hash; @@ -29,6 +29,9 @@ struct StridedRectangle { StridedRectangleSide const &at(int idx) const; std::vector const &get_sides() const; + +private: + std::vector sides; }; std::string format_as(StridedRectangle const &); std::ostream &operator<<(std::ostream &, StridedRectangle const &); @@ -37,6 +40,10 @@ size_t get_num_dims(StridedRectangle const &rect); num_points_t get_num_points(StridedRectangle const &rect); +StridedRectangle + get_strided_rectangle(MultiDimensionalStride const &strides, + std::vector const &num_points_per_dim); + } // namespace FlexFlow namespace std { diff --git a/lib/pcg/include/pcg/strided_rectangle_side.h b/lib/pcg/include/pcg/strided_rectangle_side.h index fb18b6d44e..8e0bb5784a 100644 --- a/lib/pcg/include/pcg/strided_rectangle_side.h +++ b/lib/pcg/include/pcg/strided_rectangle_side.h @@ -12,6 +12,8 @@ StridedRectangleSide strided_side_from_size_and_stride(side_size_t, side_size_t get_side_size(StridedRectangleSide const &); +std::vector get_points(StridedRectangleSide const &); + } // namespace FlexFlow #endif diff --git a/lib/pcg/src/pcg/device_id.cc b/lib/pcg/src/pcg/device_id.cc index f027b73a5d..a8cfe1f82f 100644 --- a/lib/pcg/src/pcg/device_id.cc +++ b/lib/pcg/src/pcg/device_id.cc @@ -32,17 +32,16 @@ int get_raw_id(device_id_t device_id) { case DeviceType::CPU: return unwrap_cpu(device_id).cpu_index; default: - throw mk_runtime_error( - fmt::format("Unsupported DeviceType {}", get_device_type(device_id))); + throw mk_runtime_error(fmt::format("Unsupported device {}", device_id)); } } device_id_t device_id_from_index(int idx, DeviceType device_type) { switch (device_type) { case DeviceType::GPU: - return device_id_t(gpu_id_t(idx)); + return device_id_t{gpu_id_t{idx}}; case DeviceType::CPU: - return device_id_t(cpu_id_t(idx)); + return device_id_t{cpu_id_t{idx}}; default: throw mk_runtime_error( fmt::format("Unsupported DeviceType {}", device_type)); diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 905a71d0ab..3b08c1885a 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,7 +1,7 @@ #include "pcg/machine_view.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" -#include "pcg/machine_view_dim_idx.dtg.h" +#include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers.h" @@ -20,43 +20,40 @@ namespace FlexFlow { static device_id_t get_device_id(MachineView const &mv, DeviceCoordinates const &point) { - assert(point.coords.size() == get_num_dims(mv.rect)); + assert(point.raw_coords.size() == get_num_dims(mv.rect)); std::vector coefficients = scanl(mv.rect.get_sides(), 1, [](size_t const &result, StridedRectangleSide const &side) { return result * get_side_size(side).unwrapped; }); - size_t raw_id = - sum(transform(zip(coefficients, as_vector(point.coords)), - [](auto const pair) { return pair.first * pair.second; })) + - get_raw_id(mv.start); - - return ((get_device_type(mv) == DeviceType::CPU) - ? device_id_t(cpu_id_t(raw_id)) - : device_id_t(gpu_id_t(raw_id))); + size_t coord_offset = + sum(transform(zip(coefficients, as_vector(point.raw_coords)), + [](auto const pair) { return pair.first * pair.second; })); + size_t raw_id = get_raw_id(mv.start) + coord_offset; + return device_id_from_index(raw_id, get_device_type(mv)); } std::unordered_set get_device_ids(MachineView const &mv) { - std::vector> ranges = - transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { - return range(0, get_side_size(side).unwrapped, side.stride.unwrapped); + + std::vector> coordinate_ranges = + transform(mv.rect.get_sides(), get_points); + + std::unordered_set> raw_coordinates = + unordered_set_of(cartesian_product(coordinate_ranges)); + std::unordered_set device_coordinates = + transform(raw_coordinates, [](std::vector const &point) { + return DeviceCoordinates(point); }); - std::unordered_set devices_as_points = unordered_set_of( - transform(cartesian_product(ranges), - [](auto const &point) { return DeviceCoordinates(point); })); - std::unordered_set ids = - transform(devices_as_points, [&](DeviceCoordinates const &dc) { + + std::unordered_set device_ids = + transform(device_coordinates, [&](DeviceCoordinates const &dc) { return get_device_id(mv, dc); }); - return ids; + return device_ids; } -device_id_t get_last_device_id(MachineView const &mv) { - // DeviceCoordinates last_device = DeviceCoordinates( - // transform(mv.rect.get_sides(), [](StridedRectangleSide const &s) { - // return s.stride.unwrapped; - // })); +device_id_t get_maximum_device_id(MachineView const &mv) { return maximum(get_device_ids(mv)); } @@ -83,7 +80,7 @@ DeviceType get_device_type(MachineView const &mv) { } StridedRectangleSide get_side_at_idx(MachineView const &mv, - machine_view_dim_idx const &idx) { + machine_view_dim_idx_t const &idx) { return mv.rect.at(idx.unwrapped); } diff --git a/lib/pcg/src/pcg/machine_view_dim_idx_t.cc b/lib/pcg/src/pcg/machine_view_dim_idx_t.cc new file mode 100644 index 0000000000..c7ed2ad108 --- /dev/null +++ b/lib/pcg/src/pcg/machine_view_dim_idx_t.cc @@ -0,0 +1,14 @@ +#include "pcg/machine_view_dim_idx_t.h" +#include "pcg/machine_view.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +std::unordered_set + get_machine_view_indices(MachineView const &mv) { + return transform(unordered_set_of(range(num_dims(mv))), + [](int idx) { return machine_view_dim_idx_t{idx}; }); +} +} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index 277175a93d..cc656d5047 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -8,6 +8,7 @@ #include "utils/containers/product.h" #include "utils/containers/sorted.h" #include "utils/containers/transform.h" +#include "utils/containers/zip.h" #include "utils/fmt/vector.h" #include "utils/hash-utils.h" #include "utils/hash/tuple.h" @@ -21,7 +22,7 @@ StridedRectangle::StridedRectangle( std::tuple const &> StridedRectangle::tie() const { - return std::tie(sides); + return std::tie(this->sides); } bool StridedRectangle::operator==(StridedRectangle const &other) const { @@ -49,7 +50,7 @@ bool StridedRectangle::operator>=(StridedRectangle const &other) const { } std::vector const &StridedRectangle::get_sides() const { - return sides; + return this->sides; } StridedRectangleSide const &StridedRectangle::at(int idx) const { @@ -86,6 +87,16 @@ size_t get_size(StridedRectangle const &rect) { })); } +StridedRectangle + get_strided_rectangle(MultiDimensionalStride const &strides, + std::vector const &num_points_per_dim) { + std::vector sides = transform( + zip(num_points_per_dim, strides.raw_strides), [&](auto const &p) { + return StridedRectangleSide(num_points_t(p.first), stride_t(p.second)); + }); + return StridedRectangle{sides}; +}; + } // namespace FlexFlow namespace std { diff --git a/lib/pcg/src/pcg/strided_rectangle_side.cc b/lib/pcg/src/pcg/strided_rectangle_side.cc index fcfc0e854a..0ac5752c36 100644 --- a/lib/pcg/src/pcg/strided_rectangle_side.cc +++ b/lib/pcg/src/pcg/strided_rectangle_side.cc @@ -1,4 +1,5 @@ #include "pcg/strided_rectangle_side.h" +#include "utils/containers/range.h" #include "utils/exception.h" namespace FlexFlow { @@ -14,4 +15,8 @@ side_size_t get_side_size(StridedRectangleSide const &s) { return side_size_t{s.num_points.unwrapped * s.stride.unwrapped}; } +std::vector get_points(StridedRectangleSide const &s) { + return range(0, get_side_size(s).unwrapped, s.stride.unwrapped); +} + } // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index d759f302cd..a2de62b383 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -14,11 +14,11 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { - StridedRectangle rect{{StridedRectangleSide(num_points_t(7), stride_t{5}), - StridedRectangleSide(num_points_t(10), stride_t{2}), - StridedRectangleSide(num_points_t(1), stride_t{4})}}; - gpu_id_t start(1); - MachineView mv{device_id_t{start}, rect}; + MachineView mv = MachineView{ + device_id_t{gpu_id_t{1}}, + StridedRectangle{{StridedRectangleSide(num_points_t{7}, stride_t{5}), + StridedRectangleSide(num_points_t{10}, stride_t{2}), + StridedRectangleSide(num_points_t{1}, stride_t{4})}}}; SUBCASE("num_dims") { CHECK(num_dims(mv) == 3); @@ -48,12 +48,23 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_device_ids") { SUBCASE("2D MachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}; - gpu_id_t start(0); - MachineView mv{device_id_t{start}, rect}; + // 2D MachineView describes a 4 x 6 area. + // The devices are at coordinates (0,0), (0, 3), (2, 0), (2, 3) + // Thus we have as device ids: + // 0 = 0*1 + 0*4 + // 12 = 0*1 + 3*4 + // 2 = 2*1 + 0*4 + // 14 = 2*1 + 3*4 + // The coefficients are obtained by doing + //`scanl(area_coefficients, 1,product) = {1,4}` + // and ignoring the last term. + + MachineView mv = + MachineView{device_id_t{gpu_id_t{0}}, + StridedRectangle{{ + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}}; SUBCASE("get_device_ids") { std::unordered_set expected = make_gpu_device_ids({0, 2, 12, 14}); @@ -62,13 +73,24 @@ TEST_SUITE(FF_TEST_SUITE) { } } SUBCASE("3D MachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(1), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}; - gpu_id_t start(1); - MachineView mv{device_id_t{start}, rect}; + // 3D MachineView describes a 3 x 2 x 4 area, and 1*2*2=4 devices. + // (Pre offset) the devices are at coordinates (0, 0, 0), (0, 0, 2), (0, + // 1, 0), (0, 1, 2) Thus (pre offset) we have as device ids: + // 0 = 0*1 + 0*3 + 0*(2*3) + // 12 = 0*1 + 0*3 + 2*(2*3) + // 3 = 0*1 + 1*3 + 0*(2*3) + // 15 = 0*1 + 1*3 + 1*(2*3) + // Where the coefficients are obtained by doing `scanl(area_coefficients, + // 1, product) = {1,3,6}` and ignoring the last term. We do, however, have + // 1 as a starting device, meaning all device-id are offset by 1. We thus + // have 1, 13, 4, 16 as device-ids + MachineView mv = + MachineView{device_id_t{gpu_id_t{1}}, + StridedRectangle{{ + StridedRectangleSide(num_points_t(1), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}}; SUBCASE("get_device_ids") { std::unordered_set expected = @@ -79,17 +101,18 @@ TEST_SUITE(FF_TEST_SUITE) { } } - TEST_CASE("get_last_device_id") { + TEST_CASE("get_maximum_device_id") { SUBCASE("2D MachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}; - gpu_id_t start(0); - MachineView mv{device_id_t{start}, rect}; - SUBCASE("get_last_device_id") { - CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(14))); + MachineView mv = + MachineView{device_id_t{gpu_id_t{0}}, + StridedRectangle{{ + StridedRectangleSide(num_points_t(2), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}}; + + SUBCASE("get_maximum_device_id") { + CHECK(get_maximum_device_id(mv) == device_id_t(gpu_id_t(14))); } } @@ -99,20 +122,25 @@ TEST_SUITE(FF_TEST_SUITE) { StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2}), }}; - gpu_id_t start(1); - MachineView mv{device_id_t{start}, rect}; - - SUBCASE("get_last_device_id") { - CHECK(get_last_device_id(mv) == device_id_t(gpu_id_t(16))); + MachineView mv{device_id_t{gpu_id_t{1}}, + StridedRectangle{{ + StridedRectangleSide(num_points_t(1), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}}; + + SUBCASE("get_maximum_device_id") { + CHECK(get_maximum_device_id(mv) == device_id_t(gpu_id_t(16))); } } } TEST_CASE("make_1d_machine_view - GPU") { - StridedRectangle rect{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}; - device_id_t start_gpu{gpu_id_t{1}}; - MachineView gpu_mv{start_gpu, rect}; + device_id_t start_gpu = device_id_t{gpu_id_t{1}}; + MachineView gpu_mv = MachineView{ + start_gpu, + StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}}; SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, stride_t " "stride)") { @@ -132,10 +160,11 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("make_1d_machine_view - CPU") { - StridedRectangle rect{ - {StridedRectangleSide{num_points_t{11}, stride_t{4}}}}; - device_id_t start_cpu{cpu_id_t{2}}; - MachineView cpu_mv{start_cpu, rect}; + device_id_t start_cpu = device_id_t{cpu_id_t{2}}; + MachineView cpu_mv = + MachineView{start_cpu, + StridedRectangle{ + {StridedRectangleSide{num_points_t{11}, stride_t{4}}}}}; SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, stride_t " "stride)") { diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 57cb419669..437c7213cb 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -6,16 +6,42 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), + device_id_t start = device_id_t{gpu_id_t{0}}; + StridedRectangle rect = StridedRectangle{{ + StridedRectangleSide(num_points_t{2}, stride_t{3}), + StridedRectangleSide(num_points_t{2}, stride_t{2}), }}; - device_id_t start = device_id_t(gpu_id_t(5)); - MachineView input = MachineView{start, rect}; - MachineView result = machine_view_from_start_invariant( - start_invariant_from_machine_view(input), start); - MachineView correct = input; - CHECK(correct == input); + SUBCASE("To StartInvariantMachineView") { + + MachineView input = MachineView{start, rect}; + + StartInvariantMachineView correct = StartInvariantMachineView{rect}; + StartInvariantMachineView result = + start_invariant_from_machine_view(input); + CHECK(correct == result); + } + + SUBCASE("From StartInvariantMachineView") { + + StartInvariantMachineView input = StartInvariantMachineView{rect}; + MachineView correct = MachineView{start, rect}; + MachineView result = machine_view_from_start_invariant(input, start); + CHECK(correct == result); + } + + SUBCASE("To and From") { + MachineView correct = MachineView{start, rect}; + MachineView result = machine_view_from_start_invariant( + start_invariant_from_machine_view(correct), start); + CHECK(correct == result); + } + + SUBCASE("From and To") { + StartInvariantMachineView correct = StartInvariantMachineView{rect}; + StartInvariantMachineView result = start_invariant_from_machine_view( + machine_view_from_start_invariant(correct, start)); + CHECK(correct == result); + } } } diff --git a/lib/utils/include/utils/bidict/bidict.h b/lib/utils/include/utils/bidict/bidict.h index ff17697d31..4122346880 100644 --- a/lib/utils/include/utils/bidict/bidict.h +++ b/lib/utils/include/utils/bidict/bidict.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_BIDICT_BIDICT_H #include "utils/fmt/unordered_map.h" +#include "utils/hash/unordered_map.h" #include #include #include @@ -22,7 +23,7 @@ struct bidict { } } - bidict(std::vector> init) + explicit bidict(std::vector> init) : bidict(init.begin(), init.end()) {} bool contains_l(L const &l) const { diff --git a/lib/utils/include/utils/containers/cartesian_product.h b/lib/utils/include/utils/containers/cartesian_product.h index c4ad07aa8e..1ed87a99c9 100644 --- a/lib/utils/include/utils/containers/cartesian_product.h +++ b/lib/utils/include/utils/containers/cartesian_product.h @@ -9,24 +9,26 @@ namespace FlexFlow { -template > -auto cartesian_product(std::vector const &containers) { - std::unordered_multiset result; - - std::function recurse = [&](V ¤t, size_t depth) { - if (depth == containers.size()) { - result.insert(current); - return; - } - - for (const auto &item : containers.at(depth)) { - current.push_back(item); - recurse(current, depth + 1); - current.pop_back(); - } - }; - - V current; +template +std::unordered_multiset> + cartesian_product(std::vector const &containers) { + std::unordered_multiset> result; + + std::function &, size_t)> recurse = + [&](std::vector ¤t, size_t depth) { + if (depth == containers.size()) { + result.insert(current); + return; + } + + for (E const &item : containers.at(depth)) { + current.push_back(item); + recurse(current, depth + 1); + current.pop_back(); + } + }; + + std::vector current; recurse(current, 0); return result; diff --git a/lib/utils/include/utils/containers/filter.h b/lib/utils/include/utils/containers/filter.h index f65b50fdbc..07f25dc348 100644 --- a/lib/utils/include/utils/containers/filter.h +++ b/lib/utils/include/utils/containers/filter.h @@ -44,10 +44,10 @@ std::map filter(std::map const &m, F const &f) { return result; } -template -std::unordered_multiset filter(std::unordered_multiset const &m, - F const &f) { - std::unordered_multiset result; +template +std::unordered_multiset filter(std::unordered_multiset const &m, + F const &f) { + std::unordered_multiset result; std::copy_if(m.cbegin(), m.cend(), std::inserter(result, result.begin()), f); return result; } diff --git a/lib/utils/include/utils/containers/get_all_permutations.h b/lib/utils/include/utils/containers/get_all_permutations.h new file mode 100644 index 0000000000..2b29618621 --- /dev/null +++ b/lib/utils/include/utils/containers/get_all_permutations.h @@ -0,0 +1,96 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_H + +#include "utils/containers/sorted.h" +#include +#include +#include + +namespace FlexFlow { + +template +struct permutations_container { +public: + template + permutations_container(It start, It end) : current(start, end) { + std::sort(this->current.begin(), this->current.end()); + } + + struct iterator { + public: + using difference_type = long; + using value_type = std::vector; + using pointer = std::vector const *; + using reference = std::vector const &; + using iterator_category = std::input_iterator_tag; + + public: + explicit iterator(permutations_container const &c, bool done) + : c(c), done(done) {} + + iterator &operator++() { + assert(!this->done); + + this->done = !std::next_permutation(this->c.current.begin(), + this->c.current.end()); + return *this; + } + + iterator operator++(int) { + iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(iterator other) const { + return &this->c == &other.c && this->done == other.done; + } + + bool operator!=(iterator other) const { + return &this->c != &other.c || this->done != other.done; + } + + reference operator*() const { + return this->c.current; + } + + private: + permutations_container const &c; + bool done; + }; + + using const_iterator = iterator; + using value_type = typename iterator::value_type; + using difference_type = typename iterator::difference_type; + using pointer = typename iterator::pointer; + using reference = typename iterator::reference; + using const_reference = typename iterator::reference; + + iterator begin() const { + return iterator(*this, false); + } + + iterator end() const { + return iterator(*this, true); + } + + const_iterator cbegin() const { + return iterator(*this, false); + } + + const_iterator cend() const { + return iterator(*this, true); + } + +private: + mutable std::vector current; +}; + +template +permutations_container get_all_permutations(C const &c) { + return permutations_container(c.cbegin(), c.cend()); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/permutations.h b/lib/utils/include/utils/containers/permutations.h deleted file mode 100644 index 803bd22df0..0000000000 --- a/lib/utils/include/utils/containers/permutations.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTATIONS_H -#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_PERMUTATIONS_H - -#include "utils/containers/sorted.h" -#include "utils/hash/vector.h" -#include -#include -#include - -namespace FlexFlow { - -template > -auto permutations(C const &container) { - std::unordered_set result; - - V elements = sorted(container); - - result.insert(elements); - - while (std::next_permutation(elements.begin(), elements.end())) { - result.insert(elements); - } - - return result; -} - -} // namespace FlexFlow - -#endif diff --git a/lib/utils/include/utils/containers/range.h b/lib/utils/include/utils/containers/range.h index ca6352be25..ff6b9f44ee 100644 --- a/lib/utils/include/utils/containers/range.h +++ b/lib/utils/include/utils/containers/range.h @@ -1,31 +1,12 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_RANGE_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_RANGE_H -#include -#include #include namespace FlexFlow { -std::vector range(int start, int end, int step = 1) { - assert(step != 0); - - std::vector result; - if (step > 0) { - for (int i = start; i < end; i += step) { - result.push_back(i); - } - } else { - for (int i = start; i > end; i += step) { - result.push_back(i); - } - } - return result; -} - -std::vector range(int end) { - return range(0, end); -} +std::vector range(int start, int end, int step = 1); +std::vector range(int end); } // namespace FlexFlow diff --git a/lib/utils/include/utils/containers/replicate.h b/lib/utils/include/utils/containers/replicate.h index 0bed081ad5..46d31c04a4 100644 --- a/lib/utils/include/utils/containers/replicate.h +++ b/lib/utils/include/utils/containers/replicate.h @@ -6,9 +6,9 @@ namespace FlexFlow { template -std::vector replicate(std::size_t n, T const &element) { +std::vector replicate(int n, T const &element) { std::vector result; - for (std::size_t i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { result.push_back(element); } return result; diff --git a/lib/utils/include/utils/containers/unordered_multiset_of.h b/lib/utils/include/utils/containers/unordered_multiset_of.h new file mode 100644 index 0000000000..ca25f4d15d --- /dev/null +++ b/lib/utils/include/utils/containers/unordered_multiset_of.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_UNORDERED_MULTISET_OF_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_UNORDERED_MULTISET_OF_H + +#include + +namespace FlexFlow { + +template +std::unordered_multiset unordered_multiset_of(C const &c) { + return {c.cbegin(), c.cend()}; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/without_order.h b/lib/utils/include/utils/containers/without_order.h deleted file mode 100644 index 7199b2bd4a..0000000000 --- a/lib/utils/include/utils/containers/without_order.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_WITHOUT_ORDER_H -#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_WITHOUT_ORDER_H - -#include - -namespace FlexFlow { - -template -std::unordered_multiset without_order(C const &c) { - return {c.cbegin(), c.cend()}; -} - -} // namespace FlexFlow - -#endif diff --git a/lib/utils/src/utils/containers/get_all_permutations.cc b/lib/utils/src/utils/containers/get_all_permutations.cc new file mode 100644 index 0000000000..0fa4e16f08 --- /dev/null +++ b/lib/utils/src/utils/containers/get_all_permutations.cc @@ -0,0 +1 @@ +#include "utils/containers/get_all_permutations.h" diff --git a/lib/utils/src/utils/containers/permutations.cc b/lib/utils/src/utils/containers/permutations.cc deleted file mode 100644 index 5876f84440..0000000000 --- a/lib/utils/src/utils/containers/permutations.cc +++ /dev/null @@ -1 +0,0 @@ -#include "utils/containers/permutations.h" diff --git a/lib/utils/src/utils/containers/range.cc b/lib/utils/src/utils/containers/range.cc index 8612f334b7..d3ebd1063b 100644 --- a/lib/utils/src/utils/containers/range.cc +++ b/lib/utils/src/utils/containers/range.cc @@ -1 +1,26 @@ #include "utils/containers/range.h" +#include + +namespace FlexFlow { + +std::vector range(int start, int end, int step) { + assert(step != 0); + + std::vector result; + if (step > 0) { + for (int i = start; i < end; i += step) { + result.push_back(i); + } + } else { + for (int i = start; i > end; i += step) { + result.push_back(i); + } + } + return result; +} + +std::vector range(int end) { + return range(0, end); +} + +} // namespace FlexFlow diff --git a/lib/utils/src/utils/containers/unordered_multiset_of.cc b/lib/utils/src/utils/containers/unordered_multiset_of.cc new file mode 100644 index 0000000000..5add043c76 --- /dev/null +++ b/lib/utils/src/utils/containers/unordered_multiset_of.cc @@ -0,0 +1 @@ +#include "utils/containers/unordered_multiset_of.h" diff --git a/lib/utils/src/utils/containers/without_order.cc b/lib/utils/src/utils/containers/without_order.cc deleted file mode 100644 index 3ef44b8044..0000000000 --- a/lib/utils/src/utils/containers/without_order.cc +++ /dev/null @@ -1 +0,0 @@ -#include "utils/containers/without_order.h" diff --git a/lib/utils/test/src/test_containers.cc b/lib/utils/test/src/test_containers.cc index af7792dc6d..3449c9d12d 100644 --- a/lib/utils/test/src/test_containers.cc +++ b/lib/utils/test/src/test_containers.cc @@ -119,7 +119,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == expected); } - TEST_CASE("without_order") { + TEST_CASE("unordered_multiset_of") { std::vector v = {1, 4, 6, 4, 6}; std::unordered_set expected = {1, 4, 6}; CHECK(unordered_set_of(v) == expected); diff --git a/lib/utils/test/src/utils/containers/permutations.cc b/lib/utils/test/src/utils/containers/permutations.cc deleted file mode 100644 index 3cf35579fe..0000000000 --- a/lib/utils/test/src/utils/containers/permutations.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include "utils/containers/permutations.h" -#include "utils/hash/unordered_set.h" -#include -#include -#include - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("permutations") { - SUBCASE("size=1") { - std::vector vec = {1}; - auto result = permutations(vec); - std::unordered_set> correct = {{1}}; - CHECK(result == correct); - } - - SUBCASE("size=3") { - std::vector vec = {1, 2, 3}; - auto result = permutations(vec); - std::unordered_set> correct = { - {1, 2, 3}, {1, 3, 2}, {2, 1, 3}, {2, 3, 1}, {3, 1, 2}, {3, 2, 1}}; - CHECK(result == correct); - } - - SUBCASE("elements repeated") { - std::vector vec = {1, 2, 2}; - auto result = permutations(vec); - std::unordered_set> correct = { - {1, 2, 2}, {2, 1, 2}, {2, 2, 1}}; - CHECK(result == correct); - } - } -} diff --git a/lib/utils/test/src/utils/containers/without_order.cc b/lib/utils/test/src/utils/containers/unordered_multiset_of.cc similarity index 60% rename from lib/utils/test/src/utils/containers/without_order.cc rename to lib/utils/test/src/utils/containers/unordered_multiset_of.cc index a2cf453837..064798de08 100644 --- a/lib/utils/test/src/utils/containers/without_order.cc +++ b/lib/utils/test/src/utils/containers/unordered_multiset_of.cc @@ -1,13 +1,13 @@ -#include "utils/containers/without_order.h" +#include "utils/containers/unordered_multiset_of.h" #include #include using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("without_order") { + TEST_CASE("unordered_multiset_of") { std::vector input = {1, 2, 3, 3, 2, 3}; - std::unordered_multiset result = without_order(input); + std::unordered_multiset result = unordered_multiset_of(input); std::unordered_multiset correct = {1, 2, 3, 3, 2, 3}; CHECK(result == correct); } From 7d078a1ab58c2785fe5e9711fc10e1feba122418 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 27 Aug 2024 18:07:34 -0700 Subject: [PATCH 19/34] Formatting fix --- lib/pcg/test/src/pcg/machine_view.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index a2de62b383..c3f0a29f44 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -55,8 +55,8 @@ TEST_SUITE(FF_TEST_SUITE) { // 12 = 0*1 + 3*4 // 2 = 2*1 + 0*4 // 14 = 2*1 + 3*4 - // The coefficients are obtained by doing - //`scanl(area_coefficients, 1,product) = {1,4}` + // The coefficients are obtained by doing + //`scanl(area_coefficients, 1,product) = {1,4}` // and ignoring the last term. MachineView mv = From 40ab5ef7af7ebe3c052850c1f62f3373125e2aa7 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Mon, 9 Sep 2024 14:56:54 -0700 Subject: [PATCH 20/34] new machine-view interface --- .../src/compiler/allowed_machine_views.cc | 56 ++-- .../test/src/allowed_machine_ views.cc | 149 ---------- .../test/src/allowed_machine_views.cc | 151 ++++++++++ .../src/machine_view_to_tensor_mapping.cc | 16 +- .../test/src/test_local_cost_estimator.cc | 2 +- lib/pcg/include/pcg/machine_specification.h | 2 + .../machine_specification_dimension.enum.toml | 14 + lib/pcg/include/pcg/machine_view.h | 53 ++-- lib/pcg/include/pcg/machine_view.struct.toml | 9 +- .../pcg/machine_view_projection.struct.toml | 22 ++ .../pcg/start_invariant_machine_view.h | 7 +- .../start_invariant_machine_view.struct.toml | 5 + lib/pcg/src/pcg/machine_specification.cc | 12 + lib/pcg/src/pcg/machine_view.cc | 190 ++++++------- .../src/pcg/start_invariant_machine_view.cc | 15 +- lib/pcg/test/src/pcg/machine_view.cc | 261 ++++++++++++------ .../src/pcg/start_invariant_machine_view.cc | 19 +- 17 files changed, 566 insertions(+), 417 deletions(-) delete mode 100644 lib/compiler/test/src/allowed_machine_ views.cc create mode 100644 lib/compiler/test/src/allowed_machine_views.cc create mode 100644 lib/pcg/include/pcg/machine_specification_dimension.enum.toml create mode 100644 lib/pcg/include/pcg/machine_view_projection.struct.toml diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index bf762e3b65..928abc485a 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -41,9 +41,7 @@ static std::unordered_multiset bool is_valid_machine_view(MachineView const &mv, MachineSpecification const &machine_spec) { - - int num_devices = get_num_devices(machine_spec, get_device_type(mv)); - return (num_devices > get_raw_id(get_maximum_device_id(mv))); + return false; // TODO: fix } bool is_valid_machine_view(MachineView const &mv, @@ -57,9 +55,11 @@ bool is_valid_machine_view(MachineView const &mv, } /* Generates a set of candidate `MachineView`s. - * The returned set includes all valid machine views, and might contain invalid + * The returned set includes all valid machine views, and might contain + invalid * ones. This function should never be used externally (see - * `get_allowed_machine_views` instead). There is no guarantee that a non-empty + * `get_allowed_machine_views` instead). There is no guarantee that a + non-empty * returned set contains a valid machine view (i.e. its possible for all * `MachineView`s to be invalid) */ @@ -68,29 +68,6 @@ static std::unordered_set ParallelTensorShape const &shape, DeviceType const &device_type) { - // Explanation for `candidate_strides`: - // - // Naively, we could think that, given, for example, a (2,3) stride, it would - // result in 3*2=6 tiles device-slots occupied for every actual device, and so - // we could say `max_stride_product = - // num_total_devicesnum_devices_used_by_tensor` (where - // num_devices_used_by_tensor is the product of the parallel dims) and thus - // that the max stride across any dimension is `max_stride_product`. - // - // This however, doesn't quite work: consider, for example, a 2D MachineView - // with 2x2 devices, and stride 2 across each dimension, and suppose there are - // 9 total device. While the "volume" of the MachineView is technically 4x4, - // it can really fit into a 3x3 (since part of the "external layer" of the 4x4 - // is not actually occupied by any of the 4 devices) and thus we could fit it - // with the existing devices. To address this, we thus compute not the number - // of total devices used by the tensor, but, the total number of "inner" - // devices, essentially the ones such that they have associated with them a - // full stride "volume". So we find the max stride for these using the - // previous naive procedure (which works since they all have full stride - // volume) and we know that if a given stride is too large for them then - // surely it'll be too large for the full set of devices, which essentially - // contains them. (Note that we are overestimating `max_stride_upper_bound` - // by a huge margin). auto candidate_strides = [](std::vector const &tensor_dims, int total_devices) -> std::unordered_multiset { @@ -113,6 +90,21 @@ static std::unordered_set return strides; }; + auto candidate_starts = [](std::vector ordered_tensor_dims) { + std::vector> coordinate_ranges = + transform(ordered_tensor_dims, [&](num_points_t const &num_points) { + return range(num_points.unwrapped); + }); + + std::unordered_set> raw_coordinates = + unordered_set_of(cartesian_product(coordinate_ranges)); + std::unordered_set device_coordinates = + transform(raw_coordinates, [](std::vector const &point) { + return DeviceCoordinates(point); + }); + return device_coordinates; + }; + std::unordered_multiset tensor_dims = get_num_devices_per_parallel_dim(shape); int total_devices = get_num_devices(machine_spec, device_type); @@ -122,12 +114,12 @@ static std::unordered_set for (MultiDimensionalStride const &strides : candidate_strides(sorted(tensor_dims), total_devices)) { StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); - StartInvariantMachineView start_inv_mv = StartInvariantMachineView{rect}; + StartInvariantMachineView start_inv_mv = + StartInvariantMachineView{rect, device_type}; - for (int start_id : range(total_devices)) { - device_id_t start_device = device_id_from_index(start_id, device_type); + for (DeviceCoordinates start : candidate_starts(sorted(tensor_dims))) { machine_views.insert( - machine_view_from_start_invariant(start_inv_mv, start_device)); + machine_view_from_start_invariant(start_inv_mv, start)); } } diff --git a/lib/compiler/test/src/allowed_machine_ views.cc b/lib/compiler/test/src/allowed_machine_ views.cc deleted file mode 100644 index dd7a1c8367..0000000000 --- a/lib/compiler/test/src/allowed_machine_ views.cc +++ /dev/null @@ -1,149 +0,0 @@ -#include "compiler/allowed_machine_views.h" -#include "doctest/doctest.h" -#include "pcg/machine_specification.dtg.h" -#include "pcg/machine_view.h" -#include "pcg/start_invariant_machine_view.h" -#include "utils/containers/extend.h" -#include "utils/containers/range.h" -#include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - - TEST_CASE("get_allowed_machine_views") { - - SUBCASE("1 degree of parallelism") { - - MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - std::unordered_set correct = { - make_1d_machine_view(gpu_id_t(0), gpu_id_t(3), stride_t(1)), - make_1d_machine_view(gpu_id_t(1), gpu_id_t(4), stride_t(1)), - make_1d_machine_view(gpu_id_t(2), gpu_id_t(5), stride_t(1)), - make_1d_machine_view(gpu_id_t(0), gpu_id_t(6), stride_t(2))}; - std::unordered_set result = - get_allowed_machine_views(ms, shape); - - CHECK(correct == result); - } - - SUBCASE("2 degrees of parallelism") { - - MachineSpecification ms = MachineSpecification{11, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_views = [&](int num_starts, int stride1, int stride2) { - return unordered_set_of(transform(range(num_starts), [&](int start) { - return MachineView{ - device_id_t{gpu_id_t{start}}, - StridedRectangle{ - {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, - StridedRectangleSide{num_points_t{3}, stride_t{stride2}}}}, - }; - })); - }; - - std::unordered_set correct; - extend(correct, - make_2d_views(/*num_starts*/ 6, /*stride1*/ 1, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 1, /*stride1*/ 2, /*stride2*/ 1)); - extend(correct, - make_2d_views(/*num_starts*/ 2, /*stride1*/ 1, /*stride2*/ 2)); - - std::unordered_set result = - get_allowed_machine_views(ms, shape); - - CHECK(result == correct); - } - } - - TEST_CASE("get_allowed_start_invariant_machine_views") { - - SUBCASE("1 degree of parallelism") { - - MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - std::unordered_set correct = { - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), - make_1d_start_invariant_machine_view(num_points_t(3), stride_t(2))}; - std::unordered_set result = - get_allowed_start_invariant_machine_views(ms, shape); - - CHECK(correct == result); - } - - SUBCASE("2 degrees of parallelism") { - - MachineSpecification ms = MachineSpecification(15, 1, 1, 0, 0); - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_view = [&](int stride1, int stride2) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - return StartInvariantMachineView{rect}; - }; - - std::unordered_set correct = { - make_2d_view(/*stride1*/ 1, /*stride2*/ 1), - make_2d_view(/*stride1*/ 2, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 2), - make_2d_view(/*stride1*/ 1, /*stride2*/ 3), - }; - - std::unordered_set result = - get_allowed_start_invariant_machine_views(ms, shape); - - CHECK(result == correct); - } - } -} diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc new file mode 100644 index 0000000000..1cee33ffbb --- /dev/null +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -0,0 +1,151 @@ +// #include "compiler/allowed_machine_views.h" +// #include "doctest/doctest.h" +// #include "pcg/machine_specification.dtg.h" +// #include "pcg/machine_view.h" +// #include "pcg/start_invariant_machine_view.h" +// #include "utils/containers/extend.h" +// #include "utils/containers/range.h" +// #include "utils/containers/transform.h" +// #include "utils/containers/unordered_set_of.h" + +// using namespace FlexFlow; + +// TEST_SUITE(FF_TEST_SUITE) { + +// TEST_CASE("get_allowed_machine_views") { + +// SUBCASE("1 degree of parallelism") { + +// MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; +// ParallelTensorShape shape = ParallelTensorShape{ +// ParallelTensorDims{ +// FFOrdered{ +// ShardParallelDim{10, 3}, +// }, +// ReplicaParallelDimSet{ +// SumDegree{1}, +// DiscardCopyDegree{1}, +// }, +// }, +// DataType::FLOAT, +// }; + +// std::unordered_set correct = { +// make_1d_machine_view(0, 3, stride_t(1)), +// make_1d_machine_view(1, 4, stride_t(1)), +// make_1d_machine_view(2, 5, stride_t(1)), +// make_1d_machine_view(0, 6, stride_t(2))}; +// std::unordered_set result = +// get_allowed_machine_views(ms, shape); + +// CHECK(correct == result); +// } + +// SUBCASE("2 degrees of parallelism") { + +// MachineSpecification ms = MachineSpecification{11, 1, 1, 0, 0}; +// ParallelTensorShape shape = ParallelTensorShape{ +// ParallelTensorDims{ +// FFOrdered{ +// ShardParallelDim{10, 3}, +// }, +// ReplicaParallelDimSet{ +// SumDegree{2}, +// DiscardCopyDegree{1}, +// }, +// }, +// DataType::FLOAT, +// }; + +// auto make_2d_views = [&](int num_starts, int stride1, int stride2) { +// return unordered_set_of(transform(range(num_starts), [&](int start) { +// return MachineView{ +// device_id_t{gpu_id_t{start}}, +// StridedRectangle{ +// {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, +// StridedRectangleSide{num_points_t{3}, +// stride_t{stride2}}}}, +// }; +// })); +// }; + +// std::unordered_set correct; +// extend(correct, +// make_2d_views(/*num_starts*/ 6, /*stride1*/ 1, /*stride2*/ 1)); +// extend(correct, +// make_2d_views(/*num_starts*/ 1, /*stride1*/ 2, /*stride2*/ 1)); +// extend(correct, +// make_2d_views(/*num_starts*/ 2, /*stride1*/ 1, /*stride2*/ 2)); + +// std::unordered_set result = +// get_allowed_machine_views(ms, shape); + +// CHECK(result == correct); +// } +// } + +// TEST_CASE("get_allowed_start_invariant_machine_views") { + +// SUBCASE("1 degree of parallelism") { + +// MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; +// ParallelTensorShape shape = ParallelTensorShape{ +// ParallelTensorDims{ +// FFOrdered{ +// ShardParallelDim{10, 3}, +// }, +// ReplicaParallelDimSet{ +// SumDegree{1}, +// DiscardCopyDegree{1}, +// }, +// }, +// DataType::FLOAT, +// }; + +// std::unordered_set correct = { +// make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), +// make_1d_start_invariant_machine_view(num_points_t(3), +// stride_t(2))}; +// std::unordered_set result = +// get_allowed_start_invariant_machine_views(ms, shape); + +// CHECK(correct == result); +// } + +// SUBCASE("2 degrees of parallelism") { + +// MachineSpecification ms = MachineSpecification(15, 1, 1, 0, 0); +// ParallelTensorShape shape = ParallelTensorShape{ +// ParallelTensorDims{ +// FFOrdered{ +// ShardParallelDim{10, 3}, +// }, +// ReplicaParallelDimSet{ +// SumDegree{2}, +// DiscardCopyDegree{1}, +// }, +// }, +// DataType::FLOAT, +// }; + +// auto make_2d_view = [&](int stride1, int stride2) { +// StridedRectangle rect = StridedRectangle{ +// {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, +// StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; +// return StartInvariantMachineView{rect}; +// }; + +// std::unordered_set correct = { +// make_2d_view(/*stride1*/ 1, /*stride2*/ 1), +// make_2d_view(/*stride1*/ 2, /*stride2*/ 1), +// make_2d_view(/*stride1*/ 1, /*stride2*/ 2), +// make_2d_view(/*stride1*/ 1, /*stride2*/ 3), +// }; + +// std::unordered_set result = +// get_allowed_start_invariant_machine_views(ms, shape); + +// CHECK(result == correct); +// } +// } +// } diff --git a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc index f6a0484b9b..dc798dcc5c 100644 --- a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc +++ b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc @@ -20,14 +20,14 @@ TEST_SUITE(FF_TEST_SUITE) { }, DataType::FLOAT, }; - MachineView view = MachineView{ - device_id_from_index(0, DeviceType::GPU), - StridedRectangle{{ - StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, - StridedRectangleSide{num_points_t(3), stride_t(1)}, - }}, - }; + MachineView view = + MachineView{DeviceCoordinates{{0, 0, 0}}, + StridedRectangle{{ + StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t(3), stride_t(1)}, + }}, + DeviceType::GPU}; bidict b1 = { {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc index 2bd0acc222..0e72a3b02a 100644 --- a/lib/local-execution/test/src/test_local_cost_estimator.cc +++ b/lib/local-execution/test/src/test_local_cost_estimator.cc @@ -66,7 +66,7 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { inputs_shape, inputs_shape, inputs_shape}, std::vector{weight_attrs}, std::vector{output_attrs}, - make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1})); + make_1d_machine_view(0, 1)); CHECK(result.total_elapsed_time > 0); CHECK(result.total_mem_usage > 0); diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 22f9c12744..0e42864824 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -10,6 +10,8 @@ int get_num_gpus(MachineSpecification const &ms); int get_num_cpus(MachineSpecification const &ms); int get_num_devices(MachineSpecification const &ms, DeviceType const &device_type); +int get_num_devices_per_node(MachineSpecification const &ms, + DeviceType const &device_type); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_specification_dimension.enum.toml b/lib/pcg/include/pcg/machine_specification_dimension.enum.toml new file mode 100644 index 0000000000..01ad4cd5d8 --- /dev/null +++ b/lib/pcg/include/pcg/machine_specification_dimension.enum.toml @@ -0,0 +1,14 @@ +namespace = "FlexFlow" +name = "MachineSpecificationDimension" +features = [ + "hash", + "json", + "fmt", + "rapidcheck", +] + +[[values]] +name = "INTER" + +[[values]] +name = "INTRA" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 0e59d4acb2..97712fbdee 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -4,8 +4,10 @@ #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" +#include "pcg/machine_specification.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/machine_view_dim_idx_t.dtg.h" +#include "pcg/machine_view_projection.dtg.h" #include "pcg/num_points_t.dtg.h" #include "pcg/side_size_t.dtg.h" #include @@ -13,47 +15,40 @@ namespace FlexFlow { -std::unordered_set get_device_ids(MachineView const &mv); -device_id_t get_maximum_device_id(MachineView const &mv); +std::unordered_set + get_devices_coordinates(MachineView const &mv); +DeviceCoordinates get_maximum_device_coordinates(MachineView const &mv); StridedRectangleSide get_side_at_idx(MachineView const &mv, machine_view_dim_idx_t const &idx); +device_id_t get_device_id(MachineView const &mv, + DeviceCoordinates const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection); +std::unordered_set + get_device_ids(MachineView const &mv, + MachineSpecification const &ms, + MachineViewProjection const &projection); + size_t num_dims(MachineView const &mv); size_t num_devices(MachineView const &mv); std::vector get_num_devices_per_dim(MachineView const &mv); std::vector get_side_size_per_dim(MachineView const &mv); -DeviceType get_device_type(MachineView const &mv); - -MachineView make_1d_machine_view(gpu_id_t start, - gpu_id_t stop, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(cpu_id_t start, - cpu_id_t stop, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(device_id_t start, - device_id_t stop, - stride_t stride = stride_t{1}); +MachineView make_1d_machine_view(int start, + int stop, + stride_t stride = stride_t{1}, + DeviceType device_type = DeviceType::GPU); -MachineView make_1d_machine_view(cpu_id_t start, - num_points_t num_points, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(gpu_id_t start, +MachineView make_1d_machine_view(int start, num_points_t num_points, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(device_id_t start, - num_points_t num_points, - stride_t stride = stride_t{1}); + stride_t stride = stride_t{1}, + DeviceType device_type = DeviceType::GPU); -MachineView make_1d_machine_view(cpu_id_t start, - side_size_t interval_size, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(gpu_id_t start, - side_size_t interval_size, - stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(device_id_t start, +MachineView make_1d_machine_view(int start, side_size_t interval_size, - stride_t stride = stride_t{1}); + stride_t stride = stride_t{1}, + DeviceType device_type = DeviceType::GPU); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index eb933ed9b7..d7f21a7004 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -10,14 +10,19 @@ features = [ ] includes = [ - "pcg/device_id_t.dtg.h", "pcg/strided_rectangle.h", + "pcg/device_coordinates.dtg.h", + "pcg/device_type.dtg.h", ] [[fields]] name = "start" -type = "::FlexFlow::device_id_t" +type = "::FlexFlow::DeviceCoordinates" [[fields]] name = "rect" type = "::FlexFlow::StridedRectangle" + +[[fields]] +name = "device_type" +type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/include/pcg/machine_view_projection.struct.toml b/lib/pcg/include/pcg/machine_view_projection.struct.toml new file mode 100644 index 0000000000..9836a39ad5 --- /dev/null +++ b/lib/pcg/include/pcg/machine_view_projection.struct.toml @@ -0,0 +1,22 @@ +namespace = "FlexFlow" +name = "MachineViewProjection" +features = [ + "eq", + # "ord", + "hash", + # "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/machine_view.dtg.h", + "pcg/machine_view_dim_idx_t.dtg.h", + "pcg/machine_specification_dimension.dtg.h", + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", +] + +[[fields]] +name = "raw_projection" +type = "std::unordered_map<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::MachineSpecificationDimension>" diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index b560dd095a..28478cc1a6 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -8,13 +8,12 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant(StartInvariantMachineView const &mv, - device_id_t const &start_id); + DeviceCoordinates const &start_id); StartInvariantMachineView start_invariant_from_machine_view(MachineView const &mv); -StartInvariantMachineView - make_1d_start_invariant_machine_view(num_points_t num_points, - stride_t stride); +StartInvariantMachineView make_1d_start_invariant_machine_view( + num_points_t num_points, stride_t stride, DeviceType device_type); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml index d6ad89a14e..ea65af4591 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml +++ b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml @@ -11,8 +11,13 @@ features = [ includes = [ "pcg/strided_rectangle.h", + "pcg/device_type.dtg.h", ] [[fields]] name = "rect" type = "::FlexFlow::StridedRectangle" + +[[fields]] +name = "device_type" +type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index aa4a047d8b..e7e5f4568f 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -21,4 +21,16 @@ int get_num_devices(MachineSpecification const &ms, throw mk_runtime_error("Unknown DeviceType {}", device_type); } } + +int get_num_devices_per_node(MachineSpecification const &ms, + DeviceType const &device_type) { + switch (device_type) { + case DeviceType::GPU: + return ms.num_gpus_per_node; + case DeviceType::CPU: + return ms.num_cpus_per_node; + default: + throw mk_runtime_error("Unknown DeviceType {}", device_type); + } +} } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 3b08c1885a..9ad33b55c2 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,12 +1,17 @@ #include "pcg/machine_view.h" #include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" +#include "pcg/machine_specification.h" #include "pcg/machine_view_dim_idx_t.dtg.h" +#include "pcg/machine_view_projection.dtg.h" #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers.h" #include "utils/containers/as_vector.h" #include "utils/containers/cartesian_product.h" +#include "utils/containers/contains.h" +#include "utils/containers/filter_values.h" +#include "utils/containers/keys.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/reversed.h" @@ -18,26 +23,13 @@ namespace FlexFlow { -static device_id_t get_device_id(MachineView const &mv, - DeviceCoordinates const &point) { - assert(point.raw_coords.size() == get_num_dims(mv.rect)); - std::vector coefficients = - scanl(mv.rect.get_sides(), - 1, - [](size_t const &result, StridedRectangleSide const &side) { - return result * get_side_size(side).unwrapped; - }); - size_t coord_offset = - sum(transform(zip(coefficients, as_vector(point.raw_coords)), - [](auto const pair) { return pair.first * pair.second; })); - size_t raw_id = get_raw_id(mv.start) + coord_offset; - return device_id_from_index(raw_id, get_device_type(mv)); -} - -std::unordered_set get_device_ids(MachineView const &mv) { +std::unordered_set + get_devices_coordinates(MachineView const &mv) { std::vector> coordinate_ranges = - transform(mv.rect.get_sides(), get_points); + transform(mv.rect.get_sides(), [&](StridedRectangleSide const &side) { + return range(side.num_points.unwrapped); + }); std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); @@ -45,16 +37,77 @@ std::unordered_set get_device_ids(MachineView const &mv) { transform(raw_coordinates, [](std::vector const &point) { return DeviceCoordinates(point); }); + return device_coordinates; +} - std::unordered_set device_ids = - transform(device_coordinates, [&](DeviceCoordinates const &dc) { - return get_device_id(mv, dc); - }); - return device_ids; +DeviceCoordinates get_maximum_device_coordinates(MachineView const &mv) { + return maximum(get_devices_coordinates(mv)); } -device_id_t get_maximum_device_id(MachineView const &mv) { - return maximum(get_device_ids(mv)); +device_id_t get_device_id(MachineView const &mv, + DeviceCoordinates const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + auto inter_projection = filter_values( + projection.raw_projection, [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTER; + }); + auto intra_projection = filter_values( + projection.raw_projection, [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTRA; + }); + + DeviceCoordinates transformed_coordinates = DeviceCoordinates{transform( + zip(coordinates.raw_coords, mv.rect.get_sides()), [&](auto const &pair) { + return pair.first * pair.second.stride.unwrapped; + })}; + transformed_coordinates = DeviceCoordinates{ + transform(zip(transformed_coordinates.raw_coords, mv.start.raw_coords), + [&](auto const &pair) { return pair.first + pair.second; })}; + auto get_coordinate = [&](auto const &sub_projection) { + std::vector relevant_dimensions = + sorted(keys(sub_projection)); + std::vector relevant_side_sizes = + transform(relevant_dimensions, [&](auto const &idx) { + return get_side_size(get_side_at_idx(mv, idx)); + }); + std::vector coefficients = + scanl(relevant_side_sizes, + 1, + [](size_t const &result, side_size_t const &side_size) { + return result * side_size.unwrapped; + }); + std::vector filtered_coord; + for (int i = 0; i < transformed_coordinates.raw_coords.size(); ++i) { + if (contains(relevant_dimensions, machine_view_dim_idx_t{i})) { + filtered_coord.push_back(transformed_coordinates.raw_coords[i]); + } + } + return sum( + transform(zip(coefficients, filtered_coord), + [](auto const pair) { return pair.first * pair.second; })); + }; + int inter_coordinate = get_coordinate(inter_projection); + int intra_coordinate = get_coordinate(intra_projection); + if (inter_coordinate >= ms.num_nodes || + intra_coordinate >= get_num_devices_per_node(ms, mv.device_type)) { + throw mk_runtime_error( + fmt::format("DeviceCoordinates{} is out of bound", coordinates)); + } + int idx = inter_coordinate * get_num_devices_per_node(ms, mv.device_type) + + intra_coordinate; + return device_id_from_index(idx, mv.device_type); +} + +std::unordered_set + get_device_ids(MachineView const &mv, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + std::unordered_set devices_ids; + for (DeviceCoordinates const &coordinates : get_devices_coordinates(mv)) { + devices_ids.insert(get_device_id(mv, coordinates, ms, projection)); + } + return devices_ids; } size_t num_dims(MachineView const &mv) { @@ -75,10 +128,6 @@ size_t num_devices(MachineView const &mv) { return get_num_points(mv.rect).unwrapped; } -DeviceType get_device_type(MachineView const &mv) { - return get_device_type(mv.start); -} - StridedRectangleSide get_side_at_idx(MachineView const &mv, machine_view_dim_idx_t const &idx) { return mv.rect.at(idx.unwrapped); @@ -94,26 +143,13 @@ static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { return rect; } -MachineView - make_1d_machine_view(gpu_id_t start, gpu_id_t stop, stride_t stride) { - StridedRectangle rect = make_1d_rect(start.gpu_index, stop.gpu_index, stride); - return MachineView{device_id_t{start}, rect}; -} - -MachineView - make_1d_machine_view(cpu_id_t start, cpu_id_t stop, stride_t stride) { - StridedRectangle rect = make_1d_rect(start.cpu_index, stop.cpu_index, stride); - return MachineView{device_id_t{start}, rect}; -} - -MachineView - make_1d_machine_view(device_id_t start, device_id_t stop, stride_t stride) { - assert(get_device_type(start) == get_device_type(stop)); - if (get_device_type(start) == DeviceType::CPU) { - return make_1d_machine_view(unwrap_cpu(start), unwrap_cpu(stop), stride); - } - assert(get_device_type(start) == DeviceType::GPU); - return make_1d_machine_view(unwrap_gpu(start), unwrap_gpu(stop), stride); +MachineView make_1d_machine_view(int start, + int stop, + stride_t stride, + DeviceType device_type) { + StridedRectangle rect = make_1d_rect(start, stop, stride); + DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + return MachineView{start_coordinate, rect, device_type}; } static StridedRectangle @@ -122,29 +158,13 @@ static StridedRectangle start, start + num_points.unwrapped * stride.unwrapped, stride); } -MachineView make_1d_machine_view(cpu_id_t start, - num_points_t num_points, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start.cpu_index, num_points, stride); - return MachineView{device_id_t{start}, rect}; -} - -MachineView make_1d_machine_view(gpu_id_t start, +MachineView make_1d_machine_view(int start, num_points_t num_points, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start.gpu_index, num_points, stride); - return MachineView{device_id_t{start}, rect}; -} - -MachineView make_1d_machine_view(device_id_t start, - num_points_t num_points, - stride_t stride) { - if (get_device_type(start) == DeviceType::CPU) { - return make_1d_machine_view(unwrap_cpu(start), num_points, stride); - } else { - assert(get_device_type(start) == DeviceType::GPU); - return make_1d_machine_view(unwrap_gpu(start), num_points, stride); - } + stride_t stride, + DeviceType device_type) { + StridedRectangle rect = make_1d_rect(start, num_points, stride); + DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + return MachineView{start_coordinate, rect, device_type}; } static StridedRectangle @@ -152,29 +172,13 @@ static StridedRectangle return make_1d_rect(start, start + interval_size.unwrapped, stride); } -MachineView make_1d_machine_view(cpu_id_t start, +MachineView make_1d_machine_view(int start, side_size_t interval_size, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start.cpu_index, interval_size, stride); - return MachineView{device_id_t{start}, rect}; -} - -MachineView make_1d_machine_view(gpu_id_t start, - side_size_t interval_size, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start.gpu_index, interval_size, stride); - return MachineView{device_id_t{start}, rect}; -} -MachineView make_1d_machine_view(device_id_t start, - side_size_t interval_size, - stride_t stride) { - - if (get_device_type(start) == DeviceType::CPU) { - return make_1d_machine_view(unwrap_cpu(start), interval_size, stride); - } else { - assert(get_device_type(start) == DeviceType::GPU); - return make_1d_machine_view(unwrap_gpu(start), interval_size, stride); - } + stride_t stride, + DeviceType device_type) { + StridedRectangle rect = make_1d_rect(start, interval_size, stride); + DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + return MachineView{start_coordinate, rect, device_type}; } } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index c481279042..687ed4a3a2 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -5,18 +5,19 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant( StartInvariantMachineView const &start_invariant_mv, - device_id_t const &start_id) { - return MachineView{start_id, start_invariant_mv.rect}; + DeviceCoordinates const &start) { + return MachineView{ + start, start_invariant_mv.rect, start_invariant_mv.device_type}; } StartInvariantMachineView start_invariant_from_machine_view(MachineView const &mv) { - return StartInvariantMachineView{mv.rect}; + return StartInvariantMachineView{mv.rect, mv.device_type}; } -StartInvariantMachineView - make_1d_start_invariant_machine_view(num_points_t num_points, - stride_t stride) { +StartInvariantMachineView make_1d_start_invariant_machine_view( + num_points_t num_points, stride_t stride, DeviceType device_type) { return StartInvariantMachineView{ - StridedRectangle{{StridedRectangleSide{num_points, stride}}}}; + StridedRectangle{{StridedRectangleSide{num_points, stride}}}, + device_type}; } } // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index c3f0a29f44..1df8ac6e89 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -4,21 +4,17 @@ #include "test/utils/doctest.h" #include "utils/containers/transform.h" -std::unordered_set - make_gpu_device_ids(std::unordered_set ids) { - return transform(ids, [](int id) { return device_id_t(gpu_id_t(id)); }); -} - using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { MachineView mv = MachineView{ - device_id_t{gpu_id_t{1}}, + DeviceCoordinates{{0, 0, 0}}, StridedRectangle{{StridedRectangleSide(num_points_t{7}, stride_t{5}), StridedRectangleSide(num_points_t{10}, stride_t{2}), - StridedRectangleSide(num_points_t{1}, stride_t{4})}}}; + StridedRectangleSide(num_points_t{1}, stride_t{4})}}, + DeviceType::GPU}; SUBCASE("num_dims") { CHECK(num_dims(mv) == 3); @@ -39,36 +35,27 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector result = get_num_devices_per_dim(mv); CHECK(expected == result); } - - SUBCASE("get_device_type") { - CHECK(get_device_type(mv) == DeviceType::GPU); - } } - TEST_CASE("get_device_ids") { + TEST_CASE("get_devices_coordinates") { SUBCASE("2D MachineView") { - // 2D MachineView describes a 4 x 6 area. - // The devices are at coordinates (0,0), (0, 3), (2, 0), (2, 3) - // Thus we have as device ids: - // 0 = 0*1 + 0*4 - // 12 = 0*1 + 3*4 - // 2 = 2*1 + 0*4 - // 14 = 2*1 + 3*4 - // The coefficients are obtained by doing - //`scanl(area_coefficients, 1,product) = {1,4}` - // and ignoring the last term. MachineView mv = - MachineView{device_id_t{gpu_id_t{0}}, + MachineView{DeviceCoordinates{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), - }}}; - SUBCASE("get_device_ids") { - std::unordered_set expected = - make_gpu_device_ids({0, 2, 12, 14}); - std::unordered_set result = get_device_ids(mv); + }}, + DeviceType::GPU}; + SUBCASE("get_devices_coordinates") { + std::unordered_set expected = { + {DeviceCoordinates{{0, 0}}, + DeviceCoordinates{{0, 1}}, + DeviceCoordinates{{1, 0}}, + DeviceCoordinates{{1, 1}}}}; + std::unordered_set result = + get_devices_coordinates(mv); CHECK(expected == result); } } @@ -84,101 +71,205 @@ TEST_SUITE(FF_TEST_SUITE) { // 1, product) = {1,3,6}` and ignoring the last term. We do, however, have // 1 as a starting device, meaning all device-id are offset by 1. We thus // have 1, 13, 4, 16 as device-ids + MachineView mv = - MachineView{device_id_t{gpu_id_t{1}}, + MachineView{DeviceCoordinates{{0, 1, 2}}, StridedRectangle{{ StridedRectangleSide(num_points_t(1), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2}), - }}}; + }}, + DeviceType::GPU}; - SUBCASE("get_device_ids") { - std::unordered_set expected = - make_gpu_device_ids({1, 4, 13, 16}); - std::unordered_set result = get_device_ids(mv); + SUBCASE("get_devices_coordinates") { + std::unordered_set expected = { + {DeviceCoordinates{{0, 0, 0}}, + DeviceCoordinates{{0, 0, 1}}, + DeviceCoordinates{{0, 1, 0}}, + DeviceCoordinates{{0, 1, 1}}}}; + std::unordered_set result = + get_devices_coordinates(mv); CHECK(expected == result); } } } - TEST_CASE("get_maximum_device_id") { + TEST_CASE("get_maximum_device_coordinates") { SUBCASE("2D MachineView") { MachineView mv = - MachineView{device_id_t{gpu_id_t{0}}, + MachineView{DeviceCoordinates{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), - }}}; + }}, + DeviceType::GPU}; - SUBCASE("get_maximum_device_id") { - CHECK(get_maximum_device_id(mv) == device_id_t(gpu_id_t(14))); + SUBCASE("get_maximum_device_coordinates") { + CHECK(get_maximum_device_coordinates(mv) == DeviceCoordinates{{1, 1}}); } } SUBCASE("3D MachineView") { - StridedRectangle rect{{ - StridedRectangleSide(num_points_t(1), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}; - MachineView mv{device_id_t{gpu_id_t{1}}, - StridedRectangle{{ - StridedRectangleSide(num_points_t(1), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}}; - - SUBCASE("get_maximum_device_id") { - CHECK(get_maximum_device_id(mv) == device_id_t(gpu_id_t(16))); + + MachineView mv = + MachineView{DeviceCoordinates{{0, 1, 2}}, + StridedRectangle{{ + StridedRectangleSide(num_points_t(1), stride_t{3}), + StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + }}, + DeviceType::GPU}; + + SUBCASE("get_maximum_device_coordinates") { + CHECK(get_maximum_device_coordinates(mv) == + DeviceCoordinates{{0, 1, 1}}); } } } - TEST_CASE("make_1d_machine_view - GPU") { + TEST_CASE("make_1d_machine_view") { - device_id_t start_gpu = device_id_t{gpu_id_t{1}}; - MachineView gpu_mv = MachineView{ - start_gpu, - StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}}; + DeviceCoordinates start = DeviceCoordinates{{1}}; + MachineView mv = MachineView{ + start, + StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}, + DeviceType::GPU}; - SUBCASE("make_1d_machine_view(gpu_id_t start, gpu_id_t stop, stride_t " - "stride)") { - MachineView result = make_1d_machine_view( - start_gpu, device_id_t{gpu_id_t(1 + 7 * 5)}, stride_t{5}); - MachineView correct = gpu_mv; + SUBCASE("make_1d_machine_view(int start, int stop, stride_t " + "stride,DeviceType device_type)") { + MachineView result = + make_1d_machine_view(1, 1 + 7 * 5, stride_t{5}, DeviceType::GPU); + MachineView correct = mv; CHECK(result == correct); } SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, " - "stride_t stride)") { - MachineView result = - make_1d_machine_view(start_gpu, num_points_t{7}, stride_t{5}); - MachineView correct = gpu_mv; + "stride_t stride,DeviceType device_type)") { + MachineView result = make_1d_machine_view( + 1, num_points_t{7}, stride_t{5}, DeviceType::GPU); + MachineView correct = mv; CHECK(result == correct); } - } - TEST_CASE("make_1d_machine_view - CPU") { - device_id_t start_cpu = device_id_t{cpu_id_t{2}}; - MachineView cpu_mv = - MachineView{start_cpu, - StridedRectangle{ - {StridedRectangleSide{num_points_t{11}, stride_t{4}}}}}; - - SUBCASE("make_1d_machine_view(cpu_id_t start, cpu_id_t stop, stride_t " - "stride)") { + SUBCASE("make_1d_machine_view(gpu_id_t start, side_size_t side_size, " + "stride_t stride,DeviceType device_type)") { MachineView result = make_1d_machine_view( - start_cpu, device_id_t{cpu_id_t(2 + 11 * 4)}, stride_t{4}); - MachineView correct = cpu_mv; + 1, side_size_t{7 * 5}, stride_t{5}, DeviceType::GPU); + MachineView correct = mv; CHECK(result == correct); } - SUBCASE("make_1d_machine_view(cpu_id_t start, num_points_t num_points, " - "stride_t stride)") { - MachineView result = - make_1d_machine_view(start_cpu, num_points_t{11}, stride_t{4}); - MachineView correct = cpu_mv; - CHECK(result == correct); + } + + TEST_CASE("get_device_id") { + SUBCASE("1D case") { + MachineView mv = + make_1d_machine_view(1, num_points_t{3}, stride_t{2}); // 1 3 5 + MachineSpecification ms = MachineSpecification{ + 1, 0, 6, 0, 0}; // Single node with 6 GPUs (0,1,2,3,4,5) + MachineViewProjection projection = MachineViewProjection{ + {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTRA}}}; + + SUBCASE("Device 0") { + DeviceCoordinates device = DeviceCoordinates{{0}}; + device_id_t correct = device_id_from_index(1, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + + SUBCASE("Device 1") { + DeviceCoordinates device = DeviceCoordinates{{1}}; + device_id_t correct = device_id_from_index(3, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + + SUBCASE("Device 2") { + DeviceCoordinates device = DeviceCoordinates{{2}}; + device_id_t correct = device_id_from_index(5, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + } + SUBCASE("2D case") { + MachineView mv = + MachineView{DeviceCoordinates{{1, 2}}, + StridedRectangle{ + {StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2})}}, + DeviceType::GPU}; + MachineSpecification ms = + MachineSpecification{3, 0, 5, 0, 0}; // 3 nodes with 5 GPUs each + MachineViewProjection projection = MachineViewProjection{ + {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, MachineSpecificationDimension::INTRA}}}; + + SUBCASE("Device (0,0)") { + DeviceCoordinates device = DeviceCoordinates{{0, 0}}; + device_id_t correct = device_id_from_index(7, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + + SUBCASE("Device (0,1)") { + DeviceCoordinates device = DeviceCoordinates{{0, 1}}; + device_id_t correct = device_id_from_index(9, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + SUBCASE("Device (1,0)") { + DeviceCoordinates device = DeviceCoordinates{{1, 0}}; + device_id_t correct = device_id_from_index(12, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + SUBCASE("Device (1,1)") { + DeviceCoordinates device = DeviceCoordinates{{1, 1}}; + device_id_t correct = device_id_from_index(14, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + } + + SUBCASE("3D case") { + MachineView mv = + MachineView{DeviceCoordinates{{0, 2, 0}}, + StridedRectangle{ + {StridedRectangleSide(num_points_t(2), stride_t{1}), + StridedRectangleSide(num_points_t(2), stride_t{2}), + StridedRectangleSide(num_points_t(2), stride_t{1})}}, + DeviceType::GPU}; + MachineSpecification ms = + MachineSpecification{2, 0, 8, 0, 0}; // 3 nodes with 5 GPUs each + MachineViewProjection projection = MachineViewProjection{ + {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{2}, MachineSpecificationDimension::INTRA}}}; + + SUBCASE("Device (0,0,1)") { + DeviceCoordinates device = DeviceCoordinates{{0, 1, 0}}; + device_id_t correct = device_id_from_index(3, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + + SUBCASE("Device (1, 1, 0)") { + DeviceCoordinates device = DeviceCoordinates{{1, 0, 1}}; + device_id_t correct = device_id_from_index(14, DeviceType::GPU); + device_id_t result = get_device_id(mv, device, ms, projection); + CHECK(correct == result); + } + SUBCASE("All devices") { + std::unordered_set result = + get_device_ids(mv, ms, projection); + std::unordered_set devices = {2, 3, 10, 11, 6, 7, 14, 15}; + std::unordered_set correct = + transform(devices, [&](int idx) { + return device_id_from_index(idx, DeviceType::GPU); + }); + + CHECK(result == correct); + } } } } diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 437c7213cb..7f8b97badc 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -6,17 +6,20 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView") { - device_id_t start = device_id_t{gpu_id_t{0}}; + DeviceCoordinates start = DeviceCoordinates{{0}}; StridedRectangle rect = StridedRectangle{{ StridedRectangleSide(num_points_t{2}, stride_t{3}), StridedRectangleSide(num_points_t{2}, stride_t{2}), }}; + DeviceType device_type = DeviceType::GPU; + SUBCASE("To StartInvariantMachineView") { - MachineView input = MachineView{start, rect}; + MachineView input = MachineView{start, rect, device_type}; - StartInvariantMachineView correct = StartInvariantMachineView{rect}; + StartInvariantMachineView correct = + StartInvariantMachineView{rect, device_type}; StartInvariantMachineView result = start_invariant_from_machine_view(input); CHECK(correct == result); @@ -24,21 +27,23 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("From StartInvariantMachineView") { - StartInvariantMachineView input = StartInvariantMachineView{rect}; - MachineView correct = MachineView{start, rect}; + StartInvariantMachineView input = + StartInvariantMachineView{rect, device_type}; + MachineView correct = MachineView{start, rect, device_type}; MachineView result = machine_view_from_start_invariant(input, start); CHECK(correct == result); } SUBCASE("To and From") { - MachineView correct = MachineView{start, rect}; + MachineView correct = MachineView{start, rect, device_type}; MachineView result = machine_view_from_start_invariant( start_invariant_from_machine_view(correct), start); CHECK(correct == result); } SUBCASE("From and To") { - StartInvariantMachineView correct = StartInvariantMachineView{rect}; + StartInvariantMachineView correct = + StartInvariantMachineView{rect, device_type}; StartInvariantMachineView result = start_invariant_from_machine_view( machine_view_from_start_invariant(correct, start)); CHECK(correct == result); From 3cbc6be883056b69d41992151e465bb12920e398 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 13 Sep 2024 13:47:36 -0700 Subject: [PATCH 21/34] update to allowed machine views --- .../include/compiler/allowed_machine_views.h | 16 +- .../src/compiler/allowed_machine_views.cc | 111 +++-- .../machine_view_to_tensor_mapping.cc | 16 +- .../test/src/allowed_machine_views.cc | 390 +++++++++++------- .../src/machine_view_to_tensor_mapping.cc | 101 +++-- .../op-attrs/parallel_tensor_dim_idx_t.h | 3 +- .../src/op-attrs/parallel_tensor_dim_idx_t.cc | 3 +- lib/pcg/include/pcg/machine_specification.h | 8 + ...hine_specification_coordinates.struct.toml | 26 ++ lib/pcg/include/pcg/machine_view.h | 15 +- lib/pcg/include/pcg/machine_view.struct.toml | 4 +- ...l => machine_view_coordinates.struct.toml} | 2 +- lib/pcg/include/pcg/machine_view_dim_idx_t.h | 2 +- .../pcg/start_invariant_machine_view.h | 2 +- lib/pcg/src/pcg/machine_specification.cc | 17 +- lib/pcg/src/pcg/machine_view.cc | 62 +-- lib/pcg/src/pcg/machine_view_dim_idx_t.cc | 5 +- .../src/pcg/start_invariant_machine_view.cc | 2 +- lib/pcg/src/pcg/strided_rectangle.cc | 2 +- lib/pcg/test/src/pcg/machine_view.cc | 65 +-- .../src/pcg/start_invariant_machine_view.cc | 2 +- lib/pcg/test/src/pcg/strided_rectangle.cc | 1 + .../utils/containers/get_all_permutations.h | 112 +++++ .../containers/map_from_keys_and_values.h | 29 ++ .../include/utils/containers/transform.h | 12 + .../src/utils/containers/cartesian_product.cc | 2 + lib/utils/test/src/utils/containers/filter.cc | 1 + .../utils/containers/get_all_permutations.cc | 73 ++++ lib/utils/test/src/utils/containers/range.cc | 1 + .../test/src/utils/containers/replicate.cc | 2 + lib/utils/test/src/utils/containers/scanl.cc | 2 + 31 files changed, 773 insertions(+), 316 deletions(-) create mode 100644 lib/pcg/include/pcg/machine_specification_coordinates.struct.toml rename lib/pcg/include/pcg/{device_coordinates.struct.toml => machine_view_coordinates.struct.toml} (89%) create mode 100644 lib/utils/include/utils/containers/map_from_keys_and_values.h diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index 1029ed5fd8..5c78d2601c 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -5,6 +5,7 @@ #include "op-attrs/parallel_tensor_shape.dtg.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" +#include "pcg/machine_view_projection.dtg.h" #include "pcg/start_invariant_machine_view.dtg.h" namespace FlexFlow { @@ -15,17 +16,16 @@ bool is_valid_machine_view(MachineView const &mv, bool is_valid_machine_view(MachineView const &mv, ParallelTensorShape const &shape); -std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type = DeviceType::GPU); - -std::unordered_set - get_allowed_start_invariant_machine_views( +std::unordered_set> + get_allowed_partial_machine_view_mappings( MachineSpecification const &machine_spec, ParallelTensorShape const &shape, DeviceType device_type = DeviceType::GPU); - +std::unordered_set> + get_allowed_partial_start_invariant_machine_view_mappings( + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type); } // namespace FlexFlow #endif diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 928abc485a..f7d4d9ae73 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -12,6 +12,7 @@ #include "utils/containers/extend.h" #include "utils/containers/filter.h" #include "utils/containers/get_all_permutations.h" +#include "utils/containers/map_from_keys_and_values.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/replicate.h" @@ -39,9 +40,13 @@ static std::unordered_multiset [&](int num_devices) { return num_points_t{num_devices}; }); } -bool is_valid_machine_view(MachineView const &mv, - MachineSpecification const &machine_spec) { - return false; // TODO: fix +bool is_valid_partial_machine_view_mapping(MachineView const &mv, + MachineSpecification const &ms, + MachineViewProjection const &proj) { + MachineSpecificationCoordinates maximum_device_coords = + get_machine_specification_coordinates( + mv, get_maximum_device_coordinates(mv), ms, proj); + return is_valid_machine_specification_coordinates(ms, maximum_device_coords); } bool is_valid_machine_view(MachineView const &mv, @@ -54,19 +59,20 @@ bool is_valid_machine_view(MachineView const &mv, return unordered_multiset_of(mv_num_devices) == tensor_num_devices; } -/* Generates a set of candidate `MachineView`s. +/* Generates a set of candidate `MachineView`s and their associate + `MachineViewProjection`. * The returned set includes all valid machine views, and might contain - invalid - * ones. This function should never be used externally (see - * `get_allowed_machine_views` instead). There is no guarantee that a - non-empty - * returned set contains a valid machine view (i.e. its possible for all + invalid ones. This function should never be used externally (see + * `get_allowed_partial_machine_view_mappings` instead). There is no guarantee + that a non-empty returned set contains a valid machine view (i.e. its possible + for all * `MachineView`s to be invalid) */ -static std::unordered_set - get_candidate_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType const &device_type) { +static std::unordered_set> + get_candidate_partial_machine_view_mappings( + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType const &device_type) { auto candidate_strides = [](std::vector const &tensor_dims, @@ -98,54 +104,81 @@ static std::unordered_set std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set device_coordinates = + std::unordered_set machine_view_coordinates = transform(raw_coordinates, [](std::vector const &point) { - return DeviceCoordinates(point); + return MachineViewCoordinates(point); }); - return device_coordinates; + return machine_view_coordinates; + }; + + auto candidate_projections = [](MachineView const &mv) { + std::unordered_set result; + std::unordered_set options = { + MachineSpecificationDimension::INTER, + MachineSpecificationDimension::INTRA}; + for (std::vector const &proj_vec : + get_all_permutations_with_repetition(options, num_dims(mv))) { + + result.insert(MachineViewProjection{ + map_from_keys_and_values(get_machine_view_indices(mv), proj_vec)}); + } + return result; }; std::unordered_multiset tensor_dims = get_num_devices_per_parallel_dim(shape); int total_devices = get_num_devices(machine_spec, device_type); - std::unordered_set machine_views; + std::unordered_set> + machine_views; for (MultiDimensionalStride const &strides : candidate_strides(sorted(tensor_dims), total_devices)) { StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); - StartInvariantMachineView start_inv_mv = - StartInvariantMachineView{rect, device_type}; - - for (DeviceCoordinates start : candidate_starts(sorted(tensor_dims))) { - machine_views.insert( - machine_view_from_start_invariant(start_inv_mv, start)); + auto start_inv_mv = StartInvariantMachineView{rect, device_type}; + for (MachineViewCoordinates start : candidate_starts(sorted(tensor_dims))) { + MachineView mv = machine_view_from_start_invariant(start_inv_mv, start); + for (MachineViewProjection const &proj : candidate_projections(mv)) { + machine_views.insert({mv, proj}); + } } } - return machine_views; } -std::unordered_set - get_allowed_machine_views(MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type) { - - std::unordered_set views = - get_candidate_machine_views(machine_spec, shape, device_type); - return filter(views, [&](MachineView const &view) { - return is_valid_machine_view(view, shape) && - is_valid_machine_view(view, machine_spec); - }); +std::unordered_set> + get_allowed_partial_machine_view_mappings( + MachineSpecification const &machine_spec, + ParallelTensorShape const &shape, + DeviceType device_type) { + + std::unordered_set> views = + get_candidate_partial_machine_view_mappings( + machine_spec, shape, device_type); + return filter(views, + [&](std::pair const &pair) { + auto &[mv, projection] = pair; + return is_valid_machine_view(mv, shape) && + is_valid_partial_machine_view_mapping( + mv, machine_spec, projection); + }); } -std::unordered_set - get_allowed_start_invariant_machine_views( +std::unordered_set> + get_allowed_partial_start_invariant_machine_view_mappings( MachineSpecification const &machine_spec, ParallelTensorShape const &shape, DeviceType device_type) { - return transform(get_allowed_machine_views(machine_spec, shape, device_type), - start_invariant_from_machine_view); + + std::unordered_set> views = + get_allowed_partial_machine_view_mappings( + machine_spec, shape, device_type); + + return transform( + views, [](std::pair const &p) { + auto &[view, proj] = p; + return std::pair{start_invariant_from_machine_view(view), proj}; + }); } } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc index 2436689f2c..87fd00743e 100644 --- a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc +++ b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc @@ -1,21 +1,26 @@ #include "compiler/machine_view_to_tensor_mapping.h" #include "compiler/allowed_machine_views.h" +#include "op-attrs/parallel_dim.h" #include "op-attrs/parallel_tensor_dim_idx_t.h" #include "pcg/machine_view_dim_idx_t.h" +#include "utils/bidict/algorithms/bidict_from_pairs.h" #include "utils/containers/all_of.h" #include "utils/containers/filter.h" #include "utils/containers/get_all_permutations.h" #include "utils/containers/sorted.h" #include "utils/containers/zip.h" - +#include "utils/exception.h" namespace FlexFlow { std::unordered_set get_all_machine_view_to_tensor_mappings(MachineView const &mv, ParallelTensorShape const &shape) { - assert(is_valid_machine_view(mv, shape)); + if (!is_valid_machine_view(mv, shape)) { + throw mk_runtime_error( + "Invalid MachineView {} for given ParallelTensorShape {}", mv, shape); + } std::vector machine_view_dim_ordering = - sorted(get_machine_view_indices(mv)); + get_machine_view_indices(mv); std::unordered_set shape_indices = get_parallel_tensor_indices(shape); shape_indices = @@ -26,8 +31,9 @@ std::unordered_set std::unordered_set result; for (std::vector const &tensor_dim_orderings : get_all_permutations(shape_indices)) { - MachineViewToTensorMapping mapping = MachineViewToTensorMapping( - bidict(zip(machine_view_dim_ordering, tensor_dim_orderings))); + MachineViewToTensorMapping mapping = + MachineViewToTensorMapping(bidict_from_pairs( + zip(machine_view_dim_ordering, tensor_dim_orderings))); if (is_valid_mapping(mapping, mv, shape)) { result.insert(mapping); } diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 1cee33ffbb..6a422d2f20 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -1,151 +1,239 @@ -// #include "compiler/allowed_machine_views.h" -// #include "doctest/doctest.h" -// #include "pcg/machine_specification.dtg.h" -// #include "pcg/machine_view.h" -// #include "pcg/start_invariant_machine_view.h" -// #include "utils/containers/extend.h" -// #include "utils/containers/range.h" -// #include "utils/containers/transform.h" -// #include "utils/containers/unordered_set_of.h" - -// using namespace FlexFlow; - -// TEST_SUITE(FF_TEST_SUITE) { - -// TEST_CASE("get_allowed_machine_views") { - -// SUBCASE("1 degree of parallelism") { - -// MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; -// ParallelTensorShape shape = ParallelTensorShape{ -// ParallelTensorDims{ -// FFOrdered{ -// ShardParallelDim{10, 3}, -// }, -// ReplicaParallelDimSet{ -// SumDegree{1}, -// DiscardCopyDegree{1}, -// }, -// }, -// DataType::FLOAT, -// }; - -// std::unordered_set correct = { -// make_1d_machine_view(0, 3, stride_t(1)), -// make_1d_machine_view(1, 4, stride_t(1)), -// make_1d_machine_view(2, 5, stride_t(1)), -// make_1d_machine_view(0, 6, stride_t(2))}; -// std::unordered_set result = -// get_allowed_machine_views(ms, shape); - -// CHECK(correct == result); -// } - -// SUBCASE("2 degrees of parallelism") { - -// MachineSpecification ms = MachineSpecification{11, 1, 1, 0, 0}; -// ParallelTensorShape shape = ParallelTensorShape{ -// ParallelTensorDims{ -// FFOrdered{ -// ShardParallelDim{10, 3}, -// }, -// ReplicaParallelDimSet{ -// SumDegree{2}, -// DiscardCopyDegree{1}, -// }, -// }, -// DataType::FLOAT, -// }; - -// auto make_2d_views = [&](int num_starts, int stride1, int stride2) { -// return unordered_set_of(transform(range(num_starts), [&](int start) { -// return MachineView{ -// device_id_t{gpu_id_t{start}}, -// StridedRectangle{ -// {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, -// StridedRectangleSide{num_points_t{3}, -// stride_t{stride2}}}}, -// }; -// })); -// }; - -// std::unordered_set correct; -// extend(correct, -// make_2d_views(/*num_starts*/ 6, /*stride1*/ 1, /*stride2*/ 1)); -// extend(correct, -// make_2d_views(/*num_starts*/ 1, /*stride1*/ 2, /*stride2*/ 1)); -// extend(correct, -// make_2d_views(/*num_starts*/ 2, /*stride1*/ 1, /*stride2*/ 2)); - -// std::unordered_set result = -// get_allowed_machine_views(ms, shape); - -// CHECK(result == correct); -// } -// } - -// TEST_CASE("get_allowed_start_invariant_machine_views") { - -// SUBCASE("1 degree of parallelism") { - -// MachineSpecification ms = MachineSpecification{5, 1, 1, 0, 0}; -// ParallelTensorShape shape = ParallelTensorShape{ -// ParallelTensorDims{ -// FFOrdered{ -// ShardParallelDim{10, 3}, -// }, -// ReplicaParallelDimSet{ -// SumDegree{1}, -// DiscardCopyDegree{1}, -// }, -// }, -// DataType::FLOAT, -// }; - -// std::unordered_set correct = { -// make_1d_start_invariant_machine_view(num_points_t(3), stride_t(1)), -// make_1d_start_invariant_machine_view(num_points_t(3), -// stride_t(2))}; -// std::unordered_set result = -// get_allowed_start_invariant_machine_views(ms, shape); - -// CHECK(correct == result); -// } - -// SUBCASE("2 degrees of parallelism") { - -// MachineSpecification ms = MachineSpecification(15, 1, 1, 0, 0); -// ParallelTensorShape shape = ParallelTensorShape{ -// ParallelTensorDims{ -// FFOrdered{ -// ShardParallelDim{10, 3}, -// }, -// ReplicaParallelDimSet{ -// SumDegree{2}, -// DiscardCopyDegree{1}, -// }, -// }, -// DataType::FLOAT, -// }; - -// auto make_2d_view = [&](int stride1, int stride2) { -// StridedRectangle rect = StridedRectangle{ -// {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, -// StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; -// return StartInvariantMachineView{rect}; -// }; - -// std::unordered_set correct = { -// make_2d_view(/*stride1*/ 1, /*stride2*/ 1), -// make_2d_view(/*stride1*/ 2, /*stride2*/ 1), -// make_2d_view(/*stride1*/ 1, /*stride2*/ 2), -// make_2d_view(/*stride1*/ 1, /*stride2*/ 3), -// }; - -// std::unordered_set result = -// get_allowed_start_invariant_machine_views(ms, shape); - -// CHECK(result == correct); -// } -// } -// } +#include "compiler/allowed_machine_views.h" +#include "doctest/doctest.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.h" +#include "pcg/start_invariant_machine_view.h" +#include "utils/containers/extend.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/containers/zip.h" +#include "utils/fmt/unordered_set.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("get_allowed_partial_machine_view_mappings") { + + SUBCASE("1 degree of parallelism") { + + MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::vector correct_mv = { + make_1d_machine_view(0, 3, stride_t(1)), + make_1d_machine_view(1, 4, stride_t(1)), + make_1d_machine_view(2, 5, stride_t(1)), + make_1d_machine_view(0, 6, stride_t(2))}; + + std::vector correct_proj = { + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + }; + + std::unordered_set> + correct = unordered_set_of(zip(correct_mv, correct_proj)); + + std::unordered_set> result = + get_allowed_partial_machine_view_mappings(ms, shape); + + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + + MachineSpecification ms = MachineSpecification{3, 3, 3, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_views = + [&](int start_x, int start_y, int stride1, int stride2) { + return MachineView{ + MachineViewCoordinates{{start_x, start_y}}, + StridedRectangle{ + {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, + StridedRectangleSide{num_points_t{3}, stride_t{stride2}}}}, + DeviceType::GPU}; + }; + + std::vector correct_mv = { + make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1), + make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1), + make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1), + + make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1), + make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1), + make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1), + }; + + std::vector correct_proj = { + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTER}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTER}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTER}}}, + + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA}}}, + }; + + std::unordered_set> + correct = unordered_set_of(zip(correct_mv, correct_proj)); + + std::unordered_set> result = + get_allowed_partial_machine_view_mappings(ms, shape, DeviceType::GPU); + + CHECK(correct == result); + } + } + + TEST_CASE("get_allowed_partial_start_invariant_machine_view_mappings") { + + SUBCASE("1 degree of parallelism") { + + MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + std::vector correct_mv = { + make_1d_start_invariant_machine_view( + num_points_t(3), stride_t(1), DeviceType::GPU), + make_1d_start_invariant_machine_view( + num_points_t(3), stride_t(2), DeviceType::GPU)}; + + std::vector correct_proj = { + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}}}, + }; + + std::unordered_set< + std::pair> + correct = unordered_set_of(zip(correct_mv, correct_proj)); + + std::unordered_set< + std::pair> + result = get_allowed_partial_start_invariant_machine_view_mappings( + ms, shape, DeviceType::GPU); + + CHECK(correct == result); + } + + SUBCASE("2 degrees of parallelism") { + + MachineSpecification ms = MachineSpecification(3, 3, 3, 0, 0); + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{1}, + }, + }, + DataType::FLOAT, + }; + + auto make_2d_view = [&](int stride1, int stride2) { + StridedRectangle rect = StridedRectangle{ + {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, + StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; + return StartInvariantMachineView{rect, DeviceType::GPU}; + }; + + std::vector correct_mv = { + make_2d_view(/*stride1*/ 1, /*stride2*/ 1), + make_2d_view(/*stride1*/ 2, /*stride2*/ 1), + make_2d_view(/*stride1*/ 1, /*stride2*/ 1), + make_2d_view(/*stride1*/ 2, /*stride2*/ 1), + }; + + std::vector correct_proj = { + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTER}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTER}}}, + + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA}}}, + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA}}}, + }; + std::unordered_set< + std::pair> + correct = unordered_set_of(zip(correct_mv, correct_proj)); + + std::unordered_set< + std::pair> + result = get_allowed_partial_start_invariant_machine_view_mappings( + ms, shape, DeviceType::GPU); + + CHECK(result == correct); + } + } +} diff --git a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc index dc798dcc5c..2f43bfb177 100644 --- a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc +++ b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc @@ -3,51 +3,78 @@ #include "pcg/machine_view.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_set_of.h" +#include "utils/fmt/unordered_map.h" +#include "utils/fmt/unordered_set.h" using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_all_machine_view_to_tensor_mappings") { - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{2}, - }, - }, - DataType::FLOAT, - }; - MachineView view = - MachineView{DeviceCoordinates{{0, 0, 0}}, - StridedRectangle{{ - StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, - StridedRectangleSide{num_points_t(3), stride_t(1)}, - }}, - DeviceType::GPU}; + SUBCASE("no possible mappings") { + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{3, 1}, + }, + ReplicaParallelDimSet{ + SumDegree{1}, + DiscardCopyDegree{2}, + }, + }, + DataType::FLOAT, + }; + MachineView view = + MachineView{MachineViewCoordinates{{0, 0, 0}}, + StridedRectangle{{ + StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + }}, + DeviceType::GPU}; + CHECK_THROWS_AS(get_all_machine_view_to_tensor_mappings(view, shape), + std::runtime_error); + } + SUBCASE("multiple possible mappings") { + ParallelTensorShape shape = ParallelTensorShape{ + ParallelTensorDims{ + FFOrdered{ + ShardParallelDim{10, 3}, + }, + ReplicaParallelDimSet{ + SumDegree{2}, + DiscardCopyDegree{2}, + }, + }, + DataType::FLOAT, + }; + MachineView view = + MachineView{MachineViewCoordinates{{0, 0, 0}}, + StridedRectangle{{ + StridedRectangleSide{num_points_t(2), stride_t(1)}, + StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t(3), stride_t(1)}, + }}, + DeviceType::GPU}; - bidict b1 = { - {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, - {machine_view_dim_idx_t(1), - parallel_tensor_dim_idx_t{ReplicaType::SUM}}, - {machine_view_dim_idx_t(0), - parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + bidict b1 = { + {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, + {machine_view_dim_idx_t(1), + parallel_tensor_dim_idx_t{ReplicaType::SUM}}, + {machine_view_dim_idx_t(0), + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; - bidict b2 = { - {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, - {machine_view_dim_idx_t(0), - parallel_tensor_dim_idx_t{ReplicaType::SUM}}, - {machine_view_dim_idx_t(1), - parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + bidict b2 = { + {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, + {machine_view_dim_idx_t(0), + parallel_tensor_dim_idx_t{ReplicaType::SUM}}, + {machine_view_dim_idx_t(1), + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; - std::unordered_set correct = { - MachineViewToTensorMapping{b1}, MachineViewToTensorMapping{b2}}; - std::unordered_set result = - get_all_machine_view_to_tensor_mappings(view, shape); + std::unordered_set correct = { + MachineViewToTensorMapping{b1}, MachineViewToTensorMapping{b2}}; + std::unordered_set result = + get_all_machine_view_to_tensor_mappings(view, shape); - CHECK(correct == result); + CHECK(correct == result); + } } } diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h index ca9f214b29..9cdc9e4839 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h @@ -1,8 +1,9 @@ #ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H #define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H +#include "op-attrs/parallel_dim.dtg.h" #include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" -#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/parallel_tensor_shape.dtg.h" namespace FlexFlow { diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc index 1487d7f49f..b5e52f5677 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc @@ -1,5 +1,6 @@ #include "op-attrs/parallel_tensor_dim_idx_t.h" +#include "op-attrs/parallel_tensor_dims.h" #include "utils/containers/extend.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" @@ -25,7 +26,7 @@ ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, std::unordered_set get_parallel_tensor_indices(ParallelTensorShape const &shape) { std::unordered_set indices; - extend(indices, transform(range(num_shard_dims(shape)), [](int idx) { + extend(indices, transform(range(num_shard_dims(shape.dims)), [](int idx) { return parallel_tensor_dim_idx_t(ff_dim_t(idx)); })); indices.insert(parallel_tensor_dim_idx_t(ReplicaType::SUM)); diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 0e42864824..fa27f87207 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -1,8 +1,10 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_SPECIFICATION_H +#include "pcg/device_id_t.dtg.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_specification.dtg.h" +#include "pcg/machine_specification_coordinates.dtg.h" namespace FlexFlow { @@ -13,6 +15,12 @@ int get_num_devices(MachineSpecification const &ms, int get_num_devices_per_node(MachineSpecification const &ms, DeviceType const &device_type); +bool is_valid_machine_specification_coordinates( + MachineSpecification const &ms, + MachineSpecificationCoordinates const &coords); + +device_id_t get_device_id(MachineSpecification const &ms, + MachineSpecificationCoordinates const &coords); } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_specification_coordinates.struct.toml b/lib/pcg/include/pcg/machine_specification_coordinates.struct.toml new file mode 100644 index 0000000000..c60cf913fb --- /dev/null +++ b/lib/pcg/include/pcg/machine_specification_coordinates.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "MachineSpecificationCoordinates" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/device_type.dtg.h", +] + +[[fields]] +name = "inter" +type = "int" + +[[fields]] +name = "intra" +type = "int" + +[[fields]] +name = "device_type" +type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 97712fbdee..822452eef0 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -1,11 +1,12 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H -#include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_specification.dtg.h" +#include "pcg/machine_specification_coordinates.dtg.h" #include "pcg/machine_view.dtg.h" +#include "pcg/machine_view_coordinates.dtg.h" #include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/machine_view_projection.dtg.h" #include "pcg/num_points_t.dtg.h" @@ -15,14 +16,20 @@ namespace FlexFlow { -std::unordered_set +std::unordered_set get_devices_coordinates(MachineView const &mv); -DeviceCoordinates get_maximum_device_coordinates(MachineView const &mv); +MachineViewCoordinates get_maximum_device_coordinates(MachineView const &mv); + +MachineSpecificationCoordinates get_machine_specification_coordinates( + MachineView const &mv, + MachineViewCoordinates const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection); StridedRectangleSide get_side_at_idx(MachineView const &mv, machine_view_dim_idx_t const &idx); device_id_t get_device_id(MachineView const &mv, - DeviceCoordinates const &coordinates, + MachineViewCoordinates const &coordinates, MachineSpecification const &ms, MachineViewProjection const &projection); std::unordered_set diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index d7f21a7004..bbca071cdb 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -11,13 +11,13 @@ features = [ includes = [ "pcg/strided_rectangle.h", - "pcg/device_coordinates.dtg.h", + "pcg/machine_view_coordinates.dtg.h", "pcg/device_type.dtg.h", ] [[fields]] name = "start" -type = "::FlexFlow::DeviceCoordinates" +type = "::FlexFlow::MachineViewCoordinates" [[fields]] name = "rect" diff --git a/lib/pcg/include/pcg/device_coordinates.struct.toml b/lib/pcg/include/pcg/machine_view_coordinates.struct.toml similarity index 89% rename from lib/pcg/include/pcg/device_coordinates.struct.toml rename to lib/pcg/include/pcg/machine_view_coordinates.struct.toml index c97729e1c7..7055d4f165 100644 --- a/lib/pcg/include/pcg/device_coordinates.struct.toml +++ b/lib/pcg/include/pcg/machine_view_coordinates.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "DeviceCoordinates" +name = "MachineViewCoordinates" features = [ "eq", "ord", diff --git a/lib/pcg/include/pcg/machine_view_dim_idx_t.h b/lib/pcg/include/pcg/machine_view_dim_idx_t.h index c247b2ed5d..0332240a14 100644 --- a/lib/pcg/include/pcg/machine_view_dim_idx_t.h +++ b/lib/pcg/include/pcg/machine_view_dim_idx_t.h @@ -6,7 +6,7 @@ namespace FlexFlow { -std::unordered_set +std::vector get_machine_view_indices(MachineView const &mv); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index 28478cc1a6..b19db69c40 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -8,7 +8,7 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant(StartInvariantMachineView const &mv, - DeviceCoordinates const &start_id); + MachineViewCoordinates const &start_id); StartInvariantMachineView start_invariant_from_machine_view(MachineView const &mv); diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index e7e5f4568f..951eb884d3 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -1,7 +1,6 @@ #include "pcg/machine_specification.h" #include "pcg/device_id.h" #include "utils/exception.h" - namespace FlexFlow { int get_num_gpus(MachineSpecification const &ms) { @@ -33,4 +32,20 @@ int get_num_devices_per_node(MachineSpecification const &ms, throw mk_runtime_error("Unknown DeviceType {}", device_type); } } +bool is_valid_machine_specification_coordinates( + MachineSpecification const &ms, + MachineSpecificationCoordinates const &coords) { + return (coords.inter < ms.num_nodes) && + (coords.intra < get_num_devices_per_node(ms, coords.device_type)); +} + +device_id_t get_device_id(MachineSpecification const &ms, + MachineSpecificationCoordinates const &coords) { + assert(is_valid_machine_specification_coordinates(ms, coords)); + int raw_idx = + coords.inter * get_num_devices_per_node(ms, coords.device_type) + + coords.intra; + return device_id_from_index(raw_idx, coords.device_type); +} + } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 9ad33b55c2..aad7e763b4 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,7 +1,7 @@ #include "pcg/machine_view.h" -#include "pcg/device_coordinates.dtg.h" #include "pcg/device_id.h" #include "pcg/machine_specification.h" +#include "pcg/machine_view_coordinates.dtg.h" #include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/machine_view_projection.dtg.h" #include "pcg/strided_rectangle.h" @@ -23,7 +23,7 @@ namespace FlexFlow { -std::unordered_set +std::unordered_set get_devices_coordinates(MachineView const &mv) { std::vector> coordinate_ranges = @@ -33,21 +33,23 @@ std::unordered_set std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set device_coordinates = + std::unordered_set machine_view_coordinates = transform(raw_coordinates, [](std::vector const &point) { - return DeviceCoordinates(point); + return MachineViewCoordinates(point); }); - return device_coordinates; + return machine_view_coordinates; } -DeviceCoordinates get_maximum_device_coordinates(MachineView const &mv) { +MachineViewCoordinates get_maximum_device_coordinates(MachineView const &mv) { return maximum(get_devices_coordinates(mv)); } -device_id_t get_device_id(MachineView const &mv, - DeviceCoordinates const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection) { +MachineSpecificationCoordinates get_machine_specification_coordinates( + MachineView const &mv, + MachineViewCoordinates const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + auto inter_projection = filter_values( projection.raw_projection, [](MachineSpecificationDimension const &dim) { return dim == MachineSpecificationDimension::INTER; @@ -57,13 +59,15 @@ device_id_t get_device_id(MachineView const &mv, return dim == MachineSpecificationDimension::INTRA; }); - DeviceCoordinates transformed_coordinates = DeviceCoordinates{transform( - zip(coordinates.raw_coords, mv.rect.get_sides()), [&](auto const &pair) { - return pair.first * pair.second.stride.unwrapped; - })}; - transformed_coordinates = DeviceCoordinates{ + MachineViewCoordinates transformed_coordinates = MachineViewCoordinates{ + transform(zip(coordinates.raw_coords, mv.rect.get_sides()), + [&](auto const &pair) { + return pair.first * pair.second.stride.unwrapped; + })}; + transformed_coordinates = MachineViewCoordinates{ transform(zip(transformed_coordinates.raw_coords, mv.start.raw_coords), [&](auto const &pair) { return pair.first + pair.second; })}; + auto get_coordinate = [&](auto const &sub_projection) { std::vector relevant_dimensions = sorted(keys(sub_projection)); @@ -89,14 +93,17 @@ device_id_t get_device_id(MachineView const &mv, }; int inter_coordinate = get_coordinate(inter_projection); int intra_coordinate = get_coordinate(intra_projection); - if (inter_coordinate >= ms.num_nodes || - intra_coordinate >= get_num_devices_per_node(ms, mv.device_type)) { - throw mk_runtime_error( - fmt::format("DeviceCoordinates{} is out of bound", coordinates)); - } - int idx = inter_coordinate * get_num_devices_per_node(ms, mv.device_type) + - intra_coordinate; - return device_id_from_index(idx, mv.device_type); + return MachineSpecificationCoordinates{ + inter_coordinate, intra_coordinate, mv.device_type}; +} + +device_id_t get_device_id(MachineView const &mv, + MachineViewCoordinates const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + MachineSpecificationCoordinates coords = + get_machine_specification_coordinates(mv, coordinates, ms, projection); + return get_device_id(ms, coords); } std::unordered_set @@ -104,7 +111,8 @@ std::unordered_set MachineSpecification const &ms, MachineViewProjection const &projection) { std::unordered_set devices_ids; - for (DeviceCoordinates const &coordinates : get_devices_coordinates(mv)) { + for (MachineViewCoordinates const &coordinates : + get_devices_coordinates(mv)) { devices_ids.insert(get_device_id(mv, coordinates, ms, projection)); } return devices_ids; @@ -148,7 +156,7 @@ MachineView make_1d_machine_view(int start, stride_t stride, DeviceType device_type) { StridedRectangle rect = make_1d_rect(start, stop, stride); - DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; return MachineView{start_coordinate, rect, device_type}; } @@ -163,7 +171,7 @@ MachineView make_1d_machine_view(int start, stride_t stride, DeviceType device_type) { StridedRectangle rect = make_1d_rect(start, num_points, stride); - DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; return MachineView{start_coordinate, rect, device_type}; } @@ -177,7 +185,7 @@ MachineView make_1d_machine_view(int start, stride_t stride, DeviceType device_type) { StridedRectangle rect = make_1d_rect(start, interval_size, stride); - DeviceCoordinates start_coordinate = DeviceCoordinates{{start}}; + MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; return MachineView{start_coordinate, rect, device_type}; } diff --git a/lib/pcg/src/pcg/machine_view_dim_idx_t.cc b/lib/pcg/src/pcg/machine_view_dim_idx_t.cc index c7ed2ad108..7494531366 100644 --- a/lib/pcg/src/pcg/machine_view_dim_idx_t.cc +++ b/lib/pcg/src/pcg/machine_view_dim_idx_t.cc @@ -2,13 +2,12 @@ #include "pcg/machine_view.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" namespace FlexFlow { -std::unordered_set +std::vector get_machine_view_indices(MachineView const &mv) { - return transform(unordered_set_of(range(num_dims(mv))), + return transform(range(num_dims(mv)), [](int idx) { return machine_view_dim_idx_t{idx}; }); } } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index 687ed4a3a2..981cb240e2 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -5,7 +5,7 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant( StartInvariantMachineView const &start_invariant_mv, - DeviceCoordinates const &start) { + MachineViewCoordinates const &start) { return MachineView{ start, start_invariant_mv.rect, start_invariant_mv.device_type}; } diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index cc656d5047..55f269c440 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -1,7 +1,7 @@ #include "pcg/strided_rectangle.h" #include "op-attrs/dim_ordered/transform.h" -#include "pcg/device_coordinates.dtg.h" #include "pcg/device_id_t.dtg.h" +#include "pcg/machine_view_coordinates.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers/as_vector.h" diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 1df8ac6e89..de6459c111 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -3,6 +3,8 @@ #include "pcg/strided_rectangle_side.h" #include "test/utils/doctest.h" #include "utils/containers/transform.h" +#include "utils/fmt/unordered_set.h" +#include "utils/fmt/vector.h" using namespace FlexFlow; @@ -10,7 +12,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { MachineView mv = MachineView{ - DeviceCoordinates{{0, 0, 0}}, + MachineViewCoordinates{{0, 0, 0}}, StridedRectangle{{StridedRectangleSide(num_points_t{7}, stride_t{5}), StridedRectangleSide(num_points_t{10}, stride_t{2}), StridedRectangleSide(num_points_t{1}, stride_t{4})}}, @@ -42,19 +44,19 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2D MachineView") { MachineView mv = - MachineView{DeviceCoordinates{{0, 0}}, + MachineView{MachineViewCoordinates{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), }}, DeviceType::GPU}; SUBCASE("get_devices_coordinates") { - std::unordered_set expected = { - {DeviceCoordinates{{0, 0}}, - DeviceCoordinates{{0, 1}}, - DeviceCoordinates{{1, 0}}, - DeviceCoordinates{{1, 1}}}}; - std::unordered_set result = + std::unordered_set expected = { + {MachineViewCoordinates{{0, 0}}, + MachineViewCoordinates{{0, 1}}, + MachineViewCoordinates{{1, 0}}, + MachineViewCoordinates{{1, 1}}}}; + std::unordered_set result = get_devices_coordinates(mv); CHECK(expected == result); } @@ -73,7 +75,7 @@ TEST_SUITE(FF_TEST_SUITE) { // have 1, 13, 4, 16 as device-ids MachineView mv = - MachineView{DeviceCoordinates{{0, 1, 2}}, + MachineView{MachineViewCoordinates{{0, 1, 2}}, StridedRectangle{{ StridedRectangleSide(num_points_t(1), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{1}), @@ -82,12 +84,12 @@ TEST_SUITE(FF_TEST_SUITE) { DeviceType::GPU}; SUBCASE("get_devices_coordinates") { - std::unordered_set expected = { - {DeviceCoordinates{{0, 0, 0}}, - DeviceCoordinates{{0, 0, 1}}, - DeviceCoordinates{{0, 1, 0}}, - DeviceCoordinates{{0, 1, 1}}}}; - std::unordered_set result = + std::unordered_set expected = { + {MachineViewCoordinates{{0, 0, 0}}, + MachineViewCoordinates{{0, 0, 1}}, + MachineViewCoordinates{{0, 1, 0}}, + MachineViewCoordinates{{0, 1, 1}}}}; + std::unordered_set result = get_devices_coordinates(mv); CHECK(expected == result); } @@ -98,7 +100,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2D MachineView") { MachineView mv = - MachineView{DeviceCoordinates{{0, 0}}, + MachineView{MachineViewCoordinates{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), @@ -106,14 +108,15 @@ TEST_SUITE(FF_TEST_SUITE) { DeviceType::GPU}; SUBCASE("get_maximum_device_coordinates") { - CHECK(get_maximum_device_coordinates(mv) == DeviceCoordinates{{1, 1}}); + CHECK(get_maximum_device_coordinates(mv) == + MachineViewCoordinates{{1, 1}}); } } SUBCASE("3D MachineView") { MachineView mv = - MachineView{DeviceCoordinates{{0, 1, 2}}, + MachineView{MachineViewCoordinates{{0, 1, 2}}, StridedRectangle{{ StridedRectangleSide(num_points_t(1), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{1}), @@ -123,14 +126,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_maximum_device_coordinates") { CHECK(get_maximum_device_coordinates(mv) == - DeviceCoordinates{{0, 1, 1}}); + MachineViewCoordinates{{0, 1, 1}}); } } } TEST_CASE("make_1d_machine_view") { - DeviceCoordinates start = DeviceCoordinates{{1}}; + MachineViewCoordinates start = MachineViewCoordinates{{1}}; MachineView mv = MachineView{ start, StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}, @@ -171,21 +174,21 @@ TEST_SUITE(FF_TEST_SUITE) { {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTRA}}}; SUBCASE("Device 0") { - DeviceCoordinates device = DeviceCoordinates{{0}}; + MachineViewCoordinates device = MachineViewCoordinates{{0}}; device_id_t correct = device_id_from_index(1, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device 1") { - DeviceCoordinates device = DeviceCoordinates{{1}}; + MachineViewCoordinates device = MachineViewCoordinates{{1}}; device_id_t correct = device_id_from_index(3, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device 2") { - DeviceCoordinates device = DeviceCoordinates{{2}}; + MachineViewCoordinates device = MachineViewCoordinates{{2}}; device_id_t correct = device_id_from_index(5, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); @@ -193,7 +196,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("2D case") { MachineView mv = - MachineView{DeviceCoordinates{{1, 2}}, + MachineView{MachineViewCoordinates{{1, 2}}, StridedRectangle{ {StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2})}}, @@ -205,26 +208,26 @@ TEST_SUITE(FF_TEST_SUITE) { {machine_view_dim_idx_t{1}, MachineSpecificationDimension::INTRA}}}; SUBCASE("Device (0,0)") { - DeviceCoordinates device = DeviceCoordinates{{0, 0}}; + MachineViewCoordinates device = MachineViewCoordinates{{0, 0}}; device_id_t correct = device_id_from_index(7, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (0,1)") { - DeviceCoordinates device = DeviceCoordinates{{0, 1}}; + MachineViewCoordinates device = MachineViewCoordinates{{0, 1}}; device_id_t correct = device_id_from_index(9, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1,0)") { - DeviceCoordinates device = DeviceCoordinates{{1, 0}}; + MachineViewCoordinates device = MachineViewCoordinates{{1, 0}}; device_id_t correct = device_id_from_index(12, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1,1)") { - DeviceCoordinates device = DeviceCoordinates{{1, 1}}; + MachineViewCoordinates device = MachineViewCoordinates{{1, 1}}; device_id_t correct = device_id_from_index(14, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); @@ -233,7 +236,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("3D case") { MachineView mv = - MachineView{DeviceCoordinates{{0, 2, 0}}, + MachineView{MachineViewCoordinates{{0, 2, 0}}, StridedRectangle{ {StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2}), @@ -247,14 +250,14 @@ TEST_SUITE(FF_TEST_SUITE) { {machine_view_dim_idx_t{2}, MachineSpecificationDimension::INTRA}}}; SUBCASE("Device (0,0,1)") { - DeviceCoordinates device = DeviceCoordinates{{0, 1, 0}}; + MachineViewCoordinates device = MachineViewCoordinates{{0, 1, 0}}; device_id_t correct = device_id_from_index(3, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1, 1, 0)") { - DeviceCoordinates device = DeviceCoordinates{{1, 0, 1}}; + MachineViewCoordinates device = MachineViewCoordinates{{1, 0, 1}}; device_id_t correct = device_id_from_index(14, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 7f8b97badc..44f498a9e6 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -6,7 +6,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView") { - DeviceCoordinates start = DeviceCoordinates{{0}}; + MachineViewCoordinates start = MachineViewCoordinates{{0}}; StridedRectangle rect = StridedRectangle{{ StridedRectangleSide(num_points_t{2}, stride_t{3}), StridedRectangleSide(num_points_t{2}, stride_t{2}), diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc index b1b8e13757..89ffe82668 100644 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -1,6 +1,7 @@ #include "pcg/strided_rectangle.h" #include "pcg/strided_rectangle_side.h" #include "test/utils/doctest.h" +#include "utils/fmt/vector.h" using namespace FlexFlow; diff --git a/lib/utils/include/utils/containers/get_all_permutations.h b/lib/utils/include/utils/containers/get_all_permutations.h index b7e797dad2..ee23e94533 100644 --- a/lib/utils/include/utils/containers/get_all_permutations.h +++ b/lib/utils/include/utils/containers/get_all_permutations.h @@ -101,6 +101,118 @@ permutations_container get_all_permutations(C const &c) { return permutations_container(c.cbegin(), c.cend()); } +template +struct permutations_with_repetition_container { +public: + template + permutations_with_repetition_container(It start, It end, size_t n) + : elements(start, end), n(n) { + if (elements.empty() || n == 0) { + done = true; + } else { + indices.assign(n, 0); + done = false; + } + } + + struct iterator { + public: + using difference_type = long; + using value_type = std::vector; + using pointer = std::vector const *; + using reference = std::vector const &; + using iterator_category = std::input_iterator_tag; + + public: + iterator(permutations_with_repetition_container const &c, bool end_iter) + : c(c), indices(c.indices), done(end_iter || c.done) { + if (end_iter || c.done) { + done = true; + } + } + + iterator &operator++() { + assert(!done); + + // Essentially counting in base `c.elements.size()` + for (int i = c.n - 1; i >= 0; --i) { + if (indices[i] + 1 < c.elements.size()) { + indices[i]++; + break; + } else { + indices[i] = 0; + if (i == 0) { + done = true; + } + } + } + return *this; + } + + iterator operator++(int) { + iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(iterator const &other) const { + return done == other.done && indices == other.indices; + } + + bool operator!=(iterator const &other) const { + return !(*this == other); + } + + value_type operator*() const { + std::vector result(c.n); + for (size_t i = 0; i < c.n; ++i) { + result[i] = c.elements[indices[i]]; + } + return result; + } + + private: + permutations_with_repetition_container const &c; + std::vector indices; + bool done; + }; + + using const_iterator = iterator; + using value_type = typename iterator::value_type; + using difference_type = typename iterator::difference_type; + using pointer = typename iterator::pointer; + using reference = typename iterator::reference; + using const_reference = typename iterator::reference; + + iterator begin() const { + return iterator(*this, false); + } + + iterator end() const { + return iterator(*this, true); + } + + const_iterator cbegin() const { + return iterator(*this, false); + } + + const_iterator cend() const { + return iterator(*this, true); + } + +private: + std::vector elements; + size_t n; + std::vector indices; + bool done; +}; + +template +permutations_with_repetition_container + get_all_permutations_with_repetition(C const &c, size_t n) { + return permutations_with_repetition_container(c.cbegin(), c.cend(), n); +} + } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/map_from_keys_and_values.h b/lib/utils/include/utils/containers/map_from_keys_and_values.h new file mode 100644 index 0000000000..499965dc5e --- /dev/null +++ b/lib/utils/include/utils/containers/map_from_keys_and_values.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_FROM_KEYS_AND_VALUES_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_MAP_FROM_KEYS_AND_VALUES_H + +#include "utils/containers/zip.h" +#include "utils/exception.h" +#include + +namespace FlexFlow { + +template +std::unordered_map + map_from_keys_and_values(std::vector const &keys, + std::vector const &values) { + if (keys.size() != values.size()) { + throw mk_runtime_error(fmt::format( + "recieved keys (of size {}) not matching values (of size {})", + keys.size(), + values.size())); + } + std::unordered_map result; + for (auto const &[k, v] : zip(keys, values)) { + result.insert({k, v}); + } + return result; +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/transform.h b/lib/utils/include/utils/containers/transform.h index ec3d5f5612..02dadb5352 100644 --- a/lib/utils/include/utils/containers/transform.h +++ b/lib/utils/include/utils/containers/transform.h @@ -33,6 +33,18 @@ std::unordered_set transform(std::unordered_set const &v, F const &f) { return result; } +template ()(std::declval()))> +std::unordered_multiset transform(std::unordered_multiset const &v, + F const &f) { + std::unordered_multiset result; + for (auto const &e : v) { + result.insert(f(e)); + } + return result; +} + template ()(std::declval()))> diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc index 9e00794b1c..fecb36786f 100644 --- a/lib/utils/test/src/utils/containers/cartesian_product.cc +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -1,4 +1,6 @@ #include "utils/containers/cartesian_product.h" +#include "utils/fmt/unordered_multiset.h" +#include "utils/fmt/vector.h" #include #include #include diff --git a/lib/utils/test/src/utils/containers/filter.cc b/lib/utils/test/src/utils/containers/filter.cc index 2c4eea3ff9..a2fa5fb6c1 100644 --- a/lib/utils/test/src/utils/containers/filter.cc +++ b/lib/utils/test/src/utils/containers/filter.cc @@ -3,6 +3,7 @@ #include "utils/fmt/map.h" #include "utils/fmt/set.h" #include "utils/fmt/unordered_map.h" +#include "utils/fmt/unordered_multiset.h" #include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" diff --git a/lib/utils/test/src/utils/containers/get_all_permutations.cc b/lib/utils/test/src/utils/containers/get_all_permutations.cc index 5f22266809..fb075e738d 100644 --- a/lib/utils/test/src/utils/containers/get_all_permutations.cc +++ b/lib/utils/test/src/utils/containers/get_all_permutations.cc @@ -51,4 +51,77 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); } } + + TEST_CASE("get_all_permutations_with_repetition") { + SUBCASE("container size = 3, n = 1") { + std::vector input = {1, 2, 3}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 1)); + std::unordered_multiset> correct = { + {1}, + {2}, + {3}, + }; + + CHECK(result == correct); + } + + SUBCASE("container size 3, n = 2") { + std::vector input = {1, 2, 3}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + std::unordered_multiset> correct = { + {1, 1}, + {1, 2}, + {1, 3}, + {2, 1}, + {2, 2}, + {2, 3}, + {3, 1}, + {3, 2}, + {3, 3}, + }; + + CHECK(result == correct); + } + + SUBCASE("container size 2, n = 3") { + std::vector input = {1, 2}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 3)); + std::unordered_multiset> correct = { + {1, 1, 1}, + {1, 1, 2}, + {1, 2, 1}, + {1, 2, 2}, + {2, 1, 1}, + {2, 1, 2}, + {2, 2, 1}, + {2, 2, 2}, + }; + + CHECK(result == correct); + } + + SUBCASE("duplicate elements") { + std::vector input = {1, 2, 2}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + std::unordered_multiset> correct = {{1, 1}, + {1, 2}, + {1, 2}, + {2, 1}, + {2, 1}, + {2, 2}, + {2, 2}, + {2, 2}, + {2, 2}}; + + CHECK(result == correct); + } + } } diff --git a/lib/utils/test/src/utils/containers/range.cc b/lib/utils/test/src/utils/containers/range.cc index 15244278de..e9decd43bc 100644 --- a/lib/utils/test/src/utils/containers/range.cc +++ b/lib/utils/test/src/utils/containers/range.cc @@ -1,4 +1,5 @@ #include "utils/containers/range.h" +#include "utils/fmt/vector.h" #include "utils/hash/unordered_set.h" #include #include diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/replicate.cc index 5a02fcdcaa..6d0ee10f68 100644 --- a/lib/utils/test/src/utils/containers/replicate.cc +++ b/lib/utils/test/src/utils/containers/replicate.cc @@ -1,4 +1,6 @@ #include "utils/containers/replicate.h" +#include "utils/fmt/unordered_set.h" +#include "utils/fmt/vector.h" #include "utils/hash/unordered_set.h" #include #include diff --git a/lib/utils/test/src/utils/containers/scanl.cc b/lib/utils/test/src/utils/containers/scanl.cc index d8c31bfb92..00e63663a9 100644 --- a/lib/utils/test/src/utils/containers/scanl.cc +++ b/lib/utils/test/src/utils/containers/scanl.cc @@ -1,7 +1,9 @@ #include "utils/containers/scanl.h" +#include "utils/fmt/vector.h" #include #include #include + using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { From 7eff4f526bbb5393592fd3f02186d38cd7f5cf71 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 13 Sep 2024 14:35:37 -0700 Subject: [PATCH 22/34] PR review fixes --- .../src/compiler/allowed_machine_views.cc | 14 +-- .../machine_view_to_tensor_mapping.cc | 2 +- .../test/src/allowed_machine_views.cc | 62 ++++----- .../src/machine_view_to_tensor_mapping.cc | 42 ++++--- .../test/src/test_local_cost_estimator.cc | 2 +- .../op-attrs/parallel_tensor_dim_idx_t.h | 2 +- .../src/op-attrs/parallel_tensor_dim_idx_t.cc | 2 +- lib/pcg/include/pcg/machine_specification.h | 6 +- ...hine_specification_coordinate.struct.toml} | 2 +- .../machine_specification_dimension.enum.toml | 4 +- lib/pcg/include/pcg/machine_view.h | 32 ++--- lib/pcg/include/pcg/machine_view.struct.toml | 4 +- ...ml => machine_view_coordinate.struct.toml} | 4 +- .../pcg/machine_view_projection.struct.toml | 10 +- .../pcg/start_invariant_machine_view.h | 2 +- lib/pcg/include/pcg/strided_rectangle.h | 6 + lib/pcg/src/pcg/machine_specification.cc | 17 ++- lib/pcg/src/pcg/machine_view.cc | 91 +++++++------- .../src/pcg/start_invariant_machine_view.cc | 2 +- lib/pcg/src/pcg/strided_rectangle.cc | 4 +- lib/pcg/test/src/pcg/machine_view.cc | 119 +++++++++--------- .../src/pcg/start_invariant_machine_view.cc | 41 +++--- lib/pcg/test/src/pcg/strided_rectangle.cc | 9 +- .../include/utils/containers/replicate.h | 6 +- .../src/utils/containers/cartesian_product.cc | 8 ++ 25 files changed, 253 insertions(+), 240 deletions(-) rename lib/pcg/include/pcg/{machine_specification_coordinates.struct.toml => machine_specification_coordinate.struct.toml} (87%) rename lib/pcg/include/pcg/{machine_view_coordinates.struct.toml => machine_view_coordinate.struct.toml} (82%) diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index f7d4d9ae73..4bc8210f4b 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -43,7 +43,7 @@ static std::unordered_multiset bool is_valid_partial_machine_view_mapping(MachineView const &mv, MachineSpecification const &ms, MachineViewProjection const &proj) { - MachineSpecificationCoordinates maximum_device_coords = + MachineSpecificationCoordinate maximum_device_coords = get_machine_specification_coordinates( mv, get_maximum_device_coordinates(mv), ms, proj); return is_valid_machine_specification_coordinates(ms, maximum_device_coords); @@ -104,18 +104,18 @@ static std::unordered_set> std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set machine_view_coordinates = + std::unordered_set machine_view_coordinate = transform(raw_coordinates, [](std::vector const &point) { - return MachineViewCoordinates(point); + return MachineViewCoordinate(point); }); - return machine_view_coordinates; + return machine_view_coordinate; }; auto candidate_projections = [](MachineView const &mv) { std::unordered_set result; std::unordered_set options = { - MachineSpecificationDimension::INTER, - MachineSpecificationDimension::INTRA}; + MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTRA_NODE}; for (std::vector const &proj_vec : get_all_permutations_with_repetition(options, num_dims(mv))) { @@ -136,7 +136,7 @@ static std::unordered_set> candidate_strides(sorted(tensor_dims), total_devices)) { StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); auto start_inv_mv = StartInvariantMachineView{rect, device_type}; - for (MachineViewCoordinates start : candidate_starts(sorted(tensor_dims))) { + for (MachineViewCoordinate start : candidate_starts(sorted(tensor_dims))) { MachineView mv = machine_view_from_start_invariant(start_inv_mv, start); for (MachineViewProjection const &proj : candidate_projections(mv)) { machine_views.insert({mv, proj}); diff --git a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc index 87fd00743e..e1ab335f09 100644 --- a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc +++ b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc @@ -22,7 +22,7 @@ std::unordered_set std::vector machine_view_dim_ordering = get_machine_view_indices(mv); std::unordered_set shape_indices = - get_parallel_tensor_indices(shape); + get_parallel_tensor_dim_indices(shape); shape_indices = filter(shape_indices, [&](parallel_tensor_dim_idx_t const &idx) { return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 6a422d2f20..7a4184d86c 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -33,20 +33,20 @@ TEST_SUITE(FF_TEST_SUITE) { }; std::vector correct_mv = { - make_1d_machine_view(0, 3, stride_t(1)), - make_1d_machine_view(1, 4, stride_t(1)), - make_1d_machine_view(2, 5, stride_t(1)), - make_1d_machine_view(0, 6, stride_t(2))}; + make_1d_machine_view(DeviceType::GPU, 0, 3, stride_t(1)), + make_1d_machine_view(DeviceType::GPU, 1, 4, stride_t(1)), + make_1d_machine_view(DeviceType::GPU, 2, 5, stride_t(1)), + make_1d_machine_view(DeviceType::GPU, 0, 6, stride_t(2))}; std::vector correct_proj = { MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, }; std::unordered_set> @@ -77,7 +77,7 @@ TEST_SUITE(FF_TEST_SUITE) { auto make_2d_views = [&](int start_x, int start_y, int stride1, int stride2) { return MachineView{ - MachineViewCoordinates{{start_x, start_y}}, + MachineViewCoordinate{{start_x, start_y}}, StridedRectangle{ {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, StridedRectangleSide{num_points_t{3}, stride_t{stride2}}}}, @@ -96,30 +96,30 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector correct_proj = { MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}, + MachineSpecificationDimension::INTRA_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER}}}, + MachineSpecificationDimension::INTER_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}, + MachineSpecificationDimension::INTRA_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER}}}, + MachineSpecificationDimension::INTER_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}, + MachineSpecificationDimension::INTRA_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER}}}, + MachineSpecificationDimension::INTER_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER}, + MachineSpecificationDimension::INTER_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER}, + MachineSpecificationDimension::INTER_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER}, + MachineSpecificationDimension::INTER_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, }; std::unordered_set> @@ -158,9 +158,9 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector correct_proj = { MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, }; std::unordered_set< @@ -207,22 +207,22 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector correct_proj = { MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}, + MachineSpecificationDimension::INTRA_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER}}}, + MachineSpecificationDimension::INTER_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA}, + MachineSpecificationDimension::INTRA_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER}}}, + MachineSpecificationDimension::INTER_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER}, + MachineSpecificationDimension::INTER_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER}, + MachineSpecificationDimension::INTER_NODE}, {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA}}}, + MachineSpecificationDimension::INTRA_NODE}}}, }; std::unordered_set< std::pair> diff --git a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc index 2f43bfb177..65d87979b2 100644 --- a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc +++ b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc @@ -24,10 +24,10 @@ TEST_SUITE(FF_TEST_SUITE) { DataType::FLOAT, }; MachineView view = - MachineView{MachineViewCoordinates{{0, 0, 0}}, + MachineView{MachineViewCoordinate{{0, 0, 0}}, StridedRectangle{{ - StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, + StridedRectangleSide{num_points_t{2}, stride_t{1}}, + StridedRectangleSide{num_points_t{2}, stride_t{4}}, }}, DeviceType::GPU}; CHECK_THROWS_AS(get_all_machine_view_to_tensor_mappings(view, shape), @@ -47,27 +47,35 @@ TEST_SUITE(FF_TEST_SUITE) { DataType::FLOAT, }; MachineView view = - MachineView{MachineViewCoordinates{{0, 0, 0}}, + MachineView{MachineViewCoordinate{{0, 0, 0}}, StridedRectangle{{ - StridedRectangleSide{num_points_t(2), stride_t(1)}, - StridedRectangleSide{num_points_t(2), stride_t(4)}, - StridedRectangleSide{num_points_t(3), stride_t(1)}, + StridedRectangleSide{num_points_t{2}, stride_t{1}}, + StridedRectangleSide{num_points_t{2}, stride_t{4}}, + StridedRectangleSide{num_points_t{3}, stride_t{1}}, }}, DeviceType::GPU}; + machine_view_dim_idx_t mv_dim_0 = machine_view_dim_idx_t{0}; + machine_view_dim_idx_t mv_dim_1 = machine_view_dim_idx_t{1}; + machine_view_dim_idx_t mv_dim_2 = machine_view_dim_idx_t{2}; + parallel_tensor_dim_idx_t pt_dim_0 = + parallel_tensor_dim_idx_t{ff_dim_t{0}}; + parallel_tensor_dim_idx_t pt_dim_sum = + parallel_tensor_dim_idx_t{ReplicaType::SUM}; + parallel_tensor_dim_idx_t pt_dim_eq = + parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}; + bidict b1 = { - {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, - {machine_view_dim_idx_t(1), - parallel_tensor_dim_idx_t{ReplicaType::SUM}}, - {machine_view_dim_idx_t(0), - parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + {mv_dim_2, pt_dim_0}, + {mv_dim_1, pt_dim_sum}, + {mv_dim_0, pt_dim_eq}, + }; bidict b2 = { - {machine_view_dim_idx_t(2), parallel_tensor_dim_idx_t{ff_dim_t(0)}}, - {machine_view_dim_idx_t(0), - parallel_tensor_dim_idx_t{ReplicaType::SUM}}, - {machine_view_dim_idx_t(1), - parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}}}; + {mv_dim_2, pt_dim_0}, + {mv_dim_0, pt_dim_sum}, + {mv_dim_1, pt_dim_eq}, + }; std::unordered_set correct = { MachineViewToTensorMapping{b1}, MachineViewToTensorMapping{b2}}; diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc index 0e72a3b02a..db8c2c66c8 100644 --- a/lib/local-execution/test/src/test_local_cost_estimator.cc +++ b/lib/local-execution/test/src/test_local_cost_estimator.cc @@ -66,7 +66,7 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { inputs_shape, inputs_shape, inputs_shape}, std::vector{weight_attrs}, std::vector{output_attrs}, - make_1d_machine_view(0, 1)); + make_1d_machine_view(DeviceType::GPU, 0, 1)); CHECK(result.total_elapsed_time > 0); CHECK(result.total_mem_usage > 0); diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h index 9cdc9e4839..754d477569 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h @@ -11,7 +11,7 @@ ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, parallel_tensor_dim_idx_t idx); std::unordered_set - get_parallel_tensor_indices(ParallelTensorShape const &shape); + get_parallel_tensor_dim_indices(ParallelTensorShape const &shape); } // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc index b5e52f5677..68772078e2 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc @@ -24,7 +24,7 @@ ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, } std::unordered_set - get_parallel_tensor_indices(ParallelTensorShape const &shape) { + get_parallel_tensor_dim_indices(ParallelTensorShape const &shape) { std::unordered_set indices; extend(indices, transform(range(num_shard_dims(shape.dims)), [](int idx) { return parallel_tensor_dim_idx_t(ff_dim_t(idx)); diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index fa27f87207..70aadd6ac5 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -4,7 +4,7 @@ #include "pcg/device_id_t.dtg.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification_coordinates.dtg.h" +#include "pcg/machine_specification_coordinate.dtg.h" namespace FlexFlow { @@ -17,10 +17,10 @@ int get_num_devices_per_node(MachineSpecification const &ms, bool is_valid_machine_specification_coordinates( MachineSpecification const &ms, - MachineSpecificationCoordinates const &coords); + MachineSpecificationCoordinate const &coord); device_id_t get_device_id(MachineSpecification const &ms, - MachineSpecificationCoordinates const &coords); + MachineSpecificationCoordinate const &coord); } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_specification_coordinates.struct.toml b/lib/pcg/include/pcg/machine_specification_coordinate.struct.toml similarity index 87% rename from lib/pcg/include/pcg/machine_specification_coordinates.struct.toml rename to lib/pcg/include/pcg/machine_specification_coordinate.struct.toml index c60cf913fb..f7fdfb18a7 100644 --- a/lib/pcg/include/pcg/machine_specification_coordinates.struct.toml +++ b/lib/pcg/include/pcg/machine_specification_coordinate.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "MachineSpecificationCoordinates" +name = "MachineSpecificationCoordinate" features = [ "eq", "ord", diff --git a/lib/pcg/include/pcg/machine_specification_dimension.enum.toml b/lib/pcg/include/pcg/machine_specification_dimension.enum.toml index 01ad4cd5d8..837b4306da 100644 --- a/lib/pcg/include/pcg/machine_specification_dimension.enum.toml +++ b/lib/pcg/include/pcg/machine_specification_dimension.enum.toml @@ -8,7 +8,7 @@ features = [ ] [[values]] -name = "INTER" +name = "INTER_NODE" [[values]] -name = "INTRA" +name = "INTRA_NODE" diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 822452eef0..2857b00c48 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -4,9 +4,9 @@ #include "pcg/device_id.h" #include "pcg/device_type.dtg.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification_coordinates.dtg.h" +#include "pcg/machine_specification_coordinate.dtg.h" #include "pcg/machine_view.dtg.h" -#include "pcg/machine_view_coordinates.dtg.h" +#include "pcg/machine_view_coordinate.dtg.h" #include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/machine_view_projection.dtg.h" #include "pcg/num_points_t.dtg.h" @@ -16,20 +16,20 @@ namespace FlexFlow { -std::unordered_set +std::unordered_set get_devices_coordinates(MachineView const &mv); -MachineViewCoordinates get_maximum_device_coordinates(MachineView const &mv); +MachineViewCoordinate get_maximum_device_coordinates(MachineView const &mv); -MachineSpecificationCoordinates get_machine_specification_coordinates( +MachineSpecificationCoordinate get_machine_specification_coordinates( MachineView const &mv, - MachineViewCoordinates const &coordinates, + MachineViewCoordinate const &coordinates, MachineSpecification const &ms, MachineViewProjection const &projection); StridedRectangleSide get_side_at_idx(MachineView const &mv, machine_view_dim_idx_t const &idx); device_id_t get_device_id(MachineView const &mv, - MachineViewCoordinates const &coordinates, + MachineViewCoordinate const &coordinates, MachineSpecification const &ms, MachineViewProjection const &projection); std::unordered_set @@ -42,20 +42,20 @@ size_t num_devices(MachineView const &mv); std::vector get_num_devices_per_dim(MachineView const &mv); std::vector get_side_size_per_dim(MachineView const &mv); -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, int stop, - stride_t stride = stride_t{1}, - DeviceType device_type = DeviceType::GPU); + stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, num_points_t num_points, - stride_t stride = stride_t{1}, - DeviceType device_type = DeviceType::GPU); + stride_t stride = stride_t{1}); -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, side_size_t interval_size, - stride_t stride = stride_t{1}, - DeviceType device_type = DeviceType::GPU); + stride_t stride = stride_t{1}); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index bbca071cdb..2479861ebe 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -11,13 +11,13 @@ features = [ includes = [ "pcg/strided_rectangle.h", - "pcg/machine_view_coordinates.dtg.h", + "pcg/machine_view_coordinate.dtg.h", "pcg/device_type.dtg.h", ] [[fields]] name = "start" -type = "::FlexFlow::MachineViewCoordinates" +type = "::FlexFlow::MachineViewCoordinate" [[fields]] name = "rect" diff --git a/lib/pcg/include/pcg/machine_view_coordinates.struct.toml b/lib/pcg/include/pcg/machine_view_coordinate.struct.toml similarity index 82% rename from lib/pcg/include/pcg/machine_view_coordinates.struct.toml rename to lib/pcg/include/pcg/machine_view_coordinate.struct.toml index 7055d4f165..de4d21d08a 100644 --- a/lib/pcg/include/pcg/machine_view_coordinates.struct.toml +++ b/lib/pcg/include/pcg/machine_view_coordinate.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "MachineViewCoordinates" +name = "MachineViewCoordinate" features = [ "eq", "ord", @@ -19,5 +19,5 @@ src_includes = [ ] [[fields]] -name = "raw_coords" +name = "raw_coord" type = "std::vector" diff --git a/lib/pcg/include/pcg/machine_view_projection.struct.toml b/lib/pcg/include/pcg/machine_view_projection.struct.toml index 9836a39ad5..7c7db8c7ac 100644 --- a/lib/pcg/include/pcg/machine_view_projection.struct.toml +++ b/lib/pcg/include/pcg/machine_view_projection.struct.toml @@ -2,9 +2,8 @@ namespace = "FlexFlow" name = "MachineViewProjection" features = [ "eq", - # "ord", "hash", - # "json", + "json", # "rapidcheck", "fmt", ] @@ -13,10 +12,13 @@ includes = [ "pcg/machine_view.dtg.h", "pcg/machine_view_dim_idx_t.dtg.h", "pcg/machine_specification_dimension.dtg.h", +] + +src_includes = [ "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h", + "utils/fmt/unordered_map.h" ] [[fields]] -name = "raw_projection" +name = "machine_view_dim_to_machine_spec_dim" type = "std::unordered_map<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::MachineSpecificationDimension>" diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index b19db69c40..6d5fa5bd3e 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -8,7 +8,7 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant(StartInvariantMachineView const &mv, - MachineViewCoordinates const &start_id); + MachineViewCoordinate const &start_id); StartInvariantMachineView start_invariant_from_machine_view(MachineView const &mv); diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h index 6dab48b9b2..781a91e358 100644 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ b/lib/pcg/include/pcg/strided_rectangle.h @@ -10,6 +10,12 @@ namespace FlexFlow { +/** + * @brief Represents a multi-dimensional rectangle with strided sides. + * + * @note This struct maintains its sides in a canonical order, which is sorted + * in ascending order (enforced at construction). + */ struct StridedRectangle { private: diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index 951eb884d3..af1f8b7aea 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -34,18 +34,17 @@ int get_num_devices_per_node(MachineSpecification const &ms, } bool is_valid_machine_specification_coordinates( MachineSpecification const &ms, - MachineSpecificationCoordinates const &coords) { - return (coords.inter < ms.num_nodes) && - (coords.intra < get_num_devices_per_node(ms, coords.device_type)); + MachineSpecificationCoordinate const &coord) { + return (coord.inter < ms.num_nodes) && + (coord.intra < get_num_devices_per_node(ms, coord.device_type)); } device_id_t get_device_id(MachineSpecification const &ms, - MachineSpecificationCoordinates const &coords) { - assert(is_valid_machine_specification_coordinates(ms, coords)); - int raw_idx = - coords.inter * get_num_devices_per_node(ms, coords.device_type) + - coords.intra; - return device_id_from_index(raw_idx, coords.device_type); + MachineSpecificationCoordinate const &coord) { + assert(is_valid_machine_specification_coordinates(ms, coord)); + int raw_idx = coord.inter * get_num_devices_per_node(ms, coord.device_type) + + coord.intra; + return device_id_from_index(raw_idx, coord.device_type); } } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index aad7e763b4..81ffc294ae 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,7 +1,7 @@ #include "pcg/machine_view.h" #include "pcg/device_id.h" #include "pcg/machine_specification.h" -#include "pcg/machine_view_coordinates.dtg.h" +#include "pcg/machine_view_coordinate.dtg.h" #include "pcg/machine_view_dim_idx_t.dtg.h" #include "pcg/machine_view_projection.dtg.h" #include "pcg/strided_rectangle.h" @@ -23,7 +23,7 @@ namespace FlexFlow { -std::unordered_set +std::unordered_set get_devices_coordinates(MachineView const &mv) { std::vector> coordinate_ranges = @@ -33,39 +33,41 @@ std::unordered_set std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set machine_view_coordinates = + std::unordered_set machine_view_coordinate = transform(raw_coordinates, [](std::vector const &point) { - return MachineViewCoordinates(point); + return MachineViewCoordinate{point}; }); - return machine_view_coordinates; + return machine_view_coordinate; } -MachineViewCoordinates get_maximum_device_coordinates(MachineView const &mv) { +MachineViewCoordinate get_maximum_device_coordinates(MachineView const &mv) { return maximum(get_devices_coordinates(mv)); } -MachineSpecificationCoordinates get_machine_specification_coordinates( +MachineSpecificationCoordinate get_machine_specification_coordinates( MachineView const &mv, - MachineViewCoordinates const &coordinates, + MachineViewCoordinate const &coordinates, MachineSpecification const &ms, MachineViewProjection const &projection) { - auto inter_projection = filter_values( - projection.raw_projection, [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTER; - }); - auto intra_projection = filter_values( - projection.raw_projection, [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTRA; - }); - - MachineViewCoordinates transformed_coordinates = MachineViewCoordinates{ - transform(zip(coordinates.raw_coords, mv.rect.get_sides()), + auto inter_projection = + filter_values(projection.machine_view_dim_to_machine_spec_dim, + [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTER_NODE; + }); + auto intra_projection = + filter_values(projection.machine_view_dim_to_machine_spec_dim, + [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTRA_NODE; + }); + + MachineViewCoordinate transformed_coordinates = MachineViewCoordinate{ + transform(zip(coordinates.raw_coord, mv.rect.get_sides()), [&](auto const &pair) { return pair.first * pair.second.stride.unwrapped; })}; - transformed_coordinates = MachineViewCoordinates{ - transform(zip(transformed_coordinates.raw_coords, mv.start.raw_coords), + transformed_coordinates = MachineViewCoordinate{ + transform(zip(transformed_coordinates.raw_coord, mv.start.raw_coord), [&](auto const &pair) { return pair.first + pair.second; })}; auto get_coordinate = [&](auto const &sub_projection) { @@ -82,9 +84,9 @@ MachineSpecificationCoordinates get_machine_specification_coordinates( return result * side_size.unwrapped; }); std::vector filtered_coord; - for (int i = 0; i < transformed_coordinates.raw_coords.size(); ++i) { + for (int i = 0; i < transformed_coordinates.raw_coord.size(); ++i) { if (contains(relevant_dimensions, machine_view_dim_idx_t{i})) { - filtered_coord.push_back(transformed_coordinates.raw_coords[i]); + filtered_coord.push_back(transformed_coordinates.raw_coord[i]); } } return sum( @@ -93,29 +95,28 @@ MachineSpecificationCoordinates get_machine_specification_coordinates( }; int inter_coordinate = get_coordinate(inter_projection); int intra_coordinate = get_coordinate(intra_projection); - return MachineSpecificationCoordinates{ + return MachineSpecificationCoordinate{ inter_coordinate, intra_coordinate, mv.device_type}; } device_id_t get_device_id(MachineView const &mv, - MachineViewCoordinates const &coordinates, + MachineViewCoordinate const &coordinates, MachineSpecification const &ms, MachineViewProjection const &projection) { - MachineSpecificationCoordinates coords = + MachineSpecificationCoordinate coord = get_machine_specification_coordinates(mv, coordinates, ms, projection); - return get_device_id(ms, coords); + return get_device_id(ms, coord); } std::unordered_set get_device_ids(MachineView const &mv, MachineSpecification const &ms, MachineViewProjection const &projection) { - std::unordered_set devices_ids; - for (MachineViewCoordinates const &coordinates : - get_devices_coordinates(mv)) { - devices_ids.insert(get_device_id(mv, coordinates, ms, projection)); - } - return devices_ids; + + return transform(get_devices_coordinates(mv), + [&](MachineViewCoordinate const &c) { + return get_device_id(mv, c, ms, projection); + }); } size_t num_dims(MachineView const &mv) { @@ -151,12 +152,12 @@ static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { return rect; } -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, int stop, - stride_t stride, - DeviceType device_type) { + stride_t stride) { StridedRectangle rect = make_1d_rect(start, stop, stride); - MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; return MachineView{start_coordinate, rect, device_type}; } @@ -166,12 +167,12 @@ static StridedRectangle start, start + num_points.unwrapped * stride.unwrapped, stride); } -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, num_points_t num_points, - stride_t stride, - DeviceType device_type) { + stride_t stride) { StridedRectangle rect = make_1d_rect(start, num_points, stride); - MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; return MachineView{start_coordinate, rect, device_type}; } @@ -180,12 +181,12 @@ static StridedRectangle return make_1d_rect(start, start + interval_size.unwrapped, stride); } -MachineView make_1d_machine_view(int start, +MachineView make_1d_machine_view(DeviceType device_type, + int start, side_size_t interval_size, - stride_t stride, - DeviceType device_type) { + stride_t stride) { StridedRectangle rect = make_1d_rect(start, interval_size, stride); - MachineViewCoordinates start_coordinate = MachineViewCoordinates{{start}}; + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; return MachineView{start_coordinate, rect, device_type}; } diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index 981cb240e2..8f0a4d0052 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -5,7 +5,7 @@ namespace FlexFlow { MachineView machine_view_from_start_invariant( StartInvariantMachineView const &start_invariant_mv, - MachineViewCoordinates const &start) { + MachineViewCoordinate const &start) { return MachineView{ start, start_invariant_mv.rect, start_invariant_mv.device_type}; } diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc index 55f269c440..677fc91977 100644 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ b/lib/pcg/src/pcg/strided_rectangle.cc @@ -1,7 +1,7 @@ #include "pcg/strided_rectangle.h" #include "op-attrs/dim_ordered/transform.h" #include "pcg/device_id_t.dtg.h" -#include "pcg/machine_view_coordinates.dtg.h" +#include "pcg/machine_view_coordinate.dtg.h" #include "pcg/strided_rectangle_side.dtg.h" #include "pcg/strided_rectangle_side.h" #include "utils/containers/as_vector.h" @@ -16,6 +16,8 @@ namespace FlexFlow { +// StridedRectangle has a canonical ordering to its sides, which are sorted in +// ascending order at construction StridedRectangle::StridedRectangle( std::vector<::FlexFlow::StridedRectangleSide> const &sides) : sides(sorted(sides)) {} diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index de6459c111..f9156cdebd 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -12,7 +12,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { MachineView mv = MachineView{ - MachineViewCoordinates{{0, 0, 0}}, + MachineViewCoordinate{{0, 0, 0}}, StridedRectangle{{StridedRectangleSide(num_points_t{7}, stride_t{5}), StridedRectangleSide(num_points_t{10}, stride_t{2}), StridedRectangleSide(num_points_t{1}, stride_t{4})}}, @@ -26,16 +26,16 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("get_side_size_per_dim") { - std::vector expected = { + std::vector correct = { side_size_t(1 * 4), side_size_t(7 * 5), side_size_t(10 * 2)}; std::vector result = get_side_size_per_dim(mv); - CHECK(expected == result); + CHECK(correct == result); } SUBCASE("get_num_devices_per_dim") { - std::vector expected = { + std::vector correct = { num_points_t(1), num_points_t(7), num_points_t(10)}; std::vector result = get_num_devices_per_dim(mv); - CHECK(expected == result); + CHECK(correct == result); } } @@ -44,38 +44,27 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2D MachineView") { MachineView mv = - MachineView{MachineViewCoordinates{{0, 0}}, + MachineView{MachineViewCoordinate{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), }}, DeviceType::GPU}; SUBCASE("get_devices_coordinates") { - std::unordered_set expected = { - {MachineViewCoordinates{{0, 0}}, - MachineViewCoordinates{{0, 1}}, - MachineViewCoordinates{{1, 0}}, - MachineViewCoordinates{{1, 1}}}}; - std::unordered_set result = + std::unordered_set correct = { + {MachineViewCoordinate{{0, 0}}, + MachineViewCoordinate{{0, 1}}, + MachineViewCoordinate{{1, 0}}, + MachineViewCoordinate{{1, 1}}}}; + std::unordered_set result = get_devices_coordinates(mv); - CHECK(expected == result); + CHECK(correct == result); } } SUBCASE("3D MachineView") { - // 3D MachineView describes a 3 x 2 x 4 area, and 1*2*2=4 devices. - // (Pre offset) the devices are at coordinates (0, 0, 0), (0, 0, 2), (0, - // 1, 0), (0, 1, 2) Thus (pre offset) we have as device ids: - // 0 = 0*1 + 0*3 + 0*(2*3) - // 12 = 0*1 + 0*3 + 2*(2*3) - // 3 = 0*1 + 1*3 + 0*(2*3) - // 15 = 0*1 + 1*3 + 1*(2*3) - // Where the coefficients are obtained by doing `scanl(area_coefficients, - // 1, product) = {1,3,6}` and ignoring the last term. We do, however, have - // 1 as a starting device, meaning all device-id are offset by 1. We thus - // have 1, 13, 4, 16 as device-ids MachineView mv = - MachineView{MachineViewCoordinates{{0, 1, 2}}, + MachineView{MachineViewCoordinate{{0, 1, 2}}, StridedRectangle{{ StridedRectangleSide(num_points_t(1), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{1}), @@ -84,14 +73,14 @@ TEST_SUITE(FF_TEST_SUITE) { DeviceType::GPU}; SUBCASE("get_devices_coordinates") { - std::unordered_set expected = { - {MachineViewCoordinates{{0, 0, 0}}, - MachineViewCoordinates{{0, 0, 1}}, - MachineViewCoordinates{{0, 1, 0}}, - MachineViewCoordinates{{0, 1, 1}}}}; - std::unordered_set result = + std::unordered_set correct = { + {MachineViewCoordinate{{0, 0, 0}}, + MachineViewCoordinate{{0, 0, 1}}, + MachineViewCoordinate{{0, 1, 0}}, + MachineViewCoordinate{{0, 1, 1}}}}; + std::unordered_set result = get_devices_coordinates(mv); - CHECK(expected == result); + CHECK(correct == result); } } } @@ -100,7 +89,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2D MachineView") { MachineView mv = - MachineView{MachineViewCoordinates{{0, 0}}, + MachineView{MachineViewCoordinate{{0, 0}}, StridedRectangle{{ StridedRectangleSide(num_points_t(2), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{2}), @@ -109,14 +98,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_maximum_device_coordinates") { CHECK(get_maximum_device_coordinates(mv) == - MachineViewCoordinates{{1, 1}}); + MachineViewCoordinate{{1, 1}}); } } SUBCASE("3D MachineView") { MachineView mv = - MachineView{MachineViewCoordinates{{0, 1, 2}}, + MachineView{MachineViewCoordinate{{0, 1, 2}}, StridedRectangle{{ StridedRectangleSide(num_points_t(1), stride_t{3}), StridedRectangleSide(num_points_t(2), stride_t{1}), @@ -126,14 +115,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_maximum_device_coordinates") { CHECK(get_maximum_device_coordinates(mv) == - MachineViewCoordinates{{0, 1, 1}}); + MachineViewCoordinate{{0, 1, 1}}); } } } TEST_CASE("make_1d_machine_view") { - MachineViewCoordinates start = MachineViewCoordinates{{1}}; + MachineViewCoordinate start = MachineViewCoordinate{{1}}; MachineView mv = MachineView{ start, StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}, @@ -142,7 +131,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("make_1d_machine_view(int start, int stop, stride_t " "stride,DeviceType device_type)") { MachineView result = - make_1d_machine_view(1, 1 + 7 * 5, stride_t{5}, DeviceType::GPU); + make_1d_machine_view(DeviceType::GPU, 1, 1 + 7 * 5, stride_t{5}); MachineView correct = mv; CHECK(result == correct); } @@ -150,7 +139,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, " "stride_t stride,DeviceType device_type)") { MachineView result = make_1d_machine_view( - 1, num_points_t{7}, stride_t{5}, DeviceType::GPU); + DeviceType::GPU, 1, num_points_t{7}, stride_t{5}); MachineView correct = mv; CHECK(result == correct); } @@ -158,7 +147,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("make_1d_machine_view(gpu_id_t start, side_size_t side_size, " "stride_t stride,DeviceType device_type)") { MachineView result = make_1d_machine_view( - 1, side_size_t{7 * 5}, stride_t{5}, DeviceType::GPU); + DeviceType::GPU, 1, side_size_t{7 * 5}, stride_t{5}); MachineView correct = mv; CHECK(result == correct); } @@ -166,29 +155,30 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_device_id") { SUBCASE("1D case") { - MachineView mv = - make_1d_machine_view(1, num_points_t{3}, stride_t{2}); // 1 3 5 + MachineView mv = make_1d_machine_view( + DeviceType::GPU, 1, num_points_t{3}, stride_t{2}); // 1 3 5 MachineSpecification ms = MachineSpecification{ 1, 0, 6, 0, 0}; // Single node with 6 GPUs (0,1,2,3,4,5) - MachineViewProjection projection = MachineViewProjection{ - {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTRA}}}; + MachineViewProjection projection = + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTRA_NODE}}}; SUBCASE("Device 0") { - MachineViewCoordinates device = MachineViewCoordinates{{0}}; + MachineViewCoordinate device = MachineViewCoordinate{{0}}; device_id_t correct = device_id_from_index(1, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device 1") { - MachineViewCoordinates device = MachineViewCoordinates{{1}}; + MachineViewCoordinate device = MachineViewCoordinate{{1}}; device_id_t correct = device_id_from_index(3, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device 2") { - MachineViewCoordinates device = MachineViewCoordinates{{2}}; + MachineViewCoordinate device = MachineViewCoordinate{{2}}; device_id_t correct = device_id_from_index(5, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); @@ -196,38 +186,40 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("2D case") { MachineView mv = - MachineView{MachineViewCoordinates{{1, 2}}, + MachineView{MachineViewCoordinate{{1, 2}}, StridedRectangle{ {StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2})}}, DeviceType::GPU}; MachineSpecification ms = MachineSpecification{3, 0, 5, 0, 0}; // 3 nodes with 5 GPUs each - MachineViewProjection projection = MachineViewProjection{ - {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTER}, - {machine_view_dim_idx_t{1}, MachineSpecificationDimension::INTRA}}}; + MachineViewProjection projection = + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER_NODE}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA_NODE}}}; SUBCASE("Device (0,0)") { - MachineViewCoordinates device = MachineViewCoordinates{{0, 0}}; + MachineViewCoordinate device = MachineViewCoordinate{{0, 0}}; device_id_t correct = device_id_from_index(7, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (0,1)") { - MachineViewCoordinates device = MachineViewCoordinates{{0, 1}}; + MachineViewCoordinate device = MachineViewCoordinate{{0, 1}}; device_id_t correct = device_id_from_index(9, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1,0)") { - MachineViewCoordinates device = MachineViewCoordinates{{1, 0}}; + MachineViewCoordinate device = MachineViewCoordinate{{1, 0}}; device_id_t correct = device_id_from_index(12, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1,1)") { - MachineViewCoordinates device = MachineViewCoordinates{{1, 1}}; + MachineViewCoordinate device = MachineViewCoordinate{{1, 1}}; device_id_t correct = device_id_from_index(14, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); @@ -236,7 +228,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("3D case") { MachineView mv = - MachineView{MachineViewCoordinates{{0, 2, 0}}, + MachineView{MachineViewCoordinate{{0, 2, 0}}, StridedRectangle{ {StridedRectangleSide(num_points_t(2), stride_t{1}), StridedRectangleSide(num_points_t(2), stride_t{2}), @@ -244,20 +236,23 @@ TEST_SUITE(FF_TEST_SUITE) { DeviceType::GPU}; MachineSpecification ms = MachineSpecification{2, 0, 8, 0, 0}; // 3 nodes with 5 GPUs each - MachineViewProjection projection = MachineViewProjection{ - {{machine_view_dim_idx_t{0}, MachineSpecificationDimension::INTER}, - {machine_view_dim_idx_t{1}, MachineSpecificationDimension::INTRA}, - {machine_view_dim_idx_t{2}, MachineSpecificationDimension::INTRA}}}; + MachineViewProjection projection = + MachineViewProjection{{{machine_view_dim_idx_t{0}, + MachineSpecificationDimension::INTER_NODE}, + {machine_view_dim_idx_t{1}, + MachineSpecificationDimension::INTRA_NODE}, + {machine_view_dim_idx_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; SUBCASE("Device (0,0,1)") { - MachineViewCoordinates device = MachineViewCoordinates{{0, 1, 0}}; + MachineViewCoordinate device = MachineViewCoordinate{{0, 1, 0}}; device_id_t correct = device_id_from_index(3, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); } SUBCASE("Device (1, 1, 0)") { - MachineViewCoordinates device = MachineViewCoordinates{{1, 0, 1}}; + MachineViewCoordinate device = MachineViewCoordinate{{1, 0, 1}}; device_id_t correct = device_id_from_index(14, DeviceType::GPU); device_id_t result = get_device_id(mv, device, ms, projection); CHECK(correct == result); diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 44f498a9e6..b8743bd41b 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -6,7 +6,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("StartInvariantMachineView") { - MachineViewCoordinates start = MachineViewCoordinates{{0}}; + MachineViewCoordinate start = MachineViewCoordinate{{0}}; StridedRectangle rect = StridedRectangle{{ StridedRectangleSide(num_points_t{2}, stride_t{3}), StridedRectangleSide(num_points_t{2}, stride_t{2}), @@ -14,7 +14,7 @@ TEST_SUITE(FF_TEST_SUITE) { DeviceType device_type = DeviceType::GPU; - SUBCASE("To StartInvariantMachineView") { + SUBCASE("start_invariant_from_machine_view") { MachineView input = MachineView{start, rect, device_type}; @@ -25,28 +25,21 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(correct == result); } - SUBCASE("From StartInvariantMachineView") { - - StartInvariantMachineView input = - StartInvariantMachineView{rect, device_type}; - MachineView correct = MachineView{start, rect, device_type}; - MachineView result = machine_view_from_start_invariant(input, start); - CHECK(correct == result); - } - - SUBCASE("To and From") { - MachineView correct = MachineView{start, rect, device_type}; - MachineView result = machine_view_from_start_invariant( - start_invariant_from_machine_view(correct), start); - CHECK(correct == result); - } - - SUBCASE("From and To") { - StartInvariantMachineView correct = - StartInvariantMachineView{rect, device_type}; - StartInvariantMachineView result = start_invariant_from_machine_view( - machine_view_from_start_invariant(correct, start)); - CHECK(correct == result); + SUBCASE("conversion is invertible") { + SUBCASE("MachineView -> StrideInvariant -> MachineView") { + MachineView correct = MachineView{start, rect, device_type}; + MachineView result = machine_view_from_start_invariant( + start_invariant_from_machine_view(correct), start); + CHECK(correct == result); + } + + SUBCASE("StrideInvariant -> MachineView -> StrideInvariant") { + StartInvariantMachineView correct = + StartInvariantMachineView{rect, device_type}; + StartInvariantMachineView result = start_invariant_from_machine_view( + machine_view_from_start_invariant(correct, start)); + CHECK(correct == result); + } } } } diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc index 89ffe82668..81710fcd0a 100644 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ b/lib/pcg/test/src/pcg/strided_rectangle.cc @@ -14,9 +14,12 @@ TEST_SUITE(FF_TEST_SUITE) { StridedRectangle r0 = StridedRectangle{{s0, s1}}; StridedRectangle r1 = StridedRectangle{{s1, s0}}; - CHECK(r0 == r1); - CHECK(r1.get_sides() == std::vector{s0, s1}); - CHECK(r1.get_sides() != std::vector{s1, s0}); + SUBCASE("has canonical order") { + CHECK(r0 == r1); + } + SUBCASE("canonical ordering is sorting") { + CHECK(r1.get_sides() == std::vector{s0, s1}); + } } SUBCASE("helper functions") { diff --git a/lib/utils/include/utils/containers/replicate.h b/lib/utils/include/utils/containers/replicate.h index 46d31c04a4..aa3d0a7e35 100644 --- a/lib/utils/include/utils/containers/replicate.h +++ b/lib/utils/include/utils/containers/replicate.h @@ -7,11 +7,7 @@ namespace FlexFlow { template std::vector replicate(int n, T const &element) { - std::vector result; - for (int i = 0; i < n; ++i) { - result.push_back(element); - } - return result; + return std::vector(n, element); } } // namespace FlexFlow diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc index fecb36786f..a89e8e24b7 100644 --- a/lib/utils/test/src/utils/containers/cartesian_product.cc +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -10,6 +10,14 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("cartesian_product") { + SUBCASE("empty") { + std::vector> containers = {}; + std::unordered_multiset> result = + cartesian_product(containers); + std::unordered_multiset> correct = {{}}; + CHECK(result == correct); + } + SUBCASE("single container, one element") { std::vector> containers = {{1}}; std::unordered_multiset> result = From 21ca265c7a9409f181d33b19e55b033a56cf9f4c Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 14 Sep 2024 18:49:04 -0700 Subject: [PATCH 23/34] update to machine view and getting allowed machine view to match new interface --- .../include/compiler/allowed_machine_views.h | 29 +- .../compiler/machine_view_to_tensor_mapping.h | 22 -- .../src/compiler/allowed_machine_views.cc | 153 +++------ .../machine_view_to_tensor_mapping.cc | 53 --- .../test/src/allowed_machine_views.cc | 246 +++----------- .../src/machine_view_to_tensor_mapping.cc | 88 ----- .../include/local-execution/cost_estimate.h | 2 +- .../src/local_cost_estimator.cc | 1 + .../test/src/test_local_cost_estimator.cc | 132 ++++---- ...l => machine_space_coordinate.struct.toml} | 2 +- lib/pcg/include/pcg/machine_specification.h | 9 +- lib/pcg/include/pcg/machine_view.h | 67 +--- lib/pcg/include/pcg/machine_view.struct.toml | 26 +- .../pcg/start_invariant_machine_view.h | 20 -- .../start_invariant_machine_view.struct.toml | 23 -- lib/pcg/include/pcg/strided_rectangle.h | 77 ----- lib/pcg/include/pcg/strided_rectangle_side.h | 19 -- .../pcg/strided_rectangle_side.struct.toml | 23 -- ...toml => task_space_coordinate.struct.toml} | 2 +- lib/pcg/include/pcg/task_space_operator.h | 22 ++ .../pcg/task_space_operator.struct.toml | 24 ++ lib/pcg/src/pcg/delete.c | 145 ++++++++ lib/pcg/src/pcg/machine_specification.cc | 9 +- lib/pcg/src/pcg/machine_view.cc | 244 ++++---------- lib/pcg/src/pcg/machine_view_dim_idx_t.cc | 13 - .../src/pcg/start_invariant_machine_view.cc | 23 -- lib/pcg/src/pcg/strided_rectangle.cc | 131 -------- lib/pcg/src/pcg/strided_rectangle_side.cc | 22 -- lib/pcg/src/pcg/task_space_operator.cc | 43 +++ lib/pcg/test/src/pcg/machine_view.cc | 310 ++++++------------ .../src/pcg/start_invariant_machine_view.cc | 45 --- lib/pcg/test/src/pcg/strided_rectangle.cc | 39 --- .../test/src/pcg/strided_rectangle_side.cc | 19 -- lib/pcg/test/src/pcg/task_space_operator.cc | 62 ++++ lib/utils/include/utils/containers/zip.h | 12 + 35 files changed, 678 insertions(+), 1479 deletions(-) delete mode 100644 lib/compiler/include/compiler/machine_view_to_tensor_mapping.h delete mode 100644 lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc delete mode 100644 lib/compiler/test/src/machine_view_to_tensor_mapping.cc rename lib/pcg/include/pcg/{machine_specification_coordinate.struct.toml => machine_space_coordinate.struct.toml} (87%) delete mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.h delete mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.struct.toml delete mode 100644 lib/pcg/include/pcg/strided_rectangle.h delete mode 100644 lib/pcg/include/pcg/strided_rectangle_side.h delete mode 100644 lib/pcg/include/pcg/strided_rectangle_side.struct.toml rename lib/pcg/include/pcg/{machine_view_coordinate.struct.toml => task_space_coordinate.struct.toml} (89%) create mode 100644 lib/pcg/include/pcg/task_space_operator.h create mode 100644 lib/pcg/include/pcg/task_space_operator.struct.toml create mode 100644 lib/pcg/src/pcg/delete.c delete mode 100644 lib/pcg/src/pcg/machine_view_dim_idx_t.cc delete mode 100644 lib/pcg/src/pcg/start_invariant_machine_view.cc delete mode 100644 lib/pcg/src/pcg/strided_rectangle.cc delete mode 100644 lib/pcg/src/pcg/strided_rectangle_side.cc create mode 100644 lib/pcg/src/pcg/task_space_operator.cc delete mode 100644 lib/pcg/test/src/pcg/start_invariant_machine_view.cc delete mode 100644 lib/pcg/test/src/pcg/strided_rectangle.cc delete mode 100644 lib/pcg/test/src/pcg/strided_rectangle_side.cc create mode 100644 lib/pcg/test/src/pcg/task_space_operator.cc diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index 5c78d2601c..c409a8bf75 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -1,31 +1,24 @@ #ifndef _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H #define _FLEXFLOW_COMPILER_ALLOWED_MACHINE_VIEWS_H -#include "compiler/machine_view_to_tensor_mapping.dtg.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" -#include "pcg/machine_specification.h" -#include "pcg/machine_view.h" -#include "pcg/machine_view_projection.dtg.h" -#include "pcg/start_invariant_machine_view.dtg.h" +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/task_space_operator.dtg.h" namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, - MachineSpecification const &machine_spec); + TaskSpaceOperator const &task, + MachineSpecification const &ms); bool is_valid_machine_view(MachineView const &mv, - ParallelTensorShape const &shape); + TaskSpaceOperator const &task); + +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machine_spec, + TaskSpaceOperator const &task, + DeviceType device_type); -std::unordered_set> - get_allowed_partial_machine_view_mappings( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type = DeviceType::GPU); -std::unordered_set> - get_allowed_partial_start_invariant_machine_view_mappings( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type); } // namespace FlexFlow #endif diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h deleted file mode 100644 index 2244620304..0000000000 --- a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H -#define _FLEXFLOW_COMPILER_MACHINE_VIEW_TO_TENSOR_MAPPING_H - -#include "compiler/machine_view_to_tensor_mapping.dtg.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" -#include "pcg/machine_view.h" - -#include - -namespace FlexFlow { - -bool is_valid_mapping(MachineViewToTensorMapping const &mapping, - MachineView const &mv, - ParallelTensorShape const &shape); - -std::unordered_set - get_all_machine_view_to_tensor_mappings(MachineView const &mv, - ParallelTensorShape const &shape); - -} // namespace FlexFlow - -#endif diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 4bc8210f4b..499103485c 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -1,12 +1,8 @@ #include "compiler/allowed_machine_views.h" -#include "op-attrs/parallel_tensor_dim_idx_t.h" -#include "op-attrs/parallel_tensor_dims.h" -#include "op-attrs/parallel_tensor_shape.h" #include "pcg/machine_specification.h" #include "pcg/machine_view.h" -#include "pcg/machine_view_dim_idx_t.h" #include "pcg/multi_dimensional_stride.dtg.h" -#include "pcg/start_invariant_machine_view.h" +#include "pcg/task_space_operator.h" #include "utils/containers/all_of.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/extend.h" @@ -21,58 +17,30 @@ #include "utils/containers/unordered_multiset_of.h" #include "utils/containers/unordered_set_of.h" #include "utils/containers/zip.h" -#include "utils/graph/serial_parallel/serial_parallel_decomposition.h" #include "utils/overload.h" namespace FlexFlow { -static std::unordered_multiset - get_num_devices_per_parallel_dim(ParallelTensorShape const &shape) { - std::unordered_multiset raw_device_nums = - unordered_multiset_of(ff_ordered_shard_degrees(shape)); - raw_device_nums.insert(get_sum_degree(shape)); - raw_device_nums.insert(get_discard_copy_degree(shape)); - // filtering non-parallel dims - raw_device_nums = - filter(raw_device_nums, [](int num_devices) { return num_devices != 1; }); - - return transform(raw_device_nums, - [&](int num_devices) { return num_points_t{num_devices}; }); -} - -bool is_valid_partial_machine_view_mapping(MachineView const &mv, - MachineSpecification const &ms, - MachineViewProjection const &proj) { - MachineSpecificationCoordinate maximum_device_coords = - get_machine_specification_coordinates( - mv, get_maximum_device_coordinates(mv), ms, proj); - return is_valid_machine_specification_coordinates(ms, maximum_device_coords); -} - bool is_valid_machine_view(MachineView const &mv, - ParallelTensorShape const &shape) { - - std::vector mv_num_devices = get_num_devices_per_dim(mv); - std::unordered_multiset tensor_num_devices = - get_num_devices_per_parallel_dim(shape); - - return unordered_multiset_of(mv_num_devices) == tensor_num_devices; + TaskSpaceOperator const &task, + MachineSpecification const &ms) { + MachineSpaceCoordinate maximum_device_coords = get_machine_space_coordinate( + task, mv, get_maximum_fragment_coordinate(task), ms); + return is_valid_machine_space_coordinates(ms, maximum_device_coords); } -/* Generates a set of candidate `MachineView`s and their associate - `MachineViewProjection`. +/* Generates a set of candidate `MachineView`s * The returned set includes all valid machine views, and might contain invalid ones. This function should never be used externally (see - * `get_allowed_partial_machine_view_mappings` instead). There is no guarantee - that a non-empty returned set contains a valid machine view (i.e. its possible - for all + * `get_allowed_partial_machine_view_mappings` instead). There is no + guarantee that a non-empty returned set contains a valid machine view (i.e. + its possible for all * `MachineView`s to be invalid) */ -static std::unordered_set> - get_candidate_partial_machine_view_mappings( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType const &device_type) { +static std::unordered_set + get_candidate_machine_views(MachineSpecification const &machine_spec, + TaskSpaceOperator const &task, + DeviceType const &device_type) { auto candidate_strides = [](std::vector const &tensor_dims, @@ -96,89 +64,52 @@ static std::unordered_set> return strides; }; - auto candidate_starts = [](std::vector ordered_tensor_dims) { - std::vector> coordinate_ranges = - transform(ordered_tensor_dims, [&](num_points_t const &num_points) { - return range(num_points.unwrapped); - }); - - std::unordered_set> raw_coordinates = - unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set machine_view_coordinate = - transform(raw_coordinates, [](std::vector const &point) { - return MachineViewCoordinate(point); - }); - return machine_view_coordinate; + auto candidate_starts = [](MachineSpecification const &ms, + DeviceType const &device_type) { + std::unordered_set result; + for (int i : range(ms.num_nodes)) { + for (int j : range(get_num_devices_per_node(ms, device_type))) { + result.insert(MachineSpaceCoordinate{i, j, device_type}); + } + } + return result; }; - auto candidate_projections = [](MachineView const &mv) { - std::unordered_set result; + auto candidate_projections = [](TaskSpaceOperator const &task) { std::unordered_set options = { MachineSpecificationDimension::INTER_NODE, MachineSpecificationDimension::INTRA_NODE}; - for (std::vector const &proj_vec : - get_all_permutations_with_repetition(options, num_dims(mv))) { - - result.insert(MachineViewProjection{ - map_from_keys_and_values(get_machine_view_indices(mv), proj_vec)}); - } - return result; + return get_all_permutations_with_repetition(options, num_dims(task)); }; - std::unordered_multiset tensor_dims = - get_num_devices_per_parallel_dim(shape); + std::vector tensor_dims = task.degrees; int total_devices = get_num_devices(machine_spec, device_type); - std::unordered_set> - machine_views; + std::unordered_set machine_views; for (MultiDimensionalStride const &strides : - candidate_strides(sorted(tensor_dims), total_devices)) { - StridedRectangle rect = get_strided_rectangle(strides, sorted(tensor_dims)); - auto start_inv_mv = StartInvariantMachineView{rect, device_type}; - for (MachineViewCoordinate start : candidate_starts(sorted(tensor_dims))) { - MachineView mv = machine_view_from_start_invariant(start_inv_mv, start); - for (MachineViewProjection const &proj : candidate_projections(mv)) { - machine_views.insert({mv, proj}); + candidate_strides(tensor_dims, total_devices)) { + for (MachineSpaceCoordinate start : + candidate_starts(machine_spec, device_type)) { + for (std::vector const &proj : + candidate_projections(task)) { + machine_views.insert(MachineView{strides.raw_strides, proj, start}); } } } return machine_views; } -std::unordered_set> - get_allowed_partial_machine_view_mappings( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type) { - - std::unordered_set> views = - get_candidate_partial_machine_view_mappings( - machine_spec, shape, device_type); - return filter(views, - [&](std::pair const &pair) { - auto &[mv, projection] = pair; - return is_valid_machine_view(mv, shape) && - is_valid_partial_machine_view_mapping( - mv, machine_spec, projection); - }); -} - -std::unordered_set> - get_allowed_partial_start_invariant_machine_view_mappings( - MachineSpecification const &machine_spec, - ParallelTensorShape const &shape, - DeviceType device_type) { - - std::unordered_set> views = - get_allowed_partial_machine_view_mappings( - machine_spec, shape, device_type); +std::unordered_set + get_allowed_machine_views(MachineSpecification const &machine_spec, + TaskSpaceOperator const &task, + DeviceType device_type) { - return transform( - views, [](std::pair const &p) { - auto &[view, proj] = p; - return std::pair{start_invariant_from_machine_view(view), proj}; - }); + std::unordered_set views = + get_candidate_machine_views(machine_spec, task, device_type); + return filter(views, [&](MachineView const &mv) { + return is_valid_machine_view(mv, task, machine_spec); + }); } } // namespace FlexFlow diff --git a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc b/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc deleted file mode 100644 index e1ab335f09..0000000000 --- a/lib/compiler/src/compiler/machine_view_to_tensor_mapping.cc +++ /dev/null @@ -1,53 +0,0 @@ -#include "compiler/machine_view_to_tensor_mapping.h" -#include "compiler/allowed_machine_views.h" -#include "op-attrs/parallel_dim.h" -#include "op-attrs/parallel_tensor_dim_idx_t.h" -#include "pcg/machine_view_dim_idx_t.h" -#include "utils/bidict/algorithms/bidict_from_pairs.h" -#include "utils/containers/all_of.h" -#include "utils/containers/filter.h" -#include "utils/containers/get_all_permutations.h" -#include "utils/containers/sorted.h" -#include "utils/containers/zip.h" -#include "utils/exception.h" -namespace FlexFlow { - -std::unordered_set - get_all_machine_view_to_tensor_mappings(MachineView const &mv, - ParallelTensorShape const &shape) { - if (!is_valid_machine_view(mv, shape)) { - throw mk_runtime_error( - "Invalid MachineView {} for given ParallelTensorShape {}", mv, shape); - } - std::vector machine_view_dim_ordering = - get_machine_view_indices(mv); - std::unordered_set shape_indices = - get_parallel_tensor_dim_indices(shape); - shape_indices = - filter(shape_indices, [&](parallel_tensor_dim_idx_t const &idx) { - return get_degree(get_parallel_dim_at_idx(shape, idx)) != 1; - }); - - std::unordered_set result; - for (std::vector const &tensor_dim_orderings : - get_all_permutations(shape_indices)) { - MachineViewToTensorMapping mapping = - MachineViewToTensorMapping(bidict_from_pairs( - zip(machine_view_dim_ordering, tensor_dim_orderings))); - if (is_valid_mapping(mapping, mv, shape)) { - result.insert(mapping); - } - } - return result; -} - -bool is_valid_mapping(MachineViewToTensorMapping const &mapping, - MachineView const &mv, - ParallelTensorShape const &shape) { - return all_of(mapping.raw_bidict, [&](auto const pair) { - int mv_degree = get_side_at_idx(mv, pair.first).num_points.unwrapped; - int tensor_degree = get_degree(get_parallel_dim_at_idx(shape, pair.second)); - return (tensor_degree == mv_degree); - }); -} -} // namespace FlexFlow diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 7a4184d86c..3c22606254 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -1,8 +1,5 @@ #include "compiler/allowed_machine_views.h" #include "doctest/doctest.h" -#include "pcg/machine_specification.dtg.h" -#include "pcg/machine_view.h" -#include "pcg/start_invariant_machine_view.h" #include "utils/containers/extend.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" @@ -14,46 +11,31 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_allowed_partial_machine_view_mappings") { + TEST_CASE("get_allowed_machine_views") { SUBCASE("1 degree of parallelism") { MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, + TaskSpaceOperator task = TaskSpaceOperator{{num_points_t{3}}}; + + std::unordered_set correct = { + MachineView{{{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 0, DeviceType::GPU}}, + + MachineView{{{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 1, DeviceType::GPU}}, + MachineView{{{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 2, DeviceType::GPU}}, + MachineView{{{stride_t{2}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 0, DeviceType::GPU}}, }; - std::vector correct_mv = { - make_1d_machine_view(DeviceType::GPU, 0, 3, stride_t(1)), - make_1d_machine_view(DeviceType::GPU, 1, 4, stride_t(1)), - make_1d_machine_view(DeviceType::GPU, 2, 5, stride_t(1)), - make_1d_machine_view(DeviceType::GPU, 0, 6, stride_t(2))}; - - std::vector correct_proj = { - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, - }; - - std::unordered_set> - correct = unordered_set_of(zip(correct_mv, correct_proj)); - - std::unordered_set> result = - get_allowed_partial_machine_view_mappings(ms, shape); + std::unordered_set result = + get_allowed_machine_views(ms, task, DeviceType::GPU); CHECK(correct == result); } @@ -61,179 +43,37 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2 degrees of parallelism") { MachineSpecification ms = MachineSpecification{3, 3, 3, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_views = - [&](int start_x, int start_y, int stride1, int stride2) { - return MachineView{ - MachineViewCoordinate{{start_x, start_y}}, - StridedRectangle{ - {StridedRectangleSide{num_points_t{2}, stride_t{stride1}}, - StridedRectangleSide{num_points_t{3}, stride_t{stride2}}}}, - DeviceType::GPU}; - }; - - std::vector correct_mv = { - make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1), - make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1), - make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1), - - make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1), - make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1), - make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1), - }; - - std::vector correct_proj = { - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER_NODE}}}, - - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}, + TaskSpaceOperator task = + TaskSpaceOperator{{num_points_t{2}, num_points_t{3}}}; + + auto make_2d_views = [&](int start_x, + int start_y, + int stride1, + int stride2, + MachineSpecificationDimension m1, + MachineSpecificationDimension m2) { + return MachineView{ + {stride_t{stride1}, stride_t{stride2}}, + {m1, m2}, + MachineSpaceCoordinate{start_x, start_y, DeviceType::GPU}}; }; - std::unordered_set> - correct = unordered_set_of(zip(correct_mv, correct_proj)); - - std::unordered_set> result = - get_allowed_partial_machine_view_mappings(ms, shape, DeviceType::GPU); - - CHECK(correct == result); - } - } - - TEST_CASE("get_allowed_partial_start_invariant_machine_view_mappings") { - - SUBCASE("1 degree of parallelism") { - - MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; + auto intra = MachineSpecificationDimension::INTRA_NODE; + auto inter = MachineSpecificationDimension::INTER_NODE; + std::unordered_set correct = { + make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1, inter, intra), + make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1, inter, intra), + make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1, inter, intra), - std::vector correct_mv = { - make_1d_start_invariant_machine_view( - num_points_t(3), stride_t(1), DeviceType::GPU), - make_1d_start_invariant_machine_view( - num_points_t(3), stride_t(2), DeviceType::GPU)}; - - std::vector correct_proj = { - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}, + make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1, intra, inter), + make_2d_views(0, 1, /*stride1*/ 1, /*stride2*/ 1, intra, inter), + make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1, intra, inter), }; - std::unordered_set< - std::pair> - correct = unordered_set_of(zip(correct_mv, correct_proj)); - - std::unordered_set< - std::pair> - result = get_allowed_partial_start_invariant_machine_view_mappings( - ms, shape, DeviceType::GPU); + std::unordered_set result = + get_allowed_machine_views(ms, task, DeviceType::GPU); CHECK(correct == result); } - - SUBCASE("2 degrees of parallelism") { - - MachineSpecification ms = MachineSpecification(3, 3, 3, 0, 0); - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{1}, - }, - }, - DataType::FLOAT, - }; - - auto make_2d_view = [&](int stride1, int stride2) { - StridedRectangle rect = StridedRectangle{ - {StridedRectangleSide{num_points_t(2), stride_t(stride1)}, - StridedRectangleSide{num_points_t(3), stride_t(stride2)}}}; - return StartInvariantMachineView{rect, DeviceType::GPU}; - }; - - std::vector correct_mv = { - make_2d_view(/*stride1*/ 1, /*stride2*/ 1), - make_2d_view(/*stride1*/ 2, /*stride2*/ 1), - make_2d_view(/*stride1*/ 1, /*stride2*/ 1), - make_2d_view(/*stride1*/ 2, /*stride2*/ 1), - }; - - std::vector correct_proj = { - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTER_NODE}}}, - - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}, - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}, - }; - std::unordered_set< - std::pair> - correct = unordered_set_of(zip(correct_mv, correct_proj)); - - std::unordered_set< - std::pair> - result = get_allowed_partial_start_invariant_machine_view_mappings( - ms, shape, DeviceType::GPU); - - CHECK(result == correct); - } } } diff --git a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc b/lib/compiler/test/src/machine_view_to_tensor_mapping.cc deleted file mode 100644 index 65d87979b2..0000000000 --- a/lib/compiler/test/src/machine_view_to_tensor_mapping.cc +++ /dev/null @@ -1,88 +0,0 @@ -#include "compiler/machine_view_to_tensor_mapping.h" -#include "doctest/doctest.h" -#include "pcg/machine_view.h" -#include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" -#include "utils/fmt/unordered_map.h" -#include "utils/fmt/unordered_set.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_all_machine_view_to_tensor_mappings") { - SUBCASE("no possible mappings") { - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{3, 1}, - }, - ReplicaParallelDimSet{ - SumDegree{1}, - DiscardCopyDegree{2}, - }, - }, - DataType::FLOAT, - }; - MachineView view = - MachineView{MachineViewCoordinate{{0, 0, 0}}, - StridedRectangle{{ - StridedRectangleSide{num_points_t{2}, stride_t{1}}, - StridedRectangleSide{num_points_t{2}, stride_t{4}}, - }}, - DeviceType::GPU}; - CHECK_THROWS_AS(get_all_machine_view_to_tensor_mappings(view, shape), - std::runtime_error); - } - SUBCASE("multiple possible mappings") { - ParallelTensorShape shape = ParallelTensorShape{ - ParallelTensorDims{ - FFOrdered{ - ShardParallelDim{10, 3}, - }, - ReplicaParallelDimSet{ - SumDegree{2}, - DiscardCopyDegree{2}, - }, - }, - DataType::FLOAT, - }; - MachineView view = - MachineView{MachineViewCoordinate{{0, 0, 0}}, - StridedRectangle{{ - StridedRectangleSide{num_points_t{2}, stride_t{1}}, - StridedRectangleSide{num_points_t{2}, stride_t{4}}, - StridedRectangleSide{num_points_t{3}, stride_t{1}}, - }}, - DeviceType::GPU}; - - machine_view_dim_idx_t mv_dim_0 = machine_view_dim_idx_t{0}; - machine_view_dim_idx_t mv_dim_1 = machine_view_dim_idx_t{1}; - machine_view_dim_idx_t mv_dim_2 = machine_view_dim_idx_t{2}; - parallel_tensor_dim_idx_t pt_dim_0 = - parallel_tensor_dim_idx_t{ff_dim_t{0}}; - parallel_tensor_dim_idx_t pt_dim_sum = - parallel_tensor_dim_idx_t{ReplicaType::SUM}; - parallel_tensor_dim_idx_t pt_dim_eq = - parallel_tensor_dim_idx_t{ReplicaType::DISCARD_COPY}; - - bidict b1 = { - {mv_dim_2, pt_dim_0}, - {mv_dim_1, pt_dim_sum}, - {mv_dim_0, pt_dim_eq}, - }; - - bidict b2 = { - {mv_dim_2, pt_dim_0}, - {mv_dim_0, pt_dim_sum}, - {mv_dim_1, pt_dim_eq}, - }; - - std::unordered_set correct = { - MachineViewToTensorMapping{b1}, MachineViewToTensorMapping{b2}}; - std::unordered_set result = - get_all_machine_view_to_tensor_mappings(view, shape); - - CHECK(correct == result); - } - } -} diff --git a/lib/local-execution/include/local-execution/cost_estimate.h b/lib/local-execution/include/local-execution/cost_estimate.h index 33954827bd..cc6eef877b 100644 --- a/lib/local-execution/include/local-execution/cost_estimate.h +++ b/lib/local-execution/include/local-execution/cost_estimate.h @@ -6,9 +6,9 @@ #include "local-execution/local_training_backing.h" #include "op-attrs/operator_attrs.h" #include "op-attrs/parallel_tensor_shape.h" +#include "pcg/machine_view.dtg.h" #include "pcg/machine_view.h" #include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h" - namespace FlexFlow { struct ICostEstimator { diff --git a/lib/local-execution/src/local_cost_estimator.cc b/lib/local-execution/src/local_cost_estimator.cc index d4e0467cbf..92aa57d820 100644 --- a/lib/local-execution/src/local_cost_estimator.cc +++ b/lib/local-execution/src/local_cost_estimator.cc @@ -5,6 +5,7 @@ #include "op-attrs/computation_graph_op_attrs.h" #include "op-attrs/pcg_operator_attrs.h" #include "pcg/computation_graph_builder.h" +#include "pcg/machine_view.dtg.h" #include "pcg/parallel_tensor_attrs.h" #include "utils/containers/transform.h" diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc index db8c2c66c8..bbdee38bdc 100644 --- a/lib/local-execution/test/src/test_local_cost_estimator.cc +++ b/lib/local-execution/test/src/test_local_cost_estimator.cc @@ -1,77 +1,79 @@ -#include "doctest/doctest.h" -#include "kernels/local_cuda_allocator.h" -#include "kernels/managed_per_device_ff_handle.h" -#include "local-execution/local_cost_estimator.h" -#include "pcg/computation_graph_builder.h" -#include "test_utils.h" +// #include "doctest/doctest.h" +// #include "kernels/local_cuda_allocator.h" +// #include "kernels/managed_per_device_ff_handle.h" +// #include "local-execution/local_cost_estimator.h" +// #include "pcg/computation_graph_builder.h" +// #include "test_utils.h" -namespace FlexFlow { +// namespace FlexFlow { -TEST_SUITE(FF_CUDA_TEST_SUITE) { - TEST_CASE("Local Cost Estimator") { - // local backing initialization - ManagedPerDeviceFFHandle managed_handle{}; +// TEST_SUITE(FF_CUDA_TEST_SUITE) { +// TEST_CASE("Local Cost Estimator") { +// // local backing initialization +// ManagedPerDeviceFFHandle managed_handle{}; - RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ - DeviceSpecific::create(managed_handle.raw_handle()), - EnableProfiling::YES, - ProfilingSettings{/*warmup_iters=*/0, - /*measure_iters=*/1}}; +// RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ +// DeviceSpecific::create(managed_handle.raw_handle()), +// EnableProfiling::YES, +// ProfilingSettings{/*warmup_iters=*/0, +// /*measure_iters=*/1}}; - LocalCostEstimator cost_estimator = LocalCostEstimator{runtime_arg_config}; +// LocalCostEstimator cost_estimator = +// LocalCostEstimator{runtime_arg_config}; - SUBCASE("Estimate cost -- Attention Op") { - int embed_dim = 32; - int num_heads = 10; - MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ - /*embed_dim=*/embed_dim, - /*num_heads=*/num_heads, - /*kdim=*/embed_dim, - /*vdim=*/embed_dim, - /*dropout=*/0.0, - /*bias=*/true, - /*add_bias_kv=*/false, - /*add_zero_attn=*/false, - }; +// SUBCASE("Estimate cost -- Attention Op") { +// int embed_dim = 32; +// int num_heads = 10; +// MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ +// /*embed_dim=*/embed_dim, +// /*num_heads=*/num_heads, +// /*kdim=*/embed_dim, +// /*vdim=*/embed_dim, +// /*dropout=*/0.0, +// /*bias=*/true, +// /*add_bias_kv=*/false, +// /*add_zero_attn=*/false, +// }; - size_t batch_size = 40; - size_t seq_len = 48; - size_t feature_size = 36; +// size_t batch_size = 40; +// size_t seq_len = 48; +// size_t feature_size = 36; - DataType dtype = DataType::FLOAT; - ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ - TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, - DataType::FLOAT, - }); +// DataType dtype = DataType::FLOAT; +// ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ +// TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, +// DataType::FLOAT, +// }); - ParallelTensorShape weights_shape = throw_if_unexpected( - get_weights_shape(attrs, inputs_shape, inputs_shape, inputs_shape)); - ParallelTensorAttrs weight_attrs = - ParallelTensorAttrs{weights_shape, - /*sync_type=*/std::nullopt, - /*initializer=*/std::nullopt, - CreateGrad::YES}; +// ParallelTensorShape weights_shape = throw_if_unexpected( +// get_weights_shape(attrs, inputs_shape, inputs_shape, +// inputs_shape)); +// ParallelTensorAttrs weight_attrs = +// ParallelTensorAttrs{weights_shape, +// /*sync_type=*/std::nullopt, +// /*initializer=*/std::nullopt, +// CreateGrad::YES}; - ParallelTensorShape output_shape = throw_if_unexpected( - get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape)); - ParallelTensorAttrs output_attrs = - ParallelTensorAttrs{output_shape, - /*sync_type=*/std::nullopt, - /*initializer=*/std::nullopt, - CreateGrad::YES}; +// ParallelTensorShape output_shape = throw_if_unexpected( +// get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape)); +// ParallelTensorAttrs output_attrs = +// ParallelTensorAttrs{output_shape, +// /*sync_type=*/std::nullopt, +// /*initializer=*/std::nullopt, +// CreateGrad::YES}; - CostDetails result = cost_estimator.estimate_cost( - PCGOperatorAttrs{attrs}, - std::vector{ - inputs_shape, inputs_shape, inputs_shape}, - std::vector{weight_attrs}, - std::vector{output_attrs}, - make_1d_machine_view(DeviceType::GPU, 0, 1)); +// CostDetails result = cost_estimator.estimate_cost( +// PCGOperatorAttrs{attrs}, +// std::vector{ +// inputs_shape, inputs_shape, inputs_shape}, +// std::vector{weight_attrs}, +// std::vector{output_attrs}, +// make_1d_machine_view(DeviceType::GPU, 0, 1)); - CHECK(result.total_elapsed_time > 0); - CHECK(result.total_mem_usage > 0); - } - } -} +// CHECK(result.total_elapsed_time > 0); +// CHECK(result.total_mem_usage > 0); +// } +// } +// } -} // namespace FlexFlow +// } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_specification_coordinate.struct.toml b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml similarity index 87% rename from lib/pcg/include/pcg/machine_specification_coordinate.struct.toml rename to lib/pcg/include/pcg/machine_space_coordinate.struct.toml index f7fdfb18a7..45602776d2 100644 --- a/lib/pcg/include/pcg/machine_specification_coordinate.struct.toml +++ b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "MachineSpecificationCoordinate" +name = "MachineSpaceCoordinate" features = [ "eq", "ord", diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 70aadd6ac5..7bd087998c 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -3,8 +3,8 @@ #include "pcg/device_id_t.dtg.h" #include "pcg/device_type.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" #include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification_coordinate.dtg.h" namespace FlexFlow { @@ -15,12 +15,11 @@ int get_num_devices(MachineSpecification const &ms, int get_num_devices_per_node(MachineSpecification const &ms, DeviceType const &device_type); -bool is_valid_machine_specification_coordinates( - MachineSpecification const &ms, - MachineSpecificationCoordinate const &coord); +bool is_valid_machine_space_coordinates(MachineSpecification const &ms, + MachineSpaceCoordinate const &coord); device_id_t get_device_id(MachineSpecification const &ms, - MachineSpecificationCoordinate const &coord); + MachineSpaceCoordinate const &coord); } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 2857b00c48..401788923d 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -1,61 +1,30 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_H -#include "pcg/device_id.h" -#include "pcg/device_type.dtg.h" -#include "pcg/machine_specification.dtg.h" -#include "pcg/machine_specification_coordinate.dtg.h" -#include "pcg/machine_view.dtg.h" -#include "pcg/machine_view_coordinate.dtg.h" -#include "pcg/machine_view_dim_idx_t.dtg.h" -#include "pcg/machine_view_projection.dtg.h" -#include "pcg/num_points_t.dtg.h" -#include "pcg/side_size_t.dtg.h" +#include "machine_specification.dtg.h" +#include "machine_view.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/task_space_operator.dtg.h" +#include "task_space_coordinate.dtg.h" #include -#include +#include namespace FlexFlow { -std::unordered_set - get_devices_coordinates(MachineView const &mv); -MachineViewCoordinate get_maximum_device_coordinates(MachineView const &mv); - -MachineSpecificationCoordinate get_machine_specification_coordinates( - MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection); -StridedRectangleSide get_side_at_idx(MachineView const &mv, - machine_view_dim_idx_t const &idx); - -device_id_t get_device_id(MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection); -std::unordered_set - get_device_ids(MachineView const &mv, - MachineSpecification const &ms, - MachineViewProjection const &projection); +MachineSpaceCoordinate + get_machine_space_coordinate(TaskSpaceOperator const &task, + MachineView const &mv, + TaskSpaceCoordinate const &coordinates, + MachineSpecification const &ms); + +std::unordered_set + get_machine_space_coordinates(TaskSpaceOperator const &task, + MachineView const &mv, + MachineSpecification const &ms); size_t num_dims(MachineView const &mv); -size_t num_devices(MachineView const &mv); -std::vector get_num_devices_per_dim(MachineView const &mv); -std::vector get_side_size_per_dim(MachineView const &mv); - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - int stop, - stride_t stride = stride_t{1}); - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - num_points_t num_points, - stride_t stride = stride_t{1}); - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - side_size_t interval_size, - stride_t stride = stride_t{1}); + +DeviceType get_device_type(MachineView const &mv); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index 2479861ebe..c6c8741e03 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -9,20 +9,26 @@ features = [ "fmt", ] -includes = [ - "pcg/strided_rectangle.h", - "pcg/machine_view_coordinate.dtg.h", - "pcg/device_type.dtg.h", +includes = [ + "pcg/stride_t.dtg.h", + "pcg/machine_specification_dimension.dtg.h", + "pcg/machine_space_coordinate.dtg.h", +] + +src_includes = [ + "utils/fmt/vector.h", + "utils/hash/vector.h" ] [[fields]] -name = "start" -type = "::FlexFlow::MachineViewCoordinate" +name = "strides" +type = "std::vector<::FlexFlow::stride_t>" [[fields]] -name = "rect" -type = "::FlexFlow::StridedRectangle" +name = "projection" +type = "std::vector<::FlexFlow::MachineSpecificationDimension>" [[fields]] -name = "device_type" -type = "::FlexFlow::DeviceType" +name = "start" +type = "::FlexFlow::MachineSpaceCoordinate" + diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h deleted file mode 100644 index 6d5fa5bd3e..0000000000 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H -#define _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H - -#include "pcg/machine_view.dtg.h" -#include "pcg/start_invariant_machine_view.dtg.h" - -namespace FlexFlow { - -MachineView - machine_view_from_start_invariant(StartInvariantMachineView const &mv, - MachineViewCoordinate const &start_id); -StartInvariantMachineView - start_invariant_from_machine_view(MachineView const &mv); - -StartInvariantMachineView make_1d_start_invariant_machine_view( - num_points_t num_points, stride_t stride, DeviceType device_type); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml deleted file mode 100644 index ea65af4591..0000000000 --- a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml +++ /dev/null @@ -1,23 +0,0 @@ -namespace = "FlexFlow" -name = "StartInvariantMachineView" -features = [ - "eq", - "ord", - "hash", - "json", - # "rapidcheck", - "fmt", -] - -includes = [ - "pcg/strided_rectangle.h", - "pcg/device_type.dtg.h", -] - -[[fields]] -name = "rect" -type = "::FlexFlow::StridedRectangle" - -[[fields]] -name = "device_type" -type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/include/pcg/strided_rectangle.h b/lib/pcg/include/pcg/strided_rectangle.h deleted file mode 100644 index 781a91e358..0000000000 --- a/lib/pcg/include/pcg/strided_rectangle.h +++ /dev/null @@ -1,77 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_H -#define _FLEXFLOW_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_H - -#include "op-attrs/ff_dim.dtg.h" -#include "pcg/device_id_t.dtg.h" -#include "pcg/multi_dimensional_stride.dtg.h" -#include "pcg/num_points_t.dtg.h" -#include "pcg/side_size_t.dtg.h" -#include "pcg/strided_rectangle_side.dtg.h" - -namespace FlexFlow { - -/** - * @brief Represents a multi-dimensional rectangle with strided sides. - * - * @note This struct maintains its sides in a canonical order, which is sorted - * in ascending order (enforced at construction). - */ -struct StridedRectangle { - -private: - std::tuple const &> tie() const; - friend struct std::hash; - -public: - StridedRectangle() = delete; - explicit StridedRectangle(std::vector const &sides); - - bool operator==(StridedRectangle const &) const; - bool operator!=(StridedRectangle const &) const; - bool operator<(StridedRectangle const &) const; - bool operator>(StridedRectangle const &) const; - bool operator<=(StridedRectangle const &) const; - bool operator>=(StridedRectangle const &) const; - - StridedRectangleSide const &at(int idx) const; - std::vector const &get_sides() const; - -private: - std::vector sides; -}; -std::string format_as(StridedRectangle const &); -std::ostream &operator<<(std::ostream &, StridedRectangle const &); - -size_t get_num_dims(StridedRectangle const &rect); - -num_points_t get_num_points(StridedRectangle const &rect); - -StridedRectangle - get_strided_rectangle(MultiDimensionalStride const &strides, - std::vector const &num_points_per_dim); - -} // namespace FlexFlow - -namespace std { -template <> -struct hash<::FlexFlow::StridedRectangle> { - size_t operator()(::FlexFlow::StridedRectangle const &) const; -}; -} // namespace std - -namespace nlohmann { -template <> -struct adl_serializer<::FlexFlow::StridedRectangle> { - static ::FlexFlow::StridedRectangle from_json(json const &); - static void to_json(json &, ::FlexFlow::StridedRectangle const &); -}; -} // namespace nlohmann - -namespace rc { -template <> -struct Arbitrary<::FlexFlow::StridedRectangle> { - static Gen<::FlexFlow::StridedRectangle> arbitrary(); -}; -} // namespace rc - -#endif diff --git a/lib/pcg/include/pcg/strided_rectangle_side.h b/lib/pcg/include/pcg/strided_rectangle_side.h deleted file mode 100644 index 8e0bb5784a..0000000000 --- a/lib/pcg/include/pcg/strided_rectangle_side.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef _FLEXFLOW_LIB_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_SIDE_H -#define _FLEXFLOW_LIB_PCG_INCLUDE_PCG_STRIDED_RECTANGLE_SIDE_H - -#include "pcg/side_size_t.dtg.h" -#include "pcg/stride_t.dtg.h" -#include "pcg/strided_rectangle_side.dtg.h" - -namespace FlexFlow { - -StridedRectangleSide strided_side_from_size_and_stride(side_size_t, - stride_t stride); - -side_size_t get_side_size(StridedRectangleSide const &); - -std::vector get_points(StridedRectangleSide const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/strided_rectangle_side.struct.toml b/lib/pcg/include/pcg/strided_rectangle_side.struct.toml deleted file mode 100644 index 3481ebcf16..0000000000 --- a/lib/pcg/include/pcg/strided_rectangle_side.struct.toml +++ /dev/null @@ -1,23 +0,0 @@ -namespace = "FlexFlow" -name = "StridedRectangleSide" -features = [ - "eq", - "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -includes = [ - "pcg/num_points_t.dtg.h", - "pcg/stride_t.dtg.h", -] - -[[fields]] -name = "num_points" -type = "::FlexFlow::num_points_t" - -[[fields]] -name = "stride" -type = "::FlexFlow::stride_t" diff --git a/lib/pcg/include/pcg/machine_view_coordinate.struct.toml b/lib/pcg/include/pcg/task_space_coordinate.struct.toml similarity index 89% rename from lib/pcg/include/pcg/machine_view_coordinate.struct.toml rename to lib/pcg/include/pcg/task_space_coordinate.struct.toml index de4d21d08a..65aea167cb 100644 --- a/lib/pcg/include/pcg/machine_view_coordinate.struct.toml +++ b/lib/pcg/include/pcg/task_space_coordinate.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "MachineViewCoordinate" +name = "TaskSpaceCoordinate" features = [ "eq", "ord", diff --git a/lib/pcg/include/pcg/task_space_operator.h b/lib/pcg/include/pcg/task_space_operator.h new file mode 100644 index 0000000000..067e8085bf --- /dev/null +++ b/lib/pcg/include/pcg/task_space_operator.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_TASK_SPACE_OPERATOR_H +#define _FLEXFLOW_PCG_INCLUDE_TASK_SPACE_OPERATOR_H + +#include "pcg/task_space_coordinate.dtg.h" +#include "pcg/task_space_operator.dtg.h" +#include +#include + +namespace FlexFlow { + +std::unordered_set + get_fragment_coordinates(TaskSpaceOperator const &task); + +TaskSpaceCoordinate + get_maximum_fragment_coordinate(TaskSpaceOperator const &task); + +size_t num_dims(TaskSpaceOperator const &task); +size_t num_fragments(TaskSpaceOperator const &task); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/task_space_operator.struct.toml b/lib/pcg/include/pcg/task_space_operator.struct.toml new file mode 100644 index 0000000000..5e9d496275 --- /dev/null +++ b/lib/pcg/include/pcg/task_space_operator.struct.toml @@ -0,0 +1,24 @@ +namespace = "FlexFlow" +name = "TaskSpaceOperator" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "", + "pcg/num_points_t.dtg.h" +] + +src_includes = [ + "utils/fmt/vector.h", + "utils/hash/vector.h" +] + +[[fields]] +name = "degrees" +type = "std::vector<::FlexFlow::num_points_t>" diff --git a/lib/pcg/src/pcg/delete.c b/lib/pcg/src/pcg/delete.c new file mode 100644 index 0000000000..62b358ca87 --- /dev/null +++ b/lib/pcg/src/pcg/delete.c @@ -0,0 +1,145 @@ +MachineSpaceCoordinate + get_machine_space_coordinates(MachineView const &mv, + MachineViewCoordinate const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + + auto inter_projection = + filter_values(projection.machine_view_dim_to_machine_spec_dim, + [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTER_NODE; + }); + auto intra_projection = + filter_values(projection.machine_view_dim_to_machine_spec_dim, + [](MachineSpecificationDimension const &dim) { + return dim == MachineSpecificationDimension::INTRA_NODE; + }); + + MachineViewCoordinate transformed_coordinates = MachineViewCoordinate{ + transform(zip(coordinates.raw_coord, mv.rect.get_sides()), + [&](auto const &pair) { + return pair.first * pair.second.stride.unwrapped; + })}; + transformed_coordinates = MachineViewCoordinate{ + transform(zip(transformed_coordinates.raw_coord, mv.start.raw_coord), + [&](auto const &pair) { return pair.first + pair.second; })}; + + auto get_coordinate = [&](auto const &sub_projection) { + std::vector relevant_dimensions = + sorted(keys(sub_projection)); + std::vector relevant_side_sizes = + transform(relevant_dimensions, [&](auto const &idx) { + return get_side_size(get_side_at_idx(mv, idx)); + }); + std::vector coefficients = + scanl(relevant_side_sizes, + 1, + [](size_t const &result, side_size_t const &side_size) { + return result * side_size.unwrapped; + }); + std::vector filtered_coord; + for (int i = 0; i < transformed_coordinates.raw_coord.size(); ++i) { + if (contains(relevant_dimensions, machine_view_dim_idx_t{i})) { + filtered_coord.push_back(transformed_coordinates.raw_coord[i]); + } + } + return sum( + transform(zip(coefficients, filtered_coord), + [](auto const pair) { return pair.first * pair.second; })); + }; + int inter_coordinate = get_coordinate(inter_projection); + int intra_coordinate = get_coordinate(intra_projection); + return MachineSpaceCoordinate{ + inter_coordinate, intra_coordinate, mv.device_type}; +} + +device_id_t get_device_id(MachineView const &mv, + MachineViewCoordinate const &coordinates, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + MachineSpaceCoordinate coord = + get_machine_space_coordinates(mv, coordinates, ms, projection); + return get_device_id(ms, coord); +} + +std::unordered_set + get_device_ids(MachineView const &mv, + MachineSpecification const &ms, + MachineViewProjection const &projection) { + + return transform(get_devices_coordinates(mv), + [&](MachineViewCoordinate const &c) { + return get_device_id(mv, c, ms, projection); + }); +} + +size_t num_dims(MachineView const &mv) { + return get_num_dims(mv.rect); +} + +std::vector get_num_devices_per_dim(MachineView const &mv) { + return transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { + return side.num_points; + }); +} + +std::vector get_side_size_per_dim(MachineView const &mv) { + return transform(mv.rect.get_sides(), get_side_size); +} + +size_t num_devices(MachineView const &mv) { + return get_num_points(mv.rect).unwrapped; +} + +StridedRectangleSide get_side_at_idx(MachineView const &mv, + machine_view_dim_idx_t const &idx) { + return mv.rect.at(idx.unwrapped); +} + +static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { + assert(stop > start); + assert(stride > stride_t(0)); + StridedRectangleSide side = + strided_side_from_size_and_stride(side_size_t{stop - start}, stride); + StridedRectangle rect = + StridedRectangle{std::vector{side}}; + return rect; +} + +MachineView make_1d_machine_view(DeviceType device_type, + int start, + int stop, + stride_t stride) { + StridedRectangle rect = make_1d_rect(start, stop, stride); + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; + return MachineView{start_coordinate, rect, device_type}; +} + +static StridedRectangle + make_1d_rect(int start, num_points_t num_points, stride_t stride) { + return make_1d_rect( + start, start + num_points.unwrapped * stride.unwrapped, stride); +} + +MachineView make_1d_machine_view(DeviceType device_type, + int start, + num_points_t num_points, + stride_t stride) { + StridedRectangle rect = make_1d_rect(start, num_points, stride); + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; + return MachineView{start_coordinate, rect, device_type}; +} + +static StridedRectangle + make_1d_rect(int start, side_size_t interval_size, stride_t stride) { + return make_1d_rect(start, start + interval_size.unwrapped, stride); +} + +MachineView make_1d_machine_view(DeviceType device_type, + int start, + side_size_t interval_size, + stride_t stride) { + StridedRectangle rect = make_1d_rect(start, interval_size, stride); + MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; + return MachineView{start_coordinate, rect, device_type}; +} diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index af1f8b7aea..bc9d803d0e 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -32,16 +32,15 @@ int get_num_devices_per_node(MachineSpecification const &ms, throw mk_runtime_error("Unknown DeviceType {}", device_type); } } -bool is_valid_machine_specification_coordinates( - MachineSpecification const &ms, - MachineSpecificationCoordinate const &coord) { +bool is_valid_machine_space_coordinates(MachineSpecification const &ms, + MachineSpaceCoordinate const &coord) { return (coord.inter < ms.num_nodes) && (coord.intra < get_num_devices_per_node(ms, coord.device_type)); } device_id_t get_device_id(MachineSpecification const &ms, - MachineSpecificationCoordinate const &coord) { - assert(is_valid_machine_specification_coordinates(ms, coord)); + MachineSpaceCoordinate const &coord) { + assert(is_valid_machine_space_coordinates(ms, coord)); int raw_idx = coord.inter * get_num_devices_per_node(ms, coord.device_type) + coord.intra; return device_id_from_index(raw_idx, coord.device_type); diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 81ffc294ae..27d959f557 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,193 +1,87 @@ #include "pcg/machine_view.h" -#include "pcg/device_id.h" -#include "pcg/machine_specification.h" -#include "pcg/machine_view_coordinate.dtg.h" -#include "pcg/machine_view_dim_idx_t.dtg.h" -#include "pcg/machine_view_projection.dtg.h" -#include "pcg/strided_rectangle.h" -#include "pcg/strided_rectangle_side.h" +#include "pcg/task_space_operator.h" #include "utils/containers.h" -#include "utils/containers/as_vector.h" -#include "utils/containers/cartesian_product.h" #include "utils/containers/contains.h" -#include "utils/containers/filter_values.h" -#include "utils/containers/keys.h" -#include "utils/containers/product.h" -#include "utils/containers/range.h" -#include "utils/containers/reversed.h" #include "utils/containers/scanl.h" #include "utils/containers/transform.h" -#include "utils/containers/unordered_set_of.h" #include "utils/containers/zip.h" -#include "utils/hash/vector.h" namespace FlexFlow { -std::unordered_set - get_devices_coordinates(MachineView const &mv) { - - std::vector> coordinate_ranges = - transform(mv.rect.get_sides(), [&](StridedRectangleSide const &side) { - return range(side.num_points.unwrapped); - }); - - std::unordered_set> raw_coordinates = - unordered_set_of(cartesian_product(coordinate_ranges)); - std::unordered_set machine_view_coordinate = - transform(raw_coordinates, [](std::vector const &point) { - return MachineViewCoordinate{point}; - }); - return machine_view_coordinate; -} - -MachineViewCoordinate get_maximum_device_coordinates(MachineView const &mv) { - return maximum(get_devices_coordinates(mv)); -} - -MachineSpecificationCoordinate get_machine_specification_coordinates( - MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - - auto inter_projection = - filter_values(projection.machine_view_dim_to_machine_spec_dim, - [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTER_NODE; - }); - auto intra_projection = - filter_values(projection.machine_view_dim_to_machine_spec_dim, - [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTRA_NODE; - }); - - MachineViewCoordinate transformed_coordinates = MachineViewCoordinate{ - transform(zip(coordinates.raw_coord, mv.rect.get_sides()), - [&](auto const &pair) { - return pair.first * pair.second.stride.unwrapped; - })}; - transformed_coordinates = MachineViewCoordinate{ - transform(zip(transformed_coordinates.raw_coord, mv.start.raw_coord), - [&](auto const &pair) { return pair.first + pair.second; })}; - - auto get_coordinate = [&](auto const &sub_projection) { - std::vector relevant_dimensions = - sorted(keys(sub_projection)); - std::vector relevant_side_sizes = - transform(relevant_dimensions, [&](auto const &idx) { - return get_side_size(get_side_at_idx(mv, idx)); - }); - std::vector coefficients = - scanl(relevant_side_sizes, - 1, - [](size_t const &result, side_size_t const &side_size) { - return result * side_size.unwrapped; - }); - std::vector filtered_coord; - for (int i = 0; i < transformed_coordinates.raw_coord.size(); ++i) { - if (contains(relevant_dimensions, machine_view_dim_idx_t{i})) { - filtered_coord.push_back(transformed_coordinates.raw_coord[i]); - } +MachineSpaceCoordinate + get_machine_space_coordinate(TaskSpaceOperator const &task, + MachineView const &mv, + TaskSpaceCoordinate const &coord, + MachineSpecification const &ms) { + + std::vector inter_projection; + std::vector intra_projection; + for (size_t i = 0; i < num_dims(mv); ++i) { + if (mv.projection[i] == MachineSpecificationDimension::INTER_NODE) { + inter_projection.push_back(i); + } else if (mv.projection[i] == MachineSpecificationDimension::INTRA_NODE) { + intra_projection.push_back(i); } - return sum( - transform(zip(coefficients, filtered_coord), - [](auto const pair) { return pair.first * pair.second; })); - }; - int inter_coordinate = get_coordinate(inter_projection); - int intra_coordinate = get_coordinate(intra_projection); - return MachineSpecificationCoordinate{ - inter_coordinate, intra_coordinate, mv.device_type}; -} - -device_id_t get_device_id(MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - MachineSpecificationCoordinate coord = - get_machine_specification_coordinates(mv, coordinates, ms, projection); - return get_device_id(ms, coord); -} - -std::unordered_set - get_device_ids(MachineView const &mv, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - - return transform(get_devices_coordinates(mv), - [&](MachineViewCoordinate const &c) { - return get_device_id(mv, c, ms, projection); + } + + std::vector inter_sizes; + std::vector intra_sizes; + std::vector inter_coord_points; + std::vector intra_coord_points; + std::vector inter_strides; + std::vector intra_strides; + + for (size_t i = 0; i < num_dims(mv); ++i) { + int dim_size = task.degrees.at(i).unwrapped * mv.strides.at(i).unwrapped; + if (contains(inter_projection, i)) { + inter_sizes.push_back(dim_size); + inter_coord_points.push_back(coord.raw_coord.at(i)); + inter_strides.push_back(mv.strides.at(i).unwrapped); + } + if (contains(intra_projection, i)) { + intra_sizes.push_back(dim_size); + intra_coord_points.push_back(coord.raw_coord.at(i)); + intra_strides.push_back(mv.strides.at(i).unwrapped); + } + } + + std::vector inter_coeffs = scanl(inter_sizes, 1, std::multiplies()); + std::vector intra_coeffs = scanl(intra_sizes, 1, std::multiplies()); + + int inter = + mv.start.inter + + sum(transform(zip(inter_coeffs, inter_coord_points, inter_strides), + [](auto const &tuple) { + return std::get<0>(tuple) * std::get<1>(tuple) * + std::get<2>(tuple); + })); + int intra = + mv.start.intra + + sum(transform(zip(intra_coeffs, intra_coord_points, intra_strides), + [](auto const &tuple) { + return std::get<0>(tuple) * std::get<1>(tuple) * + std::get<2>(tuple); + })); + + return MachineSpaceCoordinate{inter, intra, get_device_type(mv)}; +} + +std::unordered_set + get_machine_space_coordinates(TaskSpaceOperator const &task, + MachineView const &mv, + MachineSpecification const &ms) { + + return transform(get_fragment_coordinates(task), + [&](TaskSpaceCoordinate const &c) { + return get_machine_space_coordinate(task, mv, c, ms); }); } size_t num_dims(MachineView const &mv) { - return get_num_dims(mv.rect); -} - -std::vector get_num_devices_per_dim(MachineView const &mv) { - return transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { - return side.num_points; - }); + return mv.strides.size(); } -std::vector get_side_size_per_dim(MachineView const &mv) { - return transform(mv.rect.get_sides(), get_side_size); +DeviceType get_device_type(MachineView const &mv) { + return mv.start.device_type; } - -size_t num_devices(MachineView const &mv) { - return get_num_points(mv.rect).unwrapped; -} - -StridedRectangleSide get_side_at_idx(MachineView const &mv, - machine_view_dim_idx_t const &idx) { - return mv.rect.at(idx.unwrapped); -} - -static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { - assert(stop > start); - assert(stride > stride_t(0)); - StridedRectangleSide side = - strided_side_from_size_and_stride(side_size_t{stop - start}, stride); - StridedRectangle rect = - StridedRectangle{std::vector{side}}; - return rect; -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - int stop, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, stop, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} - -static StridedRectangle - make_1d_rect(int start, num_points_t num_points, stride_t stride) { - return make_1d_rect( - start, start + num_points.unwrapped * stride.unwrapped, stride); -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - num_points_t num_points, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, num_points, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} - -static StridedRectangle - make_1d_rect(int start, side_size_t interval_size, stride_t stride) { - return make_1d_rect(start, start + interval_size.unwrapped, stride); -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - side_size_t interval_size, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, interval_size, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} - } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_view_dim_idx_t.cc b/lib/pcg/src/pcg/machine_view_dim_idx_t.cc deleted file mode 100644 index 7494531366..0000000000 --- a/lib/pcg/src/pcg/machine_view_dim_idx_t.cc +++ /dev/null @@ -1,13 +0,0 @@ -#include "pcg/machine_view_dim_idx_t.h" -#include "pcg/machine_view.h" -#include "utils/containers/range.h" -#include "utils/containers/transform.h" - -namespace FlexFlow { - -std::vector - get_machine_view_indices(MachineView const &mv) { - return transform(range(num_dims(mv)), - [](int idx) { return machine_view_dim_idx_t{idx}; }); -} -} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc deleted file mode 100644 index 8f0a4d0052..0000000000 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ /dev/null @@ -1,23 +0,0 @@ -#include "pcg/start_invariant_machine_view.h" -#include "pcg/strided_rectangle.h" - -namespace FlexFlow { - -MachineView machine_view_from_start_invariant( - StartInvariantMachineView const &start_invariant_mv, - MachineViewCoordinate const &start) { - return MachineView{ - start, start_invariant_mv.rect, start_invariant_mv.device_type}; -} -StartInvariantMachineView - start_invariant_from_machine_view(MachineView const &mv) { - return StartInvariantMachineView{mv.rect, mv.device_type}; -} - -StartInvariantMachineView make_1d_start_invariant_machine_view( - num_points_t num_points, stride_t stride, DeviceType device_type) { - return StartInvariantMachineView{ - StridedRectangle{{StridedRectangleSide{num_points, stride}}}, - device_type}; -} -} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/strided_rectangle.cc b/lib/pcg/src/pcg/strided_rectangle.cc deleted file mode 100644 index 677fc91977..0000000000 --- a/lib/pcg/src/pcg/strided_rectangle.cc +++ /dev/null @@ -1,131 +0,0 @@ -#include "pcg/strided_rectangle.h" -#include "op-attrs/dim_ordered/transform.h" -#include "pcg/device_id_t.dtg.h" -#include "pcg/machine_view_coordinate.dtg.h" -#include "pcg/strided_rectangle_side.dtg.h" -#include "pcg/strided_rectangle_side.h" -#include "utils/containers/as_vector.h" -#include "utils/containers/product.h" -#include "utils/containers/sorted.h" -#include "utils/containers/transform.h" -#include "utils/containers/zip.h" -#include "utils/fmt/vector.h" -#include "utils/hash-utils.h" -#include "utils/hash/tuple.h" -#include "utils/hash/vector.h" - -namespace FlexFlow { - -// StridedRectangle has a canonical ordering to its sides, which are sorted in -// ascending order at construction -StridedRectangle::StridedRectangle( - std::vector<::FlexFlow::StridedRectangleSide> const &sides) - : sides(sorted(sides)) {} - -std::tuple const &> - StridedRectangle::tie() const { - return std::tie(this->sides); -} - -bool StridedRectangle::operator==(StridedRectangle const &other) const { - return this->tie() == other.tie(); -} - -bool StridedRectangle::operator!=(StridedRectangle const &other) const { - return this->tie() != other.tie(); -} - -bool StridedRectangle::operator<(StridedRectangle const &other) const { - return this->tie() < other.tie(); -} - -bool StridedRectangle::operator>(StridedRectangle const &other) const { - return this->tie() > other.tie(); -} - -bool StridedRectangle::operator<=(StridedRectangle const &other) const { - return this->tie() <= other.tie(); -} - -bool StridedRectangle::operator>=(StridedRectangle const &other) const { - return this->tie() >= other.tie(); -} - -std::vector const &StridedRectangle::get_sides() const { - return this->sides; -} - -StridedRectangleSide const &StridedRectangle::at(int idx) const { - return this->sides.at(idx); -} - -std::string format_as(StridedRectangle const &x) { - std::ostringstream oss; - oss << ""; - return oss.str(); -} - -std::ostream &operator<<(std::ostream &s, StridedRectangle const &x) { - return s << fmt::to_string(x); -} - -size_t get_num_dims(StridedRectangle const &rect) { - return rect.get_sides().size(); -} - -num_points_t get_num_points(StridedRectangle const &rect) { - return num_points_t{ - product(transform(rect.get_sides(), [](StridedRectangleSide const &side) { - return side.num_points.unwrapped; - }))}; -} - -size_t get_size(StridedRectangle const &rect) { - return product( - transform(rect.get_sides(), [](StridedRectangleSide const &side) { - return get_side_size(side).unwrapped; - })); -} - -StridedRectangle - get_strided_rectangle(MultiDimensionalStride const &strides, - std::vector const &num_points_per_dim) { - std::vector sides = transform( - zip(num_points_per_dim, strides.raw_strides), [&](auto const &p) { - return StridedRectangleSide(num_points_t(p.first), stride_t(p.second)); - }); - return StridedRectangle{sides}; -}; - -} // namespace FlexFlow - -namespace std { -size_t hash::operator()( - ::FlexFlow::StridedRectangle const &x) const { - return get_std_hash(x.tie()); -} -} // namespace std - -namespace nlohmann { -::FlexFlow::StridedRectangle - adl_serializer<::FlexFlow::StridedRectangle>::from_json(json const &j) { - return ::FlexFlow::StridedRectangle{ - j.at("sides") - .template get>()}; -} -void adl_serializer<::FlexFlow::StridedRectangle>::to_json( - json &j, ::FlexFlow::StridedRectangle const &v) { - j["__type"] = "StridedRectangle"; - j["sides"] = v.get_sides(); -} -} // namespace nlohmann - -namespace rc { -Gen<::FlexFlow::StridedRectangle> - Arbitrary<::FlexFlow::StridedRectangle>::arbitrary() { - return gen::construct<::FlexFlow::StridedRectangle>( - gen::arbitrary>()); -} -} // namespace rc diff --git a/lib/pcg/src/pcg/strided_rectangle_side.cc b/lib/pcg/src/pcg/strided_rectangle_side.cc deleted file mode 100644 index 0ac5752c36..0000000000 --- a/lib/pcg/src/pcg/strided_rectangle_side.cc +++ /dev/null @@ -1,22 +0,0 @@ -#include "pcg/strided_rectangle_side.h" -#include "utils/containers/range.h" -#include "utils/exception.h" - -namespace FlexFlow { - -StridedRectangleSide strided_side_from_size_and_stride(side_size_t side_size, - stride_t stride) { - assert((side_size.unwrapped % stride.unwrapped) == 0); - return StridedRectangleSide{ - num_points_t{side_size.unwrapped / stride.unwrapped}, stride}; -} - -side_size_t get_side_size(StridedRectangleSide const &s) { - return side_size_t{s.num_points.unwrapped * s.stride.unwrapped}; -} - -std::vector get_points(StridedRectangleSide const &s) { - return range(0, get_side_size(s).unwrapped, s.stride.unwrapped); -} - -} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/task_space_operator.cc b/lib/pcg/src/pcg/task_space_operator.cc new file mode 100644 index 0000000000..69638f7705 --- /dev/null +++ b/lib/pcg/src/pcg/task_space_operator.cc @@ -0,0 +1,43 @@ +#include "pcg/task_space_operator.h" +#include "utils/containers.h" +#include "utils/containers/as_vector.h" +#include "utils/containers/cartesian_product.h" +#include "utils/containers/product.h" +#include "utils/containers/range.h" +#include "utils/containers/transform.h" +#include "utils/containers/unordered_set_of.h" + +namespace FlexFlow { + +std::unordered_set + get_fragment_coordinates(TaskSpaceOperator const &task) { + + std::vector> coordinate_ranges = + transform(task.degrees, [&](num_points_t const &num_points) { + return range(num_points.unwrapped); + }); + + std::unordered_set> raw_coordinates = + unordered_set_of(cartesian_product(coordinate_ranges)); + std::unordered_set task_space_coordinates = + transform(raw_coordinates, [](std::vector const &point) { + return TaskSpaceCoordinate{point}; + }); + return task_space_coordinates; +} + +TaskSpaceCoordinate + get_maximum_fragment_coordinate(TaskSpaceOperator const &task) { + return maximum(get_fragment_coordinates(task)); +} + +size_t num_dims(TaskSpaceOperator const &task) { + return task.degrees.size(); +} +size_t num_fragments(TaskSpaceOperator const &task) { + return product(transform(task.degrees, [&](num_points_t const &num_points) { + return num_points.unwrapped; + })); +} + +} // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index f9156cdebd..1deeee48cf 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -1,6 +1,4 @@ #include "pcg/machine_view.h" -#include "pcg/strided_rectangle.h" -#include "pcg/strided_rectangle_side.h" #include "test/utils/doctest.h" #include "utils/containers/transform.h" #include "utils/fmt/unordered_set.h" @@ -11,263 +9,139 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { - MachineView mv = MachineView{ - MachineViewCoordinate{{0, 0, 0}}, - StridedRectangle{{StridedRectangleSide(num_points_t{7}, stride_t{5}), - StridedRectangleSide(num_points_t{10}, stride_t{2}), - StridedRectangleSide(num_points_t{1}, stride_t{4})}}, - DeviceType::GPU}; + MachineView mv = MachineView{{stride_t{2}, stride_t{2}}, + {MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTER_NODE}, + MachineSpaceCoordinate{0, 0, DeviceType::GPU}}; SUBCASE("num_dims") { - CHECK(num_dims(mv) == 3); - } - SUBCASE("num_devices") { - CHECK(num_devices(mv) == 7 * 10 * 1); - } - - SUBCASE("get_side_size_per_dim") { - std::vector correct = { - side_size_t(1 * 4), side_size_t(7 * 5), side_size_t(10 * 2)}; - std::vector result = get_side_size_per_dim(mv); - CHECK(correct == result); + CHECK(num_dims(mv) == 2); } - SUBCASE("get_num_devices_per_dim") { - std::vector correct = { - num_points_t(1), num_points_t(7), num_points_t(10)}; - std::vector result = get_num_devices_per_dim(mv); - CHECK(correct == result); + SUBCASE("get_device_type") { + CHECK(get_device_type(mv) == DeviceType::GPU); } } - TEST_CASE("get_devices_coordinates") { - - SUBCASE("2D MachineView") { - - MachineView mv = - MachineView{MachineViewCoordinate{{0, 0}}, - StridedRectangle{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}, - DeviceType::GPU}; - SUBCASE("get_devices_coordinates") { - std::unordered_set correct = { - {MachineViewCoordinate{{0, 0}}, - MachineViewCoordinate{{0, 1}}, - MachineViewCoordinate{{1, 0}}, - MachineViewCoordinate{{1, 1}}}}; - std::unordered_set result = - get_devices_coordinates(mv); - CHECK(correct == result); - } - } - SUBCASE("3D MachineView") { - - MachineView mv = - MachineView{MachineViewCoordinate{{0, 1, 2}}, - StridedRectangle{{ - StridedRectangleSide(num_points_t(1), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}, - DeviceType::GPU}; - - SUBCASE("get_devices_coordinates") { - std::unordered_set correct = { - {MachineViewCoordinate{{0, 0, 0}}, - MachineViewCoordinate{{0, 0, 1}}, - MachineViewCoordinate{{0, 1, 0}}, - MachineViewCoordinate{{0, 1, 1}}}}; - std::unordered_set result = - get_devices_coordinates(mv); - CHECK(correct == result); - } - } - } - - TEST_CASE("get_maximum_device_coordinates") { - SUBCASE("2D MachineView") { - - MachineView mv = - MachineView{MachineViewCoordinate{{0, 0}}, - StridedRectangle{{ - StridedRectangleSide(num_points_t(2), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}, - DeviceType::GPU}; - - SUBCASE("get_maximum_device_coordinates") { - CHECK(get_maximum_device_coordinates(mv) == - MachineViewCoordinate{{1, 1}}); - } - } - - SUBCASE("3D MachineView") { + TEST_CASE("get_machine_space_coordinate") { + SUBCASE("1D case") { + TaskSpaceOperator task = TaskSpaceOperator{{num_points_t{3}}}; MachineView mv = - MachineView{MachineViewCoordinate{{0, 1, 2}}, - StridedRectangle{{ - StridedRectangleSide(num_points_t(1), stride_t{3}), - StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - }}, - DeviceType::GPU}; + MachineView{{{stride_t{2}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; - SUBCASE("get_maximum_device_coordinates") { - CHECK(get_maximum_device_coordinates(mv) == - MachineViewCoordinate{{0, 1, 1}}); - } - } - } - - TEST_CASE("make_1d_machine_view") { - - MachineViewCoordinate start = MachineViewCoordinate{{1}}; - MachineView mv = MachineView{ - start, - StridedRectangle{{StridedRectangleSide{num_points_t{7}, stride_t{5}}}}, - DeviceType::GPU}; - - SUBCASE("make_1d_machine_view(int start, int stop, stride_t " - "stride,DeviceType device_type)") { - MachineView result = - make_1d_machine_view(DeviceType::GPU, 1, 1 + 7 * 5, stride_t{5}); - MachineView correct = mv; - CHECK(result == correct); - } - - SUBCASE("make_1d_machine_view(gpu_id_t start, num_points_t num_points, " - "stride_t stride,DeviceType device_type)") { - MachineView result = make_1d_machine_view( - DeviceType::GPU, 1, num_points_t{7}, stride_t{5}); - MachineView correct = mv; - CHECK(result == correct); - } - - SUBCASE("make_1d_machine_view(gpu_id_t start, side_size_t side_size, " - "stride_t stride,DeviceType device_type)") { - MachineView result = make_1d_machine_view( - DeviceType::GPU, 1, side_size_t{7 * 5}, stride_t{5}); - MachineView correct = mv; - CHECK(result == correct); - } - } - - TEST_CASE("get_device_id") { - SUBCASE("1D case") { - MachineView mv = make_1d_machine_view( - DeviceType::GPU, 1, num_points_t{3}, stride_t{2}); // 1 3 5 MachineSpecification ms = MachineSpecification{ - 1, 0, 6, 0, 0}; // Single node with 6 GPUs (0,1,2,3,4,5) - MachineViewProjection projection = - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTRA_NODE}}}; - - SUBCASE("Device 0") { - MachineViewCoordinate device = MachineViewCoordinate{{0}}; - device_id_t correct = device_id_from_index(1, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + 1, 6, 6, 0, 0}; // Single node with 6 GPUs (0,1,2,3,4,5) + + SUBCASE("Fragment 0") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{0, 1, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } - SUBCASE("Device 1") { - MachineViewCoordinate device = MachineViewCoordinate{{1}}; - device_id_t correct = device_id_from_index(3, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + SUBCASE("Fragment 1") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{0, 3, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } - SUBCASE("Device 2") { - MachineViewCoordinate device = MachineViewCoordinate{{2}}; - device_id_t correct = device_id_from_index(5, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + SUBCASE("Fragment 2") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{0, 5, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } } SUBCASE("2D case") { + + TaskSpaceOperator task = + TaskSpaceOperator{{num_points_t{2}, num_points_t{2}}}; MachineView mv = - MachineView{MachineViewCoordinate{{1, 2}}, - StridedRectangle{ - {StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2})}}, - DeviceType::GPU}; - MachineSpecification ms = - MachineSpecification{3, 0, 5, 0, 0}; // 3 nodes with 5 GPUs each - MachineViewProjection projection = - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}}}; + MachineView{{{stride_t{1}, stride_t{2}}}, + {{MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{1, 2, DeviceType::GPU}}; - SUBCASE("Device (0,0)") { - MachineViewCoordinate device = MachineViewCoordinate{{0, 0}}; - device_id_t correct = device_id_from_index(7, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + MachineSpecification ms = + MachineSpecification{3, 5, 5, 0, 0}; // 3 Nodes, 5 GPUs each + + SUBCASE("Fragment (0,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{1, 2, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } - SUBCASE("Device (0,1)") { - MachineViewCoordinate device = MachineViewCoordinate{{0, 1}}; - device_id_t correct = device_id_from_index(9, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + SUBCASE("Fragment (0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{1, 4, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } - SUBCASE("Device (1,0)") { - MachineViewCoordinate device = MachineViewCoordinate{{1, 0}}; - device_id_t correct = device_id_from_index(12, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + SUBCASE("Fragment (1,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{2, 2, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } - SUBCASE("Device (1,1)") { - MachineViewCoordinate device = MachineViewCoordinate{{1, 1}}; - device_id_t correct = device_id_from_index(14, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + + SUBCASE("Fragment (1,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{2, 4, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); CHECK(correct == result); } } SUBCASE("3D case") { + TaskSpaceOperator task = TaskSpaceOperator{ + {num_points_t{2}, num_points_t{2}, num_points_t{2}}}; MachineView mv = - MachineView{MachineViewCoordinate{{0, 2, 0}}, - StridedRectangle{ - {StridedRectangleSide(num_points_t(2), stride_t{1}), - StridedRectangleSide(num_points_t(2), stride_t{2}), - StridedRectangleSide(num_points_t(2), stride_t{1})}}, - DeviceType::GPU}; - MachineSpecification ms = - MachineSpecification{2, 0, 8, 0, 0}; // 3 nodes with 5 GPUs each - MachineViewProjection projection = - MachineViewProjection{{{machine_view_dim_idx_t{0}, - MachineSpecificationDimension::INTER_NODE}, - {machine_view_dim_idx_t{1}, - MachineSpecificationDimension::INTRA_NODE}, - {machine_view_dim_idx_t{2}, - MachineSpecificationDimension::INTRA_NODE}}}; + MachineView{{{stride_t{1}, stride_t{2}, stride_t{1}}}, + {{MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTRA_NODE, + MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; - SUBCASE("Device (0,0,1)") { - MachineViewCoordinate device = MachineViewCoordinate{{0, 1, 0}}; - device_id_t correct = device_id_from_index(3, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + MachineSpecification ms = + MachineSpecification{2, 8, 8, 0, 0}; // 2 Nodes, 8 GPUs each + + SUBCASE("Fragment (0,0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate(0, 3, DeviceType::GPU); + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); + ; CHECK(correct == result); } - SUBCASE("Device (1, 1, 0)") { - MachineViewCoordinate device = MachineViewCoordinate{{1, 0, 1}}; - device_id_t correct = device_id_from_index(14, DeviceType::GPU); - device_id_t result = get_device_id(mv, device, ms, projection); + SUBCASE("Fragment (1, 1, 0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0, 1}}; + MachineSpaceCoordinate correct = + MachineSpaceCoordinate{1, 5, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms); + ; CHECK(correct == result); } - SUBCASE("All devices") { - std::unordered_set result = - get_device_ids(mv, ms, projection); - std::unordered_set devices = {2, 3, 10, 11, 6, 7, 14, 15}; - std::unordered_set correct = - transform(devices, [&](int idx) { - return device_id_from_index(idx, DeviceType::GPU); - }); - - CHECK(result == correct); - } } } } diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc deleted file mode 100644 index b8743bd41b..0000000000 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ /dev/null @@ -1,45 +0,0 @@ -#include "pcg/start_invariant_machine_view.h" -#include "pcg/machine_view.h" -#include "test/utils/doctest.h" - -TEST_SUITE(FF_TEST_SUITE) { - - TEST_CASE("StartInvariantMachineView") { - - MachineViewCoordinate start = MachineViewCoordinate{{0}}; - StridedRectangle rect = StridedRectangle{{ - StridedRectangleSide(num_points_t{2}, stride_t{3}), - StridedRectangleSide(num_points_t{2}, stride_t{2}), - }}; - - DeviceType device_type = DeviceType::GPU; - - SUBCASE("start_invariant_from_machine_view") { - - MachineView input = MachineView{start, rect, device_type}; - - StartInvariantMachineView correct = - StartInvariantMachineView{rect, device_type}; - StartInvariantMachineView result = - start_invariant_from_machine_view(input); - CHECK(correct == result); - } - - SUBCASE("conversion is invertible") { - SUBCASE("MachineView -> StrideInvariant -> MachineView") { - MachineView correct = MachineView{start, rect, device_type}; - MachineView result = machine_view_from_start_invariant( - start_invariant_from_machine_view(correct), start); - CHECK(correct == result); - } - - SUBCASE("StrideInvariant -> MachineView -> StrideInvariant") { - StartInvariantMachineView correct = - StartInvariantMachineView{rect, device_type}; - StartInvariantMachineView result = start_invariant_from_machine_view( - machine_view_from_start_invariant(correct, start)); - CHECK(correct == result); - } - } - } -} diff --git a/lib/pcg/test/src/pcg/strided_rectangle.cc b/lib/pcg/test/src/pcg/strided_rectangle.cc deleted file mode 100644 index 81710fcd0a..0000000000 --- a/lib/pcg/test/src/pcg/strided_rectangle.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include "pcg/strided_rectangle.h" -#include "pcg/strided_rectangle_side.h" -#include "test/utils/doctest.h" -#include "utils/fmt/vector.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - - TEST_CASE("StridedRectangle") { - SUBCASE("constructor sorts the StridedRectangleSides") { - StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; - StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; - - StridedRectangle r0 = StridedRectangle{{s0, s1}}; - StridedRectangle r1 = StridedRectangle{{s1, s0}}; - SUBCASE("has canonical order") { - CHECK(r0 == r1); - } - SUBCASE("canonical ordering is sorting") { - CHECK(r1.get_sides() == std::vector{s0, s1}); - } - } - - SUBCASE("helper functions") { - StridedRectangleSide s0{num_points_t{7}, stride_t{5}}; - StridedRectangleSide s1{num_points_t{10}, stride_t{2}}; - StridedRectangleSide s2{num_points_t{8}, stride_t{1}}; - StridedRectangle rect{{s0, s1, s2}}; - - SUBCASE("get_num_dims") { - CHECK(get_num_dims(rect) == 3); - } - SUBCASE("get_num_points") { - CHECK(get_num_points(rect) == num_points_t{7 * 8 * 10}); - } - } - } -} diff --git a/lib/pcg/test/src/pcg/strided_rectangle_side.cc b/lib/pcg/test/src/pcg/strided_rectangle_side.cc deleted file mode 100644 index e45cc576e9..0000000000 --- a/lib/pcg/test/src/pcg/strided_rectangle_side.cc +++ /dev/null @@ -1,19 +0,0 @@ -#include "pcg/strided_rectangle_side.h" -#include "pcg/strided_rectangle.h" -#include "test/utils/doctest.h" - -using namespace FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_side_size(StridedRectangleSide)") { - StridedRectangleSide side{num_points_t{7}, stride_t{5}}; - - CHECK(get_side_size(side) == side_size_t{7 * 5}); - } - TEST_CASE("strided_side_from_size_and_stride") { - StridedRectangleSide correct{num_points_t{10}, stride_t{3}}; - StridedRectangleSide result = - strided_side_from_size_and_stride(side_size_t{10 * 3}, stride_t{3}); - CHECK(result == correct); - } -} diff --git a/lib/pcg/test/src/pcg/task_space_operator.cc b/lib/pcg/test/src/pcg/task_space_operator.cc new file mode 100644 index 0000000000..5e1a7f7acf --- /dev/null +++ b/lib/pcg/test/src/pcg/task_space_operator.cc @@ -0,0 +1,62 @@ +#include "pcg/task_space_operator.h" +#include "utils/fmt/unordered_set.h" +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("task_space_operator functions") { + SUBCASE("get_fragment_coordinates") { + + SUBCASE("2D Task") { + + TaskSpaceOperator task = + TaskSpaceOperator{{num_points_t{2}, num_points_t{2}}}; + + std::unordered_set correct = { + {TaskSpaceCoordinate{{0, 0}}, + TaskSpaceCoordinate{{0, 1}}, + TaskSpaceCoordinate{{1, 0}}, + TaskSpaceCoordinate{{1, 1}}}}; + std::unordered_set result = + get_fragment_coordinates(task); + CHECK(correct == result); + } + SUBCASE("3D Task") { + + TaskSpaceOperator task = TaskSpaceOperator{ + {num_points_t{1}, num_points_t{2}, num_points_t{2}}}; + + std::unordered_set correct = { + {TaskSpaceCoordinate{{0, 0, 0}}, + TaskSpaceCoordinate{{0, 0, 1}}, + TaskSpaceCoordinate{{0, 1, 0}}, + TaskSpaceCoordinate{{0, 1, 1}}}}; + std::unordered_set result = + get_fragment_coordinates(task); + CHECK(correct == result); + } + } + SUBCASE("get_maximum_fragment_coordinate") { + + SUBCASE("2D Task") { + + TaskSpaceOperator task = + TaskSpaceOperator{{num_points_t{3}, num_points_t{2}}}; + + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1}}; + TaskSpaceCoordinate result = get_maximum_fragment_coordinate(task); + CHECK(correct == result); + } + SUBCASE("3D Task") { + + TaskSpaceOperator task = TaskSpaceOperator{ + {num_points_t{3}, num_points_t{2}, num_points_t{4}}}; + + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1, 3}}; + TaskSpaceCoordinate result = get_maximum_fragment_coordinate(task); + CHECK(correct == result); + } + } + } +} diff --git a/lib/utils/include/utils/containers/zip.h b/lib/utils/include/utils/containers/zip.h index 94182577ee..0f6dbed1d3 100644 --- a/lib/utils/include/utils/containers/zip.h +++ b/lib/utils/include/utils/containers/zip.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_ZIP_H +#include #include #include @@ -16,6 +17,17 @@ std::vector> zip(std::vector const &l, return result; } +template +std::vector> zip(std::vector const &a, + std::vector const &b, + std::vector const &c) { + std::vector> result; + for (int i = 0; i < std::min({a.size(), b.size(), c.size()}); i++) { + result.push_back(std::make_tuple(a.at(i), b.at(i), c.at(i))); + } + return result; +} + } // namespace FlexFlow #endif From 0385e01fc72fa6854196088b4ac974f952b836ce Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 4 Oct 2024 15:08:49 -0700 Subject: [PATCH 24/34] formatting --- lib/pcg/src/pcg/machine_view.cc | 2 +- lib/pcg/src/pcg/task_space_operator.cc | 2 +- lib/pcg/test/src/pcg/machine_view.cc | 2 +- lib/utils/test/src/utils/containers/cartesian_product.cc | 2 +- lib/utils/test/src/utils/containers/filter.cc | 2 +- lib/utils/test/src/utils/containers/range.cc | 2 +- lib/utils/test/src/utils/containers/replicate.cc | 2 +- lib/utils/test/src/utils/containers/scanl.cc | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 8f36830dc1..d4d3d72482 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,8 +1,8 @@ #include "pcg/machine_view.h" #include "pcg/task_space_operator.h" -#include "utils/containers/sum.h" #include "utils/containers/contains.h" #include "utils/containers/scanl.h" +#include "utils/containers/sum.h" #include "utils/containers/transform.h" #include "utils/containers/zip.h" diff --git a/lib/pcg/src/pcg/task_space_operator.cc b/lib/pcg/src/pcg/task_space_operator.cc index 5738793378..130232a131 100644 --- a/lib/pcg/src/pcg/task_space_operator.cc +++ b/lib/pcg/src/pcg/task_space_operator.cc @@ -1,6 +1,6 @@ #include "pcg/task_space_operator.h" -#include "utils/containers/maximum.h" #include "utils/containers/cartesian_product.h" +#include "utils/containers/maximum.h" #include "utils/containers/product.h" #include "utils/containers/range.h" #include "utils/containers/transform.h" diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 57a4622f82..df5288a2ec 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -1,8 +1,8 @@ #include "pcg/machine_view.h" -#include #include "utils/containers/transform.h" #include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" +#include using namespace FlexFlow; diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc index 283ec5d6b0..7fdfcb847e 100644 --- a/lib/utils/test/src/utils/containers/cartesian_product.cc +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -1,7 +1,7 @@ #include "utils/containers/cartesian_product.h" +#include "test/utils/doctest/fmt/unordered_multiset.h" #include "utils/fmt/unordered_multiset.h" #include "utils/fmt/vector.h" -#include "test/utils/doctest/fmt/unordered_multiset.h" #include #include #include diff --git a/lib/utils/test/src/utils/containers/filter.cc b/lib/utils/test/src/utils/containers/filter.cc index f3c193355a..9462d30024 100644 --- a/lib/utils/test/src/utils/containers/filter.cc +++ b/lib/utils/test/src/utils/containers/filter.cc @@ -2,8 +2,8 @@ #include "test/utils/doctest/fmt/map.h" #include "test/utils/doctest/fmt/set.h" #include "test/utils/doctest/fmt/unordered_map.h" -#include "test/utils/doctest/fmt/unordered_set.h" #include "test/utils/doctest/fmt/unordered_multiset.h" +#include "test/utils/doctest/fmt/unordered_set.h" #include "test/utils/doctest/fmt/vector.h" #include "test/utils/rapidcheck.h" diff --git a/lib/utils/test/src/utils/containers/range.cc b/lib/utils/test/src/utils/containers/range.cc index bc0f1b2b27..e186f80761 100644 --- a/lib/utils/test/src/utils/containers/range.cc +++ b/lib/utils/test/src/utils/containers/range.cc @@ -1,10 +1,10 @@ #include "utils/containers/range.h" +#include "test/utils/doctest/fmt/vector.h" #include "utils/fmt/vector.h" #include "utils/hash/unordered_set.h" #include #include #include -#include "test/utils/doctest/fmt/vector.h" using namespace FlexFlow; diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/replicate.cc index 9cfe05e968..5d4c9a1bba 100644 --- a/lib/utils/test/src/utils/containers/replicate.cc +++ b/lib/utils/test/src/utils/containers/replicate.cc @@ -1,10 +1,10 @@ #include "utils/containers/replicate.h" +#include "test/utils/doctest/fmt/vector.h" #include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" #include "utils/hash/unordered_set.h" #include #include -#include "test/utils/doctest/fmt/vector.h" using namespace FlexFlow; diff --git a/lib/utils/test/src/utils/containers/scanl.cc b/lib/utils/test/src/utils/containers/scanl.cc index 1e0690b3bf..675f276df3 100644 --- a/lib/utils/test/src/utils/containers/scanl.cc +++ b/lib/utils/test/src/utils/containers/scanl.cc @@ -1,9 +1,9 @@ #include "utils/containers/scanl.h" +#include "test/utils/doctest/fmt/vector.h" #include "utils/fmt/vector.h" #include #include #include -#include "test/utils/doctest/fmt/vector.h" using namespace FlexFlow; From b438b492159ff0ed1915b310ae5aa6236a895404 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 4 Oct 2024 15:11:20 -0700 Subject: [PATCH 25/34] minor fix --- .../test/src/test_local_cost_estimator.cc | 109 +++++++++--------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc index 9d7f8987c5..f8490b4a67 100644 --- a/lib/local-execution/test/src/test_local_cost_estimator.cc +++ b/lib/local-execution/test/src/test_local_cost_estimator.cc @@ -11,69 +11,70 @@ using namespace ::FlexFlow; TEST_SUITE(FF_CUDA_TEST_SUITE) { TEST_CASE("Local Cost Estimator") { -// // local backing initialization -// ManagedPerDeviceFFHandle managed_handle{}; + // // local backing initialization + // ManagedPerDeviceFFHandle managed_handle{}; -// RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ -// DeviceSpecific::create(managed_handle.raw_handle()), -// EnableProfiling::YES, -// ProfilingSettings{/*warmup_iters=*/0, -// /*measure_iters=*/1}}; + // RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ + // DeviceSpecific::create(managed_handle.raw_handle()), + // EnableProfiling::YES, + // ProfilingSettings{/*warmup_iters=*/0, + // /*measure_iters=*/1}}; -// LocalCostEstimator cost_estimator = -// LocalCostEstimator{runtime_arg_config}; + // LocalCostEstimator cost_estimator = + // LocalCostEstimator{runtime_arg_config}; -// SUBCASE("Estimate cost -- Attention Op") { -// int embed_dim = 32; -// int num_heads = 10; -// MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ -// /*embed_dim=*/embed_dim, -// /*num_heads=*/num_heads, -// /*kdim=*/embed_dim, -// /*vdim=*/embed_dim, -// /*dropout=*/0.0, -// /*bias=*/true, -// /*add_bias_kv=*/false, -// /*add_zero_attn=*/false, -// }; + // SUBCASE("Estimate cost -- Attention Op") { + // int embed_dim = 32; + // int num_heads = 10; + // MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ + // /*embed_dim=*/embed_dim, + // /*num_heads=*/num_heads, + // /*kdim=*/embed_dim, + // /*vdim=*/embed_dim, + // /*dropout=*/0.0, + // /*bias=*/true, + // /*add_bias_kv=*/false, + // /*add_zero_attn=*/false, + // }; -// size_t batch_size = 40; -// size_t seq_len = 48; -// size_t feature_size = 36; + // size_t batch_size = 40; + // size_t seq_len = 48; + // size_t feature_size = 36; -// DataType dtype = DataType::FLOAT; -// ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ -// TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, -// DataType::FLOAT, -// }); + // DataType dtype = DataType::FLOAT; + // ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ + // TensorDims{FFOrdered{batch_size, seq_len, + // feature_size}}, DataType::FLOAT, + // }); -// ParallelTensorShape weights_shape = throw_if_unexpected( -// get_weights_shape(attrs, inputs_shape, inputs_shape, -// inputs_shape)); -// ParallelTensorAttrs weight_attrs = -// ParallelTensorAttrs{weights_shape, -// /*sync_type=*/std::nullopt, -// /*initializer=*/std::nullopt, -// CreateGrad::YES}; + // ParallelTensorShape weights_shape = throw_if_unexpected( + // get_weights_shape(attrs, inputs_shape, inputs_shape, + // inputs_shape)); + // ParallelTensorAttrs weight_attrs = + // ParallelTensorAttrs{weights_shape, + // /*sync_type=*/std::nullopt, + // /*initializer=*/std::nullopt, + // CreateGrad::YES}; -// ParallelTensorShape output_shape = throw_if_unexpected( -// get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape)); -// ParallelTensorAttrs output_attrs = -// ParallelTensorAttrs{output_shape, -// /*sync_type=*/std::nullopt, -// /*initializer=*/std::nullopt, -// CreateGrad::YES}; + // ParallelTensorShape output_shape = throw_if_unexpected( + // get_output_shape(attrs, inputs_shape, inputs_shape, + // inputs_shape)); + // ParallelTensorAttrs output_attrs = + // ParallelTensorAttrs{output_shape, + // /*sync_type=*/std::nullopt, + // /*initializer=*/std::nullopt, + // CreateGrad::YES}; -// CostDetails result = cost_estimator.estimate_cost( -// PCGOperatorAttrs{attrs}, -// std::vector{ -// inputs_shape, inputs_shape, inputs_shape}, -// std::vector{weight_attrs}, -// std::vector{output_attrs}, -// make_1d_machine_view(DeviceType::GPU, 0, 1)); + // CostDetails result = cost_estimator.estimate_cost( + // PCGOperatorAttrs{attrs}, + // std::vector{ + // inputs_shape, inputs_shape, inputs_shape}, + // std::vector{weight_attrs}, + // std::vector{output_attrs}, + // make_1d_machine_view(DeviceType::GPU, 0, 1)); - // CHECK(result.total_elapsed_time > 0); - // CHECK(result.total_mem_usage > 0); + // CHECK(result.total_elapsed_time > 0); + // CHECK(result.total_mem_usage > 0); // } } } From 52b7a26d07472efe2627b4bebf1baec56b205b5e Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Fri, 4 Oct 2024 18:25:56 -0700 Subject: [PATCH 26/34] PR fixes --- .../include/compiler/allowed_machine_views.h | 9 +- ...machine_view_to_tensor_mapping.struct.toml | 17 -- .../src/compiler/allowed_machine_views.cc | 37 +++-- .../test/src/allowed_machine_views.cc | 5 +- .../test/src/test_local_cost_estimator.cc | 137 ++++++++--------- .../op-attrs/parallel_tensor_dim_idx_t.h | 18 --- .../include/op-attrs/parallel_tensor_shape.h | 8 + .../src/op-attrs/parallel_tensor_dim_idx_t.cc | 37 ----- .../src/op-attrs/parallel_tensor_shape.cc | 29 ++++ .../pcg/machine_space_coordinate.struct.toml | 4 +- lib/pcg/include/pcg/machine_specification.h | 4 +- lib/pcg/include/pcg/machine_view.h | 6 +- lib/pcg/include/pcg/machine_view_dim_idx_t.h | 14 -- .../pcg/machine_view_dim_idx_t.struct.toml | 14 -- .../pcg/machine_view_projection.struct.toml | 24 --- lib/pcg/include/pcg/operator_task_space.h | 22 +++ ...t.toml => operator_task_space.struct.toml} | 5 +- lib/pcg/include/pcg/side_size_t.struct.toml | 14 -- lib/pcg/include/pcg/task_space_operator.h | 22 --- lib/pcg/src/pcg/delete.c | 145 ------------------ lib/pcg/src/pcg/machine_specification.cc | 18 ++- lib/pcg/src/pcg/machine_view.cc | 101 ++++++------ ...ace_operator.cc => operator_task_space.cc} | 22 ++- lib/pcg/test/src/pcg/machine_specification.cc | 17 ++ lib/pcg/test/src/pcg/machine_view.cc | 29 ++-- ...ace_operator.cc => operator_task_space.cc} | 28 ++-- .../utils/containers/get_all_permutations.h | 112 -------------- .../get_all_permutations_with_repetition.h | 131 ++++++++++++++++ .../include/utils/containers/transform.h | 18 +-- .../include/utils/fmt/unordered_multiset.h | 1 - .../utils/containers/get_all_permutations.cc | 73 --------- .../get_all_permutations_with_repetition.cc | 76 +++++++++ lib/utils/test/src/utils/containers/range.cc | 3 +- 33 files changed, 492 insertions(+), 708 deletions(-) delete mode 100644 lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml delete mode 100644 lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h delete mode 100644 lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc delete mode 100644 lib/pcg/include/pcg/machine_view_dim_idx_t.h delete mode 100644 lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml delete mode 100644 lib/pcg/include/pcg/machine_view_projection.struct.toml create mode 100644 lib/pcg/include/pcg/operator_task_space.h rename lib/pcg/include/pcg/{task_space_operator.struct.toml => operator_task_space.struct.toml} (69%) delete mode 100644 lib/pcg/include/pcg/side_size_t.struct.toml delete mode 100644 lib/pcg/include/pcg/task_space_operator.h delete mode 100644 lib/pcg/src/pcg/delete.c rename lib/pcg/src/pcg/{task_space_operator.cc => operator_task_space.cc} (54%) rename lib/pcg/test/src/pcg/{task_space_operator.cc => operator_task_space.cc} (57%) create mode 100644 lib/utils/include/utils/containers/get_all_permutations_with_repetition.h create mode 100644 lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc diff --git a/lib/compiler/include/compiler/allowed_machine_views.h b/lib/compiler/include/compiler/allowed_machine_views.h index c409a8bf75..9bb73fd1a9 100644 --- a/lib/compiler/include/compiler/allowed_machine_views.h +++ b/lib/compiler/include/compiler/allowed_machine_views.h @@ -3,20 +3,17 @@ #include "pcg/machine_specification.dtg.h" #include "pcg/machine_view.dtg.h" -#include "pcg/task_space_operator.dtg.h" +#include "pcg/operator_task_space.dtg.h" namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, - TaskSpaceOperator const &task, + OperatorTaskSpace const &task, MachineSpecification const &ms); -bool is_valid_machine_view(MachineView const &mv, - TaskSpaceOperator const &task); - std::unordered_set get_allowed_machine_views(MachineSpecification const &machine_spec, - TaskSpaceOperator const &task, + OperatorTaskSpace const &task, DeviceType device_type); } // namespace FlexFlow diff --git a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml b/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml deleted file mode 100644 index b3f154188b..0000000000 --- a/lib/compiler/include/compiler/machine_view_to_tensor_mapping.struct.toml +++ /dev/null @@ -1,17 +0,0 @@ -namespace = "FlexFlow" -name = "MachineViewToTensorMapping" -features = [ - "eq", - "hash", - "fmt", -] - -includes = [ - "pcg/machine_view_dim_idx_t.dtg.h", - "op-attrs/parallel_tensor_dim_idx_t.dtg.h", - "utils/bidict/bidict.h", -] - -[[fields]] -name = "raw_bidict" -type = "::FlexFlow::bidict<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::parallel_tensor_dim_idx_t>" diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 499103485c..ce2fd477b7 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -2,12 +2,12 @@ #include "pcg/machine_specification.h" #include "pcg/machine_view.h" #include "pcg/multi_dimensional_stride.dtg.h" -#include "pcg/task_space_operator.h" +#include "pcg/operator_task_space.h" #include "utils/containers/all_of.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/extend.h" #include "utils/containers/filter.h" -#include "utils/containers/get_all_permutations.h" +#include "utils/containers/get_all_permutations_with_repetition.h" #include "utils/containers/map_from_keys_and_values.h" #include "utils/containers/product.h" #include "utils/containers/range.h" @@ -22,11 +22,11 @@ namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, - TaskSpaceOperator const &task, + OperatorTaskSpace const &task, MachineSpecification const &ms) { MachineSpaceCoordinate maximum_device_coords = get_machine_space_coordinate( - task, mv, get_maximum_fragment_coordinate(task), ms); - return is_valid_machine_space_coordinates(ms, maximum_device_coords); + task, mv, get_task_space_maximum_coordinate(task), ms); + return is_valid_machine_space_coordinate(ms, maximum_device_coords); } /* Generates a set of candidate `MachineView`s @@ -39,18 +39,21 @@ bool is_valid_machine_view(MachineView const &mv, */ static std::unordered_set get_candidate_machine_views(MachineSpecification const &machine_spec, - TaskSpaceOperator const &task, + OperatorTaskSpace const &task, DeviceType const &device_type) { - auto candidate_strides = - [](std::vector const &tensor_dims, - int total_devices) -> std::unordered_multiset { - int min_num_devices_with_full_stride_volume = - product(transform(tensor_dims, [](num_points_t const &num_devices) { - return num_devices.unwrapped - 1; - })); + auto get_max_stride_upper_bound = [](std::vector const &tensor_dims, + int total_devices) -> int { + int min_num_devices_with_full_stride_volume = product(transform( + tensor_dims, [](int const &num_devices) { return num_devices - 1; })); + return std::ceil(total_devices / min_num_devices_with_full_stride_volume); + }; + + auto candidate_strides = [&](std::vector const &tensor_dims, + int total_devices) + -> std::unordered_multiset { int max_stride_upper_bound = - std::ceil(total_devices / min_num_devices_with_full_stride_volume); + get_max_stride_upper_bound(tensor_dims, total_devices); std::vector single_stride_range = transform(range(1, max_stride_upper_bound + 1), @@ -75,14 +78,14 @@ static std::unordered_set return result; }; - auto candidate_projections = [](TaskSpaceOperator const &task) { + auto candidate_projections = [](OperatorTaskSpace const &task) { std::unordered_set options = { MachineSpecificationDimension::INTER_NODE, MachineSpecificationDimension::INTRA_NODE}; return get_all_permutations_with_repetition(options, num_dims(task)); }; - std::vector tensor_dims = task.degrees; + std::vector tensor_dims = task.degrees; int total_devices = get_num_devices(machine_spec, device_type); std::unordered_set machine_views; @@ -102,7 +105,7 @@ static std::unordered_set std::unordered_set get_allowed_machine_views(MachineSpecification const &machine_spec, - TaskSpaceOperator const &task, + OperatorTaskSpace const &task, DeviceType device_type) { std::unordered_set views = diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 3c22606254..bbcdd9136e 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -16,7 +16,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("1 degree of parallelism") { MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; - TaskSpaceOperator task = TaskSpaceOperator{{num_points_t{3}}}; + OperatorTaskSpace task = OperatorTaskSpace{{3}}; std::unordered_set correct = { MachineView{{{stride_t{1}}}, @@ -43,8 +43,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2 degrees of parallelism") { MachineSpecification ms = MachineSpecification{3, 3, 3, 0, 0}; - TaskSpaceOperator task = - TaskSpaceOperator{{num_points_t{2}, num_points_t{3}}}; + OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; auto make_2d_views = [&](int start_x, int start_y, diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc index f8490b4a67..da3af6e3ad 100644 --- a/lib/local-execution/test/src/test_local_cost_estimator.cc +++ b/lib/local-execution/test/src/test_local_cost_estimator.cc @@ -1,80 +1,79 @@ -#include "doctest/doctest.h" -#include "kernels/local_cuda_allocator.h" -#include "kernels/managed_per_device_ff_handle.h" -#include "local-execution/local_cost_estimator.h" -#include "op-attrs/ops/attention.h" -#include "op-attrs/parallel_tensor_shape.h" -#include "pcg/computation_graph_builder.h" -#include "test_utils.h" +// #include "doctest/doctest.h" +// #include "kernels/local_cuda_allocator.h" +// #include "kernels/managed_per_device_ff_handle.h" +// #include "local-execution/local_cost_estimator.h" +// #include "op-attrs/ops/attention.h" +// #include "op-attrs/parallel_tensor_shape.h" +// #include "pcg/computation_graph_builder.h" +// #include "test_utils.h" -using namespace ::FlexFlow; +// using namespace ::FlexFlow; -TEST_SUITE(FF_CUDA_TEST_SUITE) { - TEST_CASE("Local Cost Estimator") { - // // local backing initialization - // ManagedPerDeviceFFHandle managed_handle{}; +// TEST_SUITE(FF_CUDA_TEST_SUITE) { +// TEST_CASE("Local Cost Estimator") { +// // local backing initialization +// ManagedPerDeviceFFHandle managed_handle{}; - // RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ - // DeviceSpecific::create(managed_handle.raw_handle()), - // EnableProfiling::YES, - // ProfilingSettings{/*warmup_iters=*/0, - // /*measure_iters=*/1}}; +// RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{ +// DeviceSpecific::create(managed_handle.raw_handle()), +// EnableProfiling::YES, +// ProfilingSettings{/*warmup_iters=*/0, +// /*measure_iters=*/1}}; - // LocalCostEstimator cost_estimator = - // LocalCostEstimator{runtime_arg_config}; +// LocalCostEstimator cost_estimator = +// LocalCostEstimator{runtime_arg_config}; - // SUBCASE("Estimate cost -- Attention Op") { - // int embed_dim = 32; - // int num_heads = 10; - // MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ - // /*embed_dim=*/embed_dim, - // /*num_heads=*/num_heads, - // /*kdim=*/embed_dim, - // /*vdim=*/embed_dim, - // /*dropout=*/0.0, - // /*bias=*/true, - // /*add_bias_kv=*/false, - // /*add_zero_attn=*/false, - // }; +// SUBCASE("Estimate cost -- Attention Op") { +// int embed_dim = 32; +// int num_heads = 10; +// MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{ +// /*embed_dim=*/embed_dim, +// /*num_heads=*/num_heads, +// /*kdim=*/embed_dim, +// /*vdim=*/embed_dim, +// /*dropout=*/0.0, +// /*bias=*/true, +// /*add_bias_kv=*/false, +// /*add_zero_attn=*/false, +// }; - // size_t batch_size = 40; - // size_t seq_len = 48; - // size_t feature_size = 36; +// size_t batch_size = 40; +// size_t seq_len = 48; +// size_t feature_size = 36; - // DataType dtype = DataType::FLOAT; - // ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ - // TensorDims{FFOrdered{batch_size, seq_len, - // feature_size}}, DataType::FLOAT, - // }); +// DataType dtype = DataType::FLOAT; +// ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{ +// TensorDims{FFOrdered{batch_size, seq_len, feature_size}}, +// DataType::FLOAT, +// }); - // ParallelTensorShape weights_shape = throw_if_unexpected( - // get_weights_shape(attrs, inputs_shape, inputs_shape, - // inputs_shape)); - // ParallelTensorAttrs weight_attrs = - // ParallelTensorAttrs{weights_shape, - // /*sync_type=*/std::nullopt, - // /*initializer=*/std::nullopt, - // CreateGrad::YES}; +// ParallelTensorShape weights_shape = throw_if_unexpected( +// get_weights_shape(attrs, inputs_shape, inputs_shape, +// inputs_shape)); +// ParallelTensorAttrs weight_attrs = +// ParallelTensorAttrs{weights_shape, +// /*sync_type=*/std::nullopt, +// /*initializer=*/std::nullopt, +// CreateGrad::YES}; - // ParallelTensorShape output_shape = throw_if_unexpected( - // get_output_shape(attrs, inputs_shape, inputs_shape, - // inputs_shape)); - // ParallelTensorAttrs output_attrs = - // ParallelTensorAttrs{output_shape, - // /*sync_type=*/std::nullopt, - // /*initializer=*/std::nullopt, - // CreateGrad::YES}; +// ParallelTensorShape output_shape = throw_if_unexpected( +// get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape)); +// ParallelTensorAttrs output_attrs = +// ParallelTensorAttrs{output_shape, +// /*sync_type=*/std::nullopt, +// /*initializer=*/std::nullopt, +// CreateGrad::YES}; - // CostDetails result = cost_estimator.estimate_cost( - // PCGOperatorAttrs{attrs}, - // std::vector{ - // inputs_shape, inputs_shape, inputs_shape}, - // std::vector{weight_attrs}, - // std::vector{output_attrs}, - // make_1d_machine_view(DeviceType::GPU, 0, 1)); +// CostDetails result = cost_estimator.estimate_cost( +// PCGOperatorAttrs{attrs}, +// std::vector{ +// inputs_shape, inputs_shape, inputs_shape}, +// std::vector{weight_attrs}, +// std::vector{output_attrs}, +// make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1})); - // CHECK(result.total_elapsed_time > 0); - // CHECK(result.total_mem_usage > 0); - // } - } -} +// CHECK(result.total_elapsed_time > 0); +// CHECK(result.total_mem_usage > 0); +// } +// } +// } diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h b/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h deleted file mode 100644 index 754d477569..0000000000 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_dim_idx_t.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H -#define _FLEXFLOW_OP_ATTRS_INCLUDE_OP_ATTRS_PARALLEL_TENSOR_DIM_IDX_H - -#include "op-attrs/parallel_dim.dtg.h" -#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" -#include "op-attrs/parallel_tensor_shape.dtg.h" - -namespace FlexFlow { - -ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, - parallel_tensor_dim_idx_t idx); - -std::unordered_set - get_parallel_tensor_dim_indices(ParallelTensorShape const &shape); - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h index a03151160b..0759dc746e 100644 --- a/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h +++ b/lib/op-attrs/include/op-attrs/parallel_tensor_shape.h @@ -1,7 +1,9 @@ #ifndef _OP_META_PARALLEL_TENSOR_SHAPE_H #define _OP_META_PARALLEL_TENSOR_SHAPE_H +#include "op-attrs/parallel_dim.h" #include "op-attrs/parallel_tensor_dim_degrees.dtg.h" +#include "op-attrs/parallel_tensor_dim_idx_t.dtg.h" #include "op-attrs/parallel_tensor_shape.dtg.h" #include "op-attrs/replica_parallel_dim.dtg.h" #include "op-attrs/tensor_shape.h" @@ -50,6 +52,12 @@ std::vector TensorShape get_reduced_shape(ParallelTensorShape const &); +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx_t idx); + +std::unordered_set + get_parallel_tensor_dim_indices(ParallelTensorShape const &shape); + } // namespace FlexFlow #endif diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc deleted file mode 100644 index 68772078e2..0000000000 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_dim_idx_t.cc +++ /dev/null @@ -1,37 +0,0 @@ - -#include "op-attrs/parallel_tensor_dim_idx_t.h" -#include "op-attrs/parallel_tensor_dims.h" -#include "utils/containers/extend.h" -#include "utils/containers/range.h" -#include "utils/containers/transform.h" -#include "utils/overload.h" - -namespace FlexFlow { - -ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, - parallel_tensor_dim_idx_t idx) { - return idx.visit( - overload{[&](ff_dim_t shard_dim) { - return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; - }, - [&](ReplicaType replica_type) { - ReplicaParallelDimSet replicas = shape.dims.replica_dims; - int degree = (ReplicaType::SUM == replica_type - ? replicas.sum_degree.value - : replicas.discard_copy_degree.value); - return ParallelDim{ReplicaParallelDim{degree, replica_type}}; - }}); -} - -std::unordered_set - get_parallel_tensor_dim_indices(ParallelTensorShape const &shape) { - std::unordered_set indices; - extend(indices, transform(range(num_shard_dims(shape.dims)), [](int idx) { - return parallel_tensor_dim_idx_t(ff_dim_t(idx)); - })); - indices.insert(parallel_tensor_dim_idx_t(ReplicaType::SUM)); - indices.insert(parallel_tensor_dim_idx_t(ReplicaType::DISCARD_COPY)); - return indices; -} - -} // namespace FlexFlow diff --git a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc index 0663795db5..dcc567e0ca 100644 --- a/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc +++ b/lib/op-attrs/src/op-attrs/parallel_tensor_shape.cc @@ -1,9 +1,12 @@ #include "op-attrs/parallel_tensor_shape.h" #include "op-attrs/parallel_tensor_dims.h" #include "op-attrs/tensor_dims.h" +#include "utils/containers/extend.h" #include "utils/containers/product.h" +#include "utils/containers/range.h" #include "utils/containers/transform.h" #include "utils/hash-utils.h" +#include "utils/overload.h" namespace FlexFlow { @@ -116,4 +119,30 @@ TensorShape get_reduced_shape(ParallelTensorShape const &s) { }; } +ParallelDim get_parallel_dim_at_idx(ParallelTensorShape const &shape, + parallel_tensor_dim_idx_t idx) { + return idx.visit( + overload{[&](ff_dim_t shard_dim) { + return ParallelDim{shape.dims.shard_dims.at(shard_dim)}; + }, + [&](ReplicaType replica_type) { + ReplicaParallelDimSet replicas = shape.dims.replica_dims; + int degree = (ReplicaType::SUM == replica_type + ? replicas.sum_degree.value + : replicas.discard_copy_degree.value); + return ParallelDim{ReplicaParallelDim{degree, replica_type}}; + }}); +} + +std::unordered_set + get_parallel_tensor_dim_indices(ParallelTensorShape const &shape) { + std::unordered_set indices; + extend(indices, transform(range(num_shard_dims(shape.dims)), [](int idx) { + return parallel_tensor_dim_idx_t(ff_dim_t(idx)); + })); + indices.insert(parallel_tensor_dim_idx_t(ReplicaType::SUM)); + indices.insert(parallel_tensor_dim_idx_t(ReplicaType::DISCARD_COPY)); + return indices; +} + } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml index 45602776d2..9b197a74c9 100644 --- a/lib/pcg/include/pcg/machine_space_coordinate.struct.toml +++ b/lib/pcg/include/pcg/machine_space_coordinate.struct.toml @@ -14,11 +14,11 @@ includes = [ ] [[fields]] -name = "inter" +name = "node_idx" type = "int" [[fields]] -name = "intra" +name = "device_idx" type = "int" [[fields]] diff --git a/lib/pcg/include/pcg/machine_specification.h b/lib/pcg/include/pcg/machine_specification.h index 7bd087998c..6ffa9900c2 100644 --- a/lib/pcg/include/pcg/machine_specification.h +++ b/lib/pcg/include/pcg/machine_specification.h @@ -15,8 +15,8 @@ int get_num_devices(MachineSpecification const &ms, int get_num_devices_per_node(MachineSpecification const &ms, DeviceType const &device_type); -bool is_valid_machine_space_coordinates(MachineSpecification const &ms, - MachineSpaceCoordinate const &coord); +bool is_valid_machine_space_coordinate(MachineSpecification const &ms, + MachineSpaceCoordinate const &coord); device_id_t get_device_id(MachineSpecification const &ms, MachineSpaceCoordinate const &coord); diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 401788923d..89c1465c13 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -4,7 +4,7 @@ #include "machine_specification.dtg.h" #include "machine_view.dtg.h" #include "pcg/device_id_t.dtg.h" -#include "pcg/task_space_operator.dtg.h" +#include "pcg/operator_task_space.dtg.h" #include "task_space_coordinate.dtg.h" #include #include @@ -12,13 +12,13 @@ namespace FlexFlow { MachineSpaceCoordinate - get_machine_space_coordinate(TaskSpaceOperator const &task, + get_machine_space_coordinate(OperatorTaskSpace const &task, MachineView const &mv, TaskSpaceCoordinate const &coordinates, MachineSpecification const &ms); std::unordered_set - get_machine_space_coordinates(TaskSpaceOperator const &task, + get_machine_space_coordinates(OperatorTaskSpace const &task, MachineView const &mv, MachineSpecification const &ms); diff --git a/lib/pcg/include/pcg/machine_view_dim_idx_t.h b/lib/pcg/include/pcg/machine_view_dim_idx_t.h deleted file mode 100644 index 0332240a14..0000000000 --- a/lib/pcg/include/pcg/machine_view_dim_idx_t.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_DIM_IDX_H -#define _FLEXFLOW_PCG_INCLUDE_PCG_MACHINE_VIEW_DIM_IDX_H - -#include "pcg/machine_view.dtg.h" -#include "pcg/machine_view_dim_idx_t.dtg.h" - -namespace FlexFlow { - -std::vector - get_machine_view_indices(MachineView const &mv); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml b/lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml deleted file mode 100644 index 79989d9b7b..0000000000 --- a/lib/pcg/include/pcg/machine_view_dim_idx_t.struct.toml +++ /dev/null @@ -1,14 +0,0 @@ -namespace = "FlexFlow" -name = "machine_view_dim_idx_t" -features = [ - "eq", - "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -[[fields]] -name = "unwrapped" -type = "int" diff --git a/lib/pcg/include/pcg/machine_view_projection.struct.toml b/lib/pcg/include/pcg/machine_view_projection.struct.toml deleted file mode 100644 index 7c7db8c7ac..0000000000 --- a/lib/pcg/include/pcg/machine_view_projection.struct.toml +++ /dev/null @@ -1,24 +0,0 @@ -namespace = "FlexFlow" -name = "MachineViewProjection" -features = [ - "eq", - "hash", - "json", - # "rapidcheck", - "fmt", -] - -includes = [ - "pcg/machine_view.dtg.h", - "pcg/machine_view_dim_idx_t.dtg.h", - "pcg/machine_specification_dimension.dtg.h", -] - -src_includes = [ - "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h" -] - -[[fields]] -name = "machine_view_dim_to_machine_spec_dim" -type = "std::unordered_map<::FlexFlow::machine_view_dim_idx_t, ::FlexFlow::MachineSpecificationDimension>" diff --git a/lib/pcg/include/pcg/operator_task_space.h b/lib/pcg/include/pcg/operator_task_space.h new file mode 100644 index 0000000000..98dac7e92b --- /dev/null +++ b/lib/pcg/include/pcg/operator_task_space.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_operator_task_space_H +#define _FLEXFLOW_PCG_INCLUDE_operator_task_space_H + +#include "pcg/operator_task_space.dtg.h" +#include "pcg/task_space_coordinate.dtg.h" +#include +#include + +namespace FlexFlow { + +std::unordered_set + get_task_space_coordinates(OperatorTaskSpace const &task); + +TaskSpaceCoordinate + get_task_space_maximum_coordinate(OperatorTaskSpace const &task); + +size_t num_dims(OperatorTaskSpace const &task); +size_t num_tasks(OperatorTaskSpace const &task); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/task_space_operator.struct.toml b/lib/pcg/include/pcg/operator_task_space.struct.toml similarity index 69% rename from lib/pcg/include/pcg/task_space_operator.struct.toml rename to lib/pcg/include/pcg/operator_task_space.struct.toml index 5e9d496275..3ab8b83173 100644 --- a/lib/pcg/include/pcg/task_space_operator.struct.toml +++ b/lib/pcg/include/pcg/operator_task_space.struct.toml @@ -1,5 +1,5 @@ namespace = "FlexFlow" -name = "TaskSpaceOperator" +name = "OperatorTaskSpace" features = [ "eq", "ord", @@ -11,7 +11,6 @@ features = [ includes = [ "", - "pcg/num_points_t.dtg.h" ] src_includes = [ @@ -21,4 +20,4 @@ src_includes = [ [[fields]] name = "degrees" -type = "std::vector<::FlexFlow::num_points_t>" +type = "std::vector" diff --git a/lib/pcg/include/pcg/side_size_t.struct.toml b/lib/pcg/include/pcg/side_size_t.struct.toml deleted file mode 100644 index dbaad4fedb..0000000000 --- a/lib/pcg/include/pcg/side_size_t.struct.toml +++ /dev/null @@ -1,14 +0,0 @@ -namespace = "FlexFlow" -name = "side_size_t" -features = [ - "eq", - "ord", - "hash", - "json", - "rapidcheck", - "fmt", -] - -[[fields]] -name = "unwrapped" -type = "int" diff --git a/lib/pcg/include/pcg/task_space_operator.h b/lib/pcg/include/pcg/task_space_operator.h deleted file mode 100644 index 067e8085bf..0000000000 --- a/lib/pcg/include/pcg/task_space_operator.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_TASK_SPACE_OPERATOR_H -#define _FLEXFLOW_PCG_INCLUDE_TASK_SPACE_OPERATOR_H - -#include "pcg/task_space_coordinate.dtg.h" -#include "pcg/task_space_operator.dtg.h" -#include -#include - -namespace FlexFlow { - -std::unordered_set - get_fragment_coordinates(TaskSpaceOperator const &task); - -TaskSpaceCoordinate - get_maximum_fragment_coordinate(TaskSpaceOperator const &task); - -size_t num_dims(TaskSpaceOperator const &task); -size_t num_fragments(TaskSpaceOperator const &task); - -} // namespace FlexFlow - -#endif diff --git a/lib/pcg/src/pcg/delete.c b/lib/pcg/src/pcg/delete.c deleted file mode 100644 index 62b358ca87..0000000000 --- a/lib/pcg/src/pcg/delete.c +++ /dev/null @@ -1,145 +0,0 @@ -MachineSpaceCoordinate - get_machine_space_coordinates(MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - - auto inter_projection = - filter_values(projection.machine_view_dim_to_machine_spec_dim, - [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTER_NODE; - }); - auto intra_projection = - filter_values(projection.machine_view_dim_to_machine_spec_dim, - [](MachineSpecificationDimension const &dim) { - return dim == MachineSpecificationDimension::INTRA_NODE; - }); - - MachineViewCoordinate transformed_coordinates = MachineViewCoordinate{ - transform(zip(coordinates.raw_coord, mv.rect.get_sides()), - [&](auto const &pair) { - return pair.first * pair.second.stride.unwrapped; - })}; - transformed_coordinates = MachineViewCoordinate{ - transform(zip(transformed_coordinates.raw_coord, mv.start.raw_coord), - [&](auto const &pair) { return pair.first + pair.second; })}; - - auto get_coordinate = [&](auto const &sub_projection) { - std::vector relevant_dimensions = - sorted(keys(sub_projection)); - std::vector relevant_side_sizes = - transform(relevant_dimensions, [&](auto const &idx) { - return get_side_size(get_side_at_idx(mv, idx)); - }); - std::vector coefficients = - scanl(relevant_side_sizes, - 1, - [](size_t const &result, side_size_t const &side_size) { - return result * side_size.unwrapped; - }); - std::vector filtered_coord; - for (int i = 0; i < transformed_coordinates.raw_coord.size(); ++i) { - if (contains(relevant_dimensions, machine_view_dim_idx_t{i})) { - filtered_coord.push_back(transformed_coordinates.raw_coord[i]); - } - } - return sum( - transform(zip(coefficients, filtered_coord), - [](auto const pair) { return pair.first * pair.second; })); - }; - int inter_coordinate = get_coordinate(inter_projection); - int intra_coordinate = get_coordinate(intra_projection); - return MachineSpaceCoordinate{ - inter_coordinate, intra_coordinate, mv.device_type}; -} - -device_id_t get_device_id(MachineView const &mv, - MachineViewCoordinate const &coordinates, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - MachineSpaceCoordinate coord = - get_machine_space_coordinates(mv, coordinates, ms, projection); - return get_device_id(ms, coord); -} - -std::unordered_set - get_device_ids(MachineView const &mv, - MachineSpecification const &ms, - MachineViewProjection const &projection) { - - return transform(get_devices_coordinates(mv), - [&](MachineViewCoordinate const &c) { - return get_device_id(mv, c, ms, projection); - }); -} - -size_t num_dims(MachineView const &mv) { - return get_num_dims(mv.rect); -} - -std::vector get_num_devices_per_dim(MachineView const &mv) { - return transform(mv.rect.get_sides(), [](StridedRectangleSide const &side) { - return side.num_points; - }); -} - -std::vector get_side_size_per_dim(MachineView const &mv) { - return transform(mv.rect.get_sides(), get_side_size); -} - -size_t num_devices(MachineView const &mv) { - return get_num_points(mv.rect).unwrapped; -} - -StridedRectangleSide get_side_at_idx(MachineView const &mv, - machine_view_dim_idx_t const &idx) { - return mv.rect.at(idx.unwrapped); -} - -static StridedRectangle make_1d_rect(int start, int stop, stride_t stride) { - assert(stop > start); - assert(stride > stride_t(0)); - StridedRectangleSide side = - strided_side_from_size_and_stride(side_size_t{stop - start}, stride); - StridedRectangle rect = - StridedRectangle{std::vector{side}}; - return rect; -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - int stop, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, stop, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} - -static StridedRectangle - make_1d_rect(int start, num_points_t num_points, stride_t stride) { - return make_1d_rect( - start, start + num_points.unwrapped * stride.unwrapped, stride); -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - num_points_t num_points, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, num_points, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} - -static StridedRectangle - make_1d_rect(int start, side_size_t interval_size, stride_t stride) { - return make_1d_rect(start, start + interval_size.unwrapped, stride); -} - -MachineView make_1d_machine_view(DeviceType device_type, - int start, - side_size_t interval_size, - stride_t stride) { - StridedRectangle rect = make_1d_rect(start, interval_size, stride); - MachineViewCoordinate start_coordinate = MachineViewCoordinate{{start}}; - return MachineView{start_coordinate, rect, device_type}; -} diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index bc9d803d0e..0df402ac3c 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -32,17 +32,21 @@ int get_num_devices_per_node(MachineSpecification const &ms, throw mk_runtime_error("Unknown DeviceType {}", device_type); } } -bool is_valid_machine_space_coordinates(MachineSpecification const &ms, - MachineSpaceCoordinate const &coord) { - return (coord.inter < ms.num_nodes) && - (coord.intra < get_num_devices_per_node(ms, coord.device_type)); +bool is_valid_machine_space_coordinate(MachineSpecification const &ms, + MachineSpaceCoordinate const &coord) { + return (coord.node_idx < ms.num_nodes) && + (coord.device_idx < get_num_devices_per_node(ms, coord.device_type)); } device_id_t get_device_id(MachineSpecification const &ms, MachineSpaceCoordinate const &coord) { - assert(is_valid_machine_space_coordinates(ms, coord)); - int raw_idx = coord.inter * get_num_devices_per_node(ms, coord.device_type) + - coord.intra; + if (!is_valid_machine_space_coordinate(ms, coord)) { + throw mk_runtime_error(fmt::format( + "Invalid coordinate {} for machine specification {}", ms, coord)); + } + int raw_idx = + coord.node_idx * get_num_devices_per_node(ms, coord.device_type) + + coord.device_idx; return device_id_from_index(raw_idx, coord.device_type); } diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index d4d3d72482..4b284567f9 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,5 +1,5 @@ #include "pcg/machine_view.h" -#include "pcg/task_space_operator.h" +#include "pcg/operator_task_space.h" #include "utils/containers/contains.h" #include "utils/containers/scanl.h" #include "utils/containers/sum.h" @@ -8,70 +8,69 @@ namespace FlexFlow { -MachineSpaceCoordinate - get_machine_space_coordinate(TaskSpaceOperator const &task, - MachineView const &mv, - TaskSpaceCoordinate const &coord, - MachineSpecification const &ms) { +static std::vector + get_projection_indices(MachineView const &mv, + MachineSpecificationDimension dimension) { - std::vector inter_projection; - std::vector intra_projection; - for (size_t i = 0; i < num_dims(mv); ++i) { - if (mv.projection[i] == MachineSpecificationDimension::INTER_NODE) { - inter_projection.push_back(i); - } else if (mv.projection[i] == MachineSpecificationDimension::INTRA_NODE) { - intra_projection.push_back(i); + std::vector projection_indices; + for (size_t i = 0; i < mv.projection.size(); ++i) { + if (mv.projection[i] == dimension) { + projection_indices.push_back(i); } } + return projection_indices; +} - std::vector inter_sizes; - std::vector intra_sizes; - std::vector inter_coord_points; - std::vector intra_coord_points; - std::vector inter_strides; - std::vector intra_strides; +static int compute_index(int start_idx, + std::vector const &projection_indices, + OperatorTaskSpace const &task, + MachineView const &mv, + TaskSpaceCoordinate const &coord) { - for (size_t i = 0; i < num_dims(mv); ++i) { - int dim_size = task.degrees.at(i).unwrapped * mv.strides.at(i).unwrapped; - if (contains(inter_projection, i)) { - inter_sizes.push_back(dim_size); - inter_coord_points.push_back(coord.raw_coord.at(i)); - inter_strides.push_back(mv.strides.at(i).unwrapped); - } - if (contains(intra_projection, i)) { - intra_sizes.push_back(dim_size); - intra_coord_points.push_back(coord.raw_coord.at(i)); - intra_strides.push_back(mv.strides.at(i).unwrapped); - } - } + std::vector sizes; + std::vector coord_points; + std::vector strides; - std::vector inter_coeffs = scanl(inter_sizes, 1, std::multiplies()); - std::vector intra_coeffs = scanl(intra_sizes, 1, std::multiplies()); + for (int i : projection_indices) { + int dim_size = task.degrees[i] * mv.strides[i].unwrapped; + sizes.push_back(dim_size); + coord_points.push_back(coord.raw_coord[i]); + strides.push_back(mv.strides[i].unwrapped); + } - int inter = - mv.start.inter + - sum(transform(zip(inter_coeffs, inter_coord_points, inter_strides), - [](auto const &tuple) { - return std::get<0>(tuple) * std::get<1>(tuple) * - std::get<2>(tuple); - })); - int intra = - mv.start.intra + - sum(transform(zip(intra_coeffs, intra_coord_points, intra_strides), - [](auto const &tuple) { - return std::get<0>(tuple) * std::get<1>(tuple) * - std::get<2>(tuple); - })); + std::vector coeffs = scanl(sizes, 1, std::multiplies()); - return MachineSpaceCoordinate{inter, intra, get_device_type(mv)}; + int index = start_idx; + for (auto [coeff, coord_point, stride] : zip(coeffs, coord_points, strides)) { + index += coeff * coord_point * stride; + } + return index; } +MachineSpaceCoordinate + get_machine_space_coordinate(OperatorTaskSpace const &task, + MachineView const &mv, + TaskSpaceCoordinate const &coord, + MachineSpecification const &ms) { + + std::vector inter_projection_indices = + get_projection_indices(mv, MachineSpecificationDimension::INTER_NODE); + std::vector intra_projection_indices = + get_projection_indices(mv, MachineSpecificationDimension::INTRA_NODE); + + int node_idx = compute_index( + mv.start.node_idx, inter_projection_indices, task, mv, coord); + int device_idx = compute_index( + mv.start.device_idx, intra_projection_indices, task, mv, coord); + + return MachineSpaceCoordinate{node_idx, device_idx, get_device_type(mv)}; +} std::unordered_set - get_machine_space_coordinates(TaskSpaceOperator const &task, + get_machine_space_coordinates(OperatorTaskSpace const &task, MachineView const &mv, MachineSpecification const &ms) { - return transform(get_fragment_coordinates(task), + return transform(get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &c) { return get_machine_space_coordinate(task, mv, c, ms); }); diff --git a/lib/pcg/src/pcg/task_space_operator.cc b/lib/pcg/src/pcg/operator_task_space.cc similarity index 54% rename from lib/pcg/src/pcg/task_space_operator.cc rename to lib/pcg/src/pcg/operator_task_space.cc index 130232a131..02522ae411 100644 --- a/lib/pcg/src/pcg/task_space_operator.cc +++ b/lib/pcg/src/pcg/operator_task_space.cc @@ -1,4 +1,4 @@ -#include "pcg/task_space_operator.h" +#include "pcg/operator_task_space.h" #include "utils/containers/cartesian_product.h" #include "utils/containers/maximum.h" #include "utils/containers/product.h" @@ -9,12 +9,10 @@ namespace FlexFlow { std::unordered_set - get_fragment_coordinates(TaskSpaceOperator const &task) { + get_task_space_coordinates(OperatorTaskSpace const &task) { - std::vector> coordinate_ranges = - transform(task.degrees, [&](num_points_t const &num_points) { - return range(num_points.unwrapped); - }); + std::vector> coordinate_ranges = transform( + task.degrees, [&](int const &num_points) { return range(num_points); }); std::unordered_set> raw_coordinates = unordered_set_of(cartesian_product(coordinate_ranges)); @@ -26,17 +24,15 @@ std::unordered_set } TaskSpaceCoordinate - get_maximum_fragment_coordinate(TaskSpaceOperator const &task) { - return maximum(get_fragment_coordinates(task)).value(); + get_task_space_maximum_coordinate(OperatorTaskSpace const &task) { + return maximum(get_task_space_coordinates(task)).value(); } -size_t num_dims(TaskSpaceOperator const &task) { +size_t num_dims(OperatorTaskSpace const &task) { return task.degrees.size(); } -size_t num_fragments(TaskSpaceOperator const &task) { - return product(transform(task.degrees, [&](num_points_t const &num_points) { - return num_points.unwrapped; - })); +size_t num_tasks(OperatorTaskSpace const &task) { + return product(task.degrees); } } // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_specification.cc index 8802a51622..9a86c62701 100644 --- a/lib/pcg/test/src/pcg/machine_specification.cc +++ b/lib/pcg/test/src/pcg/machine_specification.cc @@ -1,4 +1,5 @@ #include "pcg/machine_specification.h" +#include "pcg/device_id.h" #include using namespace FlexFlow; @@ -21,5 +22,21 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(get_num_devices(ms, DeviceType::GPU) == 4 * 8); CHECK(get_num_devices(ms, DeviceType::CPU) == 16 * 4); } + + SUBCASE("get_device_id") { + SUBCASE("valid MachineSpaceCoordinate") { + MachineSpaceCoordinate coord = + MachineSpaceCoordinate{2, 12, DeviceType::CPU}; + device_id_t correct = + device_id_from_index(2 * 16 + 12, DeviceType::CPU); + device_id_t result = get_device_id(ms, coord); + CHECK(correct == result); + } + SUBCASE("invalid MachineSpaceCoordinate") { + MachineSpaceCoordinate coord = + MachineSpaceCoordinate{2, 18, DeviceType::CPU}; + CHECK_THROWS(get_device_id(ms, coord)); + } + } } } diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index df5288a2ec..d4031e15b0 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -25,14 +25,17 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_machine_space_coordinate") { SUBCASE("1D case") { - TaskSpaceOperator task = TaskSpaceOperator{{num_points_t{3}}}; + OperatorTaskSpace task = OperatorTaskSpace{{3}}; MachineView mv = MachineView{{{stride_t{2}}}, {{MachineSpecificationDimension::INTRA_NODE}}, MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; - MachineSpecification ms = MachineSpecification{ - 1, 6, 6, 0, 0}; // Single node with 6 GPUs (0,1,2,3,4,5) + MachineSpecification ms = MachineSpecification{/*num_nodes*/ 1, + /*num_cpus_per_node*/ 6, + /*num_gpus_per_node*/ 6, + 0, + 0}; SUBCASE("Fragment 0") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; @@ -63,16 +66,18 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("2D case") { - TaskSpaceOperator task = - TaskSpaceOperator{{num_points_t{2}, num_points_t{2}}}; + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; MachineView mv = MachineView{{{stride_t{1}, stride_t{2}}}, {{MachineSpecificationDimension::INTER_NODE, MachineSpecificationDimension::INTRA_NODE}}, MachineSpaceCoordinate{1, 2, DeviceType::GPU}}; - MachineSpecification ms = - MachineSpecification{3, 5, 5, 0, 0}; // 3 Nodes, 5 GPUs each + MachineSpecification ms = MachineSpecification{/*num_nodes*/ 3, + /*num_cpus_per_node*/ 5, + /*num_gpus_per_node*/ 5, + 0, + 0}; SUBCASE("Fragment (0,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; @@ -111,8 +116,7 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("3D case") { - TaskSpaceOperator task = TaskSpaceOperator{ - {num_points_t{2}, num_points_t{2}, num_points_t{2}}}; + OperatorTaskSpace task = OperatorTaskSpace{{2, 2, 2}}; MachineView mv = MachineView{{{stride_t{1}, stride_t{2}, stride_t{1}}}, {{MachineSpecificationDimension::INTER_NODE, @@ -120,8 +124,11 @@ TEST_SUITE(FF_TEST_SUITE) { MachineSpecificationDimension::INTRA_NODE}}, MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; - MachineSpecification ms = - MachineSpecification{2, 8, 8, 0, 0}; // 2 Nodes, 8 GPUs each + MachineSpecification ms = MachineSpecification{/*num_nodes*/ 2, + /*num_cpus_per_node*/ 8, + /*num_gpus_per_node*/ 8, + 0, + 0}; SUBCASE("Fragment (0,0,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; diff --git a/lib/pcg/test/src/pcg/task_space_operator.cc b/lib/pcg/test/src/pcg/operator_task_space.cc similarity index 57% rename from lib/pcg/test/src/pcg/task_space_operator.cc rename to lib/pcg/test/src/pcg/operator_task_space.cc index 5e1a7f7acf..33ab5665d6 100644 --- a/lib/pcg/test/src/pcg/task_space_operator.cc +++ b/lib/pcg/test/src/pcg/operator_task_space.cc @@ -1,17 +1,16 @@ -#include "pcg/task_space_operator.h" +#include "pcg/operator_task_space.h" #include "utils/fmt/unordered_set.h" #include using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("task_space_operator functions") { - SUBCASE("get_fragment_coordinates") { + TEST_CASE("operator_task_space functions") { + SUBCASE("get_task_space_coordinates") { SUBCASE("2D Task") { - TaskSpaceOperator task = - TaskSpaceOperator{{num_points_t{2}, num_points_t{2}}}; + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; std::unordered_set correct = { {TaskSpaceCoordinate{{0, 0}}, @@ -19,13 +18,12 @@ TEST_SUITE(FF_TEST_SUITE) { TaskSpaceCoordinate{{1, 0}}, TaskSpaceCoordinate{{1, 1}}}}; std::unordered_set result = - get_fragment_coordinates(task); + get_task_space_coordinates(task); CHECK(correct == result); } SUBCASE("3D Task") { - TaskSpaceOperator task = TaskSpaceOperator{ - {num_points_t{1}, num_points_t{2}, num_points_t{2}}}; + OperatorTaskSpace task = OperatorTaskSpace{{1, 2, 2}}; std::unordered_set correct = { {TaskSpaceCoordinate{{0, 0, 0}}, @@ -33,28 +31,26 @@ TEST_SUITE(FF_TEST_SUITE) { TaskSpaceCoordinate{{0, 1, 0}}, TaskSpaceCoordinate{{0, 1, 1}}}}; std::unordered_set result = - get_fragment_coordinates(task); + get_task_space_coordinates(task); CHECK(correct == result); } } - SUBCASE("get_maximum_fragment_coordinate") { + SUBCASE("get_task_space_maximum_coordinate") { SUBCASE("2D Task") { - TaskSpaceOperator task = - TaskSpaceOperator{{num_points_t{3}, num_points_t{2}}}; + OperatorTaskSpace task = OperatorTaskSpace{{3, 2}}; TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1}}; - TaskSpaceCoordinate result = get_maximum_fragment_coordinate(task); + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); CHECK(correct == result); } SUBCASE("3D Task") { - TaskSpaceOperator task = TaskSpaceOperator{ - {num_points_t{3}, num_points_t{2}, num_points_t{4}}}; + OperatorTaskSpace task = OperatorTaskSpace{{3, 2, 4}}; TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1, 3}}; - TaskSpaceCoordinate result = get_maximum_fragment_coordinate(task); + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); CHECK(correct == result); } } diff --git a/lib/utils/include/utils/containers/get_all_permutations.h b/lib/utils/include/utils/containers/get_all_permutations.h index ee23e94533..b7e797dad2 100644 --- a/lib/utils/include/utils/containers/get_all_permutations.h +++ b/lib/utils/include/utils/containers/get_all_permutations.h @@ -101,118 +101,6 @@ permutations_container get_all_permutations(C const &c) { return permutations_container(c.cbegin(), c.cend()); } -template -struct permutations_with_repetition_container { -public: - template - permutations_with_repetition_container(It start, It end, size_t n) - : elements(start, end), n(n) { - if (elements.empty() || n == 0) { - done = true; - } else { - indices.assign(n, 0); - done = false; - } - } - - struct iterator { - public: - using difference_type = long; - using value_type = std::vector; - using pointer = std::vector const *; - using reference = std::vector const &; - using iterator_category = std::input_iterator_tag; - - public: - iterator(permutations_with_repetition_container const &c, bool end_iter) - : c(c), indices(c.indices), done(end_iter || c.done) { - if (end_iter || c.done) { - done = true; - } - } - - iterator &operator++() { - assert(!done); - - // Essentially counting in base `c.elements.size()` - for (int i = c.n - 1; i >= 0; --i) { - if (indices[i] + 1 < c.elements.size()) { - indices[i]++; - break; - } else { - indices[i] = 0; - if (i == 0) { - done = true; - } - } - } - return *this; - } - - iterator operator++(int) { - iterator retval = *this; - ++(*this); - return retval; - } - - bool operator==(iterator const &other) const { - return done == other.done && indices == other.indices; - } - - bool operator!=(iterator const &other) const { - return !(*this == other); - } - - value_type operator*() const { - std::vector result(c.n); - for (size_t i = 0; i < c.n; ++i) { - result[i] = c.elements[indices[i]]; - } - return result; - } - - private: - permutations_with_repetition_container const &c; - std::vector indices; - bool done; - }; - - using const_iterator = iterator; - using value_type = typename iterator::value_type; - using difference_type = typename iterator::difference_type; - using pointer = typename iterator::pointer; - using reference = typename iterator::reference; - using const_reference = typename iterator::reference; - - iterator begin() const { - return iterator(*this, false); - } - - iterator end() const { - return iterator(*this, true); - } - - const_iterator cbegin() const { - return iterator(*this, false); - } - - const_iterator cend() const { - return iterator(*this, true); - } - -private: - std::vector elements; - size_t n; - std::vector indices; - bool done; -}; - -template -permutations_with_repetition_container - get_all_permutations_with_repetition(C const &c, size_t n) { - return permutations_with_repetition_container(c.cbegin(), c.cend(), n); -} - } // namespace FlexFlow #endif diff --git a/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h new file mode 100644 index 0000000000..ffdd2142c8 --- /dev/null +++ b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h @@ -0,0 +1,131 @@ +#ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H +#define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H + +#include "utils/containers/sorted.h" +#include +#include +#include + +namespace FlexFlow { + +/** + * @brief For a given container `c` and integer `n`, return all possible vectors + *of size `n` that only contain (possibly duplicated) elements of `c`. + * @details + *https://en.wikipedia.org/wiki/Permutation#Permutations_with_repetition + **/ +template +struct permutations_with_repetition_container { +public: + template + permutations_with_repetition_container(It start, It end, size_t n) + : elements(start, end), n(n) { + if (elements.empty() || n == 0) { + done = true; + } else { + indices.assign(n, 0); + done = false; + } + } + + struct iterator { + public: + using difference_type = long; + using value_type = std::vector; + using pointer = std::vector const *; + using reference = std::vector const &; + using iterator_category = std::input_iterator_tag; + + public: + iterator(permutations_with_repetition_container const &c, bool end_iter) + : c(c), indices(c.indices), done(end_iter || c.done) { + if (end_iter || c.done) { + done = true; + } + } + + iterator &operator++() { + assert(!done); + + // Essentially counting in base `c.elements.size()` + for (int i = c.n - 1; i >= 0; --i) { + if (indices[i] + 1 < c.elements.size()) { + indices[i]++; + break; + } else { + indices[i] = 0; + if (i == 0) { + done = true; + } + } + } + return *this; + } + + iterator operator++(int) { + iterator retval = *this; + ++(*this); + return retval; + } + + bool operator==(iterator const &other) const { + return done == other.done && indices == other.indices; + } + + bool operator!=(iterator const &other) const { + return !(*this == other); + } + + value_type operator*() const { + std::vector result(c.n); + for (size_t i = 0; i < c.n; ++i) { + result[i] = c.elements[indices[i]]; + } + return result; + } + + private: + permutations_with_repetition_container const &c; + std::vector indices; + bool done; + }; + + using const_iterator = iterator; + using value_type = typename iterator::value_type; + using difference_type = typename iterator::difference_type; + using pointer = typename iterator::pointer; + using reference = typename iterator::reference; + using const_reference = typename iterator::reference; + + iterator begin() const { + return iterator(*this, false); + } + + iterator end() const { + return iterator(*this, true); + } + + const_iterator cbegin() const { + return iterator(*this, false); + } + + const_iterator cend() const { + return iterator(*this, true); + } + +private: + std::vector elements; + size_t n; + std::vector indices; + bool done; +}; + +template +permutations_with_repetition_container + get_all_permutations_with_repetition(C const &c, size_t n) { + return permutations_with_repetition_container(c.cbegin(), c.cend(), n); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/utils/include/utils/containers/transform.h b/lib/utils/include/utils/containers/transform.h index 02dadb5352..36b99da490 100644 --- a/lib/utils/include/utils/containers/transform.h +++ b/lib/utils/include/utils/containers/transform.h @@ -22,35 +22,29 @@ auto transform(req const &c, F const &f) return transform(static_cast(c), f); } -template ()(std::declval()))> +template > std::unordered_set transform(std::unordered_set const &v, F const &f) { std::unordered_set result; - for (auto const &e : v) { + for (In const &e : v) { result.insert(f(e)); } return result; } -template ()(std::declval()))> +template > std::unordered_multiset transform(std::unordered_multiset const &v, F const &f) { std::unordered_multiset result; - for (auto const &e : v) { + for (In const &e : v) { result.insert(f(e)); } return result; } -template ()(std::declval()))> +template > std::set transform(std::set const &v, F const &f) { std::set result; - for (auto const &e : v) { + for (In const &e : v) { result.insert(f(e)); } return result; diff --git a/lib/utils/include/utils/fmt/unordered_multiset.h b/lib/utils/include/utils/fmt/unordered_multiset.h index deb03a04d4..09dd3c5eab 100644 --- a/lib/utils/include/utils/fmt/unordered_multiset.h +++ b/lib/utils/include/utils/fmt/unordered_multiset.h @@ -23,7 +23,6 @@ struct formatter< ::FlexFlow::join_strings(m.cbegin(), m.cend(), ", ", [](T const &t) { return fmt::to_string(t); }); - // } return formatter::format("{" + result + "}", ctx); } }; diff --git a/lib/utils/test/src/utils/containers/get_all_permutations.cc b/lib/utils/test/src/utils/containers/get_all_permutations.cc index 0245c3e211..cc5edb4075 100644 --- a/lib/utils/test/src/utils/containers/get_all_permutations.cc +++ b/lib/utils/test/src/utils/containers/get_all_permutations.cc @@ -50,77 +50,4 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); } } - - TEST_CASE("get_all_permutations_with_repetition") { - SUBCASE("container size = 3, n = 1") { - std::vector input = {1, 2, 3}; - - std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 1)); - std::unordered_multiset> correct = { - {1}, - {2}, - {3}, - }; - - CHECK(result == correct); - } - - SUBCASE("container size 3, n = 2") { - std::vector input = {1, 2, 3}; - - std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); - std::unordered_multiset> correct = { - {1, 1}, - {1, 2}, - {1, 3}, - {2, 1}, - {2, 2}, - {2, 3}, - {3, 1}, - {3, 2}, - {3, 3}, - }; - - CHECK(result == correct); - } - - SUBCASE("container size 2, n = 3") { - std::vector input = {1, 2}; - - std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 3)); - std::unordered_multiset> correct = { - {1, 1, 1}, - {1, 1, 2}, - {1, 2, 1}, - {1, 2, 2}, - {2, 1, 1}, - {2, 1, 2}, - {2, 2, 1}, - {2, 2, 2}, - }; - - CHECK(result == correct); - } - - SUBCASE("duplicate elements") { - std::vector input = {1, 2, 2}; - - std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); - std::unordered_multiset> correct = {{1, 1}, - {1, 2}, - {1, 2}, - {2, 1}, - {2, 1}, - {2, 2}, - {2, 2}, - {2, 2}, - {2, 2}}; - - CHECK(result == correct); - } - } } diff --git a/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc new file mode 100644 index 0000000000..828aaae6ff --- /dev/null +++ b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc @@ -0,0 +1,76 @@ +#include "utils/containers/get_all_permutations_with_repetition.h" +#include "test/utils/doctest/fmt/unordered_multiset.h" +#include "test/utils/doctest/fmt/vector.h" +#include "utils/containers/unordered_multiset_of.h" +#include "utils/hash/vector.h" +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + + TEST_CASE("get_all_permutations_with_repetition") { + SUBCASE("output vector has only one element") { + std::vector input = {1, 2, 3}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 1)); + std::unordered_multiset> correct = { + {1}, + {2}, + {3}, + }; + + CHECK(result == correct); + } + + SUBCASE("input vector has only one element") { + std::vector input = {1}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + std::unordered_multiset> correct = { + {1, 1}, + }; + + CHECK(result == correct); + } + + SUBCASE("input, output vectors have more than 1 element") { + std::vector input = {1, 2}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 3)); + std::unordered_multiset> correct = { + {1, 1, 1}, + {1, 1, 2}, + {1, 2, 1}, + {1, 2, 2}, + {2, 1, 1}, + {2, 1, 2}, + {2, 2, 1}, + {2, 2, 2}, + }; + + CHECK(result == correct); + } + + SUBCASE("duplicate elements") { + std::vector input = {1, 2, 2}; + + std::unordered_multiset> result = + unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + std::unordered_multiset> correct = {{1, 1}, + {1, 2}, + {1, 2}, + {2, 1}, + {2, 1}, + {2, 2}, + {2, 2}, + {2, 2}, + {2, 2}}; + + CHECK(result == correct); + } + } +} diff --git a/lib/utils/test/src/utils/containers/range.cc b/lib/utils/test/src/utils/containers/range.cc index e186f80761..f115855323 100644 --- a/lib/utils/test/src/utils/containers/range.cc +++ b/lib/utils/test/src/utils/containers/range.cc @@ -1,7 +1,6 @@ #include "utils/containers/range.h" +#include "test/utils/doctest/fmt/unordered_set.h" #include "test/utils/doctest/fmt/vector.h" -#include "utils/fmt/vector.h" -#include "utils/hash/unordered_set.h" #include #include #include From 3a5fbf416a8691289150e181fd5b116ac2a767cd Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 5 Oct 2024 15:57:54 -0700 Subject: [PATCH 27/34] PR fixes --- .../src/compiler/allowed_machine_views.cc | 9 +- .../test/src/allowed_machine_views.cc | 77 ++-- lib/pcg/include/pcg/machine_view.h | 3 +- lib/pcg/include/pcg/machine_view.struct.toml | 10 +- lib/pcg/include/pcg/operator_task_space.h | 4 +- lib/pcg/src/pcg/machine_view.cc | 19 +- lib/pcg/test/src/pcg/machine_specification.cc | 24 +- lib/pcg/test/src/pcg/machine_view.cc | 397 +++++++++++++----- lib/pcg/test/src/pcg/operator_task_space.cc | 92 ++-- .../get_all_permutations_with_repetition.h | 129 ++---- .../src/utils/containers/cartesian_product.cc | 3 +- .../get_all_permutations_with_repetition.cc | 9 +- .../test/src/utils/containers/replicate.cc | 4 +- lib/utils/test/src/utils/containers/scanl.cc | 20 +- 14 files changed, 488 insertions(+), 312 deletions(-) diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index ce2fd477b7..18e80193c5 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -24,9 +24,10 @@ namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, OperatorTaskSpace const &task, MachineSpecification const &ms) { - MachineSpaceCoordinate maximum_device_coords = get_machine_space_coordinate( - task, mv, get_task_space_maximum_coordinate(task), ms); - return is_valid_machine_space_coordinate(ms, maximum_device_coords); + std::optional maximum_device_coords = + get_machine_space_coordinate( + task, mv, get_task_space_maximum_coordinate(task), ms); + return maximum_device_coords.has_value(); } /* Generates a set of candidate `MachineView`s @@ -96,7 +97,7 @@ static std::unordered_set candidate_starts(machine_spec, device_type)) { for (std::vector const &proj : candidate_projections(task)) { - machine_views.insert(MachineView{strides.raw_strides, proj, start}); + machine_views.insert(MachineView{start, strides.raw_strides, proj}); } } } diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index bbcdd9136e..5235121845 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -14,24 +14,46 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_allowed_machine_views") { SUBCASE("1 degree of parallelism") { + MachineSpecification ms = MachineSpecification{ + /*num_nodes=*/1, + /*num_cpus_per_node=*/5, + /*num_gpus_per_node=*/5, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0, + }; - MachineSpecification ms = MachineSpecification{1, 5, 5, 0, 0}; OperatorTaskSpace task = OperatorTaskSpace{{3}}; std::unordered_set correct = { - MachineView{{{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 0, DeviceType::GPU}}, - - MachineView{{{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 1, DeviceType::GPU}}, - MachineView{{{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 2, DeviceType::GPU}}, - MachineView{{{stride_t{2}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 0, DeviceType::GPU}}, + MachineView{ + MachineSpaceCoordinate{/*node_idx=*/0, + /*device_idx=*/0, + DeviceType::GPU}, + {{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + }, + + MachineView{ + MachineSpaceCoordinate{/*node_idx=*/0, + /*device_idx=*/1, + DeviceType::GPU}, + {{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + }, + MachineView{ + MachineSpaceCoordinate{/*node_idx=*/0, + /*device_idx=*/2, + DeviceType::GPU}, + {{stride_t{1}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + }, + MachineView{ + MachineSpaceCoordinate{/*node_idx=*/0, + /*device_idx=*/0, + DeviceType::GPU}, + {{stride_t{2}}}, + {{MachineSpecificationDimension::INTRA_NODE}}, + }, }; std::unordered_set result = @@ -45,28 +67,29 @@ TEST_SUITE(FF_TEST_SUITE) { MachineSpecification ms = MachineSpecification{3, 3, 3, 0, 0}; OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; - auto make_2d_views = [&](int start_x, - int start_y, - int stride1, - int stride2, - MachineSpecificationDimension m1, - MachineSpecificationDimension m2) { + auto make_2d_view = [&](int start_x, + int start_y, + int stride1, + int stride2, + MachineSpecificationDimension m1, + MachineSpecificationDimension m2) { return MachineView{ + MachineSpaceCoordinate{start_x, start_y, DeviceType::GPU}, {stride_t{stride1}, stride_t{stride2}}, {m1, m2}, - MachineSpaceCoordinate{start_x, start_y, DeviceType::GPU}}; + }; }; auto intra = MachineSpecificationDimension::INTRA_NODE; auto inter = MachineSpecificationDimension::INTER_NODE; std::unordered_set correct = { - make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1, inter, intra), - make_2d_views(1, 0, /*stride1*/ 1, /*stride2*/ 1, inter, intra), - make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1, inter, intra), + make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), + make_2d_view(1, 0, /*stride1=*/1, /*stride2=*/1, inter, intra), + make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, inter, intra), - make_2d_views(0, 0, /*stride1*/ 1, /*stride2*/ 1, intra, inter), - make_2d_views(0, 1, /*stride1*/ 1, /*stride2*/ 1, intra, inter), - make_2d_views(0, 0, /*stride1*/ 2, /*stride2*/ 1, intra, inter), + make_2d_view(0, 0, /*stride1=*/1, /*stride2=*/1, intra, inter), + make_2d_view(0, 1, /*stride1=*/1, /*stride2=*/1, intra, inter), + make_2d_view(0, 0, /*stride1=*/2, /*stride2=*/1, intra, inter), }; std::unordered_set result = diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 89c1465c13..55bed705b2 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -7,11 +7,12 @@ #include "pcg/operator_task_space.dtg.h" #include "task_space_coordinate.dtg.h" #include +#include #include namespace FlexFlow { -MachineSpaceCoordinate +std::optional get_machine_space_coordinate(OperatorTaskSpace const &task, MachineView const &mv, TaskSpaceCoordinate const &coordinates, diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index c6c8741e03..583b1baa9c 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -20,6 +20,11 @@ src_includes = [ "utils/hash/vector.h" ] + +[[fields]] +name = "start" +type = "::FlexFlow::MachineSpaceCoordinate" + [[fields]] name = "strides" type = "std::vector<::FlexFlow::stride_t>" @@ -27,8 +32,3 @@ type = "std::vector<::FlexFlow::stride_t>" [[fields]] name = "projection" type = "std::vector<::FlexFlow::MachineSpecificationDimension>" - -[[fields]] -name = "start" -type = "::FlexFlow::MachineSpaceCoordinate" - diff --git a/lib/pcg/include/pcg/operator_task_space.h b/lib/pcg/include/pcg/operator_task_space.h index 98dac7e92b..61cab4eff1 100644 --- a/lib/pcg/include/pcg/operator_task_space.h +++ b/lib/pcg/include/pcg/operator_task_space.h @@ -1,5 +1,5 @@ -#ifndef _FLEXFLOW_PCG_INCLUDE_operator_task_space_H -#define _FLEXFLOW_PCG_INCLUDE_operator_task_space_H +#ifndef _FLEXFLOW_PCG_INCLUDE_OPERATOR_TASK_SPACE_H +#define _FLEXFLOW_PCG_INCLUDE_OPERATOR_TASK_SPACE_H #include "pcg/operator_task_space.dtg.h" #include "pcg/task_space_coordinate.dtg.h" diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 4b284567f9..5e7ba1eb93 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/machine_view.h" +#include "pcg/machine_specification.h" #include "pcg/operator_task_space.h" #include "utils/containers/contains.h" #include "utils/containers/scanl.h" @@ -47,7 +48,7 @@ static int compute_index(int start_idx, return index; } -MachineSpaceCoordinate +std::optional get_machine_space_coordinate(OperatorTaskSpace const &task, MachineView const &mv, TaskSpaceCoordinate const &coord, @@ -62,18 +63,22 @@ MachineSpaceCoordinate mv.start.node_idx, inter_projection_indices, task, mv, coord); int device_idx = compute_index( mv.start.device_idx, intra_projection_indices, task, mv, coord); - - return MachineSpaceCoordinate{node_idx, device_idx, get_device_type(mv)}; + MachineSpaceCoordinate ms_coord = + MachineSpaceCoordinate{node_idx, device_idx, get_device_type(mv)}; + if (!is_valid_machine_space_coordinate(ms, ms_coord)) { + return std::nullopt; + } + return ms_coord; } std::unordered_set get_machine_space_coordinates(OperatorTaskSpace const &task, MachineView const &mv, MachineSpecification const &ms) { - return transform(get_task_space_coordinates(task), - [&](TaskSpaceCoordinate const &c) { - return get_machine_space_coordinate(task, mv, c, ms); - }); + return transform( + get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &c) { + return get_machine_space_coordinate(task, mv, c, ms).value(); + }); } size_t num_dims(MachineView const &mv) { diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_specification.cc index 9a86c62701..8acfc8e3ba 100644 --- a/lib/pcg/test/src/pcg/machine_specification.cc +++ b/lib/pcg/test/src/pcg/machine_specification.cc @@ -8,7 +8,13 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineSpecification") { - MachineSpecification ms = MachineSpecification{4, 16, 8, 100.0f, 200.0f}; + MachineSpecification ms = MachineSpecification{ + /*num_nodes=*/4, + /*num_cpus_per_node=*/16, + /*num_gpus_per_node=*/8, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0, + }; SUBCASE("get_num_gpus") { CHECK(get_num_gpus(ms) == 4 * 8); @@ -25,16 +31,22 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("get_device_id") { SUBCASE("valid MachineSpaceCoordinate") { - MachineSpaceCoordinate coord = - MachineSpaceCoordinate{2, 12, DeviceType::CPU}; + MachineSpaceCoordinate coord = MachineSpaceCoordinate{ + /*node_idx=*/2, + /*device_idx=*/12, + DeviceType::CPU, + }; device_id_t correct = device_id_from_index(2 * 16 + 12, DeviceType::CPU); device_id_t result = get_device_id(ms, coord); CHECK(correct == result); } - SUBCASE("invalid MachineSpaceCoordinate") { - MachineSpaceCoordinate coord = - MachineSpaceCoordinate{2, 18, DeviceType::CPU}; + SUBCASE("MachineSpaceCoordinate for given machine spec") { + MachineSpaceCoordinate coord = MachineSpaceCoordinate{ + /*node_idx=*/2, + /*device_idx=*/18, + DeviceType::CPU, + }; CHECK_THROWS(get_device_id(ms, coord)); } } diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index d4031e15b0..c7b7a67b09 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/machine_view.h" +#include "test/utils/doctest/fmt/optional.h" #include "utils/containers/transform.h" #include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" @@ -9,10 +10,12 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("MachineView - utility functions") { - MachineView mv = MachineView{{stride_t{2}, stride_t{2}}, - {MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTER_NODE}, - MachineSpaceCoordinate{0, 0, DeviceType::GPU}}; + MachineView mv = + MachineView{MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, + {stride_t{2}, stride_t{2}}, + {MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTER_NODE}}; SUBCASE("num_dims") { CHECK(num_dims(mv) == 2); @@ -25,129 +28,323 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_machine_space_coordinate") { SUBCASE("1D case") { + // This operator has shape (3,), and thus 3 tasks + // The (only) dimension is projected on the INTER (device) dimension with + // a stride of 2. + // The start of the projection defined by MachineView starts at + // MachineSpaceCoordinate (0,1), and the machine space has 1 node and 6 + // devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+-------+ + * | | (0,) | | (1,) | | (2,) | + * +-------+-------+-------+-------+-------+-------+ + * Where the (x,) are the `TaskSpaceCoordinate`s, and the underlying grid + * is the machine space. + * + */ OperatorTaskSpace task = OperatorTaskSpace{{3}}; MachineView mv = - MachineView{{{stride_t{2}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; + MachineView{MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, + {{stride_t{2}}}, + {{MachineSpecificationDimension::INTRA_NODE}}}; - MachineSpecification ms = MachineSpecification{/*num_nodes*/ 1, - /*num_cpus_per_node*/ 6, - /*num_gpus_per_node*/ 6, - 0, - 0}; + MachineSpecification ms = MachineSpecification{ + /*num_nodes=*/1, + /*num_cpus_per_node=*/6, + /*num_gpus_per_node=*/6, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0, + }; - SUBCASE("Fragment 0") { + SUBCASE("Task with TaskSpaceCoordinate = (0,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{0, 1, DeviceType::GPU}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/1, + DeviceType::GPU, + }; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); + get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } - SUBCASE("Fragment 1") { + SUBCASE("Task with TaskSpaceCoordinate = (1,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{0, 3, DeviceType::GPU}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/3, + DeviceType::GPU, + }; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); + get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } - SUBCASE("Fragment 2") { + SUBCASE("Task with TaskSpaceCoordinate = (2,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{0, 5, DeviceType::GPU}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/5, + DeviceType::GPU, + }; MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); + get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } - } - SUBCASE("2D case") { - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; - MachineView mv = - MachineView{{{stride_t{1}, stride_t{2}}}, - {{MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{1, 2, DeviceType::GPU}}; - - MachineSpecification ms = MachineSpecification{/*num_nodes*/ 3, - /*num_cpus_per_node*/ 5, - /*num_gpus_per_node*/ 5, - 0, - 0}; - - SUBCASE("Fragment (0,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{1, 2, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); - CHECK(correct == result); - } + SUBCASE("TaskSpaceCoordinate is out of bounds") { - SUBCASE("Fragment (0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{1, 4, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); - CHECK(correct == result); - } - SUBCASE("Fragment (1,0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{2, 2, DeviceType::GPU}; - MachineSpaceCoordinate result = + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{4}}; + + std::optional result = get_machine_space_coordinate(task, mv, coord, ms); - CHECK(correct == result); + std::optional correct = std::nullopt; + CHECK(result == correct); } - SUBCASE("Fragment (1,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{2, 4, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); - CHECK(correct == result); + SUBCASE("2D case - projection on different dimensions") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // The first dimension is projected onto the INTER (node) dimension with + // stride 1, while the second dimension is projected onto the INTRA + // (device) dimension with stride 2. + // The start of the projection defined + // by MachineView is at MachineSpaceCoordinates (1, 2), and the machine + // space has 3 nodes and 5 devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+ + * | | | | | | + * +-------+-------+-------+-------+-------+ + * | | | (0,0) | | (0,1) | + * +-------+-------+-------+-------+-------+ + * | | | (1,0) | | (1,1) | + * +-------+-------+-------+-------+-------+ + * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/2, + DeviceType::GPU, + }, + {{stride_t{1}, stride_t{2}}}, + {{MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTRA_NODE}}, + }; + + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/3, + /*num_cpus_per_node=*/5, + /*num_gpus_per_node=*/5, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/2, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/4, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/2, + /*device_idx=*/2, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/2, + /*device_idx=*/4, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } } - } - SUBCASE("3D case") { - OperatorTaskSpace task = OperatorTaskSpace{{2, 2, 2}}; - MachineView mv = - MachineView{{{stride_t{1}, stride_t{2}, stride_t{1}}}, - {{MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTRA_NODE, - MachineSpecificationDimension::INTRA_NODE}}, - MachineSpaceCoordinate{0, 1, DeviceType::GPU}}; - - MachineSpecification ms = MachineSpecification{/*num_nodes*/ 2, - /*num_cpus_per_node*/ 8, - /*num_gpus_per_node*/ 8, - 0, - 0}; - - SUBCASE("Fragment (0,0,1)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate(0, 3, DeviceType::GPU); - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); - ; - CHECK(correct == result); + SUBCASE("2D case - projection on same dimension") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // Both dimensions are projected on the INTRA (device) dimension, with + // strides 1 and 2 respectively. The start of the projection defined by + // MachineView is at MachineSpaceCoordinates (1, 0), and the machine + // space has 2 nodes and 6 devices per node. + + /** + * +-------+-------+-------+-------+-------+-------+ + * | | | | | | | + * +-------+-------+-------+-------+-------+-------+ + * | (0,0) | (1,0) | | | (0,1) | (1,1) | + * +-------+-------+-------+-------+-------+-------+ + + * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + MachineView mv = + MachineView{MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/0, + DeviceType::GPU, + }, + {{stride_t{1}, stride_t{2}}}, + {{MachineSpecificationDimension::INTRA_NODE, + MachineSpecificationDimension::INTRA_NODE}}}; + + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/2, + /*num_cpus_per_node=*/6, + /*num_gpus_per_node=*/6, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/0, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/4, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/1, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/5, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } } - SUBCASE("Fragment (1, 1, 0)") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0, 1}}; - MachineSpaceCoordinate correct = - MachineSpaceCoordinate{1, 5, DeviceType::GPU}; - MachineSpaceCoordinate result = - get_machine_space_coordinate(task, mv, coord, ms); - ; - CHECK(correct == result); + SUBCASE("3D case") { + + // This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks. + // - The first dimension is projected onto the INTER (node) dimension + // with stride 1, + // - The second dimension is projected onto the INTRA (device) dimension + // with stride 2. + // - The third dimension is projected onto the INTRA (device) dimension + // with stride 1. + // The start of the projection defined by MachineView is at + // MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes and + // 8 devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * | |(1,0,0)| |(1,0,1)| |(1,1,0)| |(1,1,1)| + * +-------+-------+-------+-------+-------+-------+-------+-------+ + * Where the (x,y,z) are the `TaskSpaceCoordinate`s, and the underlying + * grid is the machine space. + */ + + OperatorTaskSpace task = OperatorTaskSpace{{2, 2, 2}}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/1, + DeviceType::GPU, + }, + {{stride_t{1}, stride_t{2}, stride_t{1}}}, + {{MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTRA_NODE, + MachineSpecificationDimension::INTRA_NODE}}, + }; + MachineSpecification ms = MachineSpecification{ + /*num_nodes=*/2, + /*num_cpus_per_node=*/8, + /*num_gpus_per_node=*/8, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0, + }; + + SUBCASE("Task with TaskSpaceCoordinate = (0,0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/3, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1, 1, 0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, + /*device_idx=*/5, + DeviceType::GPU, + }; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } } } } diff --git a/lib/pcg/test/src/pcg/operator_task_space.cc b/lib/pcg/test/src/pcg/operator_task_space.cc index 33ab5665d6..13198d9456 100644 --- a/lib/pcg/test/src/pcg/operator_task_space.cc +++ b/lib/pcg/test/src/pcg/operator_task_space.cc @@ -5,54 +5,62 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("operator_task_space functions") { - SUBCASE("get_task_space_coordinates") { - - SUBCASE("2D Task") { - - OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; - - std::unordered_set correct = { - {TaskSpaceCoordinate{{0, 0}}, - TaskSpaceCoordinate{{0, 1}}, - TaskSpaceCoordinate{{1, 0}}, - TaskSpaceCoordinate{{1, 1}}}}; - std::unordered_set result = - get_task_space_coordinates(task); - CHECK(correct == result); - } - SUBCASE("3D Task") { - - OperatorTaskSpace task = OperatorTaskSpace{{1, 2, 2}}; - - std::unordered_set correct = { - {TaskSpaceCoordinate{{0, 0, 0}}, - TaskSpaceCoordinate{{0, 0, 1}}, - TaskSpaceCoordinate{{0, 1, 0}}, - TaskSpaceCoordinate{{0, 1, 1}}}}; - std::unordered_set result = - get_task_space_coordinates(task); - CHECK(correct == result); - } + TEST_CASE("get_task_space_coordinates") { + + SUBCASE("OperatorTaskSpace has 0 dimensions") { + OperatorTaskSpace task = OperatorTaskSpace{{}}; + + std::unordered_set correct = { + TaskSpaceCoordinate{{}}}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); } - SUBCASE("get_task_space_maximum_coordinate") { + SUBCASE("OperatorTaskSpace has 2 dimensions") { - SUBCASE("2D Task") { + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; - OperatorTaskSpace task = OperatorTaskSpace{{3, 2}}; + std::unordered_set correct = {{ + TaskSpaceCoordinate{{0, 0}}, + TaskSpaceCoordinate{{0, 1}}, + TaskSpaceCoordinate{{1, 0}}, + TaskSpaceCoordinate{{1, 1}}, + }}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); + } + SUBCASE("OperatorTaskSpace has 3 dimensions") { - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1}}; - TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); - CHECK(correct == result); - } - SUBCASE("3D Task") { + OperatorTaskSpace task = OperatorTaskSpace{{1, 2, 2}}; + + std::unordered_set correct = {{ + TaskSpaceCoordinate{{0, 0, 0}}, + TaskSpaceCoordinate{{0, 0, 1}}, + TaskSpaceCoordinate{{0, 1, 0}}, + TaskSpaceCoordinate{{0, 1, 1}}, + }}; + std::unordered_set result = + get_task_space_coordinates(task); + CHECK(correct == result); + } + } + TEST_CASE("get_task_space_maximum_coordinate") { + SUBCASE("OperatorTaskSpace has 2 dimensions") { + + OperatorTaskSpace task = OperatorTaskSpace{{3, 2}}; + + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1}}; + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); + CHECK(correct == result); + } + SUBCASE("OperatorTaskSpace has 3 dimensions") { - OperatorTaskSpace task = OperatorTaskSpace{{3, 2, 4}}; + OperatorTaskSpace task = OperatorTaskSpace{{3, 2, 4}}; - TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1, 3}}; - TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); - CHECK(correct == result); - } + TaskSpaceCoordinate correct = TaskSpaceCoordinate{{2, 1, 3}}; + TaskSpaceCoordinate result = get_task_space_maximum_coordinate(task); + CHECK(correct == result); } } } diff --git a/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h index ffdd2142c8..ccdde0131a 100644 --- a/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h +++ b/lib/utils/include/utils/containers/get_all_permutations_with_repetition.h @@ -1,129 +1,48 @@ #ifndef _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H #define _FLEXFLOW_LIB_UTILS_INCLUDE_UTILS_CONTAINERS_GET_ALL_PERMUTATIONS_WITH_REPETITION_H -#include "utils/containers/sorted.h" -#include -#include +#include #include namespace FlexFlow { /** * @brief For a given container `c` and integer `n`, return all possible vectors - *of size `n` that only contain (possibly duplicated) elements of `c`. + * of size `n` that only contain (possibly duplicated) elements of `c`. * @details - *https://en.wikipedia.org/wiki/Permutation#Permutations_with_repetition + * https://en.wikipedia.org/wiki/Permutation#Permutations_with_repetition **/ -template -struct permutations_with_repetition_container { -public: - template - permutations_with_repetition_container(It start, It end, size_t n) - : elements(start, end), n(n) { - if (elements.empty() || n == 0) { - done = true; - } else { - indices.assign(n, 0); - done = false; - } - } - - struct iterator { - public: - using difference_type = long; - using value_type = std::vector; - using pointer = std::vector const *; - using reference = std::vector const &; - using iterator_category = std::input_iterator_tag; - - public: - iterator(permutations_with_repetition_container const &c, bool end_iter) - : c(c), indices(c.indices), done(end_iter || c.done) { - if (end_iter || c.done) { - done = true; - } - } - - iterator &operator++() { - assert(!done); +template +std::unordered_multiset> + get_all_permutations_with_repetition(C const &container, int n) { + std::unordered_multiset> result; - // Essentially counting in base `c.elements.size()` - for (int i = c.n - 1; i >= 0; --i) { - if (indices[i] + 1 < c.elements.size()) { - indices[i]++; - break; - } else { - indices[i] = 0; - if (i == 0) { - done = true; - } - } - } - return *this; - } + if (container.empty() || n == 0) { + return result; + } - iterator operator++(int) { - iterator retval = *this; - ++(*this); - return retval; - } + std::vector elements(std::begin(container), std::end(container)); + std::vector indices(n, 0); - bool operator==(iterator const &other) const { - return done == other.done && indices == other.indices; + while (true) { + std::vector perm(n); + for (int i = 0; i < n; ++i) { + perm[i] = elements[indices[i]]; } + result.insert(perm); - bool operator!=(iterator const &other) const { - return !(*this == other); + int i = n - 1; + while (i != -1 && ++indices[i] == elements.size()) { + indices[i] = 0; + --i; } - value_type operator*() const { - std::vector result(c.n); - for (size_t i = 0; i < c.n; ++i) { - result[i] = c.elements[indices[i]]; - } - return result; + if (i == -1) { + break; } - - private: - permutations_with_repetition_container const &c; - std::vector indices; - bool done; - }; - - using const_iterator = iterator; - using value_type = typename iterator::value_type; - using difference_type = typename iterator::difference_type; - using pointer = typename iterator::pointer; - using reference = typename iterator::reference; - using const_reference = typename iterator::reference; - - iterator begin() const { - return iterator(*this, false); - } - - iterator end() const { - return iterator(*this, true); - } - - const_iterator cbegin() const { - return iterator(*this, false); - } - - const_iterator cend() const { - return iterator(*this, true); } -private: - std::vector elements; - size_t n; - std::vector indices; - bool done; -}; - -template -permutations_with_repetition_container - get_all_permutations_with_repetition(C const &c, size_t n) { - return permutations_with_repetition_container(c.cbegin(), c.cend(), n); + return result; } } // namespace FlexFlow diff --git a/lib/utils/test/src/utils/containers/cartesian_product.cc b/lib/utils/test/src/utils/containers/cartesian_product.cc index 7fdfcb847e..773d94c8d0 100644 --- a/lib/utils/test/src/utils/containers/cartesian_product.cc +++ b/lib/utils/test/src/utils/containers/cartesian_product.cc @@ -1,7 +1,6 @@ #include "utils/containers/cartesian_product.h" #include "test/utils/doctest/fmt/unordered_multiset.h" -#include "utils/fmt/unordered_multiset.h" -#include "utils/fmt/vector.h" +#include "test/utils/doctest/fmt/vector.h" #include #include #include diff --git a/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc index 828aaae6ff..f25bcf65b1 100644 --- a/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc +++ b/lib/utils/test/src/utils/containers/get_all_permutations_with_repetition.cc @@ -1,7 +1,6 @@ #include "utils/containers/get_all_permutations_with_repetition.h" #include "test/utils/doctest/fmt/unordered_multiset.h" #include "test/utils/doctest/fmt/vector.h" -#include "utils/containers/unordered_multiset_of.h" #include "utils/hash/vector.h" #include @@ -14,7 +13,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2, 3}; std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 1)); + get_all_permutations_with_repetition(input, 1); std::unordered_multiset> correct = { {1}, {2}, @@ -28,7 +27,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1}; std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + get_all_permutations_with_repetition(input, 2); std::unordered_multiset> correct = { {1, 1}, }; @@ -40,7 +39,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2}; std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 3)); + get_all_permutations_with_repetition(input, 3); std::unordered_multiset> correct = { {1, 1, 1}, {1, 1, 2}, @@ -59,7 +58,7 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector input = {1, 2, 2}; std::unordered_multiset> result = - unordered_multiset_of(get_all_permutations_with_repetition(input, 2)); + get_all_permutations_with_repetition(input, 2); std::unordered_multiset> correct = {{1, 1}, {1, 2}, {1, 2}, diff --git a/lib/utils/test/src/utils/containers/replicate.cc b/lib/utils/test/src/utils/containers/replicate.cc index 5d4c9a1bba..1c7845642e 100644 --- a/lib/utils/test/src/utils/containers/replicate.cc +++ b/lib/utils/test/src/utils/containers/replicate.cc @@ -1,8 +1,6 @@ #include "utils/containers/replicate.h" +#include "test/utils/doctest/fmt/unordered_set.h" #include "test/utils/doctest/fmt/vector.h" -#include "utils/fmt/unordered_set.h" -#include "utils/fmt/vector.h" -#include "utils/hash/unordered_set.h" #include #include diff --git a/lib/utils/test/src/utils/containers/scanl.cc b/lib/utils/test/src/utils/containers/scanl.cc index 675f276df3..d6da0ac0a1 100644 --- a/lib/utils/test/src/utils/containers/scanl.cc +++ b/lib/utils/test/src/utils/containers/scanl.cc @@ -1,12 +1,10 @@ #include "utils/containers/scanl.h" #include "test/utils/doctest/fmt/vector.h" -#include "utils/fmt/vector.h" #include #include #include using namespace FlexFlow; - TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("scanl") { @@ -26,7 +24,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); } - SUBCASE("heterogenous types") { + SUBCASE("heterogeneous types") { std::vector input = {1, 2, 3, 4}; auto op = [](std::string const &a, int b) { return a + std::to_string(b); @@ -35,6 +33,14 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector correct = {"", "1", "12", "123", "1234"}; CHECK(result == correct); } + + SUBCASE("empty input") { + std::vector input = {}; + std::vector result = + scanl(input, 0, [](int a, int b) { return a + b; }); + std::vector correct = {0}; + CHECK(result == correct); + } } TEST_CASE("scanl1") { @@ -53,5 +59,13 @@ TEST_SUITE(FF_TEST_SUITE) { std::vector correct = {1, 3, 16, 33}; CHECK(result == correct); } + + SUBCASE("empty input") { + std::vector input = {}; + std::vector result = + scanl1(input, [](int a, int b) { return a + b; }); + std::vector correct = {}; + CHECK(result == correct); + } } } From c1e1c8c9d286058330605b09d821f310cda9a2e7 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Sat, 5 Oct 2024 16:27:29 -0700 Subject: [PATCH 28/34] machineview interface change --- .../src/compiler/allowed_machine_views.cc | 6 +- .../test/src/allowed_machine_views.cc | 40 ++-- lib/pcg/include/pcg/machine_view.h | 13 +- lib/pcg/include/pcg/machine_view.struct.toml | 13 +- .../pcg/machine_view_dimension.struct.toml | 24 ++ lib/pcg/src/pcg/machine_view.cc | 38 +++- lib/pcg/test/src/pcg/machine_view.cc | 210 +++++++----------- 7 files changed, 168 insertions(+), 176 deletions(-) create mode 100644 lib/pcg/include/pcg/machine_view_dimension.struct.toml diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 18e80193c5..1f5ca4e114 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -97,7 +97,11 @@ static std::unordered_set candidate_starts(machine_spec, device_type)) { for (std::vector const &proj : candidate_projections(task)) { - machine_views.insert(MachineView{start, strides.raw_strides, proj}); + std::vector dimensions = + transform(zip(strides.raw_strides, proj), [&](auto const &p) { + return MachineViewDimension{p.first, p.second}; + }); + machine_views.insert(MachineView{start, dimensions}); } } } diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index 5235121845..a337ca5e69 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -26,33 +26,29 @@ TEST_SUITE(FF_TEST_SUITE) { std::unordered_set correct = { MachineView{ - MachineSpaceCoordinate{/*node_idx=*/0, - /*device_idx=*/0, - DeviceType::GPU}, - {{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ - MachineSpaceCoordinate{/*node_idx=*/0, - /*device_idx=*/1, - DeviceType::GPU}, - {{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ - MachineSpaceCoordinate{/*node_idx=*/0, - /*device_idx=*/2, - DeviceType::GPU}, - {{stride_t{1}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/2, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTRA_NODE}}, }, MachineView{ - MachineSpaceCoordinate{/*node_idx=*/0, - /*device_idx=*/0, - DeviceType::GPU}, - {{stride_t{2}}}, - {{MachineSpecificationDimension::INTRA_NODE}}, + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}, }, }; @@ -75,8 +71,8 @@ TEST_SUITE(FF_TEST_SUITE) { MachineSpecificationDimension m2) { return MachineView{ MachineSpaceCoordinate{start_x, start_y, DeviceType::GPU}, - {stride_t{stride1}, stride_t{stride2}}, - {m1, m2}, + {MachineViewDimension{stride_t{stride1}, m1}, + MachineViewDimension{stride_t{stride2}, m2}}, }; }; diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 55bed705b2..2f53ad42d7 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -12,6 +12,15 @@ namespace FlexFlow { +size_t num_dims(MachineView const &mv); + +DeviceType get_device_type(MachineView const &mv); + +std::vector get_strides(MachineView const &mv); + +std::vector + get_projections(MachineView const &mv); + std::optional get_machine_space_coordinate(OperatorTaskSpace const &task, MachineView const &mv, @@ -23,10 +32,6 @@ std::unordered_set MachineView const &mv, MachineSpecification const &ms); -size_t num_dims(MachineView const &mv); - -DeviceType get_device_type(MachineView const &mv); - } // namespace FlexFlow #endif diff --git a/lib/pcg/include/pcg/machine_view.struct.toml b/lib/pcg/include/pcg/machine_view.struct.toml index 583b1baa9c..e4de69eafc 100644 --- a/lib/pcg/include/pcg/machine_view.struct.toml +++ b/lib/pcg/include/pcg/machine_view.struct.toml @@ -10,9 +10,8 @@ features = [ ] includes = [ - "pcg/stride_t.dtg.h", - "pcg/machine_specification_dimension.dtg.h", - "pcg/machine_space_coordinate.dtg.h", + "pcg/machine_view_dimension.dtg.h", + "pcg/machine_space_coordinate.dtg.h" ] src_includes = [ @@ -26,9 +25,5 @@ name = "start" type = "::FlexFlow::MachineSpaceCoordinate" [[fields]] -name = "strides" -type = "std::vector<::FlexFlow::stride_t>" - -[[fields]] -name = "projection" -type = "std::vector<::FlexFlow::MachineSpecificationDimension>" +name = "dimensions" +type = "std::vector<::FlexFlow::MachineViewDimension>" diff --git a/lib/pcg/include/pcg/machine_view_dimension.struct.toml b/lib/pcg/include/pcg/machine_view_dimension.struct.toml new file mode 100644 index 0000000000..03b0ac51e4 --- /dev/null +++ b/lib/pcg/include/pcg/machine_view_dimension.struct.toml @@ -0,0 +1,24 @@ +namespace = "FlexFlow" +name = "MachineViewDimension" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/machine_specification_dimension.dtg.h", + "pcg/stride_t.dtg.h", +] + + +[[fields]] +name = "stride" +type = "::FlexFlow::stride_t" + +[[fields]] +name = "projection" +type = "::FlexFlow::MachineSpecificationDimension" diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 5e7ba1eb93..10b44bdab2 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -9,13 +9,34 @@ namespace FlexFlow { +size_t num_dims(MachineView const &mv) { + return get_strides(mv).size(); +} + +DeviceType get_device_type(MachineView const &mv) { + return mv.start.device_type; +} + +std::vector get_strides(MachineView const &mv) { + return transform(mv.dimensions, + [](MachineViewDimension const &dim) { return dim.stride; }); +} + +std::vector + get_projections(MachineView const &mv) { + return transform(mv.dimensions, [](MachineViewDimension const &dim) { + return dim.projection; + }); +} + static std::vector get_projection_indices(MachineView const &mv, MachineSpecificationDimension dimension) { std::vector projection_indices; - for (size_t i = 0; i < mv.projection.size(); ++i) { - if (mv.projection[i] == dimension) { + std::vector projections = get_projections(mv); + for (size_t i = 0; i < projections.size(); ++i) { + if (projections[i] == dimension) { projection_indices.push_back(i); } } @@ -32,11 +53,13 @@ static int compute_index(int start_idx, std::vector coord_points; std::vector strides; + std::vector projections = get_projections(mv); + std::vector mv_strides = get_strides(mv); for (int i : projection_indices) { - int dim_size = task.degrees[i] * mv.strides[i].unwrapped; + int dim_size = task.degrees[i] * mv_strides[i].unwrapped; sizes.push_back(dim_size); coord_points.push_back(coord.raw_coord[i]); - strides.push_back(mv.strides[i].unwrapped); + strides.push_back(mv_strides[i].unwrapped); } std::vector coeffs = scanl(sizes, 1, std::multiplies()); @@ -81,11 +104,4 @@ std::unordered_set }); } -size_t num_dims(MachineView const &mv) { - return mv.strides.size(); -} - -DeviceType get_device_type(MachineView const &mv) { - return mv.start.device_type; -} } // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index c7b7a67b09..37e709848b 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -8,14 +8,14 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("MachineView - utility functions") { - MachineView mv = - MachineView{MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, - {stride_t{2}, stride_t{2}}, - {MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTER_NODE}}; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/0, DeviceType::GPU}, + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTER_NODE}}}; SUBCASE("num_dims") { CHECK(num_dims(mv) == 2); @@ -28,12 +28,11 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("get_machine_space_coordinate") { SUBCASE("1D case") { - // This operator has shape (3,), and thus 3 tasks + // This operator has shape (3,), and thus 3 tasks. // The (only) dimension is projected on the INTER (device) dimension with - // a stride of 2. - // The start of the projection defined by MachineView starts at - // MachineSpaceCoordinate (0,1), and the machine space has 1 node and 6 - // devices per node. + // a stride of 2. The start of the projection defined by MachineView + // starts at MachineSpaceCoordinate (0,1), and the machine space has 1 + // node and 6 devices per node. /** * The tasks will thus be distributed like this: @@ -42,30 +41,24 @@ TEST_SUITE(FF_TEST_SUITE) { * +-------+-------+-------+-------+-------+-------+ * Where the (x,) are the `TaskSpaceCoordinate`s, and the underlying grid * is the machine space. - * */ OperatorTaskSpace task = OperatorTaskSpace{{3}}; - MachineView mv = - MachineView{MachineSpaceCoordinate{ - /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, - {{stride_t{2}}}, - {{MachineSpecificationDimension::INTRA_NODE}}}; - - MachineSpecification ms = MachineSpecification{ - /*num_nodes=*/1, - /*num_cpus_per_node=*/6, - /*num_gpus_per_node=*/6, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0, - }; + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/1, + /*num_cpus_per_node=*/6, + /*num_gpus_per_node=*/6, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/1, - DeviceType::GPU, - }; + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -74,10 +67,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (1,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/3, - DeviceType::GPU, - }; + /*node_idx=*/0, /*device_idx=*/3, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -86,19 +76,14 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (2,)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/5, - DeviceType::GPU, - }; + /*node_idx=*/0, /*device_idx=*/5, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } SUBCASE("TaskSpaceCoordinate is out of bounds") { - TaskSpaceCoordinate coord = TaskSpaceCoordinate{{4}}; - std::optional result = get_machine_space_coordinate(task, mv, coord, ms); std::optional correct = std::nullopt; @@ -109,8 +94,7 @@ TEST_SUITE(FF_TEST_SUITE) { // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. // The first dimension is projected onto the INTER (node) dimension with // stride 1, while the second dimension is projected onto the INTRA - // (device) dimension with stride 2. - // The start of the projection defined + // (device) dimension with stride 2. The start of the projection defined // by MachineView is at MachineSpaceCoordinates (1, 2), and the machine // space has 3 nodes and 5 devices per node. @@ -130,15 +114,11 @@ TEST_SUITE(FF_TEST_SUITE) { OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/2, - DeviceType::GPU, - }, - {{stride_t{1}, stride_t{2}}}, - {{MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTRA_NODE}}, - }; - + /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = MachineSpecification{/*num_nodes=*/3, /*num_cpus_per_node=*/5, @@ -149,10 +129,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/2, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/2, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -161,21 +138,16 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/4, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/4, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2, - /*device_idx=*/2, - DeviceType::GPU, - }; + /*node_idx=*/2, /*device_idx=*/2, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -184,10 +156,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/2, - /*device_idx=*/4, - DeviceType::GPU, - }; + /*node_idx=*/2, /*device_idx=*/4, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -202,27 +171,21 @@ TEST_SUITE(FF_TEST_SUITE) { // space has 2 nodes and 6 devices per node. /** - * +-------+-------+-------+-------+-------+-------+ - * | | | | | | | * +-------+-------+-------+-------+-------+-------+ * | (0,0) | (1,0) | | | (0,1) | (1,1) | * +-------+-------+-------+-------+-------+-------+ - * Where the (x,y) are the `TaskSpaceCoordinate`s, and the underlying * grid is the machine space. */ OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; - MachineView mv = - MachineView{MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/0, - DeviceType::GPU, - }, - {{stride_t{1}, stride_t{2}}}, - {{MachineSpecificationDimension::INTRA_NODE, - MachineSpecificationDimension::INTRA_NODE}}}; - + MachineView mv = MachineView{ + MachineSpaceCoordinate{ + /*node_idx=*/1, /*device_idx=*/0, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTRA_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}}; MachineSpecification ms = MachineSpecification{/*num_nodes=*/2, /*num_cpus_per_node=*/6, @@ -233,10 +196,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/0, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/0, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -245,21 +205,16 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/4, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/4, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/1, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/1, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -268,10 +223,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/5, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/5, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); @@ -279,24 +231,24 @@ TEST_SUITE(FF_TEST_SUITE) { } SUBCASE("3D case") { - // This operator has shape (2, 2, 2), and thus 2 * 2 * 2 = 8 tasks. // - The first dimension is projected onto the INTER (node) dimension // with stride 1, // - The second dimension is projected onto the INTRA (device) dimension - // with stride 2. + // with stride 2, // - The third dimension is projected onto the INTRA (device) dimension - // with stride 1. - // The start of the projection defined by MachineView is at - // MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes and - // 8 devices per node. + // with stride 1. The start of the projection defined by MachineView is + // at MachineSpaceCoordinates (0, 1), and the machine space has 2 nodes + // and 8 devices per node. /** * The tasks will thus be distributed like this: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)| + * | | (0,0,0) | | (0,0,1) | | (0,1,0) | | + * (0,1,1) | * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | |(1,0,0)| |(1,0,1)| |(1,1,0)| |(1,1,1)| + * | | (1,0,0) | | (1,0,1) | | (1,1,0) | | + * (1,1,1) | * +-------+-------+-------+-------+-------+-------+-------+-------+ * Where the (x,y,z) are the `TaskSpaceCoordinate`s, and the underlying * grid is the machine space. @@ -305,42 +257,42 @@ TEST_SUITE(FF_TEST_SUITE) { OperatorTaskSpace task = OperatorTaskSpace{{2, 2, 2}}; MachineView mv = MachineView{ MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/1, - DeviceType::GPU, - }, - {{stride_t{1}, stride_t{2}, stride_t{1}}}, - {{MachineSpecificationDimension::INTER_NODE, - MachineSpecificationDimension::INTRA_NODE, - MachineSpecificationDimension::INTRA_NODE}}, - }; - MachineSpecification ms = MachineSpecification{ - /*num_nodes=*/2, - /*num_cpus_per_node=*/8, - /*num_gpus_per_node=*/8, - /*inter_node_bandwidth=*/0, - /*intra_node_bandwidth=*/0, - }; + /*node_idx=*/0, /*device_idx=*/1, DeviceType::GPU}, + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}, + MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTRA_NODE}}}; + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/2, + /*num_cpus_per_node=*/8, + /*num_gpus_per_node=*/8, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; SUBCASE("Task with TaskSpaceCoordinate = (0,0,1)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1, 0}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/3, - DeviceType::GPU, - }; + /*node_idx=*/0, /*device_idx=*/3, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); } - SUBCASE("Task with TaskSpaceCoordinate = (1, 1, 0)") { + SUBCASE("Task with TaskSpaceCoordinate = (1,1,0)") { TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0, 1}}; MachineSpaceCoordinate correct = MachineSpaceCoordinate{ - /*node_idx=*/1, - /*device_idx=*/5, - DeviceType::GPU, - }; + /*node_idx=*/1, /*device_idx=*/5, DeviceType::GPU}; + MachineSpaceCoordinate result = + get_machine_space_coordinate(task, mv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1, 1}}; + MachineSpaceCoordinate correct = MachineSpaceCoordinate{ + /*node_idx=*/1, /*device_idx=*/7, DeviceType::GPU}; MachineSpaceCoordinate result = get_machine_space_coordinate(task, mv, coord, ms).value(); CHECK(correct == result); From 5cc2a2f1fcf7610b9366ee3652d4806ed090f326 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 8 Oct 2024 18:15:00 -0700 Subject: [PATCH 29/34] Minor PR fixes --- lib/compiler/test/src/allowed_machine_views.cc | 15 +++++++++++---- lib/pcg/test/src/pcg/machine_specification.cc | 2 +- lib/pcg/test/src/pcg/machine_view.cc | 6 ++---- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lib/compiler/test/src/allowed_machine_views.cc b/lib/compiler/test/src/allowed_machine_views.cc index a337ca5e69..936894ad2d 100644 --- a/lib/compiler/test/src/allowed_machine_views.cc +++ b/lib/compiler/test/src/allowed_machine_views.cc @@ -60,17 +60,24 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("2 degrees of parallelism") { - MachineSpecification ms = MachineSpecification{3, 3, 3, 0, 0}; + MachineSpecification ms = MachineSpecification{ + /*num_nodes=*/3, + /*num_cpus_per_node=*/3, + /*num_gpus_per_node=*/3, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0, + }; OperatorTaskSpace task = OperatorTaskSpace{{2, 3}}; - auto make_2d_view = [&](int start_x, - int start_y, + auto make_2d_view = [&](int start_node_idx, + int start_device_idx, int stride1, int stride2, MachineSpecificationDimension m1, MachineSpecificationDimension m2) { return MachineView{ - MachineSpaceCoordinate{start_x, start_y, DeviceType::GPU}, + MachineSpaceCoordinate{ + start_node_idx, start_device_idx, DeviceType::GPU}, {MachineViewDimension{stride_t{stride1}, m1}, MachineViewDimension{stride_t{stride2}, m2}}, }; diff --git a/lib/pcg/test/src/pcg/machine_specification.cc b/lib/pcg/test/src/pcg/machine_specification.cc index 8acfc8e3ba..c183ae0d31 100644 --- a/lib/pcg/test/src/pcg/machine_specification.cc +++ b/lib/pcg/test/src/pcg/machine_specification.cc @@ -41,7 +41,7 @@ TEST_SUITE(FF_TEST_SUITE) { device_id_t result = get_device_id(ms, coord); CHECK(correct == result); } - SUBCASE("MachineSpaceCoordinate for given machine spec") { + SUBCASE("MachineSpaceCoordinate out of bounds for given machine spec") { MachineSpaceCoordinate coord = MachineSpaceCoordinate{ /*node_idx=*/2, /*device_idx=*/18, diff --git a/lib/pcg/test/src/pcg/machine_view.cc b/lib/pcg/test/src/pcg/machine_view.cc index 37e709848b..dcf22d6c00 100644 --- a/lib/pcg/test/src/pcg/machine_view.cc +++ b/lib/pcg/test/src/pcg/machine_view.cc @@ -244,11 +244,9 @@ TEST_SUITE(FF_TEST_SUITE) { /** * The tasks will thus be distributed like this: * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | | (0,0,0) | | (0,0,1) | | (0,1,0) | | - * (0,1,1) | + * | |(0,0,0)| |(0,0,1)| |(0,1,0)| |(0,1,1)| * +-------+-------+-------+-------+-------+-------+-------+-------+ - * | | (1,0,0) | | (1,0,1) | | (1,1,0) | | - * (1,1,1) | + * | |(1,0,0)| |(1,0,1)| |(1,1,0)| |(1,1,1)| * +-------+-------+-------+-------+-------+-------+-------+-------+ * Where the (x,y,z) are the `TaskSpaceCoordinate`s, and the underlying * grid is the machine space. From e1cd5a2a53fc8240f20b31e1af52b31b1e6a66f4 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Tue, 8 Oct 2024 22:18:33 -0700 Subject: [PATCH 30/34] .cc machine view fixes + added StartInvariantMachineView --- .../src/compiler/allowed_machine_views.cc | 41 +++--- lib/pcg/include/pcg/machine_view.h | 7 +- .../pcg/start_invariant_machine_view.h | 46 ++++++ .../start_invariant_machine_view.struct.toml | 29 ++++ lib/pcg/src/pcg/machine_view.cc | 138 +++++++++--------- .../src/pcg/start_invariant_machine_view.cc | 79 ++++++++++ .../src/pcg/start_invariant_machine_view.cc | 84 +++++++++++ 7 files changed, 337 insertions(+), 87 deletions(-) create mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.h create mode 100644 lib/pcg/include/pcg/start_invariant_machine_view.struct.toml create mode 100644 lib/pcg/src/pcg/start_invariant_machine_view.cc create mode 100644 lib/pcg/test/src/pcg/start_invariant_machine_view.cc diff --git a/lib/compiler/src/compiler/allowed_machine_views.cc b/lib/compiler/src/compiler/allowed_machine_views.cc index 1f5ca4e114..1c226f79b0 100644 --- a/lib/compiler/src/compiler/allowed_machine_views.cc +++ b/lib/compiler/src/compiler/allowed_machine_views.cc @@ -24,19 +24,19 @@ namespace FlexFlow { bool is_valid_machine_view(MachineView const &mv, OperatorTaskSpace const &task, MachineSpecification const &ms) { - std::optional maximum_device_coords = + std::optional maximum_device_coord = get_machine_space_coordinate( task, mv, get_task_space_maximum_coordinate(task), ms); - return maximum_device_coords.has_value(); + return maximum_device_coord.has_value(); } -/* Generates a set of candidate `MachineView`s - * The returned set includes all valid machine views, and might contain - invalid ones. This function should never be used externally (see - * `get_allowed_partial_machine_view_mappings` instead). There is no - guarantee that a non-empty returned set contains a valid machine view (i.e. - its possible for all - * `MachineView`s to be invalid) +/* + * Generates a set of candidate `MachineView`s. + * The returned set includes all valid machine views, and might contain invalid + * ones. This function should not be used externally (see + * `get_allowed_machine_views` instead). There is no guarantee that a non-empty + * returned set contains a valid machine view (i.e. it's possible for all + * the returned `MachineView`s to be invalid) */ static std::unordered_set get_candidate_machine_views(MachineSpecification const &machine_spec, @@ -58,7 +58,7 @@ static std::unordered_set std::vector single_stride_range = transform(range(1, max_stride_upper_bound + 1), - [](int stride) { return stride_t(stride); }); + [](int stride) { return stride_t{stride}; }); std::unordered_multiset> raw_stride_vectors = cartesian_product(replicate(tensor_dims.size(), single_stride_range)); std::unordered_multiset strides = @@ -71,15 +71,16 @@ static std::unordered_set auto candidate_starts = [](MachineSpecification const &ms, DeviceType const &device_type) { std::unordered_set result; - for (int i : range(ms.num_nodes)) { - for (int j : range(get_num_devices_per_node(ms, device_type))) { - result.insert(MachineSpaceCoordinate{i, j, device_type}); + for (int node_idx : range(ms.num_nodes)) { + for (int device_idx : range(get_num_devices_per_node(ms, device_type))) { + result.insert( + MachineSpaceCoordinate{node_idx, device_idx, device_type}); } } return result; }; - auto candidate_projections = [](OperatorTaskSpace const &task) { + auto candidate_dimensions = [](OperatorTaskSpace const &task) { std::unordered_set options = { MachineSpecificationDimension::INTER_NODE, MachineSpecificationDimension::INTRA_NODE}; @@ -95,13 +96,11 @@ static std::unordered_set candidate_strides(tensor_dims, total_devices)) { for (MachineSpaceCoordinate start : candidate_starts(machine_spec, device_type)) { - for (std::vector const &proj : - candidate_projections(task)) { - std::vector dimensions = - transform(zip(strides.raw_strides, proj), [&](auto const &p) { - return MachineViewDimension{p.first, p.second}; - }); - machine_views.insert(MachineView{start, dimensions}); + for (std::vector const &dims : + candidate_dimensions(task)) { + machine_views.insert( + machine_view_from_strides_and_machine_spec_dimensions( + start, strides.raw_strides, dims)); } } } diff --git a/lib/pcg/include/pcg/machine_view.h b/lib/pcg/include/pcg/machine_view.h index 2f53ad42d7..293227b7a1 100644 --- a/lib/pcg/include/pcg/machine_view.h +++ b/lib/pcg/include/pcg/machine_view.h @@ -19,7 +19,12 @@ DeviceType get_device_type(MachineView const &mv); std::vector get_strides(MachineView const &mv); std::vector - get_projections(MachineView const &mv); + get_dimensions(MachineView const &mv); + +MachineView machine_view_from_strides_and_machine_spec_dimensions( + MachineSpaceCoordinate const &start, + std::vector const &strides, + std::vector const &dims); std::optional get_machine_space_coordinate(OperatorTaskSpace const &task, diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h new file mode 100644 index 0000000000..db00c3d57e --- /dev/null +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -0,0 +1,46 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H +#define _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H + +#include "pcg/machine_specification.dtg.h" +#include "pcg/machine_view.dtg.h" +#include "pcg/operator_task_space.dtg.h" +#include "pcg/start_invariant_machine_view.dtg.h" +#include "pcg/task_space_coordinate.dtg.h" +#include + +namespace FlexFlow { + +MachineView + machine_view_from_start_invariant(StartInvariantMachineView const &mv, + MachineSpaceCoordinate const &start); +StartInvariantMachineView + start_invariant_from_machine_view(MachineView const &mv); + +size_t num_dims(StartInvariantMachineView const &mv); + +DeviceType get_device_type(StartInvariantMachineView const &mv); + +std::vector get_strides(StartInvariantMachineView const &mv); + +std::vector + get_dimensions(StartInvariantMachineView const &mv); + +StartInvariantMachineView + start_invariant_machine_view_from_strides_and_machine_spec_dimensions( + std::vector const &strides, + std::vector const &dims); + +std::optional + get_machine_space_coordinate(OperatorTaskSpace const &task, + StartInvariantMachineView const &mv, + TaskSpaceCoordinate const &coordinates, + MachineSpecification const &ms); + +std::unordered_set + get_machine_space_coordinates(OperatorTaskSpace const &task, + StartInvariantMachineView const &mv, + MachineSpecification const &ms); + +} // namespace FlexFlow + +#endif diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml new file mode 100644 index 0000000000..a1b2b40524 --- /dev/null +++ b/lib/pcg/include/pcg/start_invariant_machine_view.struct.toml @@ -0,0 +1,29 @@ +namespace = "FlexFlow" +name = "StartInvariantMachineView" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/machine_view_dimension.dtg.h", + "pcg/device_type.dtg.h" +] + +src_includes = [ + "utils/fmt/vector.h", + "utils/hash/vector.h", +] + +[[fields]] +name = "dimensions" +type = "std::vector<::FlexFlow::MachineViewDimension>" + + +[[fields]] +name = "device_type" +type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/src/pcg/machine_view.cc b/lib/pcg/src/pcg/machine_view.cc index 10b44bdab2..18f6cacb7e 100644 --- a/lib/pcg/src/pcg/machine_view.cc +++ b/lib/pcg/src/pcg/machine_view.cc @@ -2,6 +2,8 @@ #include "pcg/machine_specification.h" #include "pcg/operator_task_space.h" #include "utils/containers/contains.h" +#include "utils/containers/count.h" +#include "utils/containers/filter.h" #include "utils/containers/scanl.h" #include "utils/containers/sum.h" #include "utils/containers/transform.h" @@ -23,84 +25,90 @@ std::vector get_strides(MachineView const &mv) { } std::vector - get_projections(MachineView const &mv) { + get_dimensions(MachineView const &mv) { return transform(mv.dimensions, [](MachineViewDimension const &dim) { return dim.projection; }); } -static std::vector - get_projection_indices(MachineView const &mv, - MachineSpecificationDimension dimension) { - - std::vector projection_indices; - std::vector projections = get_projections(mv); - for (size_t i = 0; i < projections.size(); ++i) { - if (projections[i] == dimension) { - projection_indices.push_back(i); - } - } - return projection_indices; -} - -static int compute_index(int start_idx, - std::vector const &projection_indices, - OperatorTaskSpace const &task, - MachineView const &mv, - TaskSpaceCoordinate const &coord) { - - std::vector sizes; - std::vector coord_points; - std::vector strides; - - std::vector projections = get_projections(mv); - std::vector mv_strides = get_strides(mv); - for (int i : projection_indices) { - int dim_size = task.degrees[i] * mv_strides[i].unwrapped; - sizes.push_back(dim_size); - coord_points.push_back(coord.raw_coord[i]); - strides.push_back(mv_strides[i].unwrapped); - } - - std::vector coeffs = scanl(sizes, 1, std::multiplies()); - - int index = start_idx; - for (auto [coeff, coord_point, stride] : zip(coeffs, coord_points, strides)) { - index += coeff * coord_point * stride; - } - return index; +MachineView machine_view_from_strides_and_machine_spec_dimensions( + MachineSpaceCoordinate const &start, + std::vector const &strides, + std::vector const &dims) { + std::vector dimensions = + transform(zip(strides, dims), [&](auto const &p) { + return MachineViewDimension{p.first, p.second}; + }); + return MachineView{start, dimensions}; } -std::optional - get_machine_space_coordinate(OperatorTaskSpace const &task, - MachineView const &mv, - TaskSpaceCoordinate const &coord, - MachineSpecification const &ms) { - - std::vector inter_projection_indices = - get_projection_indices(mv, MachineSpecificationDimension::INTER_NODE); - std::vector intra_projection_indices = - get_projection_indices(mv, MachineSpecificationDimension::INTRA_NODE); - - int node_idx = compute_index( - mv.start.node_idx, inter_projection_indices, task, mv, coord); - int device_idx = compute_index( - mv.start.device_idx, intra_projection_indices, task, mv, coord); - MachineSpaceCoordinate ms_coord = - MachineSpaceCoordinate{node_idx, device_idx, get_device_type(mv)}; - if (!is_valid_machine_space_coordinate(ms, ms_coord)) { +std::optional get_machine_space_coordinate( + OperatorTaskSpace const &task, + MachineView const &machine_view, + TaskSpaceCoordinate const &coord, + MachineSpecification const &machine_specification) { + + auto get_dimension_indices_for_dimension = + [&](MachineSpecificationDimension dimension) { + std::vector mv_dimensions = + get_dimensions(machine_view); + return filter(count(mv_dimensions.size()), [&](size_t idx) { + return mv_dimensions.at(idx) == dimension; + }); + }; + + auto compute_index = [&](int start_idx, + std::vector const &dimension_indices) { + std::vector mv_strides = get_strides(machine_view); + + std::vector sizes = transform(dimension_indices, [&](size_t i) { + return task.degrees.at(i) * mv_strides.at(i).unwrapped; + }); + std::vector coord_points = transform( + dimension_indices, [&](size_t i) { return coord.raw_coord.at(i); }); + std::vector strides = transform(dimension_indices, [&](size_t i) { + return mv_strides.at(i).unwrapped; + }); + + std::vector coeffs = scanl(sizes, 1, std::multiplies()); + + int index = start_idx; + for (auto [coeff, coord_point, stride] : + zip(coeffs, coord_points, strides)) { + index += coeff * coord_point * stride; + } + return index; + }; + + std::vector inter_dimension_indices = + get_dimension_indices_for_dimension( + MachineSpecificationDimension::INTER_NODE); + std::vector intra_dimension_indices = + get_dimension_indices_for_dimension( + MachineSpecificationDimension::INTRA_NODE); + + int node_idx = + compute_index(machine_view.start.node_idx, inter_dimension_indices); + int device_idx = + compute_index(machine_view.start.device_idx, intra_dimension_indices); + MachineSpaceCoordinate ms_coord = MachineSpaceCoordinate{ + node_idx, device_idx, get_device_type(machine_view)}; + + if (!is_valid_machine_space_coordinate(machine_specification, ms_coord)) { return std::nullopt; } return ms_coord; } -std::unordered_set - get_machine_space_coordinates(OperatorTaskSpace const &task, - MachineView const &mv, - MachineSpecification const &ms) { +std::unordered_set get_machine_space_coordinates( + OperatorTaskSpace const &task, + MachineView const &machine_view, + MachineSpecification const &machine_specification) { return transform( - get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &c) { - return get_machine_space_coordinate(task, mv, c, ms).value(); + get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { + return get_machine_space_coordinate( + task, machine_view, coord, machine_specification) + .value(); }); } diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc new file mode 100644 index 0000000000..2f3887e734 --- /dev/null +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -0,0 +1,79 @@ +#include "pcg/start_invariant_machine_view.h" +#include "pcg/machine_view.h" +#include "pcg/operator_task_space.h" +#include "utils/containers/count.h" +#include "utils/containers/filter.h" +#include "utils/containers/scanl.h" +#include "utils/containers/transform.h" +#include "utils/containers/zip.h" + +namespace FlexFlow { + +MachineView machine_view_from_start_invariant( + StartInvariantMachineView const &start_inv_mv, + MachineSpaceCoordinate const &start) { + return MachineView{start, start_inv_mv.dimensions}; +} + +StartInvariantMachineView + start_invariant_from_machine_view(MachineView const &mv) { + return StartInvariantMachineView{mv.dimensions, get_device_type(mv)}; +} + +size_t num_dims(StartInvariantMachineView const &start_inv_mv) { + return start_inv_mv.dimensions.size(); +} + +DeviceType get_device_type(StartInvariantMachineView const &start_inv_mv) { + return start_inv_mv.device_type; +} + +std::vector + get_strides(StartInvariantMachineView const &start_inv_mv) { + return transform(start_inv_mv.dimensions, + [](MachineViewDimension const &dim) { return dim.stride; }); +} + +std::vector + get_dimensions(StartInvariantMachineView const &start_inv_mv) { + return transform( + start_inv_mv.dimensions, + [](MachineViewDimension const &dim) { return dim.projection; }); +} + +StartInvariantMachineView + start_invariant_machine_view_from_strides_and_machine_spec_dimensions( + std::vector const &strides, + std::vector const &dims, + DeviceType device_type) { + std::vector dimensions = + transform(zip(strides, dims), [&](auto const &p) { + return MachineViewDimension{p.first, p.second}; + }); + return StartInvariantMachineView{dimensions, device_type}; +} + +std::optional get_machine_space_coordinate( + OperatorTaskSpace const &task, + StartInvariantMachineView const &start_inv_machine_view, + TaskSpaceCoordinate const &coord, + MachineSpecification const &machine_specification) { + MachineView mv = machine_view_from_start_invariant( + start_inv_machine_view, + MachineSpaceCoordinate{0, 0, get_device_type(start_inv_machine_view)}); + return get_machine_space_coordinate(task, mv, coord, machine_specification); +} + +std::unordered_set get_machine_space_coordinates( + OperatorTaskSpace const &task, + StartInvariantMachineView const &start_inv_machine_view, + MachineSpecification const &machine_specification) { + return transform( + get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { + return get_machine_space_coordinate( + task, start_inv_machine_view, coord, machine_specification) + .value(); + }); +} + +} // namespace FlexFlow diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc new file mode 100644 index 0000000000..3007b228c1 --- /dev/null +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -0,0 +1,84 @@ +#include "pcg/start_invariant_machine_view.h" +#include "utils/fmt/vector.h" +#include + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("StartInvariantMachineView - utility functions") { + StartInvariantMachineView simv = StartInvariantMachineView{ + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTER_NODE}}, + DeviceType::GPU}; + + SUBCASE("num_dims") { + int result = num_dims(simv); + int correct = 2; + CHECK(result == correct); + } + + SUBCASE("get_device_type") { + DeviceType result = get_device_type(simv); + DeviceType correct = DeviceType::GPU; + CHECK(result == correct); + } + + SUBCASE("get_strides") { + std::vector result = get_strides(simv); + std::vector correct = {stride_t{2}, stride_t{2}}; + CHECK(result == correct); + } + + SUBCASE("get_dimensions") { + std::vector result = get_dimensions(simv); + std::vector correct = { + MachineSpecificationDimension::INTER_NODE, + MachineSpecificationDimension::INTER_NODE}; + CHECK(result == correct); + } + } + + TEST_CASE("StartInvariantMachineView - conversions") { + MachineSpaceCoordinate start = + MachineSpaceCoordinate{1, 2, DeviceType::GPU}; + std::vector dimensions = { + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{3}, + MachineSpecificationDimension::INTRA_NODE}}; + + MachineView mv = MachineView{start, dimensions}; + StartInvariantMachineView simv = + StartInvariantMachineView{dimensions, DeviceType::GPU}; + + SUBCASE("start_invariant_from_machine_view") { + StartInvariantMachineView result = start_invariant_from_machine_view(mv); + StartInvariantMachineView correct = simv; + CHECK(result == correct); + } + + SUBCASE("machine_view_from_start_invariant") { + MachineView result = machine_view_from_start_invariant(simv, start); + MachineView correct = mv; + CHECK(result == correct); + } + + SUBCASE("conversion is invertible") { + SUBCASE("MachineView -> StartInvariant -> MachineView") { + MachineView result = machine_view_from_start_invariant( + start_invariant_from_machine_view(mv), start); + MachineView correct = mv; + CHECK(result == correct); + } + + SUBCASE("StartInvariant -> MachineView -> StartInvariant") { + StartInvariantMachineView result = start_invariant_from_machine_view( + machine_view_from_start_invariant(simv, start)); + StartInvariantMachineView correct = simv; + CHECK(result == correct); + } + } + } +} From 93f9bb4eeb6187a16759baa088291c6892ca168d Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Wed, 9 Oct 2024 15:58:21 -0700 Subject: [PATCH 31/34] minor PR fixes --- lib/pcg/include/pcg/machine_space_offset.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 lib/pcg/include/pcg/machine_space_offset.h diff --git a/lib/pcg/include/pcg/machine_space_offset.h b/lib/pcg/include/pcg/machine_space_offset.h new file mode 100644 index 0000000000..2f702cc518 --- /dev/null +++ b/lib/pcg/include/pcg/machine_space_offset.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_PCG_INCLUDE_MACHINE_SPACE_OFFSET_H +#define _FLEXFLOW_PCG_INCLUDE_MACHINE_SPACE_OFFSET_H + +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/machine_space_offset.dtg.h" + +namespace FlexFlow { + +MachineSpaceOffset get_machine_space_offset_from_coordinate( + MachineSpaceCoordinate const &start, MachineSpaceCoordinate const &coord); + +} // namespace FlexFlow + +#endif From 3c3518ab65d0e8136096d0390c9c480337368c15 Mon Sep 17 00:00:00 2001 From: Pietro Max Marsella Date: Wed, 9 Oct 2024 15:59:37 -0700 Subject: [PATCH 32/34] minor fixes --- .../pcg/machine_space_offset.struct.toml | 26 ++++ .../pcg/start_invariant_machine_view.h | 19 +-- lib/pcg/src/pcg/machine_space_offset.cc | 25 +++ .../src/pcg/start_invariant_machine_view.cc | 23 ++- .../src/pcg/start_invariant_machine_view.cc | 145 ++++++++++++++++++ 5 files changed, 221 insertions(+), 17 deletions(-) create mode 100644 lib/pcg/include/pcg/machine_space_offset.struct.toml create mode 100644 lib/pcg/src/pcg/machine_space_offset.cc diff --git a/lib/pcg/include/pcg/machine_space_offset.struct.toml b/lib/pcg/include/pcg/machine_space_offset.struct.toml new file mode 100644 index 0000000000..3f6eab38fd --- /dev/null +++ b/lib/pcg/include/pcg/machine_space_offset.struct.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "MachineSpaceOffset" +features = [ + "eq", + "ord", + "hash", + "json", + # "rapidcheck", + "fmt", +] + +includes = [ + "pcg/device_type.dtg.h", +] + +[[fields]] +name = "node_offset" +type = "int" + +[[fields]] +name = "device_offset" +type = "int" + +[[fields]] +name = "device_type" +type = "::FlexFlow::DeviceType" diff --git a/lib/pcg/include/pcg/start_invariant_machine_view.h b/lib/pcg/include/pcg/start_invariant_machine_view.h index db00c3d57e..f5091c69d1 100644 --- a/lib/pcg/include/pcg/start_invariant_machine_view.h +++ b/lib/pcg/include/pcg/start_invariant_machine_view.h @@ -1,6 +1,7 @@ #ifndef _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H #define _FLEXFLOW_PCG_INCLUDE_PCG_START_INVARIANT_MACHINE_VIEW_H +#include "pcg/machine_space_offset.h" #include "pcg/machine_specification.dtg.h" #include "pcg/machine_view.dtg.h" #include "pcg/operator_task_space.dtg.h" @@ -30,16 +31,16 @@ StartInvariantMachineView std::vector const &strides, std::vector const &dims); -std::optional - get_machine_space_coordinate(OperatorTaskSpace const &task, - StartInvariantMachineView const &mv, - TaskSpaceCoordinate const &coordinates, - MachineSpecification const &ms); +std::optional + get_machine_space_offset(OperatorTaskSpace const &task, + StartInvariantMachineView const &mv, + TaskSpaceCoordinate const &coordinates, + MachineSpecification const &ms); -std::unordered_set - get_machine_space_coordinates(OperatorTaskSpace const &task, - StartInvariantMachineView const &mv, - MachineSpecification const &ms); +std::unordered_set + get_machine_space_offsets(OperatorTaskSpace const &task, + StartInvariantMachineView const &mv, + MachineSpecification const &ms); } // namespace FlexFlow diff --git a/lib/pcg/src/pcg/machine_space_offset.cc b/lib/pcg/src/pcg/machine_space_offset.cc new file mode 100644 index 0000000000..9990023f8c --- /dev/null +++ b/lib/pcg/src/pcg/machine_space_offset.cc @@ -0,0 +1,25 @@ +#include "pcg/machine_space_offset.h" +#include "utils/exception.h" + +namespace FlexFlow { +MachineSpaceOffset get_machine_space_offset_from_coordinate( + MachineSpaceCoordinate const &start, MachineSpaceCoordinate const &coord) { + if ((coord.device_idx < start.device_idx) || + (coord.node_idx < start.node_idx)) { + throw mk_runtime_error(fmt::format( + "One of the coordinates of start {} is greater than one of the " + "coordinates of coord {}, are you sure you didn't swap them?", + start, + coord)); + } + if (start.device_type != coord.device_type) { + throw mk_runtime_error( + fmt::format("{} has different DeviceType from {}", start, coord)); + } + + return MachineSpaceOffset{coord.node_idx - start.node_idx, + coord.device_idx - start.device_idx, + coord.device_type}; +} + +} // namespace FlexFlow diff --git a/lib/pcg/src/pcg/start_invariant_machine_view.cc b/lib/pcg/src/pcg/start_invariant_machine_view.cc index 2f3887e734..1fcc3ea12f 100644 --- a/lib/pcg/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/src/pcg/start_invariant_machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/start_invariant_machine_view.h" +#include "pcg/machine_space_offset.h" #include "pcg/machine_view.h" #include "pcg/operator_task_space.h" #include "utils/containers/count.h" @@ -6,7 +7,6 @@ #include "utils/containers/scanl.h" #include "utils/containers/transform.h" #include "utils/containers/zip.h" - namespace FlexFlow { MachineView machine_view_from_start_invariant( @@ -53,24 +53,31 @@ StartInvariantMachineView return StartInvariantMachineView{dimensions, device_type}; } -std::optional get_machine_space_coordinate( +std::optional get_machine_space_offset( OperatorTaskSpace const &task, StartInvariantMachineView const &start_inv_machine_view, TaskSpaceCoordinate const &coord, MachineSpecification const &machine_specification) { - MachineView mv = machine_view_from_start_invariant( - start_inv_machine_view, - MachineSpaceCoordinate{0, 0, get_device_type(start_inv_machine_view)}); - return get_machine_space_coordinate(task, mv, coord, machine_specification); + MachineSpaceCoordinate dummy_start = + MachineSpaceCoordinate{0, 0, get_device_type(start_inv_machine_view)}; + MachineView mv = + machine_view_from_start_invariant(start_inv_machine_view, dummy_start); + std::optional ms_coord = + get_machine_space_coordinate(task, mv, coord, machine_specification); + if (ms_coord == std::nullopt) { + return std::nullopt; + } + return get_machine_space_offset_from_coordinate(dummy_start, + ms_coord.value()); } -std::unordered_set get_machine_space_coordinates( +std::unordered_set get_machine_space_offsets( OperatorTaskSpace const &task, StartInvariantMachineView const &start_inv_machine_view, MachineSpecification const &machine_specification) { return transform( get_task_space_coordinates(task), [&](TaskSpaceCoordinate const &coord) { - return get_machine_space_coordinate( + return get_machine_space_offset( task, start_inv_machine_view, coord, machine_specification) .value(); }); diff --git a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc index 3007b228c1..8383754aa2 100644 --- a/lib/pcg/test/src/pcg/start_invariant_machine_view.cc +++ b/lib/pcg/test/src/pcg/start_invariant_machine_view.cc @@ -1,4 +1,5 @@ #include "pcg/start_invariant_machine_view.h" +#include "utils/fmt/unordered_set.h" #include "utils/fmt/vector.h" #include @@ -81,4 +82,148 @@ TEST_SUITE(FF_TEST_SUITE) { } } } + + TEST_CASE("StartInvariantMachineView - get_machine_space_offset") { + SUBCASE("1D case") { + // This operator has shape (3,), and thus 3 tasks. + // The (only) dimension is projected on the INTRA (device) dimension with + // a stride of 2. The machine space has 1 node and 6 devices per node. + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+-------+-------+ + * | (0,) | | (1,) | | (2,) | | + * +-------+-------+-------+-------+-------+-------+ + */ + OperatorTaskSpace task = OperatorTaskSpace{{3}}; + StartInvariantMachineView simv = StartInvariantMachineView{ + {MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}, + DeviceType::GPU}; + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/1, + /*num_cpus_per_node=*/6, + /*num_gpus_per_node=*/6, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + SUBCASE("get_machine_space_offset") { + SUBCASE("Task with TaskSpaceCoordinate = (0,)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0}}; + MachineSpaceOffset correct = + MachineSpaceOffset{0, 0, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1}}; + MachineSpaceOffset correct = + MachineSpaceOffset{0, 2, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (2,)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{2}}; + MachineSpaceOffset correct = + MachineSpaceOffset{0, 4, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + } + + SUBCASE("get_machine_space_offsets") { + std::unordered_set correct = { + MachineSpaceOffset{0, 0, DeviceType::GPU}, + MachineSpaceOffset{0, 2, DeviceType::GPU}, + MachineSpaceOffset{0, 4, DeviceType::GPU}}; + std::unordered_set result = + get_machine_space_offsets(task, simv, ms); + CHECK(correct == result); + } + } + + SUBCASE("2D case") { + // This operator has shape (2, 2), and thus 2 * 2 = 4 tasks. + // The first dimension is projected onto the INTER (node) dimension with + // stride 1, while the second dimension is projected onto the INTRA + // (device) dimension with stride 2. The machine space has 2 nodes and 4 + // devices per node. + + /** + * The tasks will thus be distributed like this: + * +-------+-------+-------+-------+ + * | (0,0) | | (0,1) | | + * +-------+-------+-------+-------+ + * | (1,0) | | (1,1) | | + * +-------+-------+-------+-------+ + */ + + OperatorTaskSpace task = OperatorTaskSpace{{2, 2}}; + StartInvariantMachineView simv = StartInvariantMachineView{ + {MachineViewDimension{stride_t{1}, + MachineSpecificationDimension::INTER_NODE}, + MachineViewDimension{stride_t{2}, + MachineSpecificationDimension::INTRA_NODE}}, + DeviceType::GPU}; + MachineSpecification ms = + MachineSpecification{/*num_nodes=*/2, + /*num_cpus_per_node=*/4, + /*num_gpus_per_node=*/4, + /*inter_node_bandwidth=*/0, + /*intra_node_bandwidth=*/0}; + + SUBCASE("get_machine_space_offset") { + SUBCASE("Task with TaskSpaceCoordinate = (0,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 0}}; + MachineSpaceOffset correct = + MachineSpaceOffset{0, 0, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (0,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{0, 1}}; + MachineSpaceOffset correct = + MachineSpaceOffset{0, 2, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,0)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 0}}; + MachineSpaceOffset correct = + MachineSpaceOffset{1, 0, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + + SUBCASE("Task with TaskSpaceCoordinate = (1,1)") { + TaskSpaceCoordinate coord = TaskSpaceCoordinate{{1, 1}}; + MachineSpaceOffset correct = + MachineSpaceOffset{1, 2, DeviceType::GPU}; + MachineSpaceOffset result = + get_machine_space_offset(task, simv, coord, ms).value(); + CHECK(correct == result); + } + } + + SUBCASE("get_machine_space_offsets") { + std::unordered_set correct = { + MachineSpaceOffset{0, 0, DeviceType::GPU}, + MachineSpaceOffset{0, 2, DeviceType::GPU}, + MachineSpaceOffset{1, 0, DeviceType::GPU}, + MachineSpaceOffset{1, 2, DeviceType::GPU}}; + std::unordered_set result = + get_machine_space_offsets(task, simv, ms); + CHECK(correct == result); + } + } + } } From 9fc87128db358d4977145fe80653edea16c7427c Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Wed, 9 Oct 2024 16:14:24 -0700 Subject: [PATCH 33/34] Post-merge fixes --- ...lel_layer_guid_oblivious_machine_mapping.h | 1 + .../get_optimal_machine_mapping.cc | 29 +++++- .../get_tensor_set_movement_across_split.cc | 59 ++++++++++- .../machine_mapping/machine_mapping.cc | 80 ++++++++++++--- .../machine_mapping/machine_mapping_result.cc | 98 ++++++++++++++++--- lib/pcg/src/pcg/machine_specification.cc | 4 +- .../utils/containers/get_all_assignments.h | 3 +- .../utils/containers/get_all_assignments.cc | 12 +++ 8 files changed, 252 insertions(+), 34 deletions(-) diff --git a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h index accd96af4c..cb3af9c689 100644 --- a/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h +++ b/lib/compiler/include/compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.h @@ -2,6 +2,7 @@ #define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_MACHINE_MAPPING_PARALLEL_LAYER_GUID_OBLIVIOUS_MACHINE_MAPPING_H #include "compiler/machine_mapping/parallel_layer_guid_oblivious_machine_mapping.dtg.h" +#include namespace FlexFlow { diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index 0a874948e4..fb6bcddddb 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -42,8 +42,33 @@ TEST_SUITE(FF_TEST_SUITE) { }; }; - MachineView mv1 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(2)); - MachineView mv2 = make_1d_machine_view(gpu_id_t(1), gpu_id_t(3)); + MachineView mv1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView mv2 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; MachineSpecification full_machine_spec = MachineSpecification{ /*num_nodes=*/2, diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index c66d533d0f..05aeaeacfe 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -64,10 +64,61 @@ TEST_SUITE(FF_TEST_SUITE) { ParallelLayerAddedResult relu_2 = add_parallel_layer( pcg, relu_attrs, {get_only(relu_1.outputs)}, {relu_output_attrs}); - MachineView pre_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1}); - MachineView pre_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{2}); - MachineView post_mv1 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{3}); - MachineView post_mv2 = make_1d_machine_view(gpu_id_t{0}, gpu_id_t{4}); + MachineView pre_mv1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView pre_mv2 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView post_mv1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{3}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView post_mv2 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{4}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; SUBCASE("single edge across split") { PCGBinarySeriesSplit split = PCGBinarySeriesSplit{ diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc index 6b16a54c1f..c5adba9f22 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc @@ -8,33 +8,85 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("combine_disjoint_mappings(MachineMapping, MachineMappping)") { - MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); - MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + MachineView machine_view_0 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + MachineMapping machine_mapping_0 = MachineMapping({ - {parallel_layer_guid_t(Node(0)), machine_view_0}, + {parallel_layer_guid_t{Node{0}}, machine_view_0}, }); MachineMapping machine_mapping_1 = MachineMapping({ - {parallel_layer_guid_t(Node(1)), machine_view_1}, - }); - MachineMapping correct = MachineMapping({ - {parallel_layer_guid_t(Node(0)), machine_view_0}, - {parallel_layer_guid_t(Node(1)), machine_view_1}, + {parallel_layer_guid_t{Node{1}}, machine_view_1}, }); + MachineMapping correct = MachineMapping{{ + {parallel_layer_guid_t{Node{0}}, machine_view_0}, + {parallel_layer_guid_t{Node{1}}, machine_view_1}, + }}; MachineMapping result = combine_disjoint_mappings(machine_mapping_0, machine_mapping_1); CHECK(result == correct); } TEST_CASE("nodes_are_disjoint(MachineMapping, MachineMappping)") { - MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); - MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + MachineView machine_view_0 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + MachineMapping machine_mapping_0 = MachineMapping({ - {parallel_layer_guid_t(Node(0)), machine_view_0}, + {parallel_layer_guid_t{Node{0}}, machine_view_0}, }); SUBCASE("nodes are disjoint") { MachineMapping machine_mapping_1 = MachineMapping({ - {parallel_layer_guid_t(Node(1)), machine_view_1}, + {parallel_layer_guid_t{Node{1}}, machine_view_1}, }); bool correct = true; @@ -44,8 +96,8 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("nodes are not disjoint") { MachineMapping machine_mapping_1 = MachineMapping({ - {parallel_layer_guid_t(Node(0)), machine_view_0}, - {parallel_layer_guid_t(Node(1)), machine_view_1}, + {parallel_layer_guid_t{Node{0}}, machine_view_0}, + {parallel_layer_guid_t{Node{1}}, machine_view_1}, }); bool correct = false; bool result = nodes_are_disjoint(machine_mapping_0, machine_mapping_1); diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc index 254d6b2784..ac462a95b1 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc @@ -6,8 +6,34 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("series_combine") { - MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); - MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + MachineView machine_view_0 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + float pre_cost = 2.0; MachineMappingResult pre = MachineMappingResult{ @@ -49,7 +75,7 @@ TEST_SUITE(FF_TEST_SUITE) { float comm_cost = 3.0; - SUBCASE("pre is infeasbile") { + SUBCASE("pre is infeasible") { MachineMappingResult result = series_combine( comm_cost, infeasible, post, ParallelSplitTransformation::LthenR); MachineMappingResult correct = infeasible; @@ -57,7 +83,7 @@ TEST_SUITE(FF_TEST_SUITE) { CHECK(result == correct); } - SUBCASE("post is infeasbile") { + SUBCASE("post is infeasible") { MachineMappingResult result = series_combine( comm_cost, pre, infeasible, ParallelSplitTransformation::LthenR); MachineMappingResult correct = infeasible; @@ -160,8 +186,33 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("parallel_combine") { - MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); - MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + MachineView machine_view_0 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; MachineMappingResult lhs = MachineMappingResult{ FeasibleMachineMappingResult{ @@ -199,14 +250,14 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult infeasible = infeasible_machine_mapping_result(); - SUBCASE("lhs is infeasbile") { + SUBCASE("lhs is infeasible") { MachineMappingResult result = parallel_combine(infeasible, rhs); MachineMappingResult correct = infeasible; CHECK(result == correct); } - SUBCASE("rhs is infeasbile") { + SUBCASE("rhs is infeasible") { MachineMappingResult result = parallel_combine(lhs, infeasible); MachineMappingResult correct = infeasible; @@ -256,8 +307,33 @@ TEST_SUITE(FF_TEST_SUITE) { } TEST_CASE("minimize_runtime") { - MachineView machine_view_0 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(1)); - MachineView machine_view_1 = make_1d_machine_view(gpu_id_t(0), gpu_id_t(2)); + MachineView machine_view_0 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; + + MachineView machine_view_1 = MachineView{ + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/{ + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, + }, + }; MachineMappingResult faster = MachineMappingResult{ FeasibleMachineMappingResult{ @@ -295,7 +371,7 @@ TEST_SUITE(FF_TEST_SUITE) { MachineMappingResult infeasible = infeasible_machine_mapping_result(); - SUBCASE("lhs is infeasbile") { + SUBCASE("lhs is infeasible") { MachineMappingResult result = minimize_runtime(infeasible, slower); MachineMappingResult correct = slower; diff --git a/lib/pcg/src/pcg/machine_specification.cc b/lib/pcg/src/pcg/machine_specification.cc index 0df402ac3c..ca5b8ba047 100644 --- a/lib/pcg/src/pcg/machine_specification.cc +++ b/lib/pcg/src/pcg/machine_specification.cc @@ -17,7 +17,7 @@ int get_num_devices(MachineSpecification const &ms, case DeviceType::CPU: return get_num_cpus(ms); default: - throw mk_runtime_error("Unknown DeviceType {}", device_type); + throw mk_runtime_error(fmt::format("Unknown DeviceType {}", device_type)); } } @@ -29,7 +29,7 @@ int get_num_devices_per_node(MachineSpecification const &ms, case DeviceType::CPU: return ms.num_cpus_per_node; default: - throw mk_runtime_error("Unknown DeviceType {}", device_type); + throw mk_runtime_error(fmt::format("Unknown DeviceType {}", device_type)); } } bool is_valid_machine_space_coordinate(MachineSpecification const &ms, diff --git a/lib/utils/include/utils/containers/get_all_assignments.h b/lib/utils/include/utils/containers/get_all_assignments.h index b7b30cbae4..9981948f47 100644 --- a/lib/utils/include/utils/containers/get_all_assignments.h +++ b/lib/utils/include/utils/containers/get_all_assignments.h @@ -5,6 +5,7 @@ #include "utils/containers/keys.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_map_from_pairs.h" +#include "utils/containers/unordered_set_of.h" #include "utils/containers/vector_of.h" #include "utils/containers/zip.h" #include "utils/hash/unordered_map.h" @@ -30,7 +31,7 @@ std::unordered_set> get_all_assignments( ordered_keys, [&](K const &k) { return options_per_key.at(k); }); std::unordered_set> result = transform( - cartesian_product(ordered_value_option_sets), + unordered_set_of(cartesian_product(ordered_value_option_sets)), [&](std::vector const &chosen_values) { return unordered_map_from_pairs(zip(ordered_keys, chosen_values)); }); diff --git a/lib/utils/src/utils/containers/get_all_assignments.cc b/lib/utils/src/utils/containers/get_all_assignments.cc index 3a7cf6377a..8ccf021aa5 100644 --- a/lib/utils/src/utils/containers/get_all_assignments.cc +++ b/lib/utils/src/utils/containers/get_all_assignments.cc @@ -1 +1,13 @@ #include "utils/containers/get_all_assignments.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using K = value_type<0>; +using V = value_type<1>; + +template + std::unordered_set> get_all_assignments( + std::unordered_map> const &); + +} // namespace FlexFlow From 20f6a75ae5a8a7b2b7b68eada7efaf74f1605c5f Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Wed, 9 Oct 2024 16:15:08 -0700 Subject: [PATCH 34/34] Format --- .../get_optimal_machine_mapping.cc | 42 +++--- .../get_tensor_set_movement_across_split.cc | 84 ++++++------ .../machine_mapping/machine_mapping.cc | 84 ++++++------ .../machine_mapping/machine_mapping_result.cc | 127 +++++++++--------- .../utils/containers/get_all_assignments.cc | 5 +- 5 files changed, 178 insertions(+), 164 deletions(-) diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc index fb6bcddddb..a0d06fe930 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_optimal_machine_mapping.cc @@ -43,31 +43,33 @@ TEST_SUITE(FF_TEST_SUITE) { }; MachineView mv1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView mv2 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineSpecification full_machine_spec = MachineSpecification{ diff --git a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc index 05aeaeacfe..e22f715d82 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/get_tensor_set_movement_across_split.cc @@ -65,59 +65,63 @@ TEST_SUITE(FF_TEST_SUITE) { pcg, relu_attrs, {get_only(relu_1.outputs)}, {relu_output_attrs}); MachineView pre_mv1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView pre_mv2 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView post_mv1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{3}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{3}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView post_mv2 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{4}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{4}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; SUBCASE("single edge across split") { diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc index c5adba9f22..221cca3ae1 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping.cc @@ -9,31 +9,33 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("combine_disjoint_mappings(MachineMapping, MachineMappping)") { MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineMapping machine_mapping_0 = MachineMapping({ @@ -53,31 +55,33 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("nodes_are_disjoint(MachineMapping, MachineMappping)") { MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineMapping machine_mapping_0 = MachineMapping({ diff --git a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc index ac462a95b1..73b921fc98 100644 --- a/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc +++ b/lib/compiler/test/src/compiler/machine_mapping/machine_mapping_result.cc @@ -7,34 +7,35 @@ using namespace FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("series_combine") { MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; - float pre_cost = 2.0; MachineMappingResult pre = MachineMappingResult{ FeasibleMachineMappingResult{ @@ -187,31 +188,33 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("parallel_combine") { MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineMappingResult lhs = MachineMappingResult{ @@ -308,31 +311,33 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("minimize_runtime") { MachineView machine_view_0 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{1}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{1}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineView machine_view_1 = MachineView{ - /*start=*/MachineSpaceCoordinate{ - /*node_idx=*/0, - /*device_idx=*/0, - /*device_type=*/DeviceType::GPU, - }, - /*dimensions=*/{ - MachineViewDimension{ - stride_t{2}, - MachineSpecificationDimension::INTRA_NODE, + /*start=*/MachineSpaceCoordinate{ + /*node_idx=*/0, + /*device_idx=*/0, + /*device_type=*/DeviceType::GPU, + }, + /*dimensions=*/ + { + MachineViewDimension{ + stride_t{2}, + MachineSpecificationDimension::INTRA_NODE, + }, }, - }, }; MachineMappingResult faster = MachineMappingResult{ diff --git a/lib/utils/src/utils/containers/get_all_assignments.cc b/lib/utils/src/utils/containers/get_all_assignments.cc index 8ccf021aa5..f920ba1c1a 100644 --- a/lib/utils/src/utils/containers/get_all_assignments.cc +++ b/lib/utils/src/utils/containers/get_all_assignments.cc @@ -6,8 +6,7 @@ namespace FlexFlow { using K = value_type<0>; using V = value_type<1>; -template - std::unordered_set> get_all_assignments( - std::unordered_map> const &); +template std::unordered_set> + get_all_assignments(std::unordered_map> const &); } // namespace FlexFlow