Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding task-based simulator for PCGs #1365

Open
wants to merge 59 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 48 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
d269b40
compiler build
wmdi Oct 16, 2023
f46fd11
Merge branch 'test-substitution' into test-compiler
wmdi Oct 18, 2023
af67e9e
Merge branch 'test-substitution' into test-compiler
wmdi Nov 8, 2023
c015efb
unity dp works
wmdi Nov 15, 2023
6211b84
format
wmdi Nov 15, 2023
d9f1302
Merge remote-tracking branch 'upstream/repo-refactor' into test-compiler
wmdi Jan 24, 2024
fb58a99
fmt
wmdi Jan 24, 2024
02937e1
fix
wmdi Jan 24, 2024
6402ed0
add substitutions, compiler, and their unit tests to CI
wmdi Jan 25, 2024
0c45f61
disable runtime unit test
wmdi Jan 25, 2024
95fa427
minor fix
wmdi Feb 15, 2024
1f7e2b6
(not compilable) visitable issue for OptimalCostState
wmdi Feb 18, 2024
a9a6402
fix machine mapping hash & refactor dp algorithm
wmdi Feb 27, 2024
d8bbcb8
minor fix
wmdi Feb 27, 2024
09d3152
fix variant issue
wmdi Feb 28, 2024
a150d3a
fmt
wmdi Feb 28, 2024
2eb3fdf
fix
wmdi Mar 11, 2024
7598a92
fmt
wmdi Mar 11, 2024
05c8336
fix
wmdi Mar 14, 2024
71aeddb
Merge remote-tracking branch 'upstream/repo-refactor' into test-compiler
wmdi Mar 14, 2024
9345400
add more unit tests
wmdi Mar 18, 2024
c0015df
fmt
wmdi Mar 18, 2024
6d28697
Merge remote-tracking branch 'origin/repo-refactor' into compiler
lockshaw Mar 22, 2024
102f5fb
Fix post-merge
lockshaw Mar 22, 2024
d6e10bb
Add shell hook for sapling development
lockshaw Mar 23, 2024
95fb4cc
changed from nullopt to std::nullopt
Mar 23, 2024
c091479
fix cast issue
wmdi Mar 23, 2024
57bd35f
Merge branch 'test-compiler' of github.com:wmdi/FlexFlow into test-co…
wmdi Mar 23, 2024
54c604a
Fix spdlog cmake issue
lockshaw Mar 24, 2024
a09e528
Merge remote-tracking branch 'refs/remotes/wmdi/test-compiler' into c…
lockshaw Mar 24, 2024
8b914cf
Re-remove submodules
lockshaw Mar 24, 2024
189f323
minor fix & fmt
wmdi Mar 24, 2024
d2eb505
upd tests name to match ci
wmdi Mar 24, 2024
371324a
Add TEST_SUITE declaration to make tests findable by ctest
lockshaw Mar 26, 2024
da74817
Remove unnecessary nix files, add utils test to ci
lockshaw Mar 26, 2024
0db60db
Fix utils tests name, format
lockshaw Mar 26, 2024
6e520bb
Merge pull request #1229 from wmdi/test-compiler
wmdi Mar 26, 2024
2fd8bfe
initial draft for machine_mapping.cpp
Apr 8, 2024
ffcb8c0
Added get_successor function
Apr 8, 2024
edf7074
Machine Mapping initial draft
Apr 8, 2024
b0cf1b2
Machine Mapping initial draft
Apr 8, 2024
502c75c
Merge branch 'ff-cost-estimator' of github.com:Marsella8/FlexFlow int…
Apr 8, 2024
ae95818
Added parallel_estimate_cost function prototype
Apr 8, 2024
64d28fc
Added test draft
Apr 12, 2024
0e3cc4a
Merge remote-tracking branch 'origin/repo-refactor' into ff-cost-esti…
Apr 12, 2024
9533099
Changes file
Apr 12, 2024
9c03409
Formatting
Apr 12, 2024
2af6003
Saving
Apr 26, 2024
3a9850f
Merge branch 'repo-refactor' of github.com:flexflow/FlexFlow into ff-…
Apr 26, 2024
2793cc6
Merge branch 'repo-refactor' of github.com:flexflow/FlexFlow into ff-…
May 14, 2024
798f492
Moved cost estimator to separate file
May 14, 2024
ffa8158
Working on tests
May 15, 2024
f134e8a
Minor changes
May 15, 2024
a8e8551
Updates to cost_estimator
May 24, 2024
3a8b995
Updates to parallel_cost_estimator + implementations for machineviews…
Jun 3, 2024
cd76042
Tests + fixes for parallel cost estimator
Jun 25, 2024
94e6285
Tests + fixes for parallel_cost_estimator
Jun 27, 2024
b469d88
PR fixes
Jul 15, 2024
44ed29e
Formatting
Jul 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/compiler/include/compiler/machine_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ OptimalCostResult
MachineSpecification const &resources,
OptimalCostCache &cached_subgraph_costs);

float parallel_estimate_cost(
SubParallelComputationGraphView const &g,
CostEstimator const &estimator,
MachineMapping const &device_mapping,
std::unordered_map<OpenMultiDiEdge, MachineView> const
&frontier_machine_views);

} // namespace FlexFlow

namespace std {
Expand Down
97 changes: 97 additions & 0 deletions lib/compiler/src/machine_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "utils/exception.h"
#include "utils/graph/serialparallel.h"

#include "utils/deduplicated_priority_queue.h"
#include <algorithm>

namespace FlexFlow {

MachineMapping MachineMapping::combine(MachineMapping const &s1,
Expand Down Expand Up @@ -110,6 +113,100 @@ float estimate_cost(SubParallelComputationGraphView const &g,
return cost;
}

// Computes estimated execution cost for a single node
float node_estimate_cost(Node node,
SubParallelComputationGraphView const &g,
CostEstimator const &estimator,
MachineMapping const &device_mapping) {
std::unordered_set<UpwardOpenMultiDiEdge> incoming_edges =
get_incoming_edges(g, node);
std::vector<ParallelTensorShape> inputs = transform(
as_vector(incoming_edges), [&](UpwardOpenMultiDiEdge const &input_edge) {
return g.at(input_edge).get_shape();
});
float cost = estimator.estimate_cost(
g.at(node).attrs, inputs, device_mapping.machine_views.at(node));
return cost;
}

struct TimedNode { // Node and associated finishing time
Node node;
float endtime;
};

struct TimeComparison {
bool operator()(TimedNode const &lhs, TimedNode const &rhs) const {
return (lhs.endtime < rhs.endtime);
}
};

float parallel_estimate_cost(
SubParallelComputationGraphView const &g,
CostEstimator const &estimator,
MachineMapping const &device_mapping,
std::unordered_map<OpenMultiDiEdge, MachineView> const
&frontier_machine_views) {

std::unordered_set<Node>
frontier; // nodes whose dependencies (previous nodes) have been met, and
// are waiting to be processed.
DeduplicatedPriorityQueue<TimedNode, std::vector<TimedNode>, TimeComparison>
processing; // nodes currently being processed.
std::unordered_set<Node>
processed; // set of nodes that have already been processed
std::unordered_map<int, bool>
occupied; // keeps track of the devices that are currently occupied

// Filling the frontier
for (auto &[edge, _] : frontier_machine_views) {
Node node = get_dst_node(edge);
frontier.insert(node);
}

float current_time = 0;
while (!frontier.empty() || !processing.empty()) {

// Processing new nodes
std::unordered_set<Node> copy(frontier);
for (Node const &node : copy) {
std::vector<int> devices =
device_mapping.machine_views.at(node).machine_ids();
if (std::all_of(devices.begin(), devices.end(), [&occupied](int d) {
return occupied[d] == false
})) {
float cost = node_estimate_cost(node, g, estimator, device_mapping);
processing.push({node, current_time + cost});
for (int d : devices) {
occupied[d] = true;
}
frontier.erase(node);
}
}

// Finish processing one node
TimedNode finished = processing.pop();
std::vector<int> devices =
device_mapping.machine_views.at(finished.node).machine_ids();
for (int d : devices) { // free devices
occupied[d] = false;
}
processed.insert(finished.node);
current_time = finished.endtime;

// Adding candidates to the frontier
for (Node const &successor :
get_successors(g, finished.node)) { // All nodes depending on finished
std::unordered_set<Node> predecessors = get_predecessors(g, successor);
if (std::all_of(predecessors.begin(),
predecessors.end(),
[&processed](int p) { return processed.contains(p) })) {
frontier.insert(successor);
}
}
}
return current_time;
}

void minimize_runtime(OptimalCostResult &m1, OptimalCostResult const &m2) {
minimize(m1, m2, OptimalCostRuntimeCmp{});
}
Expand Down
50 changes: 50 additions & 0 deletions lib/compiler/test/src/test_parallel_cost_estimator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include "compiler/machine_mapping.h"
#include "doctest/doctest.h"
#include "doctest/test_cost_estimator.h#include "compiler/cost_estimate.h"

namespace FlexFlow {

struct TestCostEstimator : public ICostEstimator {
float estimate_cost(PCGOperatorAttrs const &op,
std::vector<ParallelTensorShape> const &inputs,
MachineView const &mv) const override {
return 0.1;
}"
#include "rapidcheck.h"


using namespace FlexFlow;



TEST_SUITE(FF_TEST_SUITE) {
TEST_CASE("parallel_estimate_cost: linear graph") {
//Straight line example
SubParallelComputationGraphView g =
OutputLabelledOpenMultiDiGraphView<Operator, ParallelTensor>::create<
UnorderedOutputLabelledOpenMultiDiGraph<Operator, ParallelTensor>>();
Node n0 = g.add_node(Operator{InputAttrs{}, "n0"});
Node n1 = g.add_node(Operator{InputAttrs{}, "n1"});
Node n2 = g.add_node(Operator{InputAttrs{}, "n2"});

NodePort p0 = g.add_node_port();
NodePort p1 = g.add_node_port();
NodePort p2 = g.add_node_port();

// MultiDiEdge: dst, dstport, src, srcport
MultiDiEdge e0{n1, p1, n0, p0};
MultiDiEdge e1{n2, p2, n1, p1};

g.add_edge(e0);
g.add_edge(e1);
g.add_edge(e2);
g.add_label(e0, 10);
g.add_label(e1, 11);

CostEstimator estimator = CostEstimator::create<TestCostEstimator>();
MachineMapping device_mapping = std::unordered_set<MachineView>{
make_1d_machine_view(gpu_id_t(1), gpu_id_t(2))};
views
parallel_estimate_cost(g, estimator, device_mapping, frontier_machine_views);
}
}
4 changes: 4 additions & 0 deletions lib/utils/include/utils/graph/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ std::unordered_set<Node> get_predecessors(DiGraphView const &, Node const &);
std::unordered_map<Node, std::unordered_set<Node>>
get_predecessors(DiGraphView const &, std::unordered_set<Node> const &);

std::unordered_set<Node> get_successors(DiGraphView const &, Node const &);
std::unordered_map<Node, std::unordered_set<Node>>
get_successors(DiGraphView const &, std::unordered_set<Node> const &);

Node get_src_node(MultiDiEdge const &);
Node get_dst_node(MultiDiEdge const &);
Node get_dst_node(InputMultiDiEdge const &);
Expand Down
18 changes: 18 additions & 0 deletions lib/utils/src/graph/algorithms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,24 @@ std::unordered_set<Node> get_predecessors(DiGraphView const &g, Node const &n) {
return get_predecessors(g, std::unordered_set<Node>{n}).at(n);
}

std::unordered_map<Node, std::unordered_set<Node>>
get_successors(DiGraphView const &g,
std::unordered_set<Node> const &nodes) {

std::unordered_map<Node, std::unordered_set<Node>> successors;
for (Node const &n : nodes) {
successors[n];
}
for (DirectedEdge const &e : get_outgoing_edges(g, nodes)) {
successors.at(e.src).insert(e.dst);
}
return successors;
}

std::unordered_set<Node> get_successors(DiGraphView const &g, Node const &n) {
return get_successors(g, std::unordered_set<Node>{n}).at(n);
}

std::vector<Node> get_unchecked_dfs_ordering(
DiGraphView const &g, std::unordered_set<Node> const &starting_points) {
UncheckedDFSView dfs_view = unchecked_dfs(g, starting_points);
Expand Down
Loading