Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
Chao1Han committed Sep 13, 2024
1 parent c485bd8 commit 2d1ae87
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
9 changes: 7 additions & 2 deletions torch/csrc/distributed/c10d/ProcessGroupXCCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,19 @@ bool ProcessGroupXCCL::WorkXCCL::checkTimeout(
return true;
}

void ProcessGroupXCCL::WorkXCCL::finishWorkXcclError(
const std::exception_ptr& eptr) {
future_->setError(eptr);
finish(eptr);
}

bool ProcessGroupXCCL::WorkXCCL::isCompleted() {
for (auto& ret : rets) {
bool flag;
try {
TORCH_CHECK(flag = ret.test());
} catch (...) {
future_->setError(std::current_exception());
finish(std::current_exception());
finishWorkXcclError(std::current_exception());
return true;
}
if (!flag) {
Expand Down
2 changes: 2 additions & 0 deletions torch/csrc/distributed/c10d/ProcessGroupXCCL.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ class TORCH_API ProcessGroupXCCL : public Backend {
rets.push_back(std::move(result));
}

void finishWorkXcclError(const std::exception_ptr& eptr);

bool isCompleted() override;

bool isSuccess() const override {
Expand Down

0 comments on commit 2d1ae87

Please sign in to comment.