forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathParallelNativeTBB.cpp
110 lines (92 loc) · 2.37 KB
/
ParallelNativeTBB.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#if AT_PARALLEL_NATIVE_TBB
#include <ATen/Parallel.h>
#include <ATen/PTThreadPool.h>
#include <atomic>
#include <mutex>
#include "tbb/tbb.h"
#define TBB_PREVIEW_GLOBAL_CONTROL 1
#include "tbb/global_control.h"
#ifdef _OPENMP
#include <omp.h>
#endif
#ifdef TH_BLAS_MKL
#include <mkl.h>
#endif
namespace at {
namespace {
static thread_local tbb::task_scheduler_init tbb_init_(intraop_default_num_threads());
std::atomic<int> num_intraop_threads_{-1};
static thread_local tbb::task_group tg_;
std::mutex global_thread_mutex_;
std::shared_ptr<tbb::global_control> global_thread_limit_ = nullptr;
void _internal_set_num_threads(int nthreads) {
TORCH_INTERNAL_ASSERT(nthreads > 0);
{
std::unique_lock<std::mutex> lk(global_thread_mutex_);
global_thread_limit_ = std::make_shared<tbb::global_control>(
tbb::global_control::max_allowed_parallelism, nthreads);
}
if (tbb_init_.is_active()) {
tbb_init_.terminate();
}
tbb_init_.initialize(nthreads);
}
}
void init_num_threads() {
#ifdef _OPENMP
omp_set_num_threads(1);
#endif
#ifdef TH_BLAS_MKL
mkl_set_num_threads(1);
#endif
int nthreads = num_intraop_threads_.load();
if (nthreads < 0) {
nthreads = intraop_default_num_threads();
}
_internal_set_num_threads(nthreads);
}
void set_num_threads(int nthreads) {
TORCH_CHECK(nthreads > 0);
int no_value = -1;
if (num_intraop_threads_.compare_exchange_strong(no_value, nthreads)) {
_internal_set_num_threads(nthreads);
return;
}
TORCH_CHECK(false,
"Error: cannot set number of interop threads "
"after parallel work has started or after set_num_threads call");
}
int get_num_threads() {
return tbb::this_task_arena::max_concurrency();
}
int get_thread_num() {
return tbb::this_task_arena::current_thread_index();
}
bool in_parallel_region() {
return tbb::this_task_arena::current_thread_index() != -1;
}
void intraop_launch(std::function<void()> func) {
if (get_num_threads() > 1) {
tg_.run(func);
} else {
func();
}
}
std::shared_ptr<c10::ivalue::Future> intraop_launch_future(
std::function<void()> func) {
auto future = std::make_shared<c10::ivalue::Future>(NoneType::get());
if (get_num_threads() > 1) {
tg_.run(
[func, future]() {
func();
future->markCompleted();
}
);
} else {
func();
future->markCompleted();
}
return future;
}
} // namespace at
#endif