-
Notifications
You must be signed in to change notification settings - Fork 1
/
infer_engine.h
123 lines (99 loc) · 2.81 KB
/
infer_engine.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#pragma once
#include <filesystem>
#include <sstream>
#include <atomic>
#include "NvInfer.h"
struct optimization_axis {
optimization_axis(int32_t min, int32_t opt, int32_t max) : min(min), opt(opt), max(max) {}
optimization_axis(int32_t same) : min(same), opt(same), max(same) {}
optimization_axis() : min(0), opt(0), max(0) {}
int32_t min, opt, max;
};
static std::ostream &operator<<(std::ostream &os, const optimization_axis &o) {
os << o.min << ',' << o.opt << ',' << o.max;
return os;
}
struct ScalerConfig {
optimization_axis input_width;
optimization_axis input_height;
optimization_axis batch;
int32_t aux_stream;
bool use_fp16;
bool use_int8;
bool force_precision;
bool external;
bool low_mem;
[[nodiscard]] std::string engine_name() const {
std::stringstream ss;
ss << "_w" << input_width << "_h" << input_height << "_b" << batch << "_a" << aux_stream;
if (use_fp16) {
ss << "_fp16";
}
if (use_int8) {
ss << "_int8";
}
if (force_precision) {
ss << "_force_prec";
}
if (external) {
ss << "_ext";
}
if (low_mem) {
ss << "_lm";
}
ss << ".engine";
return ss.str();
}
};
class OptimizationContext {
ScalerConfig config;
nvinfer1::ILogger &logger;
std::filesystem::path path_prefix;
std::filesystem::path path_engine;
nvinfer1::IBuilder *builder;
nvinfer1::ITimingCache *cache;
cudaDeviceProp prop;
size_t total_memory;
[[nodiscard]] nvinfer1::IBuilderConfig *prepareConfig() const;
[[nodiscard]] nvinfer1::INetworkDefinition *createNetwork() const;
public:
OptimizationContext(ScalerConfig config, nvinfer1::ILogger &logger, std::filesystem::path path_prefix);
std::string optimize();
~OptimizationContext();
};
class InferenceSession;
class InferenceContext {
nvinfer1::ILogger &logger;
nvinfer1::IRuntime *runtime;
std::filesystem::path path_engine;
nvinfer1::ICudaEngine *engine;
friend class InferenceSession;
public:
ScalerConfig config;
InferenceContext(ScalerConfig config, nvinfer1::ILogger &logger, const std::filesystem::path& path_prefix);
bool has_file();
std::string load_engine();
bool good() {
return runtime != nullptr && engine != nullptr;
}
};
class InferenceSession {
InferenceContext ctx;
nvinfer1::IExecutionContext *context;
void *execution_memory;
int32_t last_batch, last_height, last_width;
std::atomic<bool> good_;
public:
cudaStream_t stream;
cudaEvent_t input_consumed;
void *input, *output;
explicit InferenceSession(InferenceContext &ctx);
~InferenceSession();
[[nodiscard]] bool good() const { return good_; }
std::string init();
std::string allocation();
std::string deallocation();
void config(int32_t batch, int32_t height, int32_t width);
std::pair<int32_t, int32_t> detect_scale();
bool inference();
};