c++推理问题 #148

interstellar-space · 2024-06-04T11:45:21Z

C++推理后处理不能画出车道线。可以帮忙看一下问题吗

#include <iostream>
#include <fstream>
#include <NvInfer.h>
#include <memory>
#include <NvOnnxParser.h>
#include <vector>
#include <cuda_runtime_api.h>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/core/cuda.hpp>
#include <opencv2/cudawarping.hpp>
#include <opencv2/core.hpp>
#include <opencv2/cudaarithm.hpp>
#include <algorithm>
#include <numeric>

struct Detection
{
    float background;
    float foreground;
    float start_y;
    float start_x;
    float theta;
    float length;
    float lane_x_coordinates[72];
};

// utilities ----------------------------------------------------------------------------------------------------------
// class to log errors, warnings, and other information during the build and inference phases
class Logger : public nvinfer1::ILogger
{
public:
    void log(Severity severity, const char *msg) noexcept override
    {
        // remove this 'if' if you need more logged info
        if ((severity == Severity::kERROR) || (severity == Severity::kINTERNAL_ERROR))
        {
            std::cout << msg << "\n";
        }
    }
} gLogger;

// destroy TensorRT objects if something goes wrong
struct TRTDestroy
{
    template <class T>
    void operator()(T *obj) const
    {
        if (obj)
        {
            obj->destroy();
        }
    }
};

template <class T>
using TRTUniquePtr = std::unique_ptr<T, TRTDestroy>;

// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims &dims)
{
    size_t size = 1;
    for (size_t i = 0; i < dims.nbDims; ++i)
    {
        size *= dims.d[i];
    }
    return size;
}

// preprocessing stage ------------------------------------------------------------------------------------------------
void preprocessImage(cv::cuda::GpuMat &gpu_frame, float *gpu_input, const nvinfer1::Dims &dims)
{
    auto input_width = dims.d[2];
    auto input_height = dims.d[1];
    auto channels = dims.d[0];
    auto input_size = cv::Size(input_width, input_height);
    // resize
    cv::cuda::GpuMat resized;
    cv::cuda::resize(gpu_frame, resized, input_size, 0, 0, cv::INTER_NEAREST);
    // normalize
    cv::cuda::GpuMat flt_image;
    resized.convertTo(flt_image, CV_32FC3, 1.f / 255.f);
    cv::cuda::subtract(flt_image, cv::Scalar(0.485f, 0.456f, 0.406f), flt_image, cv::noArray(), -1);
    cv::cuda::divide(flt_image, cv::Scalar(0.229f, 0.224f, 0.225f), flt_image, 1, -1);
    // to tensor
    std::vector<cv::cuda::GpuMat> chw;
    for (size_t i = 0; i < channels; ++i)
    {
        chw.emplace_back(cv::cuda::GpuMat(input_size, CV_32FC1, gpu_input + i * input_width * input_height));
    }
    cv::cuda::split(flt_image, chw);
}

std::vector<std::vector<float>> softmax(const std::vector<std::vector<float>> &x)
{
    std::vector<std::vector<float>> y(x.size(), std::vector<float>(x[0].size()));

    // Assume that the softmax is performed along the last axis (columns).
    for (size_t i = 0; i < x.size(); ++i)
    {
        float maxVal = *std::max_element(x[i].begin(), x[i].end());

        std::vector<float> expVec(x[i].size());
        float sum = 0.0f;
        for (size_t j = 0; j < x[i].size(); ++j)
        {
            expVec[j] = exp(x[i][j] - maxVal);
            sum += expVec[j];
        }

        for (size_t j = 0; j < y[i].size(); ++j)
        {
            y[i][j] = expVec[j] / sum;
        }
    }

    return y;
}

bool Lane_IOU(const std::vector<float> &parent_box, const std::vector<float> &compared_box, float threshold)
{
    int n_offsets = 72;
    int n_strips = n_offsets - 1;

    int start_a = static_cast<int>(parent_box[2] * n_strips + 0.5);
    int start_b = static_cast<int>(compared_box[2] * n_strips + 0.5);
    int start = std::max(start_a, start_b);
    int end_a = start_a + static_cast<int>(parent_box[4] - 1 + 0.5 - ((parent_box[4] - 1) < 0));
    int end_b = start_b + static_cast<int>(compared_box[4] - 1 + 0.5 - ((compared_box[4] - 1) < 0));
    int end = std::min({end_a, end_b, 71});
    if ((end - start) < 0)
    {
        return false;
    }
    float dist = 0.0f;
    for (int i = 5 + start; i <= 5 + end; ++i)
    {
        if (parent_box[i] < compared_box[i])
        {
            dist += compared_box[i] - parent_box[i];
        }
        else
        {
            dist += parent_box[i] - compared_box[i];
        }
    }
    return dist < (threshold * (end - start + 1));
}

std::vector<int> Lane_nms(const std::vector<std::vector<float>> &proposals, const std::vector<float> &scores, float overlap = 50, int top_k = 4)
{
    std::vector<int> keep_index;
    std::vector<size_t> indices(scores.size());
    std::iota(indices.begin(), indices.end(), 0);

    // Sort indices based on corresponding scores in descending order
    std::sort(indices.begin(), indices.end(), [&scores](int a, int b)
              { return scores[a] > scores[b]; });

    std::vector<int> r_filters(scores.size(), 0);

    for (size_t i = 0; i < indices.size(); ++i)
    {
        size_t index = indices[i];
        if (r_filters[index] == 1) // Ensure we check r_filters for the right index
        {
            continue;
        }

        keep_index.push_back(index);

        if (static_cast<int>(keep_index.size()) > top_k) // We cast size to int to compare with top_k
        {
            break;
        }

        if (i == indices.size() - 1) // If it's the last index, break out of the loop
        {
            break;
        }

        // Iterate over the rest of the proposals from this point on
        for (size_t j = i + 1; j < indices.size(); ++j)
        {
            size_t sub_index = indices[j];
            if (!r_filters[sub_index]) // Check if not already filtered
            {
                if (Lane_IOU(proposals[index], proposals[sub_index], overlap))
                {
                    r_filters[sub_index] = 1;
                }
            }
        }
    }

    // Resize to remove any excess elements in case fewer than top_k were kept
    keep_index.resize(std::min(top_k, static_cast<int>(keep_index.size())));

    return keep_index;
}
std::vector<std::vector<cv::Point2f>> predictions_to_pred(const std::vector<std::vector<float>> &predictions,
                                                          const std::vector<float> &prior_ys,
                                                          int n_strips, int ori_img_w, int ori_img_h, int img_w, int img_h, int cut_height)
{
    std::vector<std::vector<cv::Point2f>> lanes;

    for (const auto &lane : predictions)
    {
        std::vector<float> lane_xs(lane.begin() + 6, lane.end()); // normalized value
        int start = std::min(std::max(0, static_cast<int>(round(lane[2] * n_strips))), n_strips);
        int length = static_cast<int>(round(lane[5]));
        int end = start + length - 1;
        end = std::min(end, static_cast<int>(prior_ys.size()) - 1);

        // Extend prediction until x is outside the image
        std::vector<bool> mask(start, false);
        for (int i = start - 1; i >= 0; --i)
        {
            if (lane_xs[i] < 0.0f || lane_xs[i] > 1.0f)
            {
                mask[i] = true;
            }
            else if (i < start - 1 && mask[i + 1])
            {
                mask[i] = true;
            }
        }

        std::fill(lane_xs.begin() + end + 1, lane_xs.end(), -2.0f);
        for (int i = 0; i < start; ++i)
        {
            if (mask[i])
            {
                lane_xs[i] = -2.0f;
            }
        }

        std::vector<float> lane_ys;
        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            if (lane_xs[i] >= 0)
            {
                lane_ys.push_back(prior_ys[i]);
            }
        }

        lane_xs.erase(std::remove_if(lane_xs.begin(), lane_xs.end(),
                                     [](float x)
                                     { return x < 0; }),
                      lane_xs.end());

        if (lane_xs.size() <= 1)
        {
            continue;
        }

        std::reverse(lane_xs.begin(), lane_xs.end());
        std::reverse(lane_ys.begin(), lane_ys.end());

        auto scale_x = static_cast<float>(ori_img_w) / img_w;
        auto scale_y = static_cast<float>(ori_img_h) / (img_h - cut_height);

        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            lane_xs[i] = lane_xs[i] * scale_x;
            lane_ys[i] = (lane_ys[i] * (img_h - cut_height) + cut_height) * scale_y;
        }

        std::vector<cv::Point2f> points;
        for (size_t i = 0; i < lane_xs.size(); ++i)
        {
            points.emplace_back(lane_xs[i], lane_ys[i]);
        }

        std::cout << "lane_xs: ";
        for (const auto &x : lane_xs)
        {
            std::cout << x << " ";
        }
        std::cout << "\nlane_ys: ";
        for (const auto &y : lane_ys)
        {
            std::cout << y << " ";
        }
        std::cout << "\n";

        lanes.push_back(points);
    }

    std::cout << "lanes: " << lanes.size() << "\n";

    return lanes;
}

cv::Mat imshow_lanes(cv::Mat &img, const std::vector<std::vector<cv::Point2f>> &lanes, int width = 4)
{
    std::vector<std::vector<cv::Point>> lanes_xys;

    for (const auto &lane : lanes)
    {
        std::vector<cv::Point> xys;
        for (const auto &point : lane)
        {
            if (point.x <= 0.0f || point.y <= 0.0f)
            {
                continue;
            }
            int x = static_cast<int>(point.x);
            int y = static_cast<int>(point.y);
            xys.emplace_back(x, y);
        }
        if (!xys.empty())
        {
            lanes_xys.push_back(xys);
        }
    }
    std::cout << "lanes_xys: " << lanes_xys.size() << "\n";

    std::sort(lanes_xys.begin(), lanes_xys.end(),
              [](const std::vector<cv::Point> &a, const std::vector<cv::Point> &b)
              { return a[0].x < b[0].x; });

    std::vector<cv::Scalar> COLORS = {cv::Scalar(255, 0, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 255)};

    for (size_t idx = 0; idx < lanes_xys.size(); ++idx)
    {
        const auto &xys = lanes_xys[idx];
        for (size_t i = 1; i < xys.size(); ++i)
        {
            cv::line(img, xys[i - 1], xys[i], COLORS[idx % COLORS.size()], width);
        }
    }

    cv::imwrite("test.jpg", img);
    // cv::imshow("Lanes", img);
    // cv::waitKey(0);

    return img;
}

// post-processing stage ----------------------------------------------------------------------------------------------
void postprocessResults(cv::Mat &frame, float *gpu_output, const nvinfer1::Dims &dims, int batch_size)
{
    float conf_threshold = 0.4f;

    // copy results from GPU to CPU
    std::vector<float> cpu_output(getSizeByDim(dims) * batch_size);
    cudaMemcpy(cpu_output.data(), gpu_output, cpu_output.size() * sizeof(float), cudaMemcpyDeviceToHost);
    for (int i = 0; i < cpu_output.size(); ++i)
    {
        std::cout << cpu_output[i] << " ";
        if (!((i + 1) % 78))
        {
            std::cout << "\n\n";
        }
    }
    std::cout << "------------------------\n";
    std::vector<std::vector<float>> detections(cpu_output.size() / 78, std::vector<float>(2));
    for (size_t i = 0; i < cpu_output.size() / 78; ++i)
    {
        detections[i][0] = cpu_output[i * 78];
        detections[i][1] = cpu_output[(i + 1) * 78];
    }
    const auto xyscores = softmax(detections);
    for (const auto &score : xyscores)
    {
        for (const auto &val : score)
        {
            std::cout << val << " ";
        }
        std::cout << "\n";
    }
    std::cout << "------------------------\n";
    std::vector<float> scores;
    std::vector<std::vector<float>> predictions(cpu_output.size() / 78, std::vector<float>(78));
    for (int i = 0; i < xyscores.size(); ++i)
    {
        scores.emplace_back(xyscores[i][1]);
        if (xyscores[i][1] >= conf_threshold)
        {
            std::copy(cpu_output.begin() + i * 78, cpu_output.begin() + (i + 1) * 78, predictions[i].begin());
        }
    }
    std::cout << "predictions: " << predictions.size() << "\n";

    int n_offsets = 72;
    int n_strips = n_offsets - 1;
    int img_w = 800;
    int img_h = 320;
    int ori_img_w = 1280;
    int ori_img_h = 720;
    int cut_height = 160;
    std::vector<std::vector<float>> nms_predictions;
    for (auto &prediction : predictions)
    {
        prediction[5] = std::round(prediction[5] * n_strips);
        std::vector<float> predict(sizeof(Detection) - sizeof(float));
        for (size_t j = 0; j < 4; ++j)
        {
            predict[j] = prediction[j];
        }
        for (size_t j = 5; j < prediction.size(); ++j)
        {
            predict[j - 1] = prediction[j];
        }
        nms_predictions.emplace_back(predict);
    }
    std::cout << "nms_predictions: " << nms_predictions.size() << "\n";

    for (auto &nms_prediction : nms_predictions)
    {
        nms_prediction[4] *= n_strips;
        for (size_t i = 5; i < nms_prediction.size(); ++i)
        {
            nms_prediction[i] *= ori_img_w - 1;
        }
    }

    auto keep = Lane_nms(nms_predictions, scores, 50, 5);
    std::cout << "keep: " << keep.size() << "\n";

    std::vector<float> prior_ys(n_offsets);
    for (int i = 0; i < n_offsets; ++i)
    {
        prior_ys[i] = 1.0f - static_cast<float>(i) / n_strips;
    }

    auto lanes = predictions_to_pred(predictions, prior_ys, n_strips, ori_img_w, ori_img_h, img_w, img_h, cut_height);
    imshow_lanes(frame, lanes);
}

// initialize TensorRT engine and parse ONNX model --------------------------------------------------------------------
void parseOnnxModel(const std::string &model_path, TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
                    TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
    TRTUniquePtr<nvinfer1::IBuilder> builder{nvinfer1::createInferBuilder(gLogger)};
    TRTUniquePtr<nvinfer1::INetworkDefinition> network{builder->createNetworkV2(1)};
    TRTUniquePtr<nvonnxparser::IParser> parser{nvonnxparser::createParser(*network, gLogger)};
    TRTUniquePtr<nvinfer1::IBuilderConfig> config{builder->createBuilderConfig()};
    // parse ONNX
    if (!parser->parseFromFile(model_path.c_str(), static_cast<int>(nvinfer1::ILogger::Severity::kINFO)))
    {
        std::cerr << "ERROR: could not parse the model.\n";
        return;
    }
    // allow TensorRT to use up to 1GB of GPU memory for tactic selection.
    config->setMaxWorkspaceSize(1ULL << 30);
    // use FP16 mode if possible
    if (builder->platformHasFastFp16())
    {
        std::cout << "fp16\n";
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
    }
    // we have only one image in batch
    builder->setMaxBatchSize(1);
    // generate TensorRT engine optimized for the target platform
    engine.reset(builder->buildEngineWithConfig(*network, *config));
    context.reset(engine->createExecutionContext());
}

// initialize TensorRT engine from serialized model --------------------------------------------------------------------
void loadTrtEngine(const std::string &engine_path, TRTUniquePtr<nvinfer1::IRuntime> &runtime,
                   TRTUniquePtr<nvinfer1::ICudaEngine> &engine,
                   TRTUniquePtr<nvinfer1::IExecutionContext> &context)
{
    std::ifstream engine_file(engine_path, std::ios::binary);
    if (!engine_file)
    {
        std::cerr << "ERROR: could not open the engine file.\n";
        return;
    }

    // 计算文件大小
    engine_file.seekg(0, engine_file.end);
    size_t file_size = engine_file.tellg();
    engine_file.seekg(0, engine_file.beg);

    // 加载文件内容到内存中
    std::vector<char> trt_model_stream(file_size);
    engine_file.read(trt_model_stream.data(), file_size);
    engine_file.close();

    // 创建runtime
    runtime.reset(nvinfer1::createInferRuntime(gLogger));

    // 反序列化计划文件并创建引擎
    engine.reset(runtime->deserializeCudaEngine(trt_model_stream.data(), file_size, nullptr));

    // 创建上下文
    context.reset(engine->createExecutionContext());
}

// main pipeline ------------------------------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
    if (argc < 3)
    {
        std::cerr << "usage: " << argv[0] << " model.onnx image.jpg\n";
        return -1;
    }
    std::string model_path(argv[1]);
    std::string image_path(argv[2]);
    int batch_size = 1;

    // initialize TensorRT engine and parse ONNX model
    TRTUniquePtr<nvinfer1::IRuntime> runtime{nullptr};
    TRTUniquePtr<nvinfer1::ICudaEngine> engine{nullptr};
    TRTUniquePtr<nvinfer1::IExecutionContext> context{nullptr};
    // parseOnnxModel(model_path, engine, context);
    loadTrtEngine(model_path, runtime, engine, context);

    // get sizes of input and output and allocate memory required for input data and for output data
    std::vector<nvinfer1::Dims> input_dims;               // we expect only one input
    std::vector<nvinfer1::Dims> output_dims;              // and one output
    std::vector<void *> buffers(engine->getNbBindings()); // buffers for input and output data
    for (size_t i = 0; i < engine->getNbBindings(); ++i)
    {
        auto binding_size = getSizeByDim(engine->getBindingDimensions(i)) * batch_size * sizeof(float);
        cudaMalloc(&buffers[i], binding_size);
        if (engine->bindingIsInput(i))
        {
            input_dims.emplace_back(engine->getBindingDimensions(i));
        }
        else
        {
            output_dims.emplace_back(engine->getBindingDimensions(i));
        }
    }
    if (input_dims.empty() || output_dims.empty())
    {
        std::cerr << "Expect at least one input and one output for network\n";
        return -1;
    }

    // read input image
    cv::Mat frame = cv::imread(image_path);
    if (frame.empty())
    {
        std::cerr << "Input image " << image_path << " load failed\n";
        return -1;
    }
    cv::cuda::GpuMat gpu_frame;
    // upload image to GPU
    gpu_frame.upload(frame);

    // preprocess input data
    preprocessImage(gpu_frame, (float *)buffers[0], input_dims[0]);
    // inference
    context->enqueue(batch_size, buffers.data(), 0, nullptr);
    // postprocess results
    postprocessResults(frame, (float *)buffers[1], output_dims[0], batch_size);

    for (void *buf : buffers)
    {
        cudaFree(buf);
    }
    return 0;
}

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

c++推理问题 #148

c++推理问题 #148

interstellar-space commented Jun 4, 2024 •

edited

Loading

c++推理问题 #148

c++推理问题 #148

Comments

interstellar-space commented Jun 4, 2024 • edited Loading

interstellar-space commented Jun 4, 2024 •

edited

Loading