diff --git a/perception/autoware_tensorrt_yolov10/CMakeLists.txt b/perception/autoware_tensorrt_yolov10/CMakeLists.txt
new file mode 100644
index 0000000000000..daaa0c5ed77ba
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/CMakeLists.txt
@@ -0,0 +1,175 @@
+cmake_minimum_required(VERSION 3.17)
+project(autoware_tensorrt_yolov10)
+
+find_package(tensorrt_common)
+if(NOT ${tensorrt_common_FOUND})
+  message(WARNING "The tensorrt_common package is not found. Please check its dependencies.")
+  return()
+endif()
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+find_package(OpenCV REQUIRED)
+
+option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF)
+
+# set flags for CUDA availability
+option(CUDA_AVAIL "CUDA available" OFF)
+find_package(CUDA)
+if(CUDA_FOUND)
+  find_library(CUBLAS_LIBRARIES cublas HINTS
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib
+  )
+  if(CUDA_VERBOSE)
+    message("CUDA is available!")
+    message("CUDA Libs: ${CUDA_LIBRARIES}")
+    message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  endif()
+  # Note: cublas_device was depreciated in CUDA version 9.2
+  #       https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4
+  #       In LibTorch, CUDA_cublas_device_LIBRARY is used.
+  unset(CUDA_cublas_device_LIBRARY CACHE)
+  set(CUDA_AVAIL ON)
+else()
+  message("CUDA NOT FOUND")
+  set(CUDA_AVAIL OFF)
+endif()
+
+# set flags for TensorRT availability
+option(TRT_AVAIL "TensorRT available" OFF)
+# try to find the tensorRT modules
+find_library(NVINFER nvinfer)
+find_library(NVONNXPARSER nvonnxparser)
+if(NVINFER AND NVONNXPARSER)
+  if(CUDA_VERBOSE)
+    message("TensorRT is available!")
+    message("NVINFER: ${NVINFER}")
+    message("NVONNXPARSER: ${NVONNXPARSER}")
+  endif()
+  set(TRT_AVAIL ON)
+else()
+  message("TensorRT is NOT Available")
+  set(TRT_AVAIL OFF)
+endif()
+
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library."
+)
+if(CUDNN_LIBRARY)
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDNN is available!")
+    message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
+  endif()
+  set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
+endif()
+
+find_package(OpenMP)
+if(OpenMP_FOUND)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
+##########
+# tensorrt_yolov10
+ament_auto_add_library(${PROJECT_NAME} SHARED
+  src/tensorrt_yolov10.cpp
+  src/tensorrt_yolov10_node.cpp
+)
+
+ament_target_dependencies(${PROJECT_NAME}
+  OpenCV
+)
+
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
+  # Officially, add_library supports .cu file compilation.
+  # However, as of cmake 3.22.1, it seems to fail compilation because compiler flags for
+  # C++ are directly passed to nvcc (they are originally space separated
+  # but nvcc assume comma separated as argument of `-Xcompiler` option).
+  # That is why `cuda_add_library` is used here.
+#   cuda_add_library(${PROJECT_NAME}_gpu_preprocess
+#     SHARED
+#     src/preprocess.cu
+#   )
+
+#   target_include_directories(${PROJECT_NAME}_gpu_preprocess PUBLIC
+#     $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+#     $<INSTALL_INTERFACE:include/${PROJECT_NAME}>
+#   )
+
+#   target_link_libraries(${PROJECT_NAME}
+#     ${tensorrt_common_LIBRARIES}
+#     ${PROJECT_NAME}_gpu_preprocess
+#   )
+else()
+  target_link_libraries(${PROJECT_NAME}
+    ${tensorrt_common_LIBRARIES}
+  )
+endif()
+
+target_compile_definitions(${PROJECT_NAME} PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+ament_auto_add_library(yolov10_single_image_inference_node SHARED
+  src/yolov10_single_image_inference_node.cpp
+)
+
+ament_target_dependencies(yolov10_single_image_inference_node
+  OpenCV
+)
+
+target_link_libraries(yolov10_single_image_inference_node
+  ${PROJECT_NAME}
+  stdc++fs
+)
+
+target_compile_definitions(yolov10_single_image_inference_node PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+rclcpp_components_register_node(yolov10_single_image_inference_node
+  PLUGIN "autoware::tensorrt_yolov10::Yolov10SingleImageInferenceNode"
+  EXECUTABLE yolov10_single_image_inference
+)
+
+ament_auto_add_library(${PROJECT_NAME}_node SHARED
+  src/tensorrt_yolov10_node.cpp
+)
+
+ament_target_dependencies(${PROJECT_NAME}_node
+  OpenCV
+)
+
+target_link_libraries(${PROJECT_NAME}_node
+  ${PROJECT_NAME}
+)
+
+target_compile_definitions(${PROJECT_NAME}_node PRIVATE
+  TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+)
+
+rclcpp_components_register_node(${PROJECT_NAME}_node
+  PLUGIN "autoware::tensorrt_yolov10::TrtYolov10Node"
+  EXECUTABLE ${PROJECT_NAME}_node_exe
+)
+
+if(BUILD_TESTING)
+  find_package(ament_lint_auto REQUIRED)
+  ament_lint_auto_find_test_dependencies()
+endif()
+
+ament_auto_package(INSTALL_TO_SHARE
+  launch
+  config
+)
diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md
new file mode 100644
index 0000000000000..aa813d5ad5a04
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/README.md
@@ -0,0 +1,136 @@
+# autoware_tensorrt_yolov10
+
+## Purpose
+
+This package detects target objects e.g., cars, trucks, bicycles, pedestrians,etc on a image based on [YOLOV10](https://github.com/THU-MIG/yolov10) model.
+
+## Inputs / Outputs
+
+### Input
+
+| Name       | Type                | Description     |
+| ---------- | ------------------- | --------------- |
+| `in/image` | `sensor_msgs/Image` | The input image |
+
+### Output
+
+| Name          | Type                                               | Description                                        |
+| ------------- | -------------------------------------------------- | -------------------------------------------------- |
+| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes        |
+| `out/image`   | `sensor_msgs/Image`                                | The image with 2D bounding boxes for visualization |
+
+## Assumptions / Known limits
+
+The label contained in detected 2D bounding boxes (i.e., `out/objects`) will be either one of the followings:
+
+- CAR
+- PEDESTRIAN ("PERSON" will also be categorized as "PEDESTRIAN")
+- BUS
+- TRUCK
+- BICYCLE
+- MOTORCYCLE
+
+If other labels (case insensitive) are contained in the file specified via the `label_file` parameter,
+those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`).
+
+## Onnx model
+
+you can download yolov10m.onnx from [releases](https://github.com/THU-MIG/yolov10/releases)
+
+## Label file
+
+This file represents the correspondence between class index (integer outputted from YOLOV10 network) and
+class label (strings making understanding easier). This package maps class IDs (incremented from 0)
+with labels according to the order in this file.
+
+currently, this file is actually a coco label which contains the following labels:
+
+```
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+```
+
+## Reference repositories
+
+- <https://github.com/THU-MIG/yolov10>
+
+## Legal Notice
+
+The inference code is licensed under Apache 2.0. The model and training code are licensed under AGPL-3.0. you can check details from <https://github.com/THU-MIG/yolov10?tab=AGPL-3.0-1-ov-file>. To inquire about a commercial license when using trained model weights please contact yolov10 author.
diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp
new file mode 100644
index 0000000000000..a3e1fd0e257fb
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp
@@ -0,0 +1,493 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_
+#define AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_
+
+#include "cuda_utils/cuda_check_error.hpp"
+#include "cuda_utils/cuda_unique_ptr.hpp"
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/opencv.hpp>
+
+#include <NvInfer.h>
+#include <assert.h>
+
+#include <algorithm>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <vector>
+
+namespace autoware::tensorrt_yolov10
+{
+class ImageStream
+{
+public:
+  ImageStream(
+    int batch_size, nvinfer1::Dims input_dims, const std::vector<std::string> calibration_images)
+  : batch_size_(batch_size),
+    calibration_images_(calibration_images),
+    current_batch_(0),
+    max_batches_(calibration_images.size() / batch_size_),
+    input_dims_(input_dims)
+  {
+    batch_.resize(batch_size_ * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3]);
+  }
+
+  int getBatchSize() const { return batch_size_; }
+
+  int getMaxBatches() const { return max_batches_; }
+
+  float * getBatch() { return &batch_[0]; }
+
+  nvinfer1::Dims getInputDims() { return input_dims_; }
+
+  /**
+   * @brief Preprocess in calibration
+   * @param[in] images input images
+   * @param[in] input_dims input dimensions
+   * @param[in] norm normalization (0.0-1.0)
+   * @return vector<float> preprocessed data
+   */
+  std::vector<float> preprocess(
+    const std::vector<cv::Mat> & images, nvinfer1::Dims input_dims, double norm)
+  {
+    std::vector<float> input_h_;
+    const auto batch_size = images.size();
+    input_dims.d[0] = batch_size;
+    const float input_height = static_cast<float>(input_dims.d[2]);
+    const float input_width = static_cast<float>(input_dims.d[3]);
+    std::vector<cv::Mat> dst_images;
+    std::vector<float> scales_;
+    scales_.clear();
+    for (const auto & image : images) {
+      cv::Mat dst_image;
+      const float scale = std::min(input_width / image.cols, input_height / image.rows);
+      scales_.emplace_back(scale);
+      const auto scale_size = cv::Size(image.cols * scale, image.rows * scale);
+      cv::resize(image, dst_image, scale_size, 0, 0, cv::INTER_CUBIC);
+      const auto bottom = input_height - dst_image.rows;
+      const auto right = input_width - dst_image.cols;
+      copyMakeBorder(
+        dst_image, dst_image, 0, bottom, 0, right, cv::BORDER_CONSTANT, {114, 114, 114});
+      dst_images.emplace_back(dst_image);
+    }
+    const auto chw_images =
+      cv::dnn::blobFromImages(dst_images, norm, cv::Size(), cv::Scalar(), false, false, CV_32F);
+
+    const auto data_length = chw_images.total();
+    input_h_.reserve(data_length);
+    const auto flat = chw_images.reshape(1, data_length);
+    input_h_ = chw_images.isContinuous() ? flat : flat.clone();
+    return input_h_;
+  }
+
+  /**
+   * @brief Decode data in calibration
+   * @param[in] scale normalization (0.0-1.0)
+   * @return bool succ or fail
+   */
+  bool next(double scale)
+  {
+    if (current_batch_ == max_batches_) {
+      return false;
+    }
+
+    for (int i = 0; i < batch_size_; ++i) {
+      auto image =
+        cv::imread(calibration_images_[batch_size_ * current_batch_ + i].c_str(), cv::IMREAD_COLOR);
+      std::cout << current_batch_ << " " << i << " Preprocess "
+                << calibration_images_[batch_size_ * current_batch_ + i].c_str() << std::endl;
+      auto input = preprocess({image}, input_dims_, scale);
+      batch_.insert(
+        batch_.begin() + i * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3], input.begin(),
+        input.end());
+    }
+
+    ++current_batch_;
+    return true;
+  }
+
+  /**
+   * @brief Reset calibration
+   */
+  void reset() { current_batch_ = 0; }
+
+private:
+  int batch_size_;
+  std::vector<std::string> calibration_images_;
+  int current_batch_;
+  int max_batches_;
+  nvinfer1::Dims input_dims_;
+  std::vector<float> batch_;
+};
+
+/**Percentile calibration using legacy calibrator*/
+/**
+ * @class Int8LegacyCalibrator
+ * @brief Calibrator for Percentile
+ * @warning We are confirming bug on Tegra like Xavier and Orin. We recommend use MinMax calibrator
+ */
+class Int8LegacyCalibrator : public nvinfer1::IInt8LegacyCalibrator
+{
+public:
+  Int8LegacyCalibrator(
+    ImageStream & stream, const std::string calibration_cache_file,
+    const std::string histogram_cache_file, double scale = 1.0, bool read_cache = true,
+    double quantile = 0.999999, double cutoff = 0.999999)
+  : stream_(stream),
+    calibration_cache_file_(calibration_cache_file),
+    histogram_cache_file_(histogram_cache_file),
+    read_cache_(read_cache)
+  {
+    auto d = stream_.getInputDims();
+    input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3];
+    CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+    scale_ = scale;
+    quantile_ = quantile;
+    cutoff_ = cutoff;
+    auto algType = getAlgorithm();
+    switch (algType) {
+      case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl;
+        break;
+      default:
+        std::cout << "No CalibrationAlgType" << std::endl;
+        break;
+    }
+  }
+  int getBatchSize() const noexcept override { return stream_.getBatchSize(); }
+
+  virtual ~Int8LegacyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); }
+
+  bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override
+  {
+    (void)names;
+    (void)nb_bindings;
+
+    if (!stream_.next(scale_)) {
+      return false;
+    }
+    try {
+      CHECK_CUDA_ERROR(cudaMemcpy(
+        device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    } catch (const std::exception & e) {
+      // Do nothing
+    }
+    bindings[0] = device_input_;
+    return true;
+  }
+
+  const void * readCalibrationCache(size_t & length) noexcept override
+  {
+    calib_cache_.clear();
+    std::ifstream input(calibration_cache_file_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good()) {
+      std::copy(
+        std::istream_iterator<char>(input), std::istream_iterator<char>(),
+        std::back_inserter(calib_cache_));
+    }
+
+    length = calib_cache_.size();
+    if (length) {
+      std::cout << "Using cached calibration table to build the engine" << std::endl;
+    } else {
+      std::cout << "New calibration table will be created to build the engine" << std::endl;
+    }
+    return length ? &calib_cache_[0] : nullptr;
+  }
+
+  void writeCalibrationCache(const void * cache, size_t length) noexcept override
+  {
+    std::ofstream output(calibration_cache_file_, std::ios::binary);
+    output.write(reinterpret_cast<const char *>(cache), length);
+  }
+
+  double getQuantile() const noexcept
+  {
+    printf("Quantile %f\n", quantile_);
+    return quantile_;
+  }
+
+  double getRegressionCutoff(void) const noexcept
+  {
+    printf("Cutoff %f\n", cutoff_);
+    return cutoff_;
+  }
+
+  const void * readHistogramCache(std::size_t & length) noexcept
+  {
+    hist_cache_.clear();
+    std::ifstream input(histogram_cache_file_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good()) {
+      std::copy(
+        std::istream_iterator<char>(input), std::istream_iterator<char>(),
+        std::back_inserter(hist_cache_));
+    }
+
+    length = hist_cache_.size();
+    if (length) {
+      std::cout << "Using cached histogram table to build the engine" << std::endl;
+    } else {
+      std::cout << "New histogram table will be created to build the engine" << std::endl;
+    }
+    return length ? &hist_cache_[0] : nullptr;
+  }
+  void writeHistogramCache(void const * ptr, std::size_t length) noexcept
+  {
+    std::ofstream output(histogram_cache_file_, std::ios::binary);
+    output.write(reinterpret_cast<const char *>(ptr), length);
+  }
+
+private:
+  ImageStream stream_;
+  const std::string calibration_cache_file_;
+  const std::string histogram_cache_file_;
+  bool read_cache_{true};
+  size_t input_count_;
+  void * device_input_{nullptr};
+  std::vector<char> calib_cache_;
+  std::vector<char> hist_cache_;
+  double scale_;
+  double quantile_;
+  double cutoff_;
+};
+
+/**
+ * @class Int8LegacyCalibrator
+ * @brief Calibrator for Percentile
+ * @warning This calibrator causes crucial accuracy drop for YOLOV10.
+ */
+class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+  Int8EntropyCalibrator(
+    ImageStream & stream, const std::string calibration_cache_file, double scale = 1.0,
+    bool read_cache = true)
+  : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache)
+  {
+    auto d = stream_.getInputDims();
+    input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3];
+    CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+    scale_ = scale;
+    auto algType = getAlgorithm();
+    switch (algType) {
+      case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl;
+        break;
+      default:
+        std::cout << "No CalibrationAlgType" << std::endl;
+        break;
+    }
+  }
+  int getBatchSize() const noexcept override { return stream_.getBatchSize(); }
+
+  virtual ~Int8EntropyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); }
+
+  bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override
+  {
+    (void)names;
+    (void)nb_bindings;
+
+    if (!stream_.next(scale_)) {
+      return false;
+    }
+    try {
+      CHECK_CUDA_ERROR(cudaMemcpy(
+        device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    } catch (const std::exception & e) {
+      // Do nothing
+    }
+    bindings[0] = device_input_;
+    return true;
+  }
+
+  const void * readCalibrationCache(size_t & length) noexcept override
+  {
+    calib_cache_.clear();
+    std::ifstream input(calibration_cache_file_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good()) {
+      std::copy(
+        std::istream_iterator<char>(input), std::istream_iterator<char>(),
+        std::back_inserter(calib_cache_));
+    }
+
+    length = calib_cache_.size();
+    if (length) {
+      std::cout << "Using cached calibration table to build the engine" << std::endl;
+    } else {
+      std::cout << "New calibration table will be created to build the engine" << std::endl;
+    }
+    return length ? &calib_cache_[0] : nullptr;
+  }
+
+  void writeCalibrationCache(const void * cache, size_t length) noexcept override
+  {
+    std::ofstream output(calibration_cache_file_, std::ios::binary);
+    output.write(reinterpret_cast<const char *>(cache), length);
+  }
+
+private:
+  ImageStream stream_;
+  const std::string calibration_cache_file_;
+  bool read_cache_{true};
+  size_t input_count_;
+  void * device_input_{nullptr};
+  std::vector<char> calib_cache_;
+  std::vector<char> hist_cache_;
+  double scale_;
+};
+
+/**
+ * @class Int8MinMaxCalibrator
+ * @brief Calibrator for MinMax
+ * @warning We strongly recommend MinMax calibrator for YOLOV10
+ */
+class Int8MinMaxCalibrator : public nvinfer1::IInt8MinMaxCalibrator
+{
+public:
+  Int8MinMaxCalibrator(
+    ImageStream & stream, const std::string calibration_cache_file, double scale = 1.0,
+    bool read_cache = true)
+  : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache)
+  {
+    auto d = stream_.getInputDims();
+    input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3];
+    CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float)));
+    scale_ = scale;
+    auto algType = getAlgorithm();
+    switch (algType) {
+      case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2):
+        std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl;
+        break;
+      case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION):
+        std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl;
+        break;
+      default:
+        std::cout << "No CalibrationAlgType" << std::endl;
+        break;
+    }
+  }
+  int getBatchSize() const noexcept override { return stream_.getBatchSize(); }
+
+  virtual ~Int8MinMaxCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); }
+
+  bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override
+  {
+    (void)names;
+    (void)nb_bindings;
+
+    if (!stream_.next(scale_)) {
+      return false;
+    }
+    try {
+      CHECK_CUDA_ERROR(cudaMemcpy(
+        device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice));
+    } catch (const std::exception & e) {
+      // Do nothing
+    }
+    bindings[0] = device_input_;
+    return true;
+  }
+
+  const void * readCalibrationCache(size_t & length) noexcept override
+  {
+    calib_cache_.clear();
+    std::ifstream input(calibration_cache_file_, std::ios::binary);
+    input >> std::noskipws;
+    if (read_cache_ && input.good()) {
+      std::copy(
+        std::istream_iterator<char>(input), std::istream_iterator<char>(),
+        std::back_inserter(calib_cache_));
+    }
+
+    length = calib_cache_.size();
+    if (length) {
+      std::cout << "Using cached calibration table to build the engine" << std::endl;
+    } else {
+      std::cout << "New calibration table will be created to build the engine" << std::endl;
+    }
+    return length ? &calib_cache_[0] : nullptr;
+  }
+
+  void writeCalibrationCache(const void * cache, size_t length) noexcept override
+  {
+    std::ofstream output(calibration_cache_file_, std::ios::binary);
+    output.write(reinterpret_cast<const char *>(cache), length);
+  }
+
+private:
+  ImageStream stream_;
+  const std::string calibration_cache_file_;
+  bool read_cache_{true};
+  size_t input_count_;
+  void * device_input_{nullptr};
+  std::vector<char> calib_cache_;
+  std::vector<char> hist_cache_;
+  double scale_;
+};
+}  // namespace autoware::tensorrt_yolov10
+
+#endif  // AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_
diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp
new file mode 100644
index 0000000000000..e1e47edc9d996
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp
@@ -0,0 +1,202 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_
+#define AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_
+
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <cuda_runtime_api.h>
+#include <curand.h>
+
+namespace autoware
+{
+namespace tensorrt_yolox
+{
+struct Roi
+{
+  int x;
+  int y;
+  int w;
+  int h;
+};
+
+/**
+ * @brief Resize a image using bilinear interpolation on gpus
+ * @param[out] dst Resized image
+ * @param[in] src image
+ * @param[in] d_w width for resized image
+ * @param[in] d_h height for resized image
+ * @param[in] d_c channel for resized image
+ * @param[in] s_w width for input image
+ * @param[in] s_h height for input image
+ * @param[in] s_c channel for input image
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_gpu(
+  unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
+  cudaStream_t stream);
+
+/**
+ * @brief Letterbox a image on gpus
+ * @param[out] dst letterbox-ed image
+ * @param[in] src image
+ * @param[in] d_w width for letterbox-ing
+ * @param[in] d_h height for letterbox-ing
+ * @param[in] d_c channel for letterbox-ing
+ * @param[in] s_w width for input image
+ * @param[in] s_h height for input image
+ * @param[in] s_c channel for input image
+ * @param[in] stream cuda stream
+ */
+extern void letterbox_gpu(
+  unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
+  cudaStream_t stream);
+
+/**
+ * @brief NHWC to NHWC conversion
+ * @param[out] dst converted image
+ * @param[in] src image
+ * @param[in] d_w width for a image
+ * @param[in] d_h height for a image
+ * @param[in] d_c channel for a image
+ * @param[in] stream cuda stream
+ */
+extern void nchw_to_nhwc_gpu(
+  unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, cudaStream_t stream);
+
+/**
+ * @brief Unsigned char to float32 for inference
+ * @param[out] dst32 converted image
+ * @param[in] src image
+ * @param[in] d_w width for a image
+ * @param[in] d_h height for a image
+ * @param[in] d_c channel for a image
+ * @param[in] stream cuda stream
+ */
+extern void to_float_gpu(
+  float * dst32, unsigned char * src, int d_w, int d_h, int d_c, cudaStream_t stream);
+
+/**
+ * @brief Resize and letterbox a image using bilinear interpolation on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_letterbox_gpu(
+  unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
+  cudaStream_t stream);
+
+/**
+ * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization
+ * for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_letterbox_nhwc_to_nchw32_gpu(
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c,
+  float norm, cudaStream_t stream);
+
+/**
+ * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization
+ * with batching for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] batch batch size
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, int batch,
+  float norm, cudaStream_t stream);
+
+/**
+ * @brief Optimized preprocessing including crop, resize, letterbox, nhwc2nchw, toFloat and
+ * normalization with batching for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] d_roi regions of interest for cropping
+ * @param[in] batch batch size
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void crop_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h,
+  int s_c, int batch, float norm, cudaStream_t stream);
+
+/**
+ * @brief Optimized multi-scale preprocessing including crop, resize, letterbox, nhwc2nchw, toFloat
+ * and normalization with batching for YOLOX on gpus
+ * @param[out] dst processed image
+ * @param[in] src image
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] d_c channel for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] d_roi regions of interest for cropping
+ * @param[in] batch batch size
+ * @param[in] norm normalization
+ * @param[in] stream cuda stream
+ */
+extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
+  float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h,
+  int s_c, int batch, float norm, cudaStream_t stream);
+
+/**
+ * @brief Argmax on GPU
+ * @param[out] dst processed image
+ * @param[in] src probability map
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] batch batch size
+ * @param[in] stream cuda stream
+ */
+extern void argmax_gpu(
+  unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
+  cudaStream_t stream);
+}  // namespace tensorrt_yolox
+}  // namespace autoware
+#endif  // AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_
diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp
new file mode 100644
index 0000000000000..6e6de8c6121ab
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp
@@ -0,0 +1,143 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_
+#define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_
+
+#include <autoware/tensorrt_common/tensorrt_common.hpp>
+#include <cuda_utils/cuda_unique_ptr.hpp>
+#include <cuda_utils/stream_unique_ptr.hpp>
+#include <opencv2/opencv.hpp>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace autoware::tensorrt_yolov10
+{
+using cuda_utils::CudaUniquePtr;
+using cuda_utils::CudaUniquePtrHost;
+using cuda_utils::makeCudaStream;
+using cuda_utils::StreamUniquePtr;
+
+struct Object
+{
+  int32_t x_offset;
+  int32_t y_offset;
+  int32_t height;
+  int32_t width;
+  float score;
+  int32_t type;
+};
+
+using ObjectArray = std::vector<Object>;
+using ObjectArrays = std::vector<ObjectArray>;
+
+class TrtYolov10
+{
+public:
+  TrtYolov10(
+    const std::string & model_path, const std::string & precision, const int num_class = 8,
+    const float score_threshold = 0.8,
+    const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
+    const bool use_gpu_preprocess = false, const uint8_t gpu_id = 0,
+    std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
+    [[maybe_unused]] const std::string & cache_dir = "",
+    const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
+    const size_t max_workspace_size = (1 << 30));
+
+  ~TrtYolov10();
+
+  bool setCudaDeviceId(const uint8_t gpu_id);
+
+  /**
+   * @brief return a flag for gpu initialization
+   */
+  bool isGPUInitialized() const { return is_gpu_initialized_; }
+
+  /**
+   * @brief run inference including pre-process and post-process
+   * @param[out] objects results for object detection
+   * @param[in] images batched images
+   */
+  bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+
+  //   /**
+  //    * @brief allocate buffer for preprocess on GPU
+  //    * @param[in] width original image width
+  //    * @param[in] height original image height
+  //    * @warning if we don't allocate buffers using it, "preprocessGpu" allocates buffers at the
+  //    * beginning
+  //    */
+  //   void initPreprocessBuffer(int width, int height);
+
+  //   /**
+  //    * @brief output TensorRT profiles for each layer
+  //    */
+  //   void printProfiling(void);
+
+  void preProcess(cv::Mat * img, int length, float * factor, std::vector<float> & data);
+
+  void preprocess(const std::vector<cv::Mat> & images, std::vector<float> & data);
+  ObjectArray postprocess(float * result, float factor);
+
+  //   /**
+  //    * @brief run preprocess on GPU
+  //    * @param[in] images batching images
+  //    */
+  //   void preprocessGpu(const std::vector<cv::Mat> & images);
+
+  bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+
+  std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
+
+  std::vector<float> input_h_;
+  CudaUniquePtr<float[]> input_d_;
+
+  size_t out_elem_num_;
+  size_t out_elem_num_per_batch_;
+  CudaUniquePtr<float[]> out_d_;
+  CudaUniquePtrHost<float[]> out_h_;
+  StreamUniquePtr stream_;
+
+  int32_t max_detections_;
+  std::vector<float> scales_;
+
+  int num_class_;
+  float score_threshold_;
+  int batch_size_;
+
+  // GPU id for inference
+  const uint8_t gpu_id_;
+  // flag for gpu initialization
+  bool is_gpu_initialized_;
+  // host buffer for preprocessing on GPU
+  CudaUniquePtrHost<unsigned char[]> image_buf_h_;
+  // device buffer for preprocessing on GPU
+  CudaUniquePtr<unsigned char[]> image_buf_d_;
+
+  int src_width_;
+  int src_height_;
+
+  CudaUniquePtr<unsigned char[]> argmax_buf_d_;
+  double norm_factor_;
+  int multitask_;
+  bool use_gpu_preprocess_;
+
+  float factor_ = 1.0;
+};
+
+}  // namespace autoware::tensorrt_yolov10
+
+#endif  // AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_
diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp
new file mode 100644
index 0000000000000..f869ee117db25
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp
@@ -0,0 +1,76 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_
+#define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_
+
+#include "autoware/object_recognition_utils/object_recognition_utils.hpp"
+
+#include <autoware/tensorrt_yolov10/tensorrt_yolov10.hpp>
+#include <autoware/universe_utils/ros/debug_publisher.hpp>
+#include <autoware/universe_utils/system/stop_watch.hpp>
+#include <image_transport/image_transport.hpp>
+#include <opencv2/opencv.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#include <sensor_msgs/msg/image.hpp>
+#include <std_msgs/msg/header.hpp>
+#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
+#include <tier4_perception_msgs/msg/semantic.hpp>
+
+#if __has_include(<cv_bridge/cv_bridge.hpp>)
+#include <cv_bridge/cv_bridge.hpp>
+#else
+#include <cv_bridge/cv_bridge.h>
+#endif
+
+#include <chrono>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace autoware::tensorrt_yolov10
+{
+// cspell: ignore Semseg
+class TrtYolov10Node : public rclcpp::Node
+{
+public:
+  explicit TrtYolov10Node(const rclcpp::NodeOptions & node_options);
+
+private:
+  void onConnect();
+  void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
+
+  bool readLabelFile(const std::string & label_path);
+  void replaceLabelMap();
+
+  std::unique_ptr<tensorrt_yolov10::TrtYolov10> trt_yolov10_;
+
+  image_transport::Subscriber image_sub_;
+
+  rclcpp::TimerBase::SharedPtr timer_;
+  std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_;
+  std::unique_ptr<autoware::universe_utils::DebugPublisher> debug_publisher_;
+  rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
+  image_transport::Publisher image_pub_;
+
+  std::map<int, std::string> label_map_;
+};
+
+}  // namespace autoware::tensorrt_yolov10
+
+#endif  // AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_
diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml
new file mode 100644
index 0000000000000..dd1b020847437
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<launch>
+  <!-- cspell: ignore semseg, finetune  -->
+  <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
+  <arg name="output/objects" default="/perception/object_recognition/detection/yolov10/rois0"/>
+  <arg name="model_path" default="$(env HOME)/autoware_data/tensorrt_yolov10/yolov10m.onnx"/>
+  <arg name="label_path" default="$(env HOME)/autoware_data/tensorrt_yolov10/label.txt"/>
+
+  <node pkg="autoware_tensorrt_yolov10" exec="autoware_tensorrt_yolov10_node_exe" name="tensorrt_yolov10" output="screen">
+    <remap from="~/in/image" to="$(var input/image)"/>
+    <remap from="~/out/objects" to="$(var output/objects)"/>
+    <param name="model_path" value="$(var model_path)"/>
+    <param name="label_path" value="$(var label_path)"/>
+  </node>
+</launch>
diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml
new file mode 100644
index 0000000000000..ab8a0f8c645e6
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<launch>
+  <!-- cspell: ignore semseg, finetune  -->
+  <arg name="image_path" default="$(env HOME)/autoware_data/000000000307.jpg" description="test img path"/>
+  <arg name="model_path" default="$(env HOME)/autoware_data/tensorrt_yolov10/yolov10m.onnx" description="model path"/>
+
+  <node pkg="autoware_tensorrt_yolov10" exec="yolov10_single_image_inference" name="tensorrt_yolov10_single_image_inference" output="screen">
+    <param name="image_path" value="$(var image_path)"/>
+    <param name="model_path" value="$(var model_path)"/>
+  </node>
+</launch>
diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml
new file mode 100644
index 0000000000000..76b161e50cbd6
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/package.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0"?>
+<package format="3">
+  <name>autoware_tensorrt_yolov10</name>
+  <version>0.0.1</version>
+  <description>tensorrt library implementation for yolov10</description>
+
+  <author email="storm.su@autocore.ai">Su Chang</author>
+  <maintainer email="storm.su@autocore.ai">Su Chang</maintainer>
+
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake_auto</buildtool_depend>
+  <buildtool_depend>autoware_cmake</buildtool_depend>
+  <buildtool_depend>cudnn_cmake_module</buildtool_depend>
+  <buildtool_depend>tensorrt_cmake_module</buildtool_depend>
+
+  <buildtool_export_depend>cudnn_cmake_module</buildtool_export_depend>
+  <buildtool_export_depend>tensorrt_cmake_module</buildtool_export_depend>
+
+  <depend>autoware_object_recognition_utils</depend>
+  <depend>autoware_perception_msgs</depend>
+  <depend>autoware_tensorrt_common</depend>
+  <depend>cuda_utils</depend>
+  <depend>cv_bridge</depend>
+  <depend>image_transport</depend>
+  <depend>libopencv-dev</depend>
+  <depend>perception_utils</depend>
+  <depend>rclcpp</depend>
+  <depend>rclcpp_components</depend>
+  <depend>sensor_msgs</depend>
+  <depend>tier4_perception_msgs</depend>
+
+  <exec_depend>autoware_image_transport_decompressor</exec_depend>
+
+  <test_depend>ament_lint_auto</test_depend>
+  <test_depend>autoware_lint_common</test_depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp
new file mode 100644
index 0000000000000..5106edc9cc6e5
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp
@@ -0,0 +1,397 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "cuda_utils/cuda_check_error.hpp"
+#include "cuda_utils/cuda_unique_ptr.hpp"
+
+#include <autoware/tensorrt_yolov10/calibrator.hpp>
+#include <autoware/tensorrt_yolov10/preprocess.hpp>
+#include <autoware/tensorrt_yolov10/tensorrt_yolov10.hpp>
+#include <experimental/filesystem>
+
+#include <assert.h>
+
+#include <algorithm>
+#include <fstream>
+#include <functional>
+#include <iomanip>
+#include <iostream>
+#include <memory>
+#include <numeric>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__);
+
+namespace
+{
+static void trimLeft(std::string & s)
+{
+  s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
+}
+
+static void trimRight(std::string & s)
+{
+  s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
+}
+
+std::string trim(std::string & s)
+{
+  trimLeft(s);
+  trimRight(s);
+  return s;
+}
+
+bool fileExists(const std::string & file_name, bool verbose)
+{
+  if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(file_name))) {
+    if (verbose) {
+      std::cout << "File does not exist : " << file_name << std::endl;
+    }
+    return false;
+  }
+  return true;
+}
+
+std::vector<std::string> loadListFromTextFile(const std::string & filename)
+{
+  assert(fileExists(filename, true));
+  std::vector<std::string> list;
+
+  std::ifstream f(filename);
+  if (!f) {
+    std::cout << "failed to open " << filename << std::endl;
+    assert(0);
+  }
+
+  std::string line;
+  while (std::getline(f, line)) {
+    if (line.empty()) {
+      continue;
+    } else {
+      list.push_back(trim(line));
+    }
+  }
+
+  return list;
+}
+
+std::vector<std::string> loadImageList(const std::string & filename, const std::string & prefix)
+{
+  std::vector<std::string> fileList = loadListFromTextFile(filename);
+  for (auto & file : fileList) {
+    if (fileExists(file, false)) {
+      continue;
+    } else {
+      std::string prefixed = prefix + file;
+      if (fileExists(prefixed, false))
+        file = prefixed;
+      else
+        std::cerr << "WARNING: couldn't find: " << prefixed << " while loading: " << filename
+                  << std::endl;
+    }
+  }
+  return fileList;
+}
+
+}  // anonymous namespace
+
+namespace autoware::tensorrt_yolov10
+{
+TrtYolov10::TrtYolov10(
+  const std::string & model_path, const std::string & precision, const int num_class,
+  const float score_threshold, const tensorrt_common::BuildConfig build_config,
+  const bool use_gpu_preprocess, const uint8_t gpu_id, std::string calibration_image_list_path,
+  const double norm_factor, [[maybe_unused]] const std::string & cache_dir,
+  const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size)
+: gpu_id_(gpu_id), is_gpu_initialized_(false)
+{
+  if (!setCudaDeviceId(gpu_id_)) {
+    return;
+  }
+  is_gpu_initialized_ = true;
+
+  num_class_ = num_class;
+  score_threshold_ = score_threshold;
+
+  src_width_ = -1;
+  src_height_ = -1;
+  norm_factor_ = norm_factor;
+  batch_size_ = batch_config[2];
+  multitask_ = 0;
+  stream_ = makeCudaStream();
+
+  if (precision == "int8") {
+    if (build_config.clip_value <= 0.0) {
+      if (calibration_image_list_path.empty()) {
+        throw std::runtime_error(
+          "calibration_image_list_path should be passed to generate int8 engine "
+          "or specify values larger than zero to clip_value.");
+      }
+    } else {
+      // if clip value is larger than zero, calibration file is not needed
+      calibration_image_list_path = "";
+    }
+
+    int max_batch_size = batch_size_;
+    nvinfer1::Dims input_dims = tensorrt_common::get_input_dims(model_path);
+    std::vector<std::string> calibration_images;
+    if (calibration_image_list_path != "") {
+      calibration_images = loadImageList(calibration_image_list_path, "");
+    }
+    tensorrt_yolov10::ImageStream stream(max_batch_size, input_dims, calibration_images);
+    fs::path calibration_table{model_path};
+    std::string ext = "";
+    if (build_config.calib_type_str == "Entropy") {
+      ext = "EntropyV2-";
+    } else if (
+      build_config.calib_type_str == "Legacy" || build_config.calib_type_str == "Percentile") {
+      ext = "Legacy-";
+    } else {
+      ext = "MinMax-";
+    }
+
+    ext += "calibration.table";
+    calibration_table.replace_extension(ext);
+    fs::path histogram_table{model_path};
+    ext = "histogram.table";
+    histogram_table.replace_extension(ext);
+
+    std::unique_ptr<nvinfer1::IInt8Calibrator> calibrator;
+    if (build_config.calib_type_str == "Entropy") {
+      calibrator.reset(
+        new tensorrt_yolov10::Int8EntropyCalibrator(stream, calibration_table, norm_factor_));
+
+    } else if (
+      build_config.calib_type_str == "Legacy" || build_config.calib_type_str == "Percentile") {
+      const double quantile = 0.999999;
+      const double cutoff = 0.999999;
+      calibrator.reset(new tensorrt_yolov10::Int8LegacyCalibrator(
+        stream, calibration_table, histogram_table, norm_factor_, true, quantile, cutoff));
+    } else {
+      calibrator.reset(
+        new tensorrt_yolov10::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_));
+    }
+    trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
+      model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config);
+  } else {
+    trt_common_ = std::make_unique<tensorrt_common::TrtCommon>(
+      model_path, precision, nullptr, batch_config, max_workspace_size, build_config);
+  }
+  trt_common_->setup();
+
+  if (!trt_common_->isInitialized()) {
+    return;
+  }
+  num_class_ = num_class;
+  score_threshold_ = score_threshold;
+
+  // GPU memory allocation
+  const auto input_dims = trt_common_->getBindingDimensions(0);
+  const auto input_size =
+    std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies<int>());
+
+  const auto output_dims = trt_common_->getBindingDimensions(1);
+  input_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * input_size);
+  max_detections_ = output_dims.d[1];
+  out_elem_num_ = std::accumulate(
+    output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
+  out_elem_num_ = out_elem_num_ * batch_config[2];
+  out_elem_num_per_batch_ = static_cast<int>(out_elem_num_ / batch_config[2]);
+  out_d_ = cuda_utils::make_unique<float[]>(out_elem_num_);
+  out_h_ = cuda_utils::make_unique_host<float[]>(out_elem_num_, cudaHostAllocPortable);
+
+  if (use_gpu_preprocess) {
+    use_gpu_preprocess_ = true;
+    image_buf_h_ = nullptr;
+    image_buf_d_ = nullptr;
+  } else {
+    use_gpu_preprocess_ = false;
+  }
+}
+
+TrtYolov10::~TrtYolov10()
+{
+  if (use_gpu_preprocess_) {
+    if (image_buf_h_) {
+      image_buf_h_.reset();
+    }
+    if (image_buf_d_) {
+      image_buf_d_.reset();
+    }
+    if (argmax_buf_d_) {
+      argmax_buf_d_.reset();
+    }
+  }
+}
+
+bool TrtYolov10::setCudaDeviceId(const uint8_t gpu_id)
+{
+  cudaError_t cuda_err = cudaSetDevice(gpu_id);
+  if (cuda_err != cudaSuccess) {
+    return false;
+  } else {
+    return true;
+  }
+}
+
+void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vector<float> & data)
+{
+  // Create a new cv::Mat object for storing the image after conversion
+  cv::Mat mat;
+
+  // Get the dimensions and number of channels of the input image
+  int rh = img->rows;  // Height of the input image
+  int rw = img->cols;  // Width of the input image
+
+  // Convert the input image from BGR to RGB color space
+  cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB);
+
+  // Determine the size of the new square image (largest dimension of the input image)
+  int maxImageLength = rw > rh ? rw : rh;
+
+  cv::Mat maxImage = cv::Mat::zeros(maxImageLength, maxImageLength, CV_8UC3);
+
+  // Set all pixels to 255 (white)
+  maxImage = maxImage * 255;
+
+  // Define a Region of Interest (ROI) that covers the entire original image
+  cv::Rect roi(0, 0, rw, rh);
+
+  // Copy the original image into the ROI of the new square image
+  mat.copyTo(cv::Mat(maxImage, roi));
+
+  // Create a new cv::Mat object for storing the resized image
+  cv::Mat resizeImg;
+
+  // Resize the square image to the specified dimensions (length x length)
+  cv::resize(maxImage, resizeImg, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR);
+
+  // Calculate the scaling factor and store it in the 'factor' variable
+  *factor = (1.0 * maxImageLength) / (1.0 * length);
+
+  // Convert the resized image to floating-point format with values in range [0, 1]
+  resizeImg.convertTo(resizeImg, CV_32FC3, 1 / 255.0);
+
+  // Update the height, width, and number of channels for the resized image
+  rh = resizeImg.rows;
+  rw = resizeImg.cols;
+  int rc = resizeImg.channels();
+
+  // Extract each channel of the resized image and store it in the 'data' vector
+  for (int i = 0; i < rc; ++i) {
+    // Extract the i-th channel and store it in the appropriate part of the 'data' vector
+    cv::extractChannel(resizeImg, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i);
+  }
+}
+
+void TrtYolov10::preprocess(const std::vector<cv::Mat> & images, std::vector<float> & inputData)
+{
+  // todo: support multi images. now assume that infer only one image.
+  for (cv::Mat image : images) {
+    int length = 640;  // 模型输入的大小
+    factor_ = 1.0;
+
+    preProcess(&image, length, &factor_, inputData);
+
+    // // check sum
+    // float sum = std::accumulate(inputData.begin(), inputData.end(), 0.0f);
+    // printf("sum=%.2f\n", sum);
+  }
+}
+
+bool TrtYolov10::doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+{
+  if (!setCudaDeviceId(gpu_id_)) {
+    return false;
+  }
+
+  if (!trt_common_->isInitialized()) {
+    return false;
+  }
+
+  std::vector<float> input_data(640 * 640 * 3);
+  preprocess(images, input_data);
+  CHECK_CUDA_ERROR(cudaMemcpy(
+    input_d_.get(), input_data.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice));
+
+  return feedforward(images, objects);
+}
+
+bool TrtYolov10::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+{
+  //   PRINT_DEBUG_INFO
+  std::vector<void *> buffers = {input_d_.get(), out_d_.get()};
+
+  trt_common_->enqueueV2(buffers.data(), *stream_, nullptr);
+
+  //   printf("out_elem_num_:%d\n", (int)out_elem_num_);
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    out_h_.get(), out_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost, *stream_));
+
+  cudaStreamSynchronize(*stream_);
+  objects.clear();
+
+  const auto batch_size = images.size();
+  for (size_t i = 0; i < batch_size; ++i) {
+    // auto image_size = images[i].size();
+    float * batch_prob = out_h_.get() + (i * out_elem_num_per_batch_);
+
+    ObjectArray object_array = postprocess(batch_prob, factor_);
+    objects.emplace_back(object_array);
+  }
+
+  return true;
+}
+
+ObjectArray TrtYolov10::postprocess(float * result, float factor)
+{
+  ObjectArray object_array;
+
+  // yolov10m out: 1 x 300 x 6
+  for (int i = 0; i < max_detections_; i++) {
+    int s = 6 * i;
+
+    float score = result[s + 4];
+    // printf("i:%d,score:%.3f\n",i,(float)result[s + 4]);
+
+    if (score > score_threshold_) {
+      //   printf("i:%d,score:%.3f\n", i, (float)result[s + 4]);
+
+      float ltx = result[s + 0];
+      float lty = result[s + 1];
+      float rbx = result[s + 2];
+      float rby = result[s + 3];
+
+      int x = static_cast<int>((ltx)*factor);
+      int y = static_cast<int>((lty)*factor);
+      int width = static_cast<int>((rbx - ltx) * factor);
+      int height = static_cast<int>((rby - lty) * factor);
+
+      Object object;
+      object.x_offset = x;
+      object.y_offset = y;
+      object.width = width;
+      object.height = height;
+      object.score = score;
+      object.type = static_cast<int>(result[s + 5]);
+      object_array.emplace_back(object);
+    }
+  }
+
+  return object_array;
+}
+
+}  // namespace autoware::tensorrt_yolov10
diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp
new file mode 100644
index 0000000000000..6d173876b9840
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp
@@ -0,0 +1,197 @@
+// Copyright 2022 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp"
+
+#include "autoware/object_recognition_utils/object_classification.hpp"
+#include "perception_utils/run_length_encoder.hpp"
+
+#include <autoware_perception_msgs/msg/object_classification.hpp>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__);
+
+namespace autoware::tensorrt_yolov10
+{
+TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options)
+: Node("tensorrt_yolov10", node_options)
+{
+  {
+    stop_watch_ptr_ =
+      std::make_unique<autoware::universe_utils::StopWatch<std::chrono::milliseconds>>();
+    debug_publisher_ =
+      std::make_unique<autoware::universe_utils::DebugPublisher>(this, this->get_name());
+    stop_watch_ptr_->tic("cyclic_time");
+    stop_watch_ptr_->tic("processing_time");
+  }
+  using std::placeholders::_1;
+  using std::chrono_literals::operator""ms;
+
+  const std::string model_path = this->declare_parameter<std::string>("model_path");
+  const std::string precision = this->declare_parameter<std::string>("precision", "fp16");
+  const uint8_t gpu_id = this->declare_parameter<uint8_t>("gpu_id", 0);
+  const std::string label_path = this->declare_parameter<std::string>("label_path");
+
+  trt_yolov10_ = std::make_unique<tensorrt_yolov10::TrtYolov10>(model_path, precision);
+
+  if (!readLabelFile(label_path)) {
+    RCLCPP_ERROR(this->get_logger(), "Could not find label file");
+    rclcpp::shutdown();
+  }
+  replaceLabelMap();
+
+  if (!trt_yolov10_->isGPUInitialized()) {
+    RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id);
+    rclcpp::shutdown();
+    return;
+  }
+  RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id);
+
+  timer_ =
+    rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYolov10Node::onConnect, this));
+
+  objects_pub_ = this->create_publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>(
+    "~/out/objects", 1);
+  image_pub_ = image_transport::create_publisher(this, "~/out/image");
+}
+
+void TrtYolov10Node::onConnect()
+{
+  using std::placeholders::_1;
+  if (
+    objects_pub_->get_subscription_count() == 0 &&
+    objects_pub_->get_intra_process_subscription_count() == 0 &&
+    image_pub_.getNumSubscribers() == 0) {
+    // printf("no subscribers who sub from objects_pub_ or image_pub_,shut down image_sub_\n");
+    image_sub_.shutdown();
+  } else if (!image_sub_) {
+    image_sub_ = image_transport::create_subscription(
+      this, "~/in/image", std::bind(&TrtYolov10Node::onImage, this, _1), "raw",
+      rmw_qos_profile_sensor_data);
+  }
+}
+
+void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
+{
+  stop_watch_ptr_->toc("processing_time", true);
+  tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects;
+
+  cv_bridge::CvImagePtr in_image_ptr;
+  try {
+    in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8);
+  } catch (cv_bridge::Exception & e) {
+    RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what());
+    return;
+  }
+  const auto width = in_image_ptr->image.cols;
+  const auto height = in_image_ptr->image.rows;
+
+  tensorrt_yolov10::ObjectArrays objects;
+
+  if (!trt_yolov10_->doInference({in_image_ptr->image}, objects)) {
+    RCLCPP_WARN(this->get_logger(), "Fail to inference");
+    return;
+  }
+
+  for (const auto & yolov10_object : objects.at(0)) {
+    tier4_perception_msgs::msg::DetectedObjectWithFeature object;
+    object.feature.roi.x_offset = yolov10_object.x_offset;
+    object.feature.roi.y_offset = yolov10_object.y_offset;
+    object.feature.roi.width = yolov10_object.width;
+    object.feature.roi.height = yolov10_object.height;
+    object.object.existence_probability = yolov10_object.score;
+    object.object.classification =
+      object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f);
+    out_objects.feature_objects.push_back(object);
+    const auto left = std::max(0, static_cast<int>(object.feature.roi.x_offset));
+    const auto top = std::max(0, static_cast<int>(object.feature.roi.y_offset));
+    const auto right =
+      std::min(static_cast<int>(object.feature.roi.x_offset + object.feature.roi.width), width);
+    const auto bottom =
+      std::min(static_cast<int>(object.feature.roi.y_offset + object.feature.roi.height), height);
+    cv::rectangle(
+      in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
+      8, 0);
+
+    cv::putText(
+      in_image_ptr->image, label_map_[yolov10_object.type], cv::Point(left, top),
+      cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0);
+  }
+
+  image_pub_.publish(in_image_ptr->toImageMsg());
+  out_objects.header = msg->header;
+  objects_pub_->publish(out_objects);
+
+  if (debug_publisher_) {
+    const double processing_time_ms = stop_watch_ptr_->toc("processing_time", true);
+    const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic_time", true);
+    const double pipeline_latency_ms =
+      std::chrono::duration<double, std::milli>(
+        std::chrono::nanoseconds(
+          (this->get_clock()->now() - out_objects.header.stamp).nanoseconds()))
+        .count();
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/cyclic_time_ms", cyclic_time_ms);
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/processing_time_ms", processing_time_ms);
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/pipeline_latency_ms", pipeline_latency_ms);
+  }
+}
+
+bool TrtYolov10Node::readLabelFile(const std::string & label_path)
+{
+  std::ifstream label_file(label_path);
+  if (!label_file.is_open()) {
+    RCLCPP_ERROR(this->get_logger(), "Could not open label file. [%s]", label_path.c_str());
+    return false;
+  }
+  int label_index{};
+  std::string label;
+  while (getline(label_file, label)) {
+    std::transform(
+      label.begin(), label.end(), label.begin(), [](auto c) { return std::toupper(c); });
+    label_map_.insert({label_index, label});
+    ++label_index;
+  }
+  return true;
+}
+
+// we need this because autoware::object_recognition_utils::toLabel(const std::string & class_name)
+// limit label type
+void TrtYolov10Node::replaceLabelMap()
+{
+  for (std::size_t i = 0; i < label_map_.size(); ++i) {
+    auto & label = label_map_[i];
+    if (label == "PERSON") {
+      label = "PEDESTRIAN";
+    } else if (label == "MOTORBIKE") {
+      label = "MOTORCYCLE";
+    } else if (
+      label != "CAR" && label != "PEDESTRIAN" && label != "BUS" && label != "TRUCK" &&
+      label != "BICYCLE" && label != "MOTORCYCLE") {
+      label = "UNKNOWN";
+    }
+  }
+}
+
+}  // namespace autoware::tensorrt_yolov10
+
+#include "rclcpp_components/register_node_macro.hpp"
+RCLCPP_COMPONENTS_REGISTER_NODE(autoware::tensorrt_yolov10::TrtYolov10Node)
diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp
new file mode 100644
index 0000000000000..da896629bec94
--- /dev/null
+++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp
@@ -0,0 +1,84 @@
+// Copyright 2022 Tier IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <autoware/tensorrt_yolov10/tensorrt_yolov10.hpp>
+#include <rclcpp/rclcpp.hpp>
+
+#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__)))
+#include <filesystem>
+namespace fs = ::std::filesystem;
+#else
+#include <experimental/filesystem>
+namespace fs = ::std::experimental::filesystem;
+#endif
+
+#include <memory>
+#include <string>
+
+// using namespace std;
+
+#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__);
+
+namespace autoware::tensorrt_yolov10
+{
+class Yolov10SingleImageInferenceNode : public rclcpp::Node
+{
+public:
+  explicit Yolov10SingleImageInferenceNode(const rclcpp::NodeOptions & node_options)
+  : Node("yolov10_single_image_inference", node_options)
+  {
+    const auto image_path = declare_parameter("image_path", "");
+    const auto model_path = declare_parameter("model_path", "");
+    const auto precision = declare_parameter("precision", "fp16");
+    const auto save_image = declare_parameter("save_image", true);
+
+    printf("image_path:%s\n", image_path.c_str());
+    auto p = fs::path(image_path);
+    const auto ext = p.extension().string();
+    p.replace_extension("");
+    const auto output_image_path =
+      declare_parameter("output_image_path", p.string() + "_detect" + ext);
+
+    printf("model_path:%s,output_image_path:%s\n", model_path.c_str(), output_image_path.c_str());
+    auto trt_yolov10 = std::make_unique<tensorrt_yolov10::TrtYolov10>(model_path, precision);
+    auto image = cv::imread(image_path);
+    tensorrt_yolov10::ObjectArrays objects;
+
+    trt_yolov10->doInference({image}, objects);
+    for (const auto & object : objects[0]) {
+      const auto left = object.x_offset;
+      const auto top = object.y_offset;
+      const auto right = std::clamp(left + object.width, 0, image.cols);
+      const auto bottom = std::clamp(top + object.height, 0, image.rows);
+      cv::rectangle(
+        image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0);
+
+      std::string cls_id = std::to_string(object.type);
+      cv::putText(
+        image, cls_id, cv::Point(left, top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255),
+        1, 8, 0);
+    }
+    if (!save_image) {
+      cv::imshow("inference image", image);
+      cv::waitKey(0);
+      rclcpp::shutdown();
+    }
+    cv::imwrite(output_image_path, image);
+    rclcpp::shutdown();
+  }
+};
+}  // namespace autoware::tensorrt_yolov10
+
+#include "rclcpp_components/register_node_macro.hpp"
+RCLCPP_COMPONENTS_REGISTER_NODE(autoware::tensorrt_yolov10::Yolov10SingleImageInferenceNode)