Skip to content

Commit

Permalink
Parallelproj tof (#1356)
Browse files Browse the repository at this point in the history
- add TOF functionality to the Parallelproj projector
- require Parallelproj 1.3.4 as older versions had a bug
- [GHA] update parallelproj to 1.7.3

Co-authored-by: robbietuk <[email protected]>
Co-authored-by: Kris Thielemans <[email protected]>
  • Loading branch information
3 people authored Feb 12, 2024
1 parent 7df3780 commit 250da05
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ jobs:
python -m pip install pytest
if test "${{matrix.parallelproj}}XX" == "ONXX"; then
git clone --depth 1 --branch v1.3.7 https://github.com/gschramm/parallelproj
git clone --depth 1 --branch v1.7.3 https://github.com/gschramm/parallelproj
mkdir parallelproj/build
cd parallelproj/build
if test "${{matrix.cuda_version}}" == "0"; then
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ endif()

# Parallelproj
if(NOT DISABLE_Parallelproj_PROJECTOR)
find_package(parallelproj 1.0.1 CONFIG)
find_package(parallelproj 1.3.4 CONFIG)
if (parallelproj_FOUND)
set(STIR_WITH_Parallelproj_PROJECTOR ON)
if (parallelproj_built_with_CUDA)
Expand Down
1 change: 1 addition & 0 deletions documentation/release_6.1.htm
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ <h4>Python (and MATLAB)</h4>
<h3>New functionality</h3>
<ul>
<li>
Add TOF capability of the parallelproj projector (see <a href=https://github.com/UCL/STIR/pull/1356>PR #1356</a>)
</li>
</ul>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class ParallelprojHelper
std::array<float, 3> origin;
std::vector<float> xstart;
std::vector<float> xend;

long long num_image_voxel;
long long num_lors;

float sigma_tof;
float tofcenter_offset;
float tofbin_width;
short num_tof_bins;
};

} // namespace detail
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@
\author Richard Brown
\author Kris Thielemans
\author Nicole Jurjew
*/
/*
Copyright (C) 2019, 2021 University College London
Copyright (C) 2019, 2021, 2024 University College London
This file is part of STIR.
SPDX-License-Identifier: Apache-2.0
Expand All @@ -25,6 +24,7 @@
#include "stir/RelatedViewgrams.h"
#include "stir/recon_buildblock/TrivialDataSymmetriesForBins.h"
#include "stir/ProjDataInfo.h"
#include "stir/TOF_conversions.h"
#include "stir/VoxelsOnCartesianGrid.h"
#include "stir/recon_array_functions.h"
#include "stir/ProjDataInMemory.h"
Expand Down Expand Up @@ -116,6 +116,20 @@ back_project(const ProjData& proj_data, int subset_num, int num_subsets)
}
#endif

static void
TOF_transpose(std::vector<float>& mem_for_PP_back,
const float* STIR_mem,
const shared_ptr<const detail::ParallelprojHelper> _helper,
const long long offset)
{
const auto num_tof_bins = static_cast<unsigned>(_helper->num_tof_bins);
for (unsigned tof_idx = 0; tof_idx < num_tof_bins; ++tof_idx)
for (long long lor_idx = 0; lor_idx < _helper->num_lors; ++lor_idx)
{
mem_for_PP_back[lor_idx * num_tof_bins + tof_idx] = STIR_mem[offset + tof_idx * _helper->num_lors + lor_idx];
}
}

void
BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density) const
{
Expand All @@ -128,18 +142,14 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density

#ifdef parallelproj_built_with_CUDA

long long num_image_voxel = static_cast<long long>(image_vec.size());
long long num_lors = static_cast<long long>(p.get_proj_data_info_sptr()->size_all());

long long num_lors_per_chunk_floor = num_lors / _num_gpu_chunks;
long long remainder = num_lors % _num_gpu_chunks;
long long num_lors_per_chunk_floor = _helper->num_lors / _num_gpu_chunks;
long long remainder = _helper->num_lors % _num_gpu_chunks;

long long num_lors_per_chunk;
long long offset = 0;

// send image to all visible CUDA devices
float** image_on_cuda_devices;
image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), num_image_voxel);
float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);

// do (chuck-wise) back projection on the CUDA devices
for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
Expand All @@ -152,38 +162,96 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
{
num_lors_per_chunk = num_lors_per_chunk_floor;
}

joseph3d_back_cuda(_helper->xstart.data() + 3 * offset,
_helper->xend.data() + 3 * offset,
image_on_cuda_devices,
_helper->origin.data(),
_helper->voxsize.data(),
p.get_const_data_ptr() + offset,
num_lors_per_chunk,
_helper->imgdim.data(),
/*threadsperblock*/ 64);

if (p.get_proj_data_info_sptr()->is_tof_data())
{
info("running the CUDA version of parallelproj, about to call function joseph3d_back_tof_sino_cuda", 2);

std::vector<float> mem_for_PP_back(num_lors_per_chunk * _helper->num_tof_bins);
const float* STIR_mem = p.get_const_data_ptr();

TOF_transpose(mem_for_PP_back, STIR_mem, _helper, offset);

// info("created object mem_for_PP_img", 2);
joseph3d_back_tof_sino_cuda(_helper->xend.data() + 3 * offset,
_helper->xstart.data() + 3 * offset,
image_on_cuda_devices,
_helper->origin.data(),
_helper->voxsize.data(),
mem_for_PP_back.data(), // p.get_const_data_ptr() + offset* num_tof_bins,
num_lors_per_chunk,
_helper->imgdim.data(),
_helper->tofbin_width,
&_helper->sigma_tof,
&_helper->tofcenter_offset,
4, // float n_sigmas
_helper->num_tof_bins,
0, // unsigned char lor_dependent_sigma_tof
0, // unsigned char lor_dependent_tofcenter_offset
64 // threadsperblock
);
if (chunk_num != _num_gpu_chunks - 1)
p.release_const_data_ptr();
}
else
{
joseph3d_back_cuda(_helper->xstart.data() + 3 * offset,
_helper->xend.data() + 3 * offset,
image_on_cuda_devices,
_helper->origin.data(),
_helper->voxsize.data(),
p.get_const_data_ptr() + offset,
num_lors_per_chunk,
_helper->imgdim.data(),
/*threadsperblock*/ 64);
}
offset += num_lors_per_chunk;
}

// sum backprojected images on the first CUDA device
sum_float_arrays_on_first_device(image_on_cuda_devices, num_image_voxel);
sum_float_arrays_on_first_device(image_on_cuda_devices, _helper->num_image_voxel);

// copy summed image back to host
get_float_array_from_device(image_on_cuda_devices, num_image_voxel, 0, image_vec.data());
get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_vec.data());

// free image array from CUDA devices
free_float_array_on_all_devices(image_on_cuda_devices);

#else
joseph3d_back(_helper->xstart.data(),
_helper->xend.data(),
image_vec.data(),
_helper->origin.data(),
_helper->voxsize.data(),
p.get_const_data_ptr(),
static_cast<long long>(p.get_proj_data_info_sptr()->size_all()),
_helper->imgdim.data());
if (this->_proj_data_info_sptr->is_tof_data() == 1)
{
std::vector<float> mem_for_PP_back(_helper->num_lors * _helper->num_tof_bins);
const float* STIR_mem = p.get_const_data_ptr();

TOF_transpose(mem_for_PP_back, STIR_mem, _helper, 0);

joseph3d_back_tof_sino(_helper->xend.data(),
_helper->xstart.data(),
image_vec.data(),
_helper->origin.data(),
_helper->voxsize.data(),
mem_for_PP_back.data(),
_helper->num_lors,
_helper->imgdim.data(),
_helper->tofbin_width,
&_helper->sigma_tof,
&_helper->tofcenter_offset,
4, // float n_sigmas,
_helper->num_tof_bins,
0, // unsigned char lor_dependent_sigma_tof
0 // unsigned char lor_dependent_tofcenter_offset
);
}
else
{
joseph3d_back(_helper->xstart.data(),
_helper->xend.data(),
image_vec.data(),
_helper->origin.data(),
_helper->voxsize.data(),
p.get_const_data_ptr(),
static_cast<long long>(p.get_proj_data_info_sptr()->size_all()),
_helper->imgdim.data());
}
#endif
info("done", 2);

Expand Down
Loading

0 comments on commit 250da05

Please sign in to comment.