From 58b9a2cfddde06d5bc36271cba110eb7f6ea4446 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 6 Jan 2021 23:50:57 +0000 Subject: [PATCH 01/64] tests: separate check & deploy, add pre-commit hooks --- .github/workflows/test.yml | 29 +++++++++++++++-------------- .pre-commit-config.yaml | 20 ++++++++++++++++---- setup.py | 13 ++++++------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5cefc0f3..b5ef6bcf 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,5 @@ name: Test -on: -- push +on: [push, pull_request] jobs: check: runs-on: ubuntu-latest @@ -21,9 +20,9 @@ jobs: with: path: ~/.cache/pre-commit key: pre-commit|${{ env.PYSHA }}|${{ hashFiles('.pre-commit-config.yaml') }} - - run: pip install -U pre-commit twine setuptools wheel setuptools_scm[toml] ninst scikit-build - - run: HMUDIR=$HOME python setup.py sdist - - run: twine check dist/* + - name: dependencies + run: | + pip install -U pre-commit - run: pre-commit run -a --show-diff-on-failure test: runs-on: [self-hosted, cuda] @@ -38,7 +37,6 @@ jobs: env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} deploy: - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') needs: [check, test] name: PyPI Deploy runs-on: ubuntu-latest @@ -47,31 +45,34 @@ jobs: with: fetch-depth: 0 - uses: actions/setup-python@v2 - with: - python-version: '3.x' - run: pip install -U twine setuptools wheel setuptools_scm[toml] ninst scikit-build - - run: HMUDIR=$HOME python setup.py sdist - - run: twine upload dist/* + - run: PATHTOOLS=$HOME/NiftyPET_tools HMUDIR=$HOME python setup.py sdist + - run: twine check dist/* + - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + run: twine upload dist/* env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} skip_existing: true - - id: collect_assets + - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + id: collect_assets name: Collect assets run: | echo "::set-output name=asset_path::$(ls dist/*.tar.gz)" echo "::set-output name=asset_name::$(basename dist/*.tar.gz)" git log --pretty='format:%d%n- %s%n%b---' $(git tag --sort=v:refname | tail -n2 | head -n1)..HEAD > _CHANGES.md - - id: create_release + - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + id: create_release uses: actions/create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} with: tag_name: ${{ github.ref }} - release_name: ninst ${{ github.ref }} beta + release_name: nipet ${{ github.ref }} stable body_path: _CHANGES.md draft: true - - uses: actions/upload-release-asset@v1 + - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0507bc73..ccd5cc3c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,18 +2,30 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.3.0 + rev: v3.4.0 hooks: - id: check-added-large-files - id: check-case-conflict - id: check-docstring-first - id: check-executables-have-shebangs - id: check-toml + - id: check-merge-conflict - id: check-yaml + - id: debug-statements - id: end-of-file-fixer - id: mixed-line-ending + - id: sort-simple-yaml - id: trailing-whitespace -- hooks: +- repo: local + hooks: + - id: todo + name: Check TODO + language: pygrep + entry: TODO + types: [text] + exclude: ^(.pre-commit-config.yaml|.github/workflows/test.yml)$ + args: [-i] +- repo: https://github.com/PyCQA/isort + rev: 5.7.0 + hooks: - id: isort - repo: https://github.com/timothycrosley/isort - rev: 5.6.4 diff --git a/setup.py b/setup.py index c12c9c9b..0d882c5b 100644 --- a/setup.py +++ b/setup.py @@ -223,14 +223,12 @@ def check_constants(): log.info("hardware mu-maps have been located") build_ver = ".".join(__version__.split('.')[:3]).split(".dev")[0] -cmake_args = [f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] try: nvcc_arches = {"{2:d}{3:d}".format(*i) for i in dinf.gpuinfo()} except Exception as exc: - log.warning("could not detect CUDA architectures:\n%s", exc) -else: - cmake_args.append("-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))) -log.info("cmake_args:%s", cmake_args) + if "sdist" not in sys.argv or any(i in sys.argv for i in ["build", "bdist", "wheel"]): + log.warning("could not detect CUDA architectures:\n%s", exc) + nvcc_arches = [] for i in (Path(__file__).resolve().parent / "_skbuild").rglob("CMakeCache.txt"): i.write_text(re.sub("^//.*$\n^[^#].*pip-build-env.*$", "", i.read_text(), flags=re.M)) setup( @@ -240,5 +238,6 @@ def check_constants(): cmake_source_dir="niftypet", cmake_languages=("C", "CXX", "CUDA"), cmake_minimum_required_version="3.18", - cmake_args=cmake_args, -) + cmake_args=[ + f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}", + "-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))]) From da17ea07b5adaed16a4f9b28731ab883c98bf216 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 6 Jan 2021 23:54:09 +0000 Subject: [PATCH 02/64] tests: isolate env --- .github/workflows/test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b5ef6bcf..63b078a9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,17 +25,23 @@ jobs: pip install -U pre-commit - run: pre-commit run -a --show-diff-on-failure test: - runs-on: [self-hosted, cuda] + if: github.event_name != 'pull_request' || github.head_ref != 'devel' + runs-on: [self-hosted, cuda, python] name: Test steps: - uses: actions/checkout@v2 with: fetch-depth: 0 + - name: Run setup-python + run: setup-python -p3.7 - run: pip install -U -e .[dev] - run: pytest - run: codecov env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + - name: Post Run setup-python + run: setup-python -p3.7 -Dr + if: ${{ always() }} deploy: needs: [check, test] name: PyPI Deploy From d67ece0b7fb36c63226276522b162f6635b99381 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 02:00:46 +0000 Subject: [PATCH 03/64] format: add clang-format config --- .pre-commit-config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccd5cc3c..acfd7e60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,3 +29,9 @@ repos: rev: 5.7.0 hooks: - id: isort +- repo: https://github.com/doublify/pre-commit-clang-format + rev: master + hooks: + - id: clang-format + files: \.(cc?|cuh?|cxx|cpp|h|hpp|hxx|java|js)$ + args: ['-fallback-style=none', '-style={BasedOnStyle: LLVM, ColumnLimit: 99}'] From b012d4ccbb9ff791c5099813c12a3a483e1c777f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 7 Jan 2021 02:01:14 +0000 Subject: [PATCH 04/64] format: clang-format --- niftypet/nipet/include/auxmath.h | 5 +- niftypet/nipet/include/def.h | 72 +- niftypet/nipet/include/scanner_0.h | 209 ++-- niftypet/nipet/lm/src/hst.cu | 1006 ++++++++--------- niftypet/nipet/lm/src/hst.h | 30 +- niftypet/nipet/lm/src/lm_module.cu | 1330 +++++++++++------------ niftypet/nipet/lm/src/lmaux.cu | 596 +++++----- niftypet/nipet/lm/src/lmaux.h | 16 +- niftypet/nipet/lm/src/lmproc.cu | 452 ++++---- niftypet/nipet/lm/src/lmproc.h | 49 +- niftypet/nipet/lm/src/rnd.cu | 1318 +++++++++++----------- niftypet/nipet/lm/src/rnd.h | 23 +- niftypet/nipet/prj/src/prj_module.cu | 1503 +++++++++++++------------- niftypet/nipet/prj/src/prjb.cu | 810 +++++++------- niftypet/nipet/prj/src/prjb.h | 37 +- niftypet/nipet/prj/src/prjf.cu | 841 +++++++------- niftypet/nipet/prj/src/prjf.h | 34 +- niftypet/nipet/prj/src/recon.cu | 585 +++++----- niftypet/nipet/prj/src/recon.h | 52 +- niftypet/nipet/prj/src/tprj.cu | 386 ++++--- niftypet/nipet/prj/src/tprj.h | 8 +- niftypet/nipet/sct/src/ray.cu | 274 +++-- niftypet/nipet/sct/src/sct.cu | 1189 ++++++++++---------- niftypet/nipet/sct/src/sct.h | 53 +- niftypet/nipet/sct/src/sct_module.cu | 608 +++++------ niftypet/nipet/sct/src/sctaux.cu | 615 +++++------ niftypet/nipet/sct/src/sctaux.h | 59 +- niftypet/nipet/src/aux_module.cu | 1055 +++++++++--------- niftypet/nipet/src/auxmath.cu | 93 +- niftypet/nipet/src/norm.cu | 392 ++++--- niftypet/nipet/src/norm.h | 42 +- niftypet/nipet/src/scanner_0.cu | 442 ++++---- 32 files changed, 6835 insertions(+), 7349 deletions(-) diff --git a/niftypet/nipet/include/auxmath.h b/niftypet/nipet/include/auxmath.h index b37263b2..15e9594d 100644 --- a/niftypet/nipet/include/auxmath.h +++ b/niftypet/nipet/include/auxmath.h @@ -4,12 +4,9 @@ #ifndef AUXMATH_H #define AUXMATH_H - extern LMprop lmprop; - -void var_online(float * M1, float * M2, float * X, int b, size_t nele); - +void var_online(float *M1, float *M2, float *X, int b, size_t nele); // //sinos out in a structure // struct sctOUT { diff --git a/niftypet/nipet/include/def.h b/niftypet/nipet/include/def.h index 3b6173d8..43c13660 100644 --- a/niftypet/nipet/include/def.h +++ b/niftypet/nipet/include/def.h @@ -5,48 +5,50 @@ #ifndef _DEF_H_ #define _DEF_H_ -//to print extra info while processing the LM dataset (for now it effects only GE Signa processing?) +// to print extra info while processing the LM dataset (for now it effects only GE Signa +// processing?) #define EX_PRINT_INFO 0 -#define MIN( a, b ) ( ((a) < (b)) ? (a) : (b) ) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define LOGDEBUG 10 #define LOGINFO 20 #define LOGWARNING 30 - #define RD2MEM 0 // device #define BTHREADS 10 #define NTHREADS 256 -#define TOTHRDS (BTHREADS*NTHREADS) -#define ITIME 1000 //integration time -#define BTPTIME 100 //time period for bootstrapping +#define TOTHRDS (BTHREADS * NTHREADS) +#define ITIME 1000 // integration time +#define BTPTIME 100 // time period for bootstrapping #define MVTIME 1000 -#define VTIME 2 // 2**VTIME = time resolution for PRJ VIEW [s] -#define MXNITAG 5400 //max number of time tags to avoid out of memory errors +#define VTIME 2 // 2**VTIME = time resolution for PRJ VIEW [s] +#define MXNITAG 5400 // max number of time tags to avoid out of memory errors -//maximum threads for device +// maximum threads for device #define MXTHRD 1024 -#define TOT_BINS_S1 354033792 //344*252*4084 +#define TOT_BINS_S1 354033792 // 344*252*4084 -//344*252*837 +// 344*252*837 #define TOT_BINS 72557856 -#define NSTREAMS 32 // # CUDA streams -#define ELECHNK (402653184/NSTREAMS) //Siemens Mmr: (402653184 = 2^28+2^27 => 1.5G), 536870912 -#define ELECHNK_S (268435456/NSTREAMS) //GE Signa: 2^28 = 268435456 int elements to make up 1.6GB when 6bytes per event +#define NSTREAMS 32 // # CUDA streams +#define ELECHNK (402653184 / NSTREAMS) // Siemens Mmr: (402653184 = 2^28+2^27 => 1.5G), 536870912 +#define ELECHNK_S \ + (268435456 / \ + NSTREAMS) // GE Signa: 2^28 = 268435456 int elements to make up 1.6GB when 6bytes per event //=== LM bit fields/masks === // mask for time bits #define mMR_TMSK (0x1fffffff) // check if time tag -#define mMR_TTAG(w) ( (w>>29) == 4 ) +#define mMR_TTAG(w) ((w >> 29) == 4) -//for randoms -#define mxRD 60 //maximum ring difference -#define CFOR 20 //number of iterations for crystals transaxially +// for randoms +#define mxRD 60 // maximum ring difference +#define CFOR 20 // number of iterations for crystals transaxially #define SPAN 11 #define NRINGS 64 @@ -54,15 +56,15 @@ #define nCRSR 448 // number of active crystals #define NSBINS 344 #define NSANGLES 252 -#define NSBINANG 86688 //NSBINS*NSANGLES +#define NSBINANG 86688 // NSBINS*NSANGLES #define NSINOS 4084 #define NSINOS11 837 #define SEG0 127 -#define NBUCKTS 224 //purposely too large (should be 224 = 28*8) -#define AW 68516 //number of active bins in 2D sino +#define NBUCKTS 224 // purposely too large (should be 224 = 28*8) +#define AW 68516 // number of active bins in 2D sino #define NLI2R 2074 -//coincidence time window in pico-seconds +// coincidence time window in pico-seconds #define CWND = 5859.38 //====== SIGNA ======= @@ -82,23 +84,22 @@ #define SEG0_S 89 //====== -//number of transaxial blocks per module +// number of transaxial blocks per module #define NBTXM_S 4 -//number of transaxial modules (on the ring) +// number of transaxial modules (on the ring) #define NTXM_S 28 -//crystals per block +// crystals per block #define NCRSBLK_S 4 #define NCRS_S 448 - #define PI 3.1415926535f -#define L21 0.001f // threshold for special case when finding Siddon intersections -#define TA1 0.7885139f // angle threshold 1 for Siddon calculations ~ PI/4 -#define TA2 -0.7822831f // angle threshold 2 for Siddon calculations ~-PI/4 -#define N_TV 1807 //907 // max number of voxels intersections with a ray (t) -#define N_TT 10 // number of constants pre-calculated and saved for proper axial calculations -#define UV_SHFT 9 // shift when representing 2 voxel indx in one float variable +#define L21 0.001f // threshold for special case when finding Siddon intersections +#define TA1 0.7885139f // angle threshold 1 for Siddon calculations ~ PI/4 +#define TA2 -0.7822831f // angle threshold 2 for Siddon calculations ~-PI/4 +#define N_TV 1807 // 907 // max number of voxels intersections with a ray (t) +#define N_TT 10 // number of constants pre-calculated and saved for proper axial calculations +#define UV_SHFT 9 // shift when representing 2 voxel indx in one float variable //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> //## start ##// constants definitions in synch with Python. DONT MODIFY MANUALLY HERE! @@ -119,9 +120,8 @@ // ring size #define SZ_RING 0.40625f -//crystal angle -#define aLPHA ((2*PI)/nCRS) - +// crystal angle +#define aLPHA ((2 * PI) / nCRS) //============= GE SIGNA stuff ================= // compile/add additional routines for GE Signa; otherwise comment out the definition below @@ -131,6 +131,4 @@ // https://www.hdfgroup.org/HDF5/release/obtainsrc.html#src //============================================== - - #endif // end of _DEF_H_ diff --git a/niftypet/nipet/include/scanner_0.h b/niftypet/nipet/include/scanner_0.h index d1c21c9d..afbe2d95 100644 --- a/niftypet/nipet/include/scanner_0.h +++ b/niftypet/nipet/include/scanner_0.h @@ -1,80 +1,77 @@ -#include #include "def.h" +#include #ifndef SCANNER_0_H #define SCANNER_0_H - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // SCANNER CONSTANTS //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> struct Cnst { - int BPE; // bytes per single event - int LMOFF; // offset for the LM file (e.g., offsetting for header) + int BPE; // bytes per single event + int LMOFF; // offset for the LM file (e.g., offsetting for header) - int A; //sino angles - int W; //sino bins for any angular index - int aw; //sino bins (active only) + int A; // sino angles + int W; // sino bins for any angular index + int aw; // sino bins (active only) - int NCRS; //number of crystals - int NCRSR; //reduced number of crystals by gaps - int NRNG; //number of axial rings - int D; //number of linear indexes along Michelogram diagonals - int Bt; //number of buckets transaxially + int NCRS; // number of crystals + int NCRSR; // reduced number of crystals by gaps + int NRNG; // number of axial rings + int D; // number of linear indexes along Michelogram diagonals + int Bt; // number of buckets transaxially - int B; //number of buckets (total) - int Cbt;//number of crystals in bucket transaxially - int Cba;//number of crystals in bucket axially + int B; // number of buckets (total) + int Cbt; // number of crystals in bucket transaxially + int Cba; // number of crystals in bucket axially - int NSN1; //number of sinos in span-1 - int NSN11;//in span-11 - int NSN64;//with no MRD limit + int NSN1; // number of sinos in span-1 + int NSN11; // in span-11 + int NSN64; // with no MRD limit - char SPN; //span-1 (s=1) or span-11 (s=11, default) or SSRB (s=0) - int NSEG0; + char SPN; // span-1 (s=1) or span-11 (s=11, default) or SSRB (s=0) + int NSEG0; - char RNG_STRT; //range of rings considered in the projector calculations (start and stop, default are 0-64) - char RNG_END; // it only works with span-1 + char RNG_STRT; // range of rings considered in the projector calculations (start and stop, + // default are 0-64) + char RNG_END; // it only works with span-1 - int TGAP; //get the crystal gaps right in the sinogram, period and offset given - int OFFGAP; + int TGAP; // get the crystal gaps right in the sinogram, period and offset given + int OFFGAP; - int NSCRS; //number of scatter crystals used in scatter estimation - int NSRNG; - int MRD; + int NSCRS; // number of scatter crystals used in scatter estimation + int NSRNG; + int MRD; - float ALPHA; //angle subtended by a crystal - float AXR; //axial crystal dim + float ALPHA; // angle subtended by a crystal + float AXR; // axial crystal dim - float COSUPSMX; //cosine of max allowed scatter angle - float COSSTP; //cosine step + float COSUPSMX; // cosine of max allowed scatter angle + float COSSTP; // cosine step - int TOFBINN; - float TOFBINS; - float TOFBIND; - float ITOFBIND; + int TOFBINN; + float TOFBINS; + float TOFBIND; + float ITOFBIND; - char BTP; //0: no bootstrapping, 1: no-parametric, 2: parametric (recommended) - float BTPRT; // ratio of bootstrapped/original events in the target sinogram (1.0 default) + char BTP; // 0: no bootstrapping, 1: no-parametric, 2: parametric (recommended) + float BTPRT; // ratio of bootstrapped/original events in the target sinogram (1.0 default) - char DEVID; // device (GPU) ID. allows choosing the device on which to perform calculations - char LOG; //different levels of verbose/logging like in Python's logging package + char DEVID; // device (GPU) ID. allows choosing the device on which to perform calculations + char LOG; // different levels of verbose/logging like in Python's logging package + float SIGMA_RM; // resolution modelling sigma + // float RE; //effective ring diameter + // float ICOSSTP; - float SIGMA_RM; // resolution modelling sigma - // float RE; //effective ring diameter - // float ICOSSTP; - - float ETHRLD; + float ETHRLD; }; //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // LIST MODE DATA PROPERTIES //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> -typedef struct{ +typedef struct { char *fname; size_t *atag; size_t *btag; @@ -84,113 +81,99 @@ typedef struct{ int nchnk; int nitag; int toff; - int lmoff; //offset for starting LM events + int lmoff; // offset for starting LM events int last_ttag; int tstart; int tstop; int tmidd; - int flgs; //write out sinos in span-11 - int span; //choose span (1, 11 or SSRB) - int flgf; //do fan-sums calculations and output by randoms estimation + int flgs; // write out sinos in span-11 + int span; // choose span (1, 11 or SSRB) + int flgf; // do fan-sums calculations and output by randoms estimation - int bpe; //number of bytes per event - int btp; //whether to use bootstrap and if so what kind of bootstrap (0:no, 1:non-parametric, 2:parametric) + int bpe; // number of bytes per event + int btp; // whether to use bootstrap and if so what kind of bootstrap (0:no, 1:non-parametric, + // 2:parametric) - int log; //for logging in list mode processing + int log; // for logging in list mode processing -} LMprop; //properties of LM data file and its breaking up into chunks of data. +} LMprop; // properties of LM data file and its breaking up into chunks of data. //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - -#define HANDLE_ERROR(err) (HandleError( err, __FILE__, __LINE__ )) +#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__)) void HandleError(cudaError_t err, const char *file, int line); extern LMprop lmprop; typedef struct { - short *li2s11; - char *NSinos; -}span11LUT; + short *li2s11; + char *NSinos; +} span11LUT; typedef struct { - int *zR; //sum of z indx - int *zM; //total mass for SEG0 -} mMass; //structure for motion centre of Mass + int *zR; // sum of z indx + int *zM; // total mass for SEG0 +} mMass; // structure for motion centre of Mass struct LORcc { - short c0; - short c1; + short c0; + short c1; }; struct LORaw { - short ai; - short wi; + short ai; + short wi; }; -//structure for 2D sino lookup tables (Siemens mMR) +// structure for 2D sino lookup tables (Siemens mMR) struct txLUTs { - LORcc *s2cF; - int *c2sF; - int *cr2s; - LORcc *s2c; - LORcc *s2cr; - LORaw *aw2sn; - int * aw2ali; - short *crsr; - char *msino; - char *cij; - int naw; + LORcc *s2cF; + int *c2sF; + int *cr2s; + LORcc *s2c; + LORcc *s2cr; + LORaw *aw2sn; + int *aw2ali; + short *crsr; + char *msino; + char *cij; + int naw; }; -//structure for axial look up tables (Siemens mMR) +// structure for axial look up tables (Siemens mMR) struct axialLUT { - int * li2rno; // linear indx to ring indx - int * li2sn; // linear michelogram index (along diagonals) to sino index - int * li2nos; // linear indx to no of sinos in span-11 - short * sn1_rno; - short * sn1_sn11; - short * sn1_ssrb; - char *sn1_sn11no; - int Nli2rno[2]; // array sizes - int Nli2sn[2]; - int Nli2nos; + int *li2rno; // linear indx to ring indx + int *li2sn; // linear michelogram index (along diagonals) to sino index + int *li2nos; // linear indx to no of sinos in span-11 + short *sn1_rno; + short *sn1_sn11; + short *sn1_ssrb; + char *sn1_sn11no; + int Nli2rno[2]; // array sizes + int Nli2sn[2]; + int Nli2nos; }; -//structure for 2D sino lookup tables (GE Signa) +// structure for 2D sino lookup tables (GE Signa) struct txLUT_S { - int *c2s; + int *c2s; }; - -//structure for axial look up tables (GE Signa) +// structure for axial look up tables (GE Signa) struct axialLUT_S { - short *r2s; + short *r2s; }; - void getMemUse(const Cnst cnt); -//LUT for converstion from span-1 to span-11 +// LUT for converstion from span-1 to span-11 span11LUT span1_span11(const Cnst Cnt); - //------------------------ // mMR gaps //------------------------ -void put_gaps( - float *sino, - float *sng, - int *aw2ali, - int sino_no, - Cnst Cnt - ); - -void remove_gaps( - float *sng, - float *sino, - int snno, - int * aw2ali, - Cnst Cnt); +void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt); + +void remove_gaps(float *sng, float *sino, int snno, int *aw2ali, Cnst Cnt); //------------------------ -#endif //SCANNER_0_H +#endif // SCANNER_0_H diff --git a/niftypet/nipet/lm/src/hst.cu b/niftypet/nipet/lm/src/hst.cu index ee881c1d..5eb40fb9 100644 --- a/niftypet/nipet/lm/src/hst.cu +++ b/niftypet/nipet/lm/src/hst.cu @@ -9,596 +9,536 @@ Copyrights: 2018 #include #include -#include "hst.h" #include "def.h" +#include "hst.h" #include #define nhNSN1 4084 -#define nSEG 11 //number of segments, in span-11 +#define nSEG 11 // number of segments, in span-11 // #define CURAND_ERR(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \ // printf("Error at %s:%d\n",__FILE__,__LINE__);\ // return EXIT_FAILURE;}} while(0) - -//put the info about sino segemnts to constant memory +// put the info about sino segemnts to constant memory __constant__ int c_sinoSeg[nSEG]; __constant__ int c_cumSeg[nSEG]; __constant__ short c_ssrb[nhNSN1]; -//span-1 to span-11 +// span-1 to span-11 __constant__ short c_li2span11[nhNSN1]; - - //============== RANDOM NUMBERS FROM CUDA ============================= -__global__ void setup_rand(curandState *state) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - curand_init((unsigned long long)clock(), idx, 0, &state[idx]); +__global__ void setup_rand(curandState *state) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + curand_init((unsigned long long)clock(), idx, 0, &state[idx]); } //===================================================================== -__global__ void hst( - int *lm, - unsigned int *psino, - // unsigned int *dsino, - unsigned int *ssrb, - unsigned int *rdlyd, - unsigned int *rprmt, - mMass mass, - unsigned int *snview, - short2 *sn2crs, - short2 *sn1_rno, - unsigned int *fansums, - unsigned int *bucks, - const int ele4thrd, - const int elm, - const int off, - const int toff, - const int nitag, - const int span, - const int btp, - const float btprt, - const int tstart, - const int tstop, - curandState *state, - curandDiscreteDistribution_t poisson_hst) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - - //> stream index - // int strmi = off / ELECHNK; - - //> index for bootstrap random numbers state - //int idb = (BTHREADS*strmi + blockIdx.x)*blockDim.x + threadIdx.x; - int idb = blockIdx.x*blockDim.x + threadIdx.x; - - //random number generator for bootstrapping when requested - curandState locState = state[idb]; - //weight for number of events, only for parametric bootstrap it can be different than 1. - unsigned int Nevnt = 1; - - int i_start, i_stop; - if (idx == (BTHREADS*NTHREADS - 1)) { - i_stop = off + elm; - i_start = off + (BTHREADS*NTHREADS - 1)*ele4thrd; - } - else { - i_stop = off + (idx + 1)*ele4thrd; - i_start = off + idx * ele4thrd; - } - - int word; - bool P; //prompt bit - int val; //bin address or time - int addr = -1; - int si = -1, si11 = -1; //span-1/11 sino index - short si_ssrb = -1; // ssrb sino index - int aw = -1; - int a = -1, w = -1; //angle and projection bin indexes - bool a0, a126; - - int bi; //bootstrap index - - //find the first time tag in this thread patch - int itag; //integration time tag - int itagu; - int i = i_start; - int tag = 0; - while (tag == 0) { - if (((lm[i] >> 29) == -4)) { - tag = 1; - itag = ((lm[i] & 0x1fffffff) - toff) / ITIME; //assuming that the tag is every 1ms - itagu = (val - toff) - itag*ITIME; - } - i++; - if (i >= i_stop) { - printf("wc> couldn't find time tag from this position onwards: %d, \n assuming the last one.\n", i_start); - itag = nitag; - itagu = 0; - break; - } - } - //printf("istart=%d, dt=%d, itag=%d\n", i_start, i_stop-i_start, itag ); - //=================================================================================== - - - for (int i = i_start; i0) { - bi = (int)floorf((i_stop - i_start)*curand_uniform(&locState)); - - //do the random sampling until it is an event - while (lm[i_start + bi] <= 0) { - bi = (int)floorf((i_stop - i_start)*curand_uniform(&locState)); - } - //get the randomly chosen packet - word = lm[i_start + bi]; - } - //otherwise do the normal stuff for non-event packets - } - else if (btp == 2) { - //parametric bootstrap (btp==2) - Nevnt = curand_discrete(&locState, poisson_hst); - }// <----------------------------------------------------------------------------------------- - - //by masking (ignore the first bits) extract the bin address or time - val = word & 0x3fffffff; - - if ((itag >= tstart) && (itag0){ - - if ((Nevnt>0)&&(Nevnt<32)){ - - si = val / NSBINANG; - aw = val - si*NSBINANG; - a = aw / NSBINS; - w = aw - a*NSBINS; - - //span-11 sinos - si11 = c_li2span11[si]; - - //SSRB sino [127x252x344] - si_ssrb = c_ssrb[si]; - - //span-1 - if (span == 1) addr = val; - //span-11 - else if (span == 11) addr = si11*NSBINANG + aw; - //SSRB - else if (span == 0) addr = si_ssrb*NSBINANG + aw; - - P = (word >> 30); - - //> prompts - if (P == 1) { - - atomicAdd(rprmt + itag, Nevnt); - - //---SSRB - atomicAdd(ssrb + si_ssrb*NSBINANG + aw, Nevnt); - //--- - - //---sino - atomicAdd(psino + addr, Nevnt); - //--- - - //-- centre of mass - atomicAdd(mass.zR + itag, si_ssrb); - atomicAdd(mass.zM + itag, Nevnt); - //--- - - //---motion projection view - a0 = a == 0; - a126 = a == 126; - if ((a0 || a126) && (itag> VTIME)*SEG0*NSBINS + si_ssrb*NSBINS + w, Nevnt << (a126 * 8)); - } - - } - - //> delayeds - else { - //> use the same UINT32 sinogram for prompts after shifting delayeds - atomicAdd(psino + addr, Nevnt<<16); - - //> delayeds head curve - atomicAdd(rdlyd + itag, Nevnt); - - //+++ fan-sums (for singles estimation) +++ - atomicAdd(fansums + nCRS*sn1_rno[si].x + sn2crs[a + NSANGLES*w].x, Nevnt); - atomicAdd(fansums + nCRS*sn1_rno[si].y + sn2crs[a + NSANGLES*w].y, Nevnt); - //+++ - } - } - } - - else { - - //--time tags - if ((word >> 29) == -4) { - itag = (val - toff) / ITIME; - itagu = (val - toff) - itag*ITIME; - } - //--singles - else if (((word >> 29) == -3) && (itag >= tstart) && (itag> 19); - - //weirdly the bucket index can be larger than NBUCKTS (the size)! so checking for it... - if (ibck0) << 30); - - //--get some more info about the time tag (mili seconds) for up to two singles reports per second - if (bucks[ibck + NBUCKTS*itag + NBUCKTS*nitag] == 0) - atomicAdd(bucks + ibck + NBUCKTS*itag + NBUCKTS*nitag, itagu); - else - atomicAdd(bucks + ibck + NBUCKTS*itag + NBUCKTS*nitag, itagu << 10); - } - - } - - } - - } - - }// <--for - - // put back the state for random generator when bootstrapping is requested - // if (btp>0) - state[idb] = locState; - +__global__ void hst(int *lm, unsigned int *psino, + // unsigned int *dsino, + unsigned int *ssrb, unsigned int *rdlyd, unsigned int *rprmt, mMass mass, + unsigned int *snview, short2 *sn2crs, short2 *sn1_rno, unsigned int *fansums, + unsigned int *bucks, const int ele4thrd, const int elm, const int off, + const int toff, const int nitag, const int span, const int btp, + const float btprt, const int tstart, const int tstop, curandState *state, + curandDiscreteDistribution_t poisson_hst) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + //> stream index + // int strmi = off / ELECHNK; + + //> index for bootstrap random numbers state + // int idb = (BTHREADS*strmi + blockIdx.x)*blockDim.x + threadIdx.x; + int idb = blockIdx.x * blockDim.x + threadIdx.x; + + // random number generator for bootstrapping when requested + curandState locState = state[idb]; + // weight for number of events, only for parametric bootstrap it can be different than 1. + unsigned int Nevnt = 1; + + int i_start, i_stop; + if (idx == (BTHREADS * NTHREADS - 1)) { + i_stop = off + elm; + i_start = off + (BTHREADS * NTHREADS - 1) * ele4thrd; + } else { + i_stop = off + (idx + 1) * ele4thrd; + i_start = off + idx * ele4thrd; + } + + int word; + bool P; // prompt bit + int val; // bin address or time + int addr = -1; + int si = -1, si11 = -1; // span-1/11 sino index + short si_ssrb = -1; // ssrb sino index + int aw = -1; + int a = -1, w = -1; // angle and projection bin indexes + bool a0, a126; + + int bi; // bootstrap index + + // find the first time tag in this thread patch + int itag; // integration time tag + int itagu; + int i = i_start; + int tag = 0; + while (tag == 0) { + if (((lm[i] >> 29) == -4)) { + tag = 1; + itag = ((lm[i] & 0x1fffffff) - toff) / ITIME; // assuming that the tag is every 1ms + itagu = (val - toff) - itag * ITIME; + } + i++; + if (i >= i_stop) { + printf("wc> couldn't find time tag from this position onwards: %d, \n assuming the last " + "one.\n", + i_start); + itag = nitag; + itagu = 0; + break; + } + } + // printf("istart=%d, dt=%d, itag=%d\n", i_start, i_stop-i_start, itag ); + //=================================================================================== + + for (int i = i_start; i < i_stop; i++) { + + // read the data packet from global memory + word = lm[i]; + + //--- do the bootstrapping when requested <--------------------------------------------------- + if (btp == 1) { + // this is non-parametric bootstrap (btp==1); + // the parametric bootstrap (btp==2) will perform better (memory access) and may have better + // statistical properties + // for the given position in LM check if an event. if so do the bootstrapping. otherwise + // leave as is. + if (word > 0) { + bi = (int)floorf((i_stop - i_start) * curand_uniform(&locState)); + + // do the random sampling until it is an event + while (lm[i_start + bi] <= 0) { + bi = (int)floorf((i_stop - i_start) * curand_uniform(&locState)); + } + // get the randomly chosen packet + word = lm[i_start + bi]; + } + // otherwise do the normal stuff for non-event packets + } else if (btp == 2) { + // parametric bootstrap (btp==2) + Nevnt = curand_discrete(&locState, poisson_hst); + } // <----------------------------------------------------------------------------------------- + + // by masking (ignore the first bits) extract the bin address or time + val = word & 0x3fffffff; + + if ((itag >= tstart) && (itag < tstop)) { + + if (word > 0) { + + if ((Nevnt > 0) && (Nevnt < 32)) { + + si = val / NSBINANG; + aw = val - si * NSBINANG; + a = aw / NSBINS; + w = aw - a * NSBINS; + + // span-11 sinos + si11 = c_li2span11[si]; + + // SSRB sino [127x252x344] + si_ssrb = c_ssrb[si]; + + // span-1 + if (span == 1) + addr = val; + // span-11 + else if (span == 11) + addr = si11 * NSBINANG + aw; + // SSRB + else if (span == 0) + addr = si_ssrb * NSBINANG + aw; + + P = (word >> 30); + + //> prompts + if (P == 1) { + + atomicAdd(rprmt + itag, Nevnt); + + //---SSRB + atomicAdd(ssrb + si_ssrb * NSBINANG + aw, Nevnt); + //--- + + //---sino + atomicAdd(psino + addr, Nevnt); + //--- + + //-- centre of mass + atomicAdd(mass.zR + itag, si_ssrb); + atomicAdd(mass.zM + itag, Nevnt); + //--- + + //---motion projection view + a0 = a == 0; + a126 = a == 126; + if ((a0 || a126) && (itag < MXNITAG)) { + atomicAdd(snview + (itag >> VTIME) * SEG0 * NSBINS + si_ssrb * NSBINS + w, + Nevnt << (a126 * 8)); + } + + } + + //> delayeds + else { + //> use the same UINT32 sinogram for prompts after shifting delayeds + atomicAdd(psino + addr, Nevnt << 16); + + //> delayeds head curve + atomicAdd(rdlyd + itag, Nevnt); + + //+++ fan-sums (for singles estimation) +++ + atomicAdd(fansums + nCRS * sn1_rno[si].x + sn2crs[a + NSANGLES * w].x, Nevnt); + atomicAdd(fansums + nCRS * sn1_rno[si].y + sn2crs[a + NSANGLES * w].y, Nevnt); + //+++ + } + } + } + + else { + + //--time tags + if ((word >> 29) == -4) { + itag = (val - toff) / ITIME; + itagu = (val - toff) - itag * ITIME; + } + //--singles + else if (((word >> 29) == -3) && (itag >= tstart) && (itag < tstop)) { + + // bucket index + unsigned short ibck = ((word & 0x1fffffff) >> 19); + + // weirdly the bucket index can be larger than NBUCKTS (the size)! so checking for it... + if (ibck < NBUCKTS) { + atomicAdd(bucks + ibck + NBUCKTS * itag, (word & 0x0007ffff) << 3); + // how many reads greater than zeros per one sec + // the last two bits are used for the number of reports per second + atomicAdd(bucks + ibck + NBUCKTS * itag + NBUCKTS * nitag, ((word & 0x0007ffff) > 0) + << 30); + + //--get some more info about the time tag (mili seconds) for up to two singles reports + // per second + if (bucks[ibck + NBUCKTS * itag + NBUCKTS * nitag] == 0) + atomicAdd(bucks + ibck + NBUCKTS * itag + NBUCKTS * nitag, itagu); + else + atomicAdd(bucks + ibck + NBUCKTS * itag + NBUCKTS * nitag, itagu << 10); + } + } + } + } + + } // <--for + + // put back the state for random generator when bootstrapping is requested + // if (btp>0) + state[idb] = locState; } - - - - //============================================================================= -char LOG; // logging in CUDA stream callback -char BTP; // switching bootstrap mode (0, 1, 2) -double BTPRT; //rate of bootstrap events (controls the output number of bootstrap events) +char LOG; // logging in CUDA stream callback +char BTP; // switching bootstrap mode (0, 1, 2) +double BTPRT; // rate of bootstrap events (controls the output number of bootstrap events) //> host generator for random Poisson events curandGenerator_t h_rndgen; - //============================================================================= -curandState* setup_curand() { +curandState *setup_curand() { - //Setup RANDOM NUMBERS even when bootstrapping was not requested - if (LOG <= LOGINFO) printf("\ni> setting up CUDA pseudorandom number generator... "); - curandState *d_prng_states; + // Setup RANDOM NUMBERS even when bootstrapping was not requested + if (LOG <= LOGINFO) + printf("\ni> setting up CUDA pseudorandom number generator... "); + curandState *d_prng_states; - // cudaMalloc((void **)&d_prng_states, MIN(NSTREAMS, lmprop.nchnk)*BTHREADS*NTHREADS * sizeof(curandStatePhilox4_32_10_t)); - // setup_rand <<< MIN(NSTREAMS, lmprop.nchnk)*BTHREADS, NTHREADS >>>(d_prng_states); + // cudaMalloc((void **)&d_prng_states, MIN(NSTREAMS, lmprop.nchnk)*BTHREADS*NTHREADS * + // sizeof(curandStatePhilox4_32_10_t)); setup_rand <<< MIN(NSTREAMS, lmprop.nchnk)*BTHREADS, + // NTHREADS >>>(d_prng_states); - cudaMalloc((void **)&d_prng_states, BTHREADS*NTHREADS * sizeof(curandState)); - setup_rand <<< BTHREADS, NTHREADS >>>(d_prng_states); + cudaMalloc((void **)&d_prng_states, BTHREADS * NTHREADS * sizeof(curandState)); + setup_rand<<>>(d_prng_states); - if (LOG <= LOGINFO) printf("DONE.\n"); + if (LOG <= LOGINFO) + printf("DONE.\n"); - return d_prng_states; + return d_prng_states; } - - - //============================================================================= //***** general variables used for streams int ichnk; // indicator of how many chunks have been processed in the GPU. int nchnkrd; // indicator of how many chunks have been read from disk. -int *lmbuff; // data buffer +int *lmbuff; // data buffer bool dataready[NSTREAMS]; - -FILE* open_lm(){ - FILE* f; - if ((f = fopen(lmprop.fname, "rb")) == NULL) - { - fprintf(stderr, "e> Can't open input file: %s \n", lmprop.fname); - exit(1); - } - return f; +FILE *open_lm() { + FILE *f; + if ((f = fopen(lmprop.fname, "rb")) == NULL) { + fprintf(stderr, "e> Can't open input file: %s \n", lmprop.fname); + exit(1); + } + return f; } +void seek_lm(FILE *f) { -void seek_lm(FILE* f){ - - size_t seek_offset = lmprop.lmoff + (lmprop.bpe*lmprop.atag[nchnkrd]); + size_t seek_offset = lmprop.lmoff + (lmprop.bpe * lmprop.atag[nchnkrd]); - #ifdef __linux__ - fseek(f, seek_offset, SEEK_SET); //<<<<------------------- IMPORTANT!!! - #endif - #ifdef WIN32 - _fseeki64(f, seek_offset, SEEK_SET); //<<<<------------------- IMPORTANT!!! - #endif +#ifdef __linux__ + fseek(f, seek_offset, SEEK_SET); //<<<<------------------- IMPORTANT!!! +#endif +#ifdef WIN32 + _fseeki64(f, seek_offset, SEEK_SET); //<<<<------------------- IMPORTANT!!! +#endif - if (LOG <= LOGDEBUG) - printf("ic> fseek adrress: %zd\n", lmprop.lmoff + lmprop.atag[nchnkrd]); + if (LOG <= LOGDEBUG) + printf("ic> fseek adrress: %zd\n", lmprop.lmoff + lmprop.atag[nchnkrd]); } +void get_lm_chunk(FILE *f, int stream_idx) { -void get_lm_chunk(FILE* f, int stream_idx){ + // ele4chnk[i] -> contains the number of elements for chunk i + // atag[i] -> contains the offset for the chunk i - // ele4chnk[i] -> contains the number of elements for chunk i - // atag[i] -> contains the offset for the chunk i + int n = lmprop.ele4chnk[nchnkrd]; - int n = lmprop.ele4chnk[nchnkrd]; + size_t r = fread(&lmbuff[stream_idx * ELECHNK], lmprop.bpe, n, f); + if (r != n) { + printf("ele4chnk = %d, r = %zd\n", n, r); + fputs("Reading error (CUDART callback)\n", stderr); + fclose(f); + exit(3); + } - size_t r = fread(&lmbuff[stream_idx*ELECHNK], lmprop.bpe, n, f); - if (r != n) - { - printf("ele4chnk = %d, r = %zd\n", n, r); - fputs("Reading error (CUDART callback)\n", stderr); - fclose(f); - exit(3); - } + // Increment the number of chunk read + nchnkrd++; - // Increment the number of chunk read - nchnkrd++; + // Set a flag: stream[i] is free now and the new data is ready. + dataready[stream_idx] = true; - // Set a flag: stream[i] is free now and the new data is ready. - dataready[stream_idx] = true; - - if (LOG <= LOGDEBUG) - printf("[%4d / %4d] chunks read\n\n", nchnkrd, lmprop.nchnk); + if (LOG <= LOGDEBUG) + printf("[%4d / %4d] chunks read\n\n", nchnkrd, lmprop.nchnk); } - - - - //================================================================================================ //***** Stream Callback ***** -void CUDART_CB MyCallback(cudaStream_t stream, cudaError_t status, void *data) -{ - int stream_idx = (int)(size_t)data; - - if (LOG <= LOGINFO){ - printf("\r +> stream[%d]: %d chunks of data are DONE. ", stream_idx, ichnk + 1); - } - - ichnk += 1; - if (nchnkrd stream[%d]: %d chunks of data are DONE. ", stream_idx, ichnk + 1); + } + + ichnk += 1; + if (nchnkrd < lmprop.nchnk) { + FILE *fr = open_lm(); + seek_lm(fr); + get_lm_chunk(fr, stream_idx); + fclose(fr); + } + if (LOG <= LOGDEBUG) + printf("\n"); } - //================================================================================ -void gpu_hst( - unsigned int *d_psino, - // unsigned int *d_dsino, - unsigned int *d_ssrb, - unsigned int *d_rdlyd, - unsigned int *d_rprmt, - mMass d_mass, - unsigned int *d_snview, - unsigned int *d_fansums, - unsigned int *d_bucks, - int tstart, - int tstop, - LORcc *s2cF, - axialLUT axLUT, - const Cnst Cnt) -{ - - LOG = Cnt.LOG; - BTP = Cnt.BTP; - BTPRT = (double)Cnt.BTPRT; - - if (nhNSN1 != Cnt.NSN1) { - printf("e> defined number of sinos for constant memory, nhNSN1 = %d, does not match the one given in the structure of constants %d. please, correct that.\n", nhNSN1, Cnt.NSN1); - exit(1); - } - - // check which device is going to be used - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); - - //--- INITIALISE GPU RANDOM GENERATOR - if (Cnt.BTP>0) { - if (Cnt.LOG <= LOGINFO) { - printf("\nic> using GPU bootstrap mode: %d\n", Cnt.BTP); - printf(" > bootstrap with output ratio of: %f\n", Cnt.BTPRT); - } - } - - curandState *d_prng_states = setup_curand(); - // for parametric bootstrap find the histogram - curandDiscreteDistribution_t poisson_hst; - // normally instead of Cnt.BTPRT I would have 1.0 if expecting the same - // number of resampled events as in the original file (or close to) - if (Cnt.BTP==2) - curandCreatePoissonDistribution(Cnt.BTPRT, &poisson_hst); - //--- - - // single slice rebinning LUT to constant memory - cudaMemcpyToSymbol(c_ssrb, axLUT.sn1_ssrb, Cnt.NSN1 * sizeof(short)); - - //SPAN-1 to SPAN-11 conversion table in GPU constant memory - cudaMemcpyToSymbol(c_li2span11, axLUT.sn1_sn11, Cnt.NSN1 * sizeof(short)); - - short2 *d_sn2crs; - HANDLE_ERROR(cudaMalloc((void**)&d_sn2crs, Cnt.W * Cnt.A * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn2crs, s2cF, Cnt.W * Cnt.A * sizeof(short2), cudaMemcpyHostToDevice)); - - short2 *d_sn1_rno; - HANDLE_ERROR(cudaMalloc((void**)&d_sn1_rno, Cnt.NSN1 * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn1_rno, axLUT.sn1_rno, Cnt.NSN1 * sizeof(short2), cudaMemcpyHostToDevice)); - - //put the sino segment info into the constant memory - int sinoSeg[nSEG] = { 127,115,115,93,93,71,71,49,49,27,27 }; // sinos in segments - - cudaMemcpyToSymbol(c_sinoSeg, sinoSeg, nSEG * sizeof(int)); - - //cumulative sum of the above segment def - int cumSeg[nSEG]; - cumSeg[0] = 0; - for (int i = 1; i allocate memory for the chunks of list mode file - int *d_lmbuff; - //> host pinned memory - HANDLE_ERROR(cudaMallocHost((void**)&lmbuff, NSTREAMS * ELECHNK * sizeof(int))); - //> device memory - HANDLE_ERROR(cudaMalloc((void**)&d_lmbuff, NSTREAMS * ELECHNK * sizeof(int))); - - - // Get the number of streams to be used - int nstreams = MIN(NSTREAMS, lmprop.nchnk); - - if (Cnt.LOG <= LOGINFO) printf("\ni> creating %d CUDA streams... ", nstreams); - cudaStream_t *stream = new cudaStream_t[nstreams]; - //cudaStream_t stream[nstreams]; - for (int i = 0; i < nstreams; ++i) - HANDLE_ERROR(cudaStreamCreate(&stream[i])); - if (Cnt.LOG <= LOGINFO) printf("DONE.\n"); - - - - // ****** check memory usage - getMemUse(Cnt); - //******* - - //__________________________________________________________________________________________________ - ichnk = 0; // indicator of how many chunks have been processed in the GPU. - nchnkrd = 0; // indicator of how many chunks have been read from disk. - - - // LM file read - if (Cnt.LOG <= LOGINFO) printf("\ni> reading the first chunks of LM data from:\n %s ", lmprop.fname); - FILE* fr = open_lm(); - - // Jump the any LM headers - seek_lm(fr); - - for (int i = 0; i < nstreams; i++) { - get_lm_chunk(fr, i); - } - fclose(fr); - - if (Cnt.LOG <= LOGINFO){ - printf("DONE.\n"); - printf("\n+> histogramming the LM data:\n"); - } - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - //============================================================================ - for (int n = 0; n stream[%d] was free for %d-th chunk.\n", si, n + 1); - break; - } - //else{printf("\n >> stream %d was busy at %d-th chunk. \n", i, n);} - } - } - //****** - dataready[si] = 0; //set a flag: stream[i] is busy now with processing the data. - HANDLE_ERROR(cudaMemcpyAsync(&d_lmbuff[si*ELECHNK], &lmbuff[si*ELECHNK], //lmprop.atag[n] - lmprop.ele4chnk[n] * sizeof(int), cudaMemcpyHostToDevice, stream[si])); - - hst<<>>( - d_lmbuff, - d_psino, - d_ssrb, - d_rdlyd, - d_rprmt, - d_mass, - d_snview, - d_sn2crs, - d_sn1_rno, - d_fansums, - d_bucks, - lmprop.ele4thrd[n], lmprop.ele4chnk[n], - si*ELECHNK, - lmprop.toff, - lmprop.nitag, - lmprop.span, - BTP, BTPRT, - tstart, tstop, - d_prng_states, poisson_hst); - - HANDLE_ERROR(cudaGetLastError()); - if (Cnt.LOG <= LOGDEBUG) printf("chunk[%d], stream[%d], ele4thrd[%d], ele4chnk[%d]\n", n, si, lmprop.ele4thrd[n], lmprop.ele4chnk[n]); - cudaStreamAddCallback(stream[si], MyCallback, (void*)(size_t)si, 0); - - } - //============================================================================ - - cudaDeviceSynchronize(); - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) printf("+> histogramming DONE in %fs.\n\n", 0.001*elapsedTime); - - - for (int i = 0; i < nstreams; ++i) - { - cudaError_t err = cudaStreamSynchronize(stream[i]); - if (Cnt.LOG <= LOGDEBUG) - printf("--> sync CPU with stream[%d/%d], %s\n", i, nstreams, cudaGetErrorName( err )); - HANDLE_ERROR( err ); - } - - //***** close things down ***** - for (int i = 0; i < nstreams; ++i) { - //printf("--> checking stream[%d], %s\n",i, cudaGetErrorName( cudaStreamQuery(stream[i]) )); - HANDLE_ERROR(cudaStreamDestroy(stream[i])); - } - - //______________________________________________________________________________________________________ - - - cudaFreeHost(lmbuff); - cudaFree(d_lmbuff); - cudaFree(d_sn2crs); - cudaFree(d_sn1_rno); - - //destroy the histogram for parametric bootstrap - if (Cnt.BTP==2) - curandDestroyDistribution(poisson_hst); - //***** - - - return; +void gpu_hst(unsigned int *d_psino, + // unsigned int *d_dsino, + unsigned int *d_ssrb, unsigned int *d_rdlyd, unsigned int *d_rprmt, mMass d_mass, + unsigned int *d_snview, unsigned int *d_fansums, unsigned int *d_bucks, int tstart, + int tstop, LORcc *s2cF, axialLUT axLUT, const Cnst Cnt) { + + LOG = Cnt.LOG; + BTP = Cnt.BTP; + BTPRT = (double)Cnt.BTPRT; + + if (nhNSN1 != Cnt.NSN1) { + printf("e> defined number of sinos for constant memory, nhNSN1 = %d, does not match the one " + "given in the structure of constants %d. please, correct that.\n", + nhNSN1, Cnt.NSN1); + exit(1); + } + + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); + + //--- INITIALISE GPU RANDOM GENERATOR + if (Cnt.BTP > 0) { + if (Cnt.LOG <= LOGINFO) { + printf("\nic> using GPU bootstrap mode: %d\n", Cnt.BTP); + printf(" > bootstrap with output ratio of: %f\n", Cnt.BTPRT); + } + } + + curandState *d_prng_states = setup_curand(); + // for parametric bootstrap find the histogram + curandDiscreteDistribution_t poisson_hst; + // normally instead of Cnt.BTPRT I would have 1.0 if expecting the same + // number of resampled events as in the original file (or close to) + if (Cnt.BTP == 2) + curandCreatePoissonDistribution(Cnt.BTPRT, &poisson_hst); + //--- + + // single slice rebinning LUT to constant memory + cudaMemcpyToSymbol(c_ssrb, axLUT.sn1_ssrb, Cnt.NSN1 * sizeof(short)); + + // SPAN-1 to SPAN-11 conversion table in GPU constant memory + cudaMemcpyToSymbol(c_li2span11, axLUT.sn1_sn11, Cnt.NSN1 * sizeof(short)); + + short2 *d_sn2crs; + HANDLE_ERROR(cudaMalloc((void **)&d_sn2crs, Cnt.W * Cnt.A * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_sn2crs, s2cF, Cnt.W * Cnt.A * sizeof(short2), cudaMemcpyHostToDevice)); + + short2 *d_sn1_rno; + HANDLE_ERROR(cudaMalloc((void **)&d_sn1_rno, Cnt.NSN1 * sizeof(short2))); + HANDLE_ERROR( + cudaMemcpy(d_sn1_rno, axLUT.sn1_rno, Cnt.NSN1 * sizeof(short2), cudaMemcpyHostToDevice)); + + // put the sino segment info into the constant memory + int sinoSeg[nSEG] = {127, 115, 115, 93, 93, 71, 71, 49, 49, 27, 27}; // sinos in segments + + cudaMemcpyToSymbol(c_sinoSeg, sinoSeg, nSEG * sizeof(int)); + + // cumulative sum of the above segment def + int cumSeg[nSEG]; + cumSeg[0] = 0; + for (int i = 1; i < nSEG; i++) + cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; + + cudaMemcpyToSymbol(c_cumSeg, cumSeg, nSEG * sizeof(int)); + + //> allocate memory for the chunks of list mode file + int *d_lmbuff; + //> host pinned memory + HANDLE_ERROR(cudaMallocHost((void **)&lmbuff, NSTREAMS * ELECHNK * sizeof(int))); + //> device memory + HANDLE_ERROR(cudaMalloc((void **)&d_lmbuff, NSTREAMS * ELECHNK * sizeof(int))); + + // Get the number of streams to be used + int nstreams = MIN(NSTREAMS, lmprop.nchnk); + + if (Cnt.LOG <= LOGINFO) + printf("\ni> creating %d CUDA streams... ", nstreams); + cudaStream_t *stream = new cudaStream_t[nstreams]; + // cudaStream_t stream[nstreams]; + for (int i = 0; i < nstreams; ++i) + HANDLE_ERROR(cudaStreamCreate(&stream[i])); + if (Cnt.LOG <= LOGINFO) + printf("DONE.\n"); + + // ****** check memory usage + getMemUse(Cnt); + //******* + + //__________________________________________________________________________________________________ + ichnk = 0; // indicator of how many chunks have been processed in the GPU. + nchnkrd = 0; // indicator of how many chunks have been read from disk. + + // LM file read + if (Cnt.LOG <= LOGINFO) + printf("\ni> reading the first chunks of LM data from:\n %s ", lmprop.fname); + FILE *fr = open_lm(); + + // Jump the any LM headers + seek_lm(fr); + + for (int i = 0; i < nstreams; i++) { + get_lm_chunk(fr, i); + } + fclose(fr); + + if (Cnt.LOG <= LOGINFO) { + printf("DONE.\n"); + printf("\n+> histogramming the LM data:\n"); + } + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + //============================================================================ + for (int n = 0; n < lmprop.nchnk; n++) { // lmprop.nchnk + + //***** launch the next free stream ****** + int si, busy = 1; + while (busy == 1) { + for (int i = 0; i < nstreams; i++) { + if ((cudaStreamQuery(stream[i]) == cudaSuccess) && (dataready[i] == 1)) { + busy = 0; + si = i; + if (Cnt.LOG <= LOGDEBUG) + printf(" i> stream[%d] was free for %d-th chunk.\n", si, n + 1); + break; + } + // else{printf("\n >> stream %d was busy at %d-th chunk. \n", i, n);} + } + } + //****** + dataready[si] = 0; // set a flag: stream[i] is busy now with processing the data. + HANDLE_ERROR(cudaMemcpyAsync(&d_lmbuff[si * ELECHNK], &lmbuff[si * ELECHNK], // lmprop.atag[n] + lmprop.ele4chnk[n] * sizeof(int), cudaMemcpyHostToDevice, + stream[si])); + + hst<<>>( + d_lmbuff, d_psino, d_ssrb, d_rdlyd, d_rprmt, d_mass, d_snview, d_sn2crs, d_sn1_rno, + d_fansums, d_bucks, lmprop.ele4thrd[n], lmprop.ele4chnk[n], si * ELECHNK, lmprop.toff, + lmprop.nitag, lmprop.span, BTP, BTPRT, tstart, tstop, d_prng_states, poisson_hst); + + HANDLE_ERROR(cudaGetLastError()); + if (Cnt.LOG <= LOGDEBUG) + printf("chunk[%d], stream[%d], ele4thrd[%d], ele4chnk[%d]\n", n, si, lmprop.ele4thrd[n], + lmprop.ele4chnk[n]); + cudaStreamAddCallback(stream[si], MyCallback, (void *)(size_t)si, 0); + } + //============================================================================ + + cudaDeviceSynchronize(); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) + printf("+> histogramming DONE in %fs.\n\n", 0.001 * elapsedTime); + + for (int i = 0; i < nstreams; ++i) { + cudaError_t err = cudaStreamSynchronize(stream[i]); + if (Cnt.LOG <= LOGDEBUG) + printf("--> sync CPU with stream[%d/%d], %s\n", i, nstreams, cudaGetErrorName(err)); + HANDLE_ERROR(err); + } + + //***** close things down ***** + for (int i = 0; i < nstreams; ++i) { + // printf("--> checking stream[%d], %s\n",i, cudaGetErrorName( cudaStreamQuery(stream[i]) )); + HANDLE_ERROR(cudaStreamDestroy(stream[i])); + } + + //______________________________________________________________________________________________________ + + cudaFreeHost(lmbuff); + cudaFree(d_lmbuff); + cudaFree(d_sn2crs); + cudaFree(d_sn1_rno); + + // destroy the histogram for parametric bootstrap + if (Cnt.BTP == 2) + curandDestroyDistribution(poisson_hst); + //***** + + return; } diff --git a/niftypet/nipet/lm/src/hst.h b/niftypet/nipet/lm/src/hst.h index 60e080d0..38bf2b90 100644 --- a/niftypet/nipet/lm/src/hst.h +++ b/niftypet/nipet/lm/src/hst.h @@ -1,34 +1,20 @@ #ifndef HST_H #define HST_H -#include "scanner_0.h" #include "lmaux.h" +#include "scanner_0.h" #include -#include #include - +#include extern LMprop lmprop; -extern int* lm; - -curandState* setup_curand(); - -void gpu_hst( - unsigned int *d_psino, - unsigned int *d_ssrb, - unsigned int *d_rdlyd, - unsigned int *d_rprmt, - mMass d_mass, - unsigned int *d_snview, - unsigned int *d_fansums, - unsigned int *d_bucks, - int tstart, int tstop, - LORcc *s2cF, - axialLUT axLUT, - const Cnst Cnt); - - +extern int *lm; +curandState *setup_curand(); +void gpu_hst(unsigned int *d_psino, unsigned int *d_ssrb, unsigned int *d_rdlyd, + unsigned int *d_rprmt, mMass d_mass, unsigned int *d_snview, unsigned int *d_fansums, + unsigned int *d_bucks, int tstart, int tstop, LORcc *s2cF, axialLUT axLUT, + const Cnst Cnt); #endif diff --git a/niftypet/nipet/lm/src/lm_module.cu b/niftypet/nipet/lm/src/lm_module.cu index 19976612..a9f0f4f5 100644 --- a/niftypet/nipet/lm/src/lm_module.cu +++ b/niftypet/nipet/lm/src/lm_module.cu @@ -7,18 +7,15 @@ author: Pawel Markiewicz Copyrights: 2019 ------------------------------------------------------------------------*/ #define PY_SSIZE_T_CLEAN -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION //NPY_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION -#include -#include -#include #include "def.h" #include "lmproc.h" -#include "scanner_0.h" #include "rnd.h" - - - +#include "scanner_0.h" +#include +#include +#include //=== START PYTHON INIT === @@ -29,733 +26,708 @@ static PyObject *mmr_rand(PyObject *self, PyObject *args); static PyObject *mmr_prand(PyObject *self, PyObject *args); //--- - //> Module Method Table static PyMethodDef mmr_lmproc_methods[] = { - {"lminfo", mmr_lminfo, METH_VARARGS, - "Get the timing info from the LM data."}, - {"hist", mmr_hist, METH_VARARGS, - "Process and histogram the LM data using CUDA streams."}, - {"rand", mmr_rand, METH_VARARGS, - "Estimates randoms' 3D sinograms from crystal singles."}, - {"prand", mmr_prand, METH_VARARGS, - "Estimates randoms' 3D sinograms from prompt-derived fan-sums."}, - {NULL, NULL, 0, NULL} // Sentinel + {"lminfo", mmr_lminfo, METH_VARARGS, "Get the timing info from the LM data."}, + {"hist", mmr_hist, METH_VARARGS, "Process and histogram the LM data using CUDA streams."}, + {"rand", mmr_rand, METH_VARARGS, "Estimates randoms' 3D sinograms from crystal singles."}, + {"prand", mmr_prand, METH_VARARGS, + "Estimates randoms' 3D sinograms from prompt-derived fan-sums."}, + {NULL, NULL, 0, NULL} // Sentinel }; //> Module Definition Structure static struct PyModuleDef mmr_lmproc_module = { - PyModuleDef_HEAD_INIT, - "mmr_lmproc", //> name of module - //> module documentation, may be NULL - "This module provides an interface for mMR image generation using GPU routines.", - -1, //> the module keeps state in global variables. - mmr_lmproc_methods -}; + PyModuleDef_HEAD_INIT, + "mmr_lmproc", //> name of module + //> module documentation, may be NULL + "This module provides an interface for mMR image generation using GPU routines.", + -1, //> the module keeps state in global variables. + mmr_lmproc_methods}; //> Initialization function PyMODINIT_FUNC PyInit_mmr_lmproc(void) { - Py_Initialize(); + Py_Initialize(); - //> load NumPy functionality - import_array(); + //> load NumPy functionality + import_array(); - return PyModule_Create(&mmr_lmproc_module); + return PyModule_Create(&mmr_lmproc_module); } //=== END PYTHON INIT === - //============================================================================= - - //============================================================================= // P R O C E S I N G L I S T M O D E D A T A //----------------------------------------------------------------------------- // Siemens mMR static PyObject *mmr_lminfo(PyObject *self, PyObject *args) { - /* Quickly process the list mode file to find the timing information - and number of elements - */ + /* Quickly process the list mode file to find the timing information + and number of elements + */ - // path to LM file - char *flm; + // path to LM file + char *flm; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "s", &flm)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "s", &flm)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - FILE *fr; - size_t r; + FILE *fr; + size_t r; - //open the list-mode file - fr = fopen(flm, "rb"); - if (fr == NULL) { - fprintf(stderr, "Can't open input (list mode) file!\n"); - exit(1); - } + // open the list-mode file + fr = fopen(flm, "rb"); + if (fr == NULL) { + fprintf(stderr, "Can't open input (list mode) file!\n"); + exit(1); + } #ifdef __linux__ - // file size in elements - fseek(fr, 0, SEEK_END); - size_t nbytes = ftell(fr); - size_t ele = nbytes / sizeof(int); - rewind(fr); + // file size in elements + fseek(fr, 0, SEEK_END); + size_t nbytes = ftell(fr); + size_t ele = nbytes / sizeof(int); + rewind(fr); #endif #ifdef WIN32 - struct _stati64 bufStat; - _stati64(flm, &bufStat); - size_t nbytes = bufStat.st_size; - size_t ele = nbytes / sizeof(int); + struct _stati64 bufStat; + _stati64(flm, &bufStat); + size_t nbytes = bufStat.st_size; + size_t ele = nbytes / sizeof(int); #endif - unsigned int buff; - // tag times - int tagt1, tagt0; - // address of tag times in LM stream - size_t taga1, taga0; - size_t c = 1; - //-- - int tag = 0; - while (tag == 0) { - r = fread(&buff, sizeof(unsigned int), 1, fr); - if (r != 1) { fputs("Reading error \n", stderr); exit(3); } - - if (mMR_TTAG(buff)) { - tag = 1; - tagt0 = buff & mMR_TMSK; - taga0 = c; - } - c += 1; - } - //printf("i> the first time tag is: %d at positon %lu.\n", tagt0, taga0); - - tag = 0; c = 1; - while (tag == 0) { + unsigned int buff; + // tag times + int tagt1, tagt0; + // address of tag times in LM stream + size_t taga1, taga0; + size_t c = 1; + //-- + int tag = 0; + while (tag == 0) { + r = fread(&buff, sizeof(unsigned int), 1, fr); + if (r != 1) { + fputs("Reading error \n", stderr); + exit(3); + } + + if (mMR_TTAG(buff)) { + tag = 1; + tagt0 = buff & mMR_TMSK; + taga0 = c; + } + c += 1; + } + // printf("i> the first time tag is: %d at positon %lu.\n", tagt0, taga0); + + tag = 0; + c = 1; + while (tag == 0) { #ifdef __linux__ - fseek(fr, -c * sizeof(unsigned int), SEEK_END); + fseek(fr, -c * sizeof(unsigned int), SEEK_END); #endif #ifdef WIN32 - _fseeki64(fr, -c * sizeof(unsigned int), SEEK_END); + _fseeki64(fr, -c * sizeof(unsigned int), SEEK_END); #endif - r = fread(&buff, sizeof(unsigned int), 1, fr); - if (r != 1) { fputs("Reading error \n", stderr); exit(3); } - if (mMR_TTAG(buff)) { - tag = 1; - tagt1 = buff & mMR_TMSK; - taga1 = ele - c; - } - c += 1; - } - //printf("i> the last time tag is: %d at positon %lu.\n", tagt1, taga1); - - - // first/last time tags out - PyObject *tuple_ttag = PyTuple_New(2); - PyTuple_SetItem(tuple_ttag, 0, Py_BuildValue("i", tagt0)); - PyTuple_SetItem(tuple_ttag, 1, Py_BuildValue("i", tagt1)); - - // first/last tag address out - PyObject *tuple_atag = PyTuple_New(2); - PyTuple_SetItem(tuple_atag, 0, Py_BuildValue("L", taga0)); - PyTuple_SetItem(tuple_atag, 1, Py_BuildValue("L", taga1)); - - // all together with number of elements - PyObject *tuple_out = PyTuple_New(3); - PyTuple_SetItem(tuple_out, 0, Py_BuildValue("L", ele)); - PyTuple_SetItem(tuple_out, 1, tuple_ttag); - PyTuple_SetItem(tuple_out, 2, tuple_atag); - - - return tuple_out; + r = fread(&buff, sizeof(unsigned int), 1, fr); + if (r != 1) { + fputs("Reading error \n", stderr); + exit(3); + } + if (mMR_TTAG(buff)) { + tag = 1; + tagt1 = buff & mMR_TMSK; + taga1 = ele - c; + } + c += 1; + } + // printf("i> the last time tag is: %d at positon %lu.\n", tagt1, taga1); + + // first/last time tags out + PyObject *tuple_ttag = PyTuple_New(2); + PyTuple_SetItem(tuple_ttag, 0, Py_BuildValue("i", tagt0)); + PyTuple_SetItem(tuple_ttag, 1, Py_BuildValue("i", tagt1)); + + // first/last tag address out + PyObject *tuple_atag = PyTuple_New(2); + PyTuple_SetItem(tuple_atag, 0, Py_BuildValue("L", taga0)); + PyTuple_SetItem(tuple_atag, 1, Py_BuildValue("L", taga1)); + + // all together with number of elements + PyObject *tuple_out = PyTuple_New(3); + PyTuple_SetItem(tuple_out, 0, Py_BuildValue("L", ele)); + PyTuple_SetItem(tuple_out, 1, tuple_ttag); + PyTuple_SetItem(tuple_out, 2, tuple_atag); + + return tuple_out; } - //============================================================================= -static PyObject *mmr_hist(PyObject *self, PyObject *args) -{ - - //preallocated dictionary of output arrays - PyObject * o_dicout=NULL; - - char * flm; - int tstart, tstop; - - //Dictionary of scanner constants - PyObject * o_mmrcnst=NULL; - //axial LUTs - PyObject * o_axLUT=NULL; - PyObject * o_txLUT=NULL; - - //structure of constants - Cnst Cnt; - //structure of axial LUTs for LM processing - axialLUT axLUT; - - /* Parse the input tuple */ - if (!PyArg_ParseTuple( - args, "OsiiOOO", - &o_dicout, - &flm, - &tstart, - &tstop, - &o_txLUT, - &o_axLUT, - &o_mmrcnst)) - return NULL; - - - - /* Interpret the input objects as numpy arrays. */ - //the dictionary of constants - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - - PyObject* pd_bpe = PyDict_GetItemString(o_mmrcnst, "BPE"); - Cnt.BPE = (int)PyLong_AsLong(pd_bpe); - - PyObject* pd_lmoff = PyDict_GetItemString(o_mmrcnst, "LMOFF"); - Cnt.LMOFF = (int)PyLong_AsLong(pd_lmoff); - - PyObject* pd_Naw = PyDict_GetItemString(o_mmrcnst, "Naw"); - Cnt.aw = (int)PyLong_AsLong(pd_Naw); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); - Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); - PyObject* pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); - Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); - PyObject* pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); - Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_span); - PyObject* pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); - Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); - PyObject* pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); - Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); - - PyObject* pd_btp = PyDict_GetItemString(o_mmrcnst, "BTP"); - Cnt.BTP = (char)PyLong_AsLong(pd_btp); - PyObject* pd_btprt = PyDict_GetItemString(o_mmrcnst, "BTPRT"); - Cnt.BTPRT = (float)PyFloat_AsDouble(pd_btprt); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - //axial LUTs: - PyObject* pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); - PyObject* pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); - PyObject* pd_sn1_ssrb = PyDict_GetItemString(o_axLUT, "sn1_ssrb"); - - PyArrayObject *p_sn1_rno = NULL; - p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1_sn11 = NULL; - p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1_ssrb = NULL; - p_sn1_ssrb = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_ssrb, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - PyObject *pd_s2cF = PyDict_GetItemString(o_txLUT, "s2cF"); - PyArrayObject *p_s2cF = NULL; - p_s2cF = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cF, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - /* If that didn't work, throw an exception. */ - if (p_sn1_rno == NULL || p_sn1_sn11 == NULL || p_sn1_ssrb == NULL || p_s2cF == NULL) { - Py_XDECREF(p_sn1_rno); - Py_XDECREF(p_sn1_sn11); - Py_XDECREF(p_sn1_ssrb); - Py_XDECREF(p_s2cF); - return NULL; - } - - - - axLUT.sn1_rno = (short*)PyArray_DATA(p_sn1_rno); - axLUT.sn1_sn11 = (short*)PyArray_DATA(p_sn1_sn11); - axLUT.sn1_ssrb = (short*)PyArray_DATA(p_sn1_ssrb); - - //sino to crystal LUT from txLUTs - LORcc *s2cF = (LORcc*)PyArray_DATA(p_s2cF); - - //=============== the dictionary of output arrays ================== - //sinograms - PyObject *pd_psn=NULL, *pd_dsn=NULL; - PyArrayObject *p_psn=NULL, *p_dsn=NULL; - - // prompt sinogram - pd_psn = PyDict_GetItemString(o_dicout, "psn"); - p_psn = (PyArrayObject *)PyArray_FROM_OTF(pd_psn, NPY_UINT16, NPY_ARRAY_INOUT_ARRAY2); - - // delayed sinogram - pd_dsn = PyDict_GetItemString(o_dicout, "dsn"); - p_dsn = (PyArrayObject *)PyArray_FROM_OTF(pd_dsn, NPY_UINT16, NPY_ARRAY_INOUT_ARRAY2); - - PyArrayObject *p_phc=NULL, *p_dhc=NULL, *p_ssr=NULL, *p_mss=NULL; - PyArrayObject *p_pvs=NULL, *p_bck=NULL, *p_fan=NULL; - - // single slice rebinned (SSRB) prompt sinogram - PyObject *pd_ssr = PyDict_GetItemString(o_dicout, "ssr"); - p_ssr = (PyArrayObject *)PyArray_FROM_OTF(pd_ssr, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - // prompt head curve - PyObject *pd_phc = PyDict_GetItemString(o_dicout, "phc"); - p_phc = (PyArrayObject *)PyArray_FROM_OTF(pd_phc, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - // delayed head curve - PyObject *pd_dhc = PyDict_GetItemString(o_dicout, "dhc"); - p_dhc = (PyArrayObject *)PyArray_FROM_OTF(pd_dhc, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - // centre of mass of axial radiodistribution - PyObject *pd_mss = PyDict_GetItemString(o_dicout, "mss"); - p_mss = (PyArrayObject *)PyArray_FROM_OTF(pd_mss, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - // projection views (sagittal and coronal) for video - PyObject *pd_pvs = PyDict_GetItemString(o_dicout, "pvs"); - p_pvs = (PyArrayObject *)PyArray_FROM_OTF(pd_pvs, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - // single bucket rates over time - PyObject *pd_bck = PyDict_GetItemString(o_dicout, "bck"); - p_bck = (PyArrayObject *)PyArray_FROM_OTF(pd_bck, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - // fan-sums of delayed events - PyObject *pd_fan = PyDict_GetItemString(o_dicout, "fan"); - p_fan = (PyArrayObject *)PyArray_FROM_OTF(pd_fan, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); - - if (p_phc == NULL || p_dhc == NULL || p_mss == NULL || p_pvs == NULL || - p_bck == NULL || p_fan == NULL || p_psn == NULL || p_dsn == NULL || p_ssr == NULL) { - PyArray_DiscardWritebackIfCopy(p_phc); - Py_XDECREF(p_phc); - PyArray_DiscardWritebackIfCopy(p_dhc); - Py_XDECREF(p_dhc); - PyArray_DiscardWritebackIfCopy(p_mss); - Py_XDECREF(p_mss); - PyArray_DiscardWritebackIfCopy(p_pvs); - Py_XDECREF(p_pvs); - PyArray_DiscardWritebackIfCopy(p_bck); - Py_XDECREF(p_bck); - PyArray_DiscardWritebackIfCopy(p_fan); - Py_XDECREF(p_fan); - - PyArray_DiscardWritebackIfCopy(p_psn); - Py_XDECREF(p_psn); - PyArray_DiscardWritebackIfCopy(p_dsn); - Py_XDECREF(p_dsn); - PyArray_DiscardWritebackIfCopy(p_ssr); - Py_XDECREF(p_ssr); - return NULL; - } - - hstout dicout; - // head curves (prompts and delayed), centre of mass of - // axial radiodistribution and projection views (for video) - dicout.hcp = (unsigned int*)PyArray_DATA(p_phc); - dicout.hcd = (unsigned int*)PyArray_DATA(p_dhc); - dicout.mss = (float*)PyArray_DATA(p_mss); - dicout.snv = (unsigned int*)PyArray_DATA(p_pvs); - - //single buckets and delayed fan-sums - dicout.bck = (unsigned int*)PyArray_DATA(p_bck); - dicout.fan = (unsigned int*)PyArray_DATA(p_fan); - - //sinograms: prompt, delayed and SSRB - dicout.psn = (unsigned short*)PyArray_DATA(p_psn); - dicout.dsn = (unsigned short*)PyArray_DATA(p_dsn); - dicout.ssr = (unsigned int*)PyArray_DATA(p_ssr); - //================================================================== - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //================================================================== - lmproc(dicout, flm, tstart, tstop, s2cF, axLUT, Cnt); - //================================================================== - - //Clean up: - Py_DECREF(p_sn1_rno); - Py_DECREF(p_sn1_sn11); - Py_DECREF(p_sn1_ssrb); - Py_DECREF(p_s2cF); - - PyArray_ResolveWritebackIfCopy(p_phc); - Py_DECREF(p_phc); - PyArray_ResolveWritebackIfCopy(p_dhc); - Py_DECREF(p_dhc); - PyArray_ResolveWritebackIfCopy(p_mss); - Py_DECREF(p_mss); - PyArray_ResolveWritebackIfCopy(p_pvs); - Py_DECREF(p_pvs); - PyArray_ResolveWritebackIfCopy(p_bck); - Py_DECREF(p_bck); - PyArray_ResolveWritebackIfCopy(p_fan); - Py_DECREF(p_fan); - - PyArray_ResolveWritebackIfCopy(p_psn); - Py_DECREF(p_psn); - PyArray_ResolveWritebackIfCopy(p_dsn); - Py_DECREF(p_dsn); - PyArray_ResolveWritebackIfCopy(p_ssr); - Py_DECREF(p_ssr); - - - Py_INCREF(Py_None); - return Py_None; +static PyObject *mmr_hist(PyObject *self, PyObject *args) { + + // preallocated dictionary of output arrays + PyObject *o_dicout = NULL; + + char *flm; + int tstart, tstop; + + // Dictionary of scanner constants + PyObject *o_mmrcnst = NULL; + // axial LUTs + PyObject *o_axLUT = NULL; + PyObject *o_txLUT = NULL; + + // structure of constants + Cnst Cnt; + // structure of axial LUTs for LM processing + axialLUT axLUT; + + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OsiiOOO", &o_dicout, &flm, &tstart, &tstop, &o_txLUT, &o_axLUT, + &o_mmrcnst)) + return NULL; + + /* Interpret the input objects as numpy arrays. */ + // the dictionary of constants + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + + PyObject *pd_bpe = PyDict_GetItemString(o_mmrcnst, "BPE"); + Cnt.BPE = (int)PyLong_AsLong(pd_bpe); + + PyObject *pd_lmoff = PyDict_GetItemString(o_mmrcnst, "LMOFF"); + Cnt.LMOFF = (int)PyLong_AsLong(pd_lmoff); + + PyObject *pd_Naw = PyDict_GetItemString(o_mmrcnst, "Naw"); + Cnt.aw = (int)PyLong_AsLong(pd_Naw); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); + Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); + PyObject *pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); + Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); + PyObject *pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); + Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_span); + PyObject *pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); + Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); + PyObject *pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); + Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); + + PyObject *pd_btp = PyDict_GetItemString(o_mmrcnst, "BTP"); + Cnt.BTP = (char)PyLong_AsLong(pd_btp); + PyObject *pd_btprt = PyDict_GetItemString(o_mmrcnst, "BTPRT"); + Cnt.BTPRT = (float)PyFloat_AsDouble(pd_btprt); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + // axial LUTs: + PyObject *pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); + PyObject *pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); + PyObject *pd_sn1_ssrb = PyDict_GetItemString(o_axLUT, "sn1_ssrb"); + + PyArrayObject *p_sn1_rno = NULL; + p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1_sn11 = NULL; + p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1_ssrb = NULL; + p_sn1_ssrb = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_ssrb, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + PyObject *pd_s2cF = PyDict_GetItemString(o_txLUT, "s2cF"); + PyArrayObject *p_s2cF = NULL; + p_s2cF = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cF, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + /* If that didn't work, throw an exception. */ + if (p_sn1_rno == NULL || p_sn1_sn11 == NULL || p_sn1_ssrb == NULL || p_s2cF == NULL) { + Py_XDECREF(p_sn1_rno); + Py_XDECREF(p_sn1_sn11); + Py_XDECREF(p_sn1_ssrb); + Py_XDECREF(p_s2cF); + return NULL; + } + + axLUT.sn1_rno = (short *)PyArray_DATA(p_sn1_rno); + axLUT.sn1_sn11 = (short *)PyArray_DATA(p_sn1_sn11); + axLUT.sn1_ssrb = (short *)PyArray_DATA(p_sn1_ssrb); + + // sino to crystal LUT from txLUTs + LORcc *s2cF = (LORcc *)PyArray_DATA(p_s2cF); + + //=============== the dictionary of output arrays ================== + // sinograms + PyObject *pd_psn = NULL, *pd_dsn = NULL; + PyArrayObject *p_psn = NULL, *p_dsn = NULL; + + // prompt sinogram + pd_psn = PyDict_GetItemString(o_dicout, "psn"); + p_psn = (PyArrayObject *)PyArray_FROM_OTF(pd_psn, NPY_UINT16, NPY_ARRAY_INOUT_ARRAY2); + + // delayed sinogram + pd_dsn = PyDict_GetItemString(o_dicout, "dsn"); + p_dsn = (PyArrayObject *)PyArray_FROM_OTF(pd_dsn, NPY_UINT16, NPY_ARRAY_INOUT_ARRAY2); + + PyArrayObject *p_phc = NULL, *p_dhc = NULL, *p_ssr = NULL, *p_mss = NULL; + PyArrayObject *p_pvs = NULL, *p_bck = NULL, *p_fan = NULL; + + // single slice rebinned (SSRB) prompt sinogram + PyObject *pd_ssr = PyDict_GetItemString(o_dicout, "ssr"); + p_ssr = (PyArrayObject *)PyArray_FROM_OTF(pd_ssr, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // prompt head curve + PyObject *pd_phc = PyDict_GetItemString(o_dicout, "phc"); + p_phc = (PyArrayObject *)PyArray_FROM_OTF(pd_phc, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // delayed head curve + PyObject *pd_dhc = PyDict_GetItemString(o_dicout, "dhc"); + p_dhc = (PyArrayObject *)PyArray_FROM_OTF(pd_dhc, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // centre of mass of axial radiodistribution + PyObject *pd_mss = PyDict_GetItemString(o_dicout, "mss"); + p_mss = (PyArrayObject *)PyArray_FROM_OTF(pd_mss, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + // projection views (sagittal and coronal) for video + PyObject *pd_pvs = PyDict_GetItemString(o_dicout, "pvs"); + p_pvs = (PyArrayObject *)PyArray_FROM_OTF(pd_pvs, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // single bucket rates over time + PyObject *pd_bck = PyDict_GetItemString(o_dicout, "bck"); + p_bck = (PyArrayObject *)PyArray_FROM_OTF(pd_bck, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + // fan-sums of delayed events + PyObject *pd_fan = PyDict_GetItemString(o_dicout, "fan"); + p_fan = (PyArrayObject *)PyArray_FROM_OTF(pd_fan, NPY_UINT32, NPY_ARRAY_INOUT_ARRAY2); + + if (p_phc == NULL || p_dhc == NULL || p_mss == NULL || p_pvs == NULL || p_bck == NULL || + p_fan == NULL || p_psn == NULL || p_dsn == NULL || p_ssr == NULL) { + PyArray_DiscardWritebackIfCopy(p_phc); + Py_XDECREF(p_phc); + PyArray_DiscardWritebackIfCopy(p_dhc); + Py_XDECREF(p_dhc); + PyArray_DiscardWritebackIfCopy(p_mss); + Py_XDECREF(p_mss); + PyArray_DiscardWritebackIfCopy(p_pvs); + Py_XDECREF(p_pvs); + PyArray_DiscardWritebackIfCopy(p_bck); + Py_XDECREF(p_bck); + PyArray_DiscardWritebackIfCopy(p_fan); + Py_XDECREF(p_fan); + + PyArray_DiscardWritebackIfCopy(p_psn); + Py_XDECREF(p_psn); + PyArray_DiscardWritebackIfCopy(p_dsn); + Py_XDECREF(p_dsn); + PyArray_DiscardWritebackIfCopy(p_ssr); + Py_XDECREF(p_ssr); + return NULL; + } + + hstout dicout; + // head curves (prompts and delayed), centre of mass of + // axial radiodistribution and projection views (for video) + dicout.hcp = (unsigned int *)PyArray_DATA(p_phc); + dicout.hcd = (unsigned int *)PyArray_DATA(p_dhc); + dicout.mss = (float *)PyArray_DATA(p_mss); + dicout.snv = (unsigned int *)PyArray_DATA(p_pvs); + + // single buckets and delayed fan-sums + dicout.bck = (unsigned int *)PyArray_DATA(p_bck); + dicout.fan = (unsigned int *)PyArray_DATA(p_fan); + + // sinograms: prompt, delayed and SSRB + dicout.psn = (unsigned short *)PyArray_DATA(p_psn); + dicout.dsn = (unsigned short *)PyArray_DATA(p_dsn); + dicout.ssr = (unsigned int *)PyArray_DATA(p_ssr); + //================================================================== + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //================================================================== + lmproc(dicout, flm, tstart, tstop, s2cF, axLUT, Cnt); + //================================================================== + + // Clean up: + Py_DECREF(p_sn1_rno); + Py_DECREF(p_sn1_sn11); + Py_DECREF(p_sn1_ssrb); + Py_DECREF(p_s2cF); + + PyArray_ResolveWritebackIfCopy(p_phc); + Py_DECREF(p_phc); + PyArray_ResolveWritebackIfCopy(p_dhc); + Py_DECREF(p_dhc); + PyArray_ResolveWritebackIfCopy(p_mss); + Py_DECREF(p_mss); + PyArray_ResolveWritebackIfCopy(p_pvs); + Py_DECREF(p_pvs); + PyArray_ResolveWritebackIfCopy(p_bck); + Py_DECREF(p_bck); + PyArray_ResolveWritebackIfCopy(p_fan); + Py_DECREF(p_fan); + + PyArray_ResolveWritebackIfCopy(p_psn); + Py_DECREF(p_psn); + PyArray_ResolveWritebackIfCopy(p_dsn); + Py_DECREF(p_dsn); + PyArray_ResolveWritebackIfCopy(p_ssr); + Py_DECREF(p_ssr); + + Py_INCREF(Py_None); + return Py_None; } - - //====================================================================================== // E S T I M A T I N G R A N D O M E V E N T S //-------------------------------------------------------------------------------------- static PyObject *mmr_rand(PyObject *self, PyObject *args) { - //Structure of constants - Cnst Cnt; - - // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - //transaxial LUT - PyObject * o_txLUT; - - //output dictionary - PyObject * o_rndout; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // fan sums for each crystal (can be in time frames for dynamic scans) - PyObject * o_fansums; - - //structure of transaxial LUTs - txLUTs txlut; - - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOO!O!O!", &o_rndout, &o_fansums, &PyDict_Type, &o_txLUT, &PyDict_Type, &o_axLUT, &PyDict_Type, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - /* Interpret the input objects as numpy arrays. */ - PyObject* pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); - Cnt.aw = (int)PyLong_AsLong(pd_aw); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); - Cnt.MRD = (int)PyLong_AsLong(pd_MRD); - PyObject* pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); - Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); - PyObject* pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); - Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); - PyObject* pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); - Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_span); - PyObject* pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); - Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); - PyObject* pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); - Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - //axial LUTs: - PyObject* pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); - PyObject* pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); - - //transaxial LUTs: - PyObject* pd_s2cr = PyDict_GetItemString(o_txLUT, "s2cr"); - PyObject* pd_aw2sn = PyDict_GetItemString(o_txLUT, "aw2sn"); - PyObject* pd_cij = PyDict_GetItemString(o_txLUT, "cij"); - PyObject* pd_crsr = PyDict_GetItemString(o_txLUT, "crsri"); - - //random output dictionary - PyObject* pd_rsn = PyDict_GetItemString(o_rndout, "rsn"); - PyObject* pd_cmap = PyDict_GetItemString(o_rndout, "cmap"); - - - //-- get the arrays form the objects - PyArrayObject *p_fansums = NULL; - p_fansums = (PyArrayObject *)PyArray_FROM_OTF(o_fansums, NPY_UINT32, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_sn1_rno = NULL; - p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1_sn11 = NULL; - p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_s2cr = NULL; - p_s2cr = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cr, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_aw2sn = NULL; - p_aw2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_cij = NULL; - p_cij = (PyArrayObject *)PyArray_FROM_OTF(pd_cij, NPY_INT8, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_crsr = NULL; - p_crsr = (PyArrayObject *)PyArray_FROM_OTF(pd_crsr, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_rsn = NULL; - p_rsn = (PyArrayObject *)PyArray_FROM_OTF(pd_rsn, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - PyArrayObject *p_cmap = NULL; - p_cmap = (PyArrayObject *)PyArray_FROM_OTF(pd_cmap, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //-- - - /* If that didn't work, throw an exception. */ - if (p_fansums == NULL || p_sn1_rno == NULL || p_sn1_sn11 == NULL || - p_s2cr == NULL || p_aw2sn == NULL || p_cij == NULL || p_crsr == NULL || p_rsn == NULL || p_cmap == NULL) - { - Py_XDECREF(p_fansums); - Py_XDECREF(p_sn1_rno); - Py_XDECREF(p_sn1_sn11); - Py_XDECREF(p_s2cr); - Py_XDECREF(p_aw2sn); - Py_XDECREF(p_cij); - Py_XDECREF(p_crsr); - - PyArray_DiscardWritebackIfCopy(p_rsn); - Py_XDECREF(p_rsn); - PyArray_DiscardWritebackIfCopy(p_cmap); - Py_XDECREF(p_cmap); - - return NULL; - } - - //-- get the pointers to the data as C-types - unsigned int *fansums = (unsigned int*)PyArray_DATA(p_fansums); - short *sn1_rno = (short*)PyArray_DATA(p_sn1_rno); - short *sn1_sn11 = (short*)PyArray_DATA(p_sn1_sn11); - - float *rsn = (float*)PyArray_DATA(p_rsn); - float *cmap = (float*)PyArray_DATA(p_cmap); - - txlut.s2cr = (LORcc*)PyArray_DATA(p_s2cr); - txlut.aw2sn = (LORaw*)PyArray_DATA(p_aw2sn); - txlut.cij = (char*)PyArray_DATA(p_cij); - txlut.crsr = (short*)PyArray_DATA(p_crsr); - - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><> E s t i m a t e r a n d o m s GPU <><><><><><><><><><><><><><> - gpu_randoms(rsn, cmap, fansums, txlut, sn1_rno, sn1_sn11, Cnt); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - PyArray_ResolveWritebackIfCopy(p_rsn); - Py_DECREF(p_rsn); - PyArray_ResolveWritebackIfCopy(p_cmap); - Py_DECREF(p_cmap); - - Py_DECREF(p_fansums); - - Py_DECREF(p_s2cr); - Py_DECREF(p_aw2sn); - Py_DECREF(p_cij); - Py_DECREF(p_crsr); - - Py_DECREF(p_sn1_sn11); - Py_DECREF(p_sn1_rno); - - Py_INCREF(Py_None); - return Py_None; + // Structure of constants + Cnst Cnt; + + // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + // transaxial LUT + PyObject *o_txLUT; + + // output dictionary + PyObject *o_rndout; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // fan sums for each crystal (can be in time frames for dynamic scans) + PyObject *o_fansums; + + // structure of transaxial LUTs + txLUTs txlut; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOO!O!O!", &o_rndout, &o_fansums, &PyDict_Type, &o_txLUT, + &PyDict_Type, &o_axLUT, &PyDict_Type, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as numpy arrays. */ + PyObject *pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); + Cnt.aw = (int)PyLong_AsLong(pd_aw); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); + Cnt.MRD = (int)PyLong_AsLong(pd_MRD); + PyObject *pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); + Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); + PyObject *pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); + Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); + PyObject *pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); + Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_span); + PyObject *pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); + Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); + PyObject *pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); + Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + // axial LUTs: + PyObject *pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); + PyObject *pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); + + // transaxial LUTs: + PyObject *pd_s2cr = PyDict_GetItemString(o_txLUT, "s2cr"); + PyObject *pd_aw2sn = PyDict_GetItemString(o_txLUT, "aw2sn"); + PyObject *pd_cij = PyDict_GetItemString(o_txLUT, "cij"); + PyObject *pd_crsr = PyDict_GetItemString(o_txLUT, "crsri"); + + // random output dictionary + PyObject *pd_rsn = PyDict_GetItemString(o_rndout, "rsn"); + PyObject *pd_cmap = PyDict_GetItemString(o_rndout, "cmap"); + + //-- get the arrays form the objects + PyArrayObject *p_fansums = NULL; + p_fansums = (PyArrayObject *)PyArray_FROM_OTF(o_fansums, NPY_UINT32, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_sn1_rno = NULL; + p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1_sn11 = NULL; + p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_s2cr = NULL; + p_s2cr = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cr, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_aw2sn = NULL; + p_aw2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_cij = NULL; + p_cij = (PyArrayObject *)PyArray_FROM_OTF(pd_cij, NPY_INT8, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_crsr = NULL; + p_crsr = (PyArrayObject *)PyArray_FROM_OTF(pd_crsr, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_rsn = NULL; + p_rsn = (PyArrayObject *)PyArray_FROM_OTF(pd_rsn, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + PyArrayObject *p_cmap = NULL; + p_cmap = (PyArrayObject *)PyArray_FROM_OTF(pd_cmap, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + //-- + + /* If that didn't work, throw an exception. */ + if (p_fansums == NULL || p_sn1_rno == NULL || p_sn1_sn11 == NULL || p_s2cr == NULL || + p_aw2sn == NULL || p_cij == NULL || p_crsr == NULL || p_rsn == NULL || p_cmap == NULL) { + Py_XDECREF(p_fansums); + Py_XDECREF(p_sn1_rno); + Py_XDECREF(p_sn1_sn11); + Py_XDECREF(p_s2cr); + Py_XDECREF(p_aw2sn); + Py_XDECREF(p_cij); + Py_XDECREF(p_crsr); + + PyArray_DiscardWritebackIfCopy(p_rsn); + Py_XDECREF(p_rsn); + PyArray_DiscardWritebackIfCopy(p_cmap); + Py_XDECREF(p_cmap); + + return NULL; + } + + //-- get the pointers to the data as C-types + unsigned int *fansums = (unsigned int *)PyArray_DATA(p_fansums); + short *sn1_rno = (short *)PyArray_DATA(p_sn1_rno); + short *sn1_sn11 = (short *)PyArray_DATA(p_sn1_sn11); + + float *rsn = (float *)PyArray_DATA(p_rsn); + float *cmap = (float *)PyArray_DATA(p_cmap); + + txlut.s2cr = (LORcc *)PyArray_DATA(p_s2cr); + txlut.aw2sn = (LORaw *)PyArray_DATA(p_aw2sn); + txlut.cij = (char *)PyArray_DATA(p_cij); + txlut.crsr = (short *)PyArray_DATA(p_crsr); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><> E s t i m a t e r a n d o m s GPU <><><><><><><><><><><><><><> + gpu_randoms(rsn, cmap, fansums, txlut, sn1_rno, sn1_sn11, Cnt); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + + PyArray_ResolveWritebackIfCopy(p_rsn); + Py_DECREF(p_rsn); + PyArray_ResolveWritebackIfCopy(p_cmap); + Py_DECREF(p_cmap); + + Py_DECREF(p_fansums); + + Py_DECREF(p_s2cr); + Py_DECREF(p_aw2sn); + Py_DECREF(p_cij); + Py_DECREF(p_crsr); + + Py_DECREF(p_sn1_sn11); + Py_DECREF(p_sn1_rno); + + Py_INCREF(Py_None); + return Py_None; } - //====================================================================================== // NEW!!! E S T I M A T I N G R A N D O M E V E N T S (F R O M P R O M P T S) //-------------------------------------------------------------------------------------- static PyObject *mmr_prand(PyObject *self, PyObject *args) { - //Structure of constants - Cnst Cnt; - - // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - //transaxial LUT - PyObject * o_txLUT; - - //output dictionary - PyObject * o_rndout; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // fan sums for each crystal - PyObject * o_fansums; - - //mask for the randoms only regions in prompt sinogram - PyObject * o_pmsksn; - - //structure of transaxial LUTs - txLUTs txlut; - - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOO", &o_rndout, &o_pmsksn, &o_fansums, &o_txLUT, &o_axLUT, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - /* Interpret the input objects as numpy arrays. */ - PyObject* pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); - Cnt.aw = (int)PyLong_AsLong(pd_aw); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); - Cnt.MRD = (int)PyLong_AsLong(pd_MRD); - PyObject* pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); - Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); - PyObject* pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); - Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); - PyObject* pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); - Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_span); - PyObject* pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); - Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); - PyObject* pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); - Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - //axial LUTs: - PyObject* pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); - PyObject* pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); - PyObject* pd_Msn1 = PyDict_GetItemString(o_axLUT, "Msn1"); - - //transaxial LUTs: - PyObject* pd_s2cr = PyDict_GetItemString(o_txLUT, "s2cr"); - PyObject* pd_aw2sn = PyDict_GetItemString(o_txLUT, "aw2sn"); - PyObject* pd_cij = PyDict_GetItemString(o_txLUT, "cij"); - PyObject* pd_crsr = PyDict_GetItemString(o_txLUT, "crsri"); - PyObject* pd_cr2s = PyDict_GetItemString(o_txLUT, "cr2s"); - - //random output dictionary - PyObject* pd_rsn = PyDict_GetItemString(o_rndout, "rsn"); - PyObject* pd_cmap = PyDict_GetItemString(o_rndout, "cmap"); - - //-- get the arrays form the objects - PyArrayObject *p_pmsksn = NULL; - p_pmsksn = (PyArrayObject *)PyArray_FROM_OTF(o_pmsksn, NPY_INT8, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_fansums = NULL; - p_fansums = (PyArrayObject *)PyArray_FROM_OTF(o_fansums, NPY_UINT32, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_sn1_rno = NULL; - p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1_sn11 = NULL; - p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_Msn1 = NULL; - p_Msn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_Msn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_s2cr = NULL; - p_s2cr = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cr, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_aw2sn = NULL; - p_aw2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_cij = NULL; - p_cij = (PyArrayObject *)PyArray_FROM_OTF(pd_cij, NPY_INT8, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_crsr = NULL; - p_crsr = (PyArrayObject *)PyArray_FROM_OTF(pd_crsr, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_cr2s = NULL; - p_cr2s = (PyArrayObject *)PyArray_FROM_OTF(pd_cr2s, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - PyArrayObject *p_rsn = NULL; - p_rsn = (PyArrayObject *)PyArray_FROM_OTF(pd_rsn, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - PyArrayObject *p_cmap = NULL; - p_cmap = (PyArrayObject *)PyArray_FROM_OTF(pd_cmap, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //-- - - /* If that didn't work, throw an exception. */ - if (p_fansums == NULL || p_sn1_rno == NULL || p_sn1_sn11 == NULL || - p_s2cr == NULL || p_aw2sn == NULL || p_cij == NULL || p_crsr == NULL || - p_rsn == NULL || p_cmap == NULL || p_cr2s == NULL || p_Msn1 == NULL || p_pmsksn == NULL) - { - Py_XDECREF(p_fansums); - Py_XDECREF(p_sn1_rno); - Py_XDECREF(p_sn1_sn11); - Py_XDECREF(p_s2cr); - Py_XDECREF(p_aw2sn); - Py_XDECREF(p_cij); - Py_XDECREF(p_crsr); - Py_XDECREF(p_cr2s); - Py_XDECREF(p_Msn1); - Py_XDECREF(p_pmsksn); - - PyArray_DiscardWritebackIfCopy(p_rsn); - Py_XDECREF(p_rsn); - PyArray_DiscardWritebackIfCopy(p_cmap); - Py_XDECREF(p_cmap); - - printf("e> could not get the variable from Python right!\n"); - - return NULL; - } - - //-- get the pointers to the data as C-types - char *pmsksn = (char*)PyArray_DATA(p_pmsksn); - unsigned int *fansums = (unsigned int*)PyArray_DATA(p_fansums); - - short *sn1_rno = (short*)PyArray_DATA(p_sn1_rno); - short *sn1_sn11 = (short*)PyArray_DATA(p_sn1_sn11); - short *Msn1 = (short*)PyArray_DATA(p_Msn1); - - float *rsn = (float*)PyArray_DATA(p_rsn); - float *cmap = (float*)PyArray_DATA(p_cmap); - - txlut.s2cr = (LORcc*)PyArray_DATA(p_s2cr); - txlut.aw2sn = (LORaw*)PyArray_DATA(p_aw2sn); - txlut.cij = (char*)PyArray_DATA(p_cij); - txlut.crsr = (short*)PyArray_DATA(p_crsr); - txlut.cr2s = (int*)PyArray_DATA(p_cr2s); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><> E s t i m a t e r a n d o m s GPU <><><><><><><><><><><><><><> - p_randoms(rsn, cmap, pmsksn, fansums, txlut, sn1_rno, sn1_sn11, Msn1, Cnt); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - PyArray_ResolveWritebackIfCopy(p_rsn); - Py_DECREF(p_rsn); - PyArray_ResolveWritebackIfCopy(p_cmap); - Py_DECREF(p_cmap); - - - Py_DECREF(p_pmsksn); - Py_DECREF(p_fansums); - - Py_DECREF(p_s2cr); - Py_DECREF(p_aw2sn); - Py_DECREF(p_cij); - Py_DECREF(p_crsr); - Py_DECREF(p_cr2s); - - Py_DECREF(p_sn1_sn11); - Py_DECREF(p_sn1_rno); - Py_DECREF(p_Msn1); - - Py_INCREF(Py_None); - return Py_None; + // Structure of constants + Cnst Cnt; + + // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + // transaxial LUT + PyObject *o_txLUT; + + // output dictionary + PyObject *o_rndout; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // fan sums for each crystal + PyObject *o_fansums; + + // mask for the randoms only regions in prompt sinogram + PyObject *o_pmsksn; + + // structure of transaxial LUTs + txLUTs txlut; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOO", &o_rndout, &o_pmsksn, &o_fansums, &o_txLUT, &o_axLUT, + &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as numpy arrays. */ + PyObject *pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); + Cnt.aw = (int)PyLong_AsLong(pd_aw); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); + Cnt.MRD = (int)PyLong_AsLong(pd_MRD); + PyObject *pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); + Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); + PyObject *pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); + Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); + PyObject *pd_NCRSR = PyDict_GetItemString(o_mmrcnst, "NCRSR"); + Cnt.NCRSR = (int)PyLong_AsLong(pd_NCRSR); + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_span); + PyObject *pd_tgap = PyDict_GetItemString(o_mmrcnst, "TGAP"); + Cnt.TGAP = (int)PyLong_AsLong(pd_tgap); + PyObject *pd_offgap = PyDict_GetItemString(o_mmrcnst, "OFFGAP"); + Cnt.OFFGAP = (int)PyLong_AsLong(pd_offgap); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + // axial LUTs: + PyObject *pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); + PyObject *pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); + PyObject *pd_Msn1 = PyDict_GetItemString(o_axLUT, "Msn1"); + + // transaxial LUTs: + PyObject *pd_s2cr = PyDict_GetItemString(o_txLUT, "s2cr"); + PyObject *pd_aw2sn = PyDict_GetItemString(o_txLUT, "aw2sn"); + PyObject *pd_cij = PyDict_GetItemString(o_txLUT, "cij"); + PyObject *pd_crsr = PyDict_GetItemString(o_txLUT, "crsri"); + PyObject *pd_cr2s = PyDict_GetItemString(o_txLUT, "cr2s"); + + // random output dictionary + PyObject *pd_rsn = PyDict_GetItemString(o_rndout, "rsn"); + PyObject *pd_cmap = PyDict_GetItemString(o_rndout, "cmap"); + + //-- get the arrays form the objects + PyArrayObject *p_pmsksn = NULL; + p_pmsksn = (PyArrayObject *)PyArray_FROM_OTF(o_pmsksn, NPY_INT8, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_fansums = NULL; + p_fansums = (PyArrayObject *)PyArray_FROM_OTF(o_fansums, NPY_UINT32, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_sn1_rno = NULL; + p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1_sn11 = NULL; + p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_Msn1 = NULL; + p_Msn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_Msn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_s2cr = NULL; + p_s2cr = (PyArrayObject *)PyArray_FROM_OTF(pd_s2cr, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_aw2sn = NULL; + p_aw2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_cij = NULL; + p_cij = (PyArrayObject *)PyArray_FROM_OTF(pd_cij, NPY_INT8, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_crsr = NULL; + p_crsr = (PyArrayObject *)PyArray_FROM_OTF(pd_crsr, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_cr2s = NULL; + p_cr2s = (PyArrayObject *)PyArray_FROM_OTF(pd_cr2s, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + PyArrayObject *p_rsn = NULL; + p_rsn = (PyArrayObject *)PyArray_FROM_OTF(pd_rsn, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + PyArrayObject *p_cmap = NULL; + p_cmap = (PyArrayObject *)PyArray_FROM_OTF(pd_cmap, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + //-- + + /* If that didn't work, throw an exception. */ + if (p_fansums == NULL || p_sn1_rno == NULL || p_sn1_sn11 == NULL || p_s2cr == NULL || + p_aw2sn == NULL || p_cij == NULL || p_crsr == NULL || p_rsn == NULL || p_cmap == NULL || + p_cr2s == NULL || p_Msn1 == NULL || p_pmsksn == NULL) { + Py_XDECREF(p_fansums); + Py_XDECREF(p_sn1_rno); + Py_XDECREF(p_sn1_sn11); + Py_XDECREF(p_s2cr); + Py_XDECREF(p_aw2sn); + Py_XDECREF(p_cij); + Py_XDECREF(p_crsr); + Py_XDECREF(p_cr2s); + Py_XDECREF(p_Msn1); + Py_XDECREF(p_pmsksn); + + PyArray_DiscardWritebackIfCopy(p_rsn); + Py_XDECREF(p_rsn); + PyArray_DiscardWritebackIfCopy(p_cmap); + Py_XDECREF(p_cmap); + + printf("e> could not get the variable from Python right!\n"); + + return NULL; + } + + //-- get the pointers to the data as C-types + char *pmsksn = (char *)PyArray_DATA(p_pmsksn); + unsigned int *fansums = (unsigned int *)PyArray_DATA(p_fansums); + + short *sn1_rno = (short *)PyArray_DATA(p_sn1_rno); + short *sn1_sn11 = (short *)PyArray_DATA(p_sn1_sn11); + short *Msn1 = (short *)PyArray_DATA(p_Msn1); + + float *rsn = (float *)PyArray_DATA(p_rsn); + float *cmap = (float *)PyArray_DATA(p_cmap); + + txlut.s2cr = (LORcc *)PyArray_DATA(p_s2cr); + txlut.aw2sn = (LORaw *)PyArray_DATA(p_aw2sn); + txlut.cij = (char *)PyArray_DATA(p_cij); + txlut.crsr = (short *)PyArray_DATA(p_crsr); + txlut.cr2s = (int *)PyArray_DATA(p_cr2s); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><> E s t i m a t e r a n d o m s GPU <><><><><><><><><><><><><><> + p_randoms(rsn, cmap, pmsksn, fansums, txlut, sn1_rno, sn1_sn11, Msn1, Cnt); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + + PyArray_ResolveWritebackIfCopy(p_rsn); + Py_DECREF(p_rsn); + PyArray_ResolveWritebackIfCopy(p_cmap); + Py_DECREF(p_cmap); + + Py_DECREF(p_pmsksn); + Py_DECREF(p_fansums); + + Py_DECREF(p_s2cr); + Py_DECREF(p_aw2sn); + Py_DECREF(p_cij); + Py_DECREF(p_crsr); + Py_DECREF(p_cr2s); + + Py_DECREF(p_sn1_sn11); + Py_DECREF(p_sn1_rno); + Py_DECREF(p_Msn1); + + Py_INCREF(Py_None); + return Py_None; } diff --git a/niftypet/nipet/lm/src/lmaux.cu b/niftypet/nipet/lm/src/lmaux.cu index 0b08b0b6..33c1c15a 100644 --- a/niftypet/nipet/lm/src/lmaux.cu +++ b/niftypet/nipet/lm/src/lmaux.cu @@ -6,344 +6,350 @@ author: Pawel Markiewicz Copyrights: 2020 ----------------------------------------------------------------------*/ -#include #include "lmaux.h" +#include #ifdef UNIX #include #endif - //********** LIST MODA DATA FILE PROPERTIES (Siemens mMR) ************** -void getLMinfo(char *flm, const Cnst Cnt) -{ - // variables for openning and reading binary files - FILE *fr; - size_t r; - - - //open the list-mode file - fr = fopen(flm, "rb"); - if (fr == NULL) { - fprintf(stderr, "Can't open input (list mode) file!\n"); - exit(1); - } +void getLMinfo(char *flm, const Cnst Cnt) { + // variables for openning and reading binary files + FILE *fr; + size_t r; + + // open the list-mode file + fr = fopen(flm, "rb"); + if (fr == NULL) { + fprintf(stderr, "Can't open input (list mode) file!\n"); + exit(1); + } #ifdef __linux__ - // file size in elements - fseek(fr, 0, SEEK_END); - size_t nbytes = ftell(fr); - size_t ele = nbytes / sizeof(int); - if (Cnt.LOG <= LOGINFO) printf("i> number of elements in the list mode file: %lu\n", ele); - rewind(fr); + // file size in elements + fseek(fr, 0, SEEK_END); + size_t nbytes = ftell(fr); + size_t ele = nbytes / sizeof(int); + if (Cnt.LOG <= LOGINFO) + printf("i> number of elements in the list mode file: %lu\n", ele); + rewind(fr); #endif #ifdef WIN32 - struct _stati64 bufStat; - _stati64(flm, &bufStat); - size_t nbytes = bufStat.st_size; - size_t ele = nbytes / sizeof(int); - if (Cnt.LOG <= LOGINFO) printf("i> number of elements in the list mode file: %lu\n", ele); + struct _stati64 bufStat; + _stati64(flm, &bufStat); + size_t nbytes = bufStat.st_size; + size_t ele = nbytes / sizeof(int); + if (Cnt.LOG <= LOGINFO) + printf("i> number of elements in the list mode file: %lu\n", ele); #endif - - - //--try reading the whole lot to memory + //--try reading the whole lot to memory #if RD2MEM - if (Cnt.LOG <= LOGINFO) printf("i> reading the whole file..."); - if (NULL == (lm = (int *)malloc(ele * sizeof(int)))) { - printf("malloc failed\n"); - return; - } - r = fread(lm, 4, ele, fr); - if (r != ele) { fprintf(stderr, "Reading error: r = %lu and ele = %lu\n", r, ele); exit(3); } - if (Cnt.LOG <= LOGINFO) printf("DONE.\n\n"); - rewind(fr); + if (Cnt.LOG <= LOGINFO) + printf("i> reading the whole file..."); + if (NULL == (lm = (int *)malloc(ele * sizeof(int)))) { + printf("malloc failed\n"); + return; + } + r = fread(lm, 4, ele, fr); + if (r != ele) { + fprintf(stderr, "Reading error: r = %lu and ele = %lu\n", r, ele); + exit(3); + } + if (Cnt.LOG <= LOGINFO) + printf("DONE.\n\n"); + rewind(fr); #endif - //------------ first and last time tags --------------- - int tag = 0; - int buff[1]; - int last_ttag, first_ttag; - - //time offset based on the first time tag - int toff; - size_t last_taddr, first_taddr; - long long c = 1; - //-- - while (tag == 0) { - r = fread(buff, 4, 1, fr); - if (r != 1) { fputs("Reading error \n", stderr); exit(3); } - if ((buff[0] >> 29) == -4) { - tag = 1; - first_ttag = buff[0] & 0x1fffffff; - first_taddr = c; - } - c += 1; - } - if (Cnt.LOG <= LOGINFO) printf("i> the first time tag is: %d at positon %lu.\n", first_ttag, first_taddr); - - tag = 0; c = 1; - while (tag == 0) { + //------------ first and last time tags --------------- + int tag = 0; + int buff[1]; + int last_ttag, first_ttag; + + // time offset based on the first time tag + int toff; + size_t last_taddr, first_taddr; + long long c = 1; + //-- + while (tag == 0) { + r = fread(buff, 4, 1, fr); + if (r != 1) { + fputs("Reading error \n", stderr); + exit(3); + } + if ((buff[0] >> 29) == -4) { + tag = 1; + first_ttag = buff[0] & 0x1fffffff; + first_taddr = c; + } + c += 1; + } + if (Cnt.LOG <= LOGINFO) + printf("i> the first time tag is: %d at positon %lu.\n", first_ttag, first_taddr); + + tag = 0; + c = 1; + while (tag == 0) { #ifdef __linux__ - fseek(fr, c * -4, SEEK_END); + fseek(fr, c * -4, SEEK_END); #endif #ifdef WIN32 - _fseeki64(fr, c * -4, SEEK_END); + _fseeki64(fr, c * -4, SEEK_END); #endif - r = fread(buff, 4, 1, fr); - if (r != 1) { fputs("Reading error \n", stderr); exit(3); } - if ((buff[0] >> 29) == -4) { - tag = 1; - last_ttag = buff[0] & 0x1fffffff; - last_taddr = ele - c; - } - c += 1; - } - if (Cnt.LOG <= LOGINFO) printf("i> the last time tag is: %d at positon %lu.\n", last_ttag, last_taddr); - - // first time tag is also the time offset used later on. - if (first_ttag using time offset: %d\n", toff); - } - else { - fprintf(stderr, "Weird time stamps. The first and last time tags are: %d and %d\n", first_ttag, last_ttag); - exit(1); - } - //-------------------------------------------------------- - - int nitag = ((last_ttag - toff) + ITIME - 1) / ITIME; // # integration time tags (+1 for the end). - if (Cnt.LOG <= LOGINFO) printf("i> number of report itags is: %d\n", nitag); - - // divide the data into data chunks - // the default is to read 1GB to be dealt with all streams (default: 32) - int nchnk = 10 + (ele + ELECHNK - 1) / ELECHNK; //plus ten extra... - if (Cnt.LOG <= LOGINFO) printf("i> # chunks of data (initial): %d\n\n", nchnk); - - if (Cnt.LOG <= LOGINFO) printf("i> # elechnk: %d\n\n", ELECHNK); - - // divide the list mode data (1GB) into chunks in terms of addresses of selected time tags - //break time tag - size_t *btag = (size_t *)malloc((nchnk + 1) * sizeof(size_t)); - - //address (position) in file (in 4bytes unit) - size_t *atag = (size_t *)malloc((nchnk + 1) * sizeof(size_t)); - - //elements per thread to be dealt with - int *ele4thrd = (int *)malloc(nchnk * sizeof(int)); - - //elements per data chunk - int *ele4chnk = (int *)malloc(nchnk * sizeof(int)); - - //starting values - btag[0] = 0; - atag[0] = 0; - - //------------------------------------------------------------------------------------------------ - if (Cnt.LOG <= LOGINFO) - printf("i> setting up data chunks:\n"); - int i = 0; - while ((ele - atag[i])>(size_t)ELECHNK) { - //printf(">>>>>>>>>>>>>>>>>>> ele=%lu, atag=%lu, ELE=%d\n", ele, atag[i], ELECHNK); - //printf(">>>>>>>>>>>>>>>>>>> ele=%lu,\n", ele - atag[i]); - - i += 1; - c = 0; - tag = 0; - while (tag == 0) { + r = fread(buff, 4, 1, fr); + if (r != 1) { + fputs("Reading error \n", stderr); + exit(3); + } + if ((buff[0] >> 29) == -4) { + tag = 1; + last_ttag = buff[0] & 0x1fffffff; + last_taddr = ele - c; + } + c += 1; + } + if (Cnt.LOG <= LOGINFO) + printf("i> the last time tag is: %d at positon %lu.\n", last_ttag, last_taddr); + + // first time tag is also the time offset used later on. + if (first_ttag < last_ttag) { + toff = first_ttag; + if (Cnt.LOG <= LOGINFO) + printf("i> using time offset: %d\n", toff); + } else { + fprintf(stderr, "Weird time stamps. The first and last time tags are: %d and %d\n", + first_ttag, last_ttag); + exit(1); + } + //-------------------------------------------------------- + + int nitag = + ((last_ttag - toff) + ITIME - 1) / ITIME; // # integration time tags (+1 for the end). + if (Cnt.LOG <= LOGINFO) + printf("i> number of report itags is: %d\n", nitag); + + // divide the data into data chunks + // the default is to read 1GB to be dealt with all streams (default: 32) + int nchnk = 10 + (ele + ELECHNK - 1) / ELECHNK; // plus ten extra... + if (Cnt.LOG <= LOGINFO) + printf("i> # chunks of data (initial): %d\n\n", nchnk); + + if (Cnt.LOG <= LOGINFO) + printf("i> # elechnk: %d\n\n", ELECHNK); + + // divide the list mode data (1GB) into chunks in terms of addresses of selected time tags + // break time tag + size_t *btag = (size_t *)malloc((nchnk + 1) * sizeof(size_t)); + + // address (position) in file (in 4bytes unit) + size_t *atag = (size_t *)malloc((nchnk + 1) * sizeof(size_t)); + + // elements per thread to be dealt with + int *ele4thrd = (int *)malloc(nchnk * sizeof(int)); + + // elements per data chunk + int *ele4chnk = (int *)malloc(nchnk * sizeof(int)); + + // starting values + btag[0] = 0; + atag[0] = 0; + + //------------------------------------------------------------------------------------------------ + if (Cnt.LOG <= LOGINFO) + printf("i> setting up data chunks:\n"); + int i = 0; + while ((ele - atag[i]) > (size_t)ELECHNK) { + // printf(">>>>>>>>>>>>>>>>>>> ele=%lu, atag=%lu, ELE=%d\n", ele, atag[i], ELECHNK); + // printf(">>>>>>>>>>>>>>>>>>> ele=%lu,\n", ele - atag[i]); + + i += 1; + c = 0; + tag = 0; + while (tag == 0) { #if RD2MEM - buff[0] = lm[atag[i - 1] + ELECHNK - c - 1]; + buff[0] = lm[atag[i - 1] + ELECHNK - c - 1]; #else #ifdef __linux__ - fseek(fr, 4 * (atag[i - 1] + ELECHNK - c - 1), SEEK_SET); //make the chunks a little smaller than ELECHNK (that's why - ) + fseek(fr, 4 * (atag[i - 1] + ELECHNK - c - 1), + SEEK_SET); // make the chunks a little smaller than ELECHNK (that's why - ) #endif #ifdef WIN32 - _fseeki64(fr, 4 * (atag[i - 1] + ELECHNK - c - 1), SEEK_SET); //make the chunks a little smaller than ELECHNK (that's why - ) + _fseeki64(fr, 4 * (atag[i - 1] + ELECHNK - c - 1), + SEEK_SET); // make the chunks a little smaller than ELECHNK (that's why - ) #endif - r = fread(buff, 4, 1, fr); + r = fread(buff, 4, 1, fr); #endif - if ((buff[0] >> 29) == -4) { - int itime = (buff[0] & 0x1fffffff); - if ((itime % BTPTIME) == 0) { - tag = 1; - btag[i] = itime - toff; - atag[i] = (atag[i - 1] + ELECHNK - c - 1); - ele4chnk[i - 1] = atag[i] - atag[i - 1]; - ele4thrd[i - 1] = (atag[i] - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; - } - } - c += 1; - } - if (Cnt.LOG <= LOGDEBUG){ - printf("i> break time tag [%d] is: %lums at position %lu. \n", i, btag[i], atag[i]); - printf(" # elements: %d/per chunk, %d/per thread. c = %lld.\n", ele4chnk[i - 1], ele4thrd[i - 1], c); - } - else if (Cnt.LOG <= LOGINFO) - printf("i> break time tag [%d] is: %lums at position %lu.\r", i, btag[i], atag[i]); // ele = %lu ele-atag[i] = %lu , , ele, ele-atag[i] - } - - i += 1; - //add 1ms for the remaining events - btag[i] = last_ttag - toff + 1; - atag[i] = ele; - ele4thrd[i - 1] = (ele - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; - ele4chnk[i - 1] = ele - atag[i - 1]; - if (Cnt.LOG <= LOGDEBUG){ - printf("i> break time tag [%d] is: %lums at position %lu.\n", i, btag[i], atag[i]); - printf(" # elements: %d/per chunk, %d/per thread.\n", ele4chnk[i - 1], ele4thrd[i - 1]); - } - if (Cnt.LOG <= LOGINFO) - printf("i> break time tag [%d] is: %lums at position %lu. \n", i, btag[i], atag[i]); - fclose(fr); - - //------------------------------------------------------------------------------------------------ - - lmprop.fname = flm; - lmprop.atag = atag; - lmprop.btag = btag; - lmprop.ele4chnk = ele4chnk; - lmprop.ele4thrd = ele4thrd; - lmprop.ele = ele; - lmprop.nchnk = i; - lmprop.nitag = nitag; - lmprop.toff = toff; - lmprop.last_ttag = last_ttag; - - // free(lm); + if ((buff[0] >> 29) == -4) { + int itime = (buff[0] & 0x1fffffff); + if ((itime % BTPTIME) == 0) { + tag = 1; + btag[i] = itime - toff; + atag[i] = (atag[i - 1] + ELECHNK - c - 1); + ele4chnk[i - 1] = atag[i] - atag[i - 1]; + ele4thrd[i - 1] = (atag[i] - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; + } + } + c += 1; + } + if (Cnt.LOG <= LOGDEBUG) { + printf("i> break time tag [%d] is: %lums at position %lu. \n", i, btag[i], atag[i]); + printf(" # elements: %d/per chunk, %d/per thread. c = %lld.\n", ele4chnk[i - 1], + ele4thrd[i - 1], c); + } else if (Cnt.LOG <= LOGINFO) + printf("i> break time tag [%d] is: %lums at position %lu.\r", i, btag[i], + atag[i]); // ele = %lu ele-atag[i] = %lu , , ele, ele-atag[i] + } + + i += 1; + // add 1ms for the remaining events + btag[i] = last_ttag - toff + 1; + atag[i] = ele; + ele4thrd[i - 1] = (ele - atag[i - 1] + (TOTHRDS - 1)) / TOTHRDS; + ele4chnk[i - 1] = ele - atag[i - 1]; + if (Cnt.LOG <= LOGDEBUG) { + printf("i> break time tag [%d] is: %lums at position %lu.\n", i, btag[i], atag[i]); + printf(" # elements: %d/per chunk, %d/per thread.\n", ele4chnk[i - 1], ele4thrd[i - 1]); + } + if (Cnt.LOG <= LOGINFO) + printf("i> break time tag [%d] is: %lums at position %lu. \n", i, btag[i], atag[i]); + fclose(fr); + + //------------------------------------------------------------------------------------------------ + + lmprop.fname = flm; + lmprop.atag = atag; + lmprop.btag = btag; + lmprop.ele4chnk = ele4chnk; + lmprop.ele4thrd = ele4thrd; + lmprop.ele = ele; + lmprop.nchnk = i; + lmprop.nitag = nitag; + lmprop.toff = toff; + lmprop.last_ttag = last_ttag; + + // free(lm); } //********************************************************************* - - - - -void modifyLMinfo(int tstart, int tstop, const Cnst Cnt) -{ - int newn = 0; //new number of chunks - int ntag[2] = { -1, -1 }; //new start and end time/address break tag - for (int n = 0; n time break [%d] <%lu, %lu> is in. ele={%d, %d}.\n", n + 1, lmprop.btag[n], lmprop.btag[n + 1], lmprop.ele4thrd[n], lmprop.ele4chnk[n]); - newn += 1; - } - } - - size_t *tmp_btag = (size_t *)malloc((newn + 1) * sizeof(size_t)); //break time tag - size_t *tmp_atag = (size_t *)malloc((newn + 1) * sizeof(size_t)); //address (position) in file (in 4bytes unit) - int *tmp_ele4thrd = (int *)malloc(newn * sizeof(int)); //elements per thread to be dealt with - int *tmp_ele4chnk = (int *)malloc(newn * sizeof(int)); //elements per data chunk - - int nn = 0; //new indexing - tmp_btag[0] = lmprop.btag[ntag[0]]; - tmp_atag[0] = lmprop.atag[ntag[0]]; - if (Cnt.LOG <= LOGDEBUG) - printf("> leaving only those chunks for histogramming:\n"); - - for (int n = ntag[0]; n <= ntag[1]; n++) { - tmp_btag[nn + 1] = lmprop.btag[n + 1]; - tmp_atag[nn + 1] = lmprop.atag[n + 1]; - tmp_ele4thrd[nn] = lmprop.ele4thrd[n]; - tmp_ele4chnk[nn] = lmprop.ele4chnk[n]; - if (Cnt.LOG <= LOGDEBUG) - printf(" > break time tag (original) [%d] @%lums ele={%d, %d}.\n", - n + 1, tmp_btag[nn + 1], tmp_ele4thrd[nn], tmp_ele4chnk[nn]); - - nn += 1; - } - lmprop.atag = tmp_atag; - lmprop.btag = tmp_btag; - lmprop.ele4chnk = tmp_ele4chnk; - lmprop.ele4thrd = tmp_ele4thrd; - lmprop.nchnk = newn; +void modifyLMinfo(int tstart, int tstop, const Cnst Cnt) { + int newn = 0; // new number of chunks + int ntag[2] = {-1, -1}; // new start and end time/address break tag + for (int n = 0; n < lmprop.nchnk; n++) { + if ((tstart <= (lmprop.btag[n + 1] / ITIME)) && ((lmprop.btag[n] / ITIME) < tstop)) { + if (ntag[0] == -1) + ntag[0] = n; + ntag[1] = n; + if (Cnt.LOG <= LOGDEBUG) + printf(" > time break [%d] <%lu, %lu> is in. ele={%d, %d}.\n", n + 1, lmprop.btag[n], + lmprop.btag[n + 1], lmprop.ele4thrd[n], lmprop.ele4chnk[n]); + newn += 1; + } + } + + size_t *tmp_btag = (size_t *)malloc((newn + 1) * sizeof(size_t)); // break time tag + size_t *tmp_atag = + (size_t *)malloc((newn + 1) * sizeof(size_t)); // address (position) in file (in 4bytes unit) + int *tmp_ele4thrd = (int *)malloc(newn * sizeof(int)); // elements per thread to be dealt with + int *tmp_ele4chnk = (int *)malloc(newn * sizeof(int)); // elements per data chunk + + int nn = 0; // new indexing + tmp_btag[0] = lmprop.btag[ntag[0]]; + tmp_atag[0] = lmprop.atag[ntag[0]]; + if (Cnt.LOG <= LOGDEBUG) + printf("> leaving only those chunks for histogramming:\n"); + + for (int n = ntag[0]; n <= ntag[1]; n++) { + tmp_btag[nn + 1] = lmprop.btag[n + 1]; + tmp_atag[nn + 1] = lmprop.atag[n + 1]; + tmp_ele4thrd[nn] = lmprop.ele4thrd[n]; + tmp_ele4chnk[nn] = lmprop.ele4chnk[n]; + if (Cnt.LOG <= LOGDEBUG) + printf(" > break time tag (original) [%d] @%lums ele={%d, %d}.\n", n + 1, tmp_btag[nn + 1], + tmp_ele4thrd[nn], tmp_ele4chnk[nn]); + + nn += 1; + } + lmprop.atag = tmp_atag; + lmprop.btag = tmp_btag; + lmprop.ele4chnk = tmp_ele4chnk; + lmprop.ele4thrd = tmp_ele4thrd; + lmprop.nchnk = newn; } //================================================================== - - - - - - //***************************************************************************** //***************************************************************************** //***************************************************************************** //============================================================================= -__global__ void sino_uncmprss(unsigned int * dsino, - unsigned char * p1sino, - unsigned char * d1sino, - int ifrm, - int nele) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx> 8) & 0x000000ff); - d1sino[2 * idx + 1] = (unsigned char)((dsino[ifrm*nele + idx] >> 24) & 0x000000ff); - - p1sino[2 * idx] = (unsigned char)(dsino[ifrm*nele + idx] & 0x000000ff); - p1sino[2 * idx + 1] = (unsigned char)((dsino[ifrm*nele + idx] >> 16) & 0x000000ff); - } +__global__ void sino_uncmprss(unsigned int *dsino, unsigned char *p1sino, unsigned char *d1sino, + int ifrm, int nele) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < nele) { + d1sino[2 * idx] = (unsigned char)((dsino[ifrm * nele + idx] >> 8) & 0x000000ff); + d1sino[2 * idx + 1] = (unsigned char)((dsino[ifrm * nele + idx] >> 24) & 0x000000ff); + + p1sino[2 * idx] = (unsigned char)(dsino[ifrm * nele + idx] & 0x000000ff); + p1sino[2 * idx + 1] = (unsigned char)((dsino[ifrm * nele + idx] >> 16) & 0x000000ff); + } } //============================================================================= //============================================================================= -void dsino_ucmpr(unsigned int *d_dsino, - unsigned char *pdsn, unsigned char *ddsn, - int tot_bins, int nfrm) -{ - - dim3 grid; - dim3 block; - - block.x = 1024; block.y = 1; block.z = 1; - grid.x = (unsigned int)((tot_bins / 2 + block.x - 1) / block.x); - grid.y = 1; grid.z = 1; - - unsigned char *d_d1sino, *d_p1sino; - HANDLE_ERROR(cudaMalloc(&d_d1sino, tot_bins * sizeof(unsigned char))); - HANDLE_ERROR(cudaMalloc(&d_p1sino, tot_bins * sizeof(unsigned char))); - - //getMemUse(Cnt); - - printf("i> uncompressing dynamic sino..."); - - //---time clock---- - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //----------------- - - for (int i = 0; i> >(d_dsino, d_p1sino, d_d1sino, i, tot_bins / 2); - HANDLE_ERROR(cudaGetLastError()); - - HANDLE_ERROR(cudaMemcpy(&pdsn[i*tot_bins], d_p1sino, - tot_bins * sizeof(unsigned char), cudaMemcpyDeviceToHost)); - - HANDLE_ERROR(cudaMemcpy(&ddsn[i*tot_bins], d_d1sino, - tot_bins * sizeof(unsigned char), cudaMemcpyDeviceToHost)); - - } - - //---time clock--- - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - printf(" DONE in %fs.\n", 0.001*elapsedTime); - //------- - - cudaFree(d_d1sino); - cudaFree(d_p1sino); - +void dsino_ucmpr(unsigned int *d_dsino, unsigned char *pdsn, unsigned char *ddsn, int tot_bins, + int nfrm) { + + dim3 grid; + dim3 block; + + block.x = 1024; + block.y = 1; + block.z = 1; + grid.x = (unsigned int)((tot_bins / 2 + block.x - 1) / block.x); + grid.y = 1; + grid.z = 1; + + unsigned char *d_d1sino, *d_p1sino; + HANDLE_ERROR(cudaMalloc(&d_d1sino, tot_bins * sizeof(unsigned char))); + HANDLE_ERROR(cudaMalloc(&d_p1sino, tot_bins * sizeof(unsigned char))); + + // getMemUse(Cnt); + + printf("i> uncompressing dynamic sino..."); + + //---time clock---- + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //----------------- + + for (int i = 0; i < nfrm; i++) { + + sino_uncmprss<<>>(d_dsino, d_p1sino, d_d1sino, i, tot_bins / 2); + HANDLE_ERROR(cudaGetLastError()); + + HANDLE_ERROR(cudaMemcpy(&pdsn[i * tot_bins], d_p1sino, tot_bins * sizeof(unsigned char), + cudaMemcpyDeviceToHost)); + + HANDLE_ERROR(cudaMemcpy(&ddsn[i * tot_bins], d_d1sino, tot_bins * sizeof(unsigned char), + cudaMemcpyDeviceToHost)); + } + + //---time clock--- + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + printf(" DONE in %fs.\n", 0.001 * elapsedTime); + //------- + + cudaFree(d_d1sino); + cudaFree(d_p1sino); } diff --git a/niftypet/nipet/lm/src/lmaux.h b/niftypet/nipet/lm/src/lmaux.h index a0f7a4fd..d2d66b0c 100644 --- a/niftypet/nipet/lm/src/lmaux.h +++ b/niftypet/nipet/lm/src/lmaux.h @@ -1,21 +1,21 @@ -#include #include "def.h" #include "scanner_0.h" +#include #ifndef LAUX_H #define LAUX_H extern LMprop lmprop; -//get the properties of LM and the chunks into which the LM is divided +// get the properties of LM and the chunks into which the LM is divided void getLMinfo(char *flm, const Cnst Cnt); -//modify the properties of LM in case of dynamic studies as the number of frames wont fit in the memory +// modify the properties of LM in case of dynamic studies as the number of frames wont fit in the +// memory void modifyLMinfo(int tstart, int tstop, const Cnst Cnt); -//uncompress the sinogram after GPU execution -void dsino_ucmpr(unsigned int *d_dsino, - unsigned char *pdsn, unsigned char *ddsn, - int tot_bins, int nfrm); +// uncompress the sinogram after GPU execution +void dsino_ucmpr(unsigned int *d_dsino, unsigned char *pdsn, unsigned char *ddsn, int tot_bins, + int nfrm); -#endif //LAUX_H +#endif // LAUX_H diff --git a/niftypet/nipet/lm/src/lmproc.cu b/niftypet/nipet/lm/src/lmproc.cu index 78dbc06b..29d584e1 100644 --- a/niftypet/nipet/lm/src/lmproc.cu +++ b/niftypet/nipet/lm/src/lmproc.cu @@ -9,243 +9,231 @@ Copyrights: 2020 #include "lmproc.h" -void lmproc( - hstout dicout, - char *flm, - int tstart, int tstop, - LORcc *s2cF, - axialLUT axLUT, - Cnst Cnt) - - /* - Prepare for processing the list mode data and send it for GPU - execution. - */ +void lmproc(hstout dicout, char *flm, int tstart, int tstop, LORcc *s2cF, axialLUT axLUT, Cnst Cnt) + +/* +Prepare for processing the list mode data and send it for GPU +execution. +*/ { - //list mode data file (binary) - if (Cnt.LOG <= LOGINFO) printf("i> the list-mode file: %s\n", flm); + // list mode data file (binary) + if (Cnt.LOG <= LOGINFO) + printf("i> the list-mode file: %s\n", flm); - //------------ file and path names + //------------ file and path names #ifdef WIN32 - char *lmdir = strdup(flm); + char *lmdir = strdup(flm); #else - char *lmdir = strdupa(flm); + char *lmdir = strdupa(flm); #endif - char *base = strrchr(lmdir, '/'); - lmdir[base - lmdir] = '\0'; - //------------ - - //****** get LM info ****** - //uses global variable lmprop (see lmaux.cu) - getLMinfo(flm, Cnt); - //****** - - //--- prompt & delayed reports - unsigned int *d_rdlyd; - unsigned int *d_rprmt; - HANDLE_ERROR(cudaMalloc(&d_rdlyd, lmprop.nitag * sizeof(unsigned int))); - HANDLE_ERROR(cudaMalloc(&d_rprmt, lmprop.nitag * sizeof(unsigned int))); - - HANDLE_ERROR(cudaMemset(d_rdlyd, 0, lmprop.nitag * sizeof(unsigned int))); - HANDLE_ERROR(cudaMemset(d_rprmt, 0, lmprop.nitag * sizeof(unsigned int))); - //--- - - //--- for motion detection (centre of Mass) - mMass d_mass; - cudaMalloc(&d_mass.zR, lmprop.nitag * sizeof(int)); - cudaMalloc(&d_mass.zM, lmprop.nitag * sizeof(int)); - cudaMemset(d_mass.zR, 0, lmprop.nitag * sizeof(int)); - cudaMemset(d_mass.zM, 0, lmprop.nitag * sizeof(int)); - //--- - - //--- sino views for motion visualisation - //already copy variables to output (number of time tags) - dicout.nitag = lmprop.nitag; - if (lmprop.nitag>MXNITAG) - dicout.sne = MXNITAG / (1 << VTIME)*SEG0*NSBINS; - else - dicout.sne = (lmprop.nitag + (1 << VTIME) - 1) / (1 << VTIME)*SEG0*NSBINS; - - - // projections for videos - unsigned int * d_snview; - if (lmprop.nitag>MXNITAG) { - //reduce the sino views to only the first 2 hours - cudaMalloc(&d_snview, dicout.sne * sizeof(unsigned int)); - cudaMemset(d_snview, 0, dicout.sne * sizeof(unsigned int)); - } - else { - cudaMalloc(&d_snview, dicout.sne * sizeof(unsigned int)); - cudaMemset( d_snview, 0, dicout.sne * sizeof(unsigned int)); - } - //--- - - //--- fansums for randoms estimation - unsigned int *d_fansums; - cudaMalloc(&d_fansums, NRINGS*nCRS * sizeof(unsigned int)); - cudaMemset( d_fansums, 0, NRINGS*nCRS * sizeof(unsigned int)); - //--- - - //--- singles (buckets) - // double the size as additionally saving the number of single - // reports per second (there may be two singles' readings...) - unsigned int *d_bucks; - cudaMalloc(&d_bucks, 2 * NBUCKTS*lmprop.nitag * sizeof(unsigned int)); - cudaMemset( d_bucks, 0, 2 * NBUCKTS*lmprop.nitag * sizeof(unsigned int)); - //--- - - //--- SSRB sino - unsigned int *d_ssrb; - HANDLE_ERROR(cudaMalloc(&d_ssrb, SEG0*NSBINANG * sizeof(unsigned int))); - HANDLE_ERROR(cudaMemset( d_ssrb, 0, SEG0*NSBINANG * sizeof(unsigned int))); - //--- - - //--- sinograms in span-1 or span-11 or ssrb - unsigned int tot_bins; - - if (Cnt.SPN == 1) { - tot_bins = TOT_BINS_S1; - } - else if (Cnt.SPN == 11) { - tot_bins = TOT_BINS; - } - else if (Cnt.SPN == 0) { - tot_bins = SEG0*NSBINANG; - } - - - // prompt and delayed sinograms - unsigned int *d_psino;//, *d_dsino; - - - // prompt and compressed delayeds in one sinogram (two unsigned shorts) - HANDLE_ERROR(cudaMalloc(&d_psino, tot_bins * sizeof(unsigned int))); - HANDLE_ERROR(cudaMemset( d_psino, 0, tot_bins * sizeof(unsigned int))); - - - //--- start and stop time - if (tstart == tstop) { - tstart = 0; - tstop = lmprop.nitag; - } - lmprop.tstart = tstart; - lmprop.tstop = tstop; - //> bytes per LM event - lmprop.bpe = Cnt.BPE; - //> list mode data offset, start of events - lmprop.lmoff = Cnt.LMOFF; - - - if (Cnt.LOG <= LOGDEBUG) printf("i> LM offset in bytes: %d\n", lmprop.lmoff); - if (Cnt.LOG <= LOGDEBUG) printf("i> bytes per LM event: %d\n", lmprop.bpe); - if (Cnt.LOG <= LOGINFO) printf("i> frame start time: %d\n", tstart); - if (Cnt.LOG <= LOGINFO) printf("i> frame stop time: %d\n", tstop); - //--- - - //======= get only the chunks which have the time frame data - modifyLMinfo(tstart, tstop, Cnt); - lmprop.span = Cnt.SPN; - //=========== - - - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - //************************************************************************************** - gpu_hst( - d_psino, - d_ssrb, - d_rdlyd, - d_rprmt, - d_mass, - d_snview, - d_fansums, - d_bucks, - tstart, tstop, - s2cF, - axLUT, - Cnt); - //************************************************************************************** - // cudaDeviceSynchronize(); - - - dicout.tot = tot_bins; - - //---SSRB - HANDLE_ERROR(cudaMemcpy(dicout.ssr, d_ssrb, SEG0*NSBINANG * sizeof(unsigned int), cudaMemcpyDeviceToHost)); - unsigned long long psum_ssrb = 0; - for (int i = 0; i copy to host the compressed prompt and delayed sinograms - unsigned int * sino = (unsigned int *)malloc(tot_bins * sizeof(unsigned int)); - HANDLE_ERROR(cudaMemcpy(sino, d_psino, tot_bins * sizeof(unsigned int), cudaMemcpyDeviceToHost)); - - unsigned int mxbin = 0; - dicout.psm = 0; - dicout.dsm = 0; - for (int i = 0; i> 16; - dicout.psm += dicout.psn[i]; - dicout.dsm += dicout.dsn[i]; - if (mxbin calculate the centre of mass while also the sum of head-curve prompts and delayeds - unsigned long long sphc = 0, sdhc = 0; - for (int i = 0; i total prompt single slice rebinned sinogram: P = %llu\n", psum_ssrb); - if (Cnt.LOG <= LOGINFO) printf("\nic> total prompt and delayeds sinogram events: P = %llu, D = %llu\n", dicout.psm, dicout.dsm); - if (Cnt.LOG <= LOGINFO) printf("\nic> total prompt and delayeds head-curve events: P = %llu, D = %llu\n", sphc, sdhc); - if (Cnt.LOG <= LOGINFO) printf("\nic> maximum prompt sino value: %u \n", mxbin); - - - //-fansums and bucket singles - HANDLE_ERROR(cudaMemcpy(dicout.fan, d_fansums, NRINGS*nCRS * sizeof(unsigned int), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(dicout.bck, d_bucks, 2 * NBUCKTS*lmprop.nitag * sizeof(unsigned int), cudaMemcpyDeviceToHost)); - - /* Clean up. */ - free(zR); - free(zM); - - free(lmprop.atag); - free(lmprop.btag); - free(lmprop.ele4chnk); - free(lmprop.ele4thrd); - - cudaFree(d_psino); - cudaFree(d_ssrb); - cudaFree(d_rdlyd); - cudaFree(d_rprmt); - cudaFree(d_snview); - cudaFree(d_bucks); - cudaFree(d_fansums); - cudaFree(d_mass.zR); - cudaFree(d_mass.zM); - - return; + char *base = strrchr(lmdir, '/'); + lmdir[base - lmdir] = '\0'; + //------------ + + //****** get LM info ****** + // uses global variable lmprop (see lmaux.cu) + getLMinfo(flm, Cnt); + //****** + + //--- prompt & delayed reports + unsigned int *d_rdlyd; + unsigned int *d_rprmt; + HANDLE_ERROR(cudaMalloc(&d_rdlyd, lmprop.nitag * sizeof(unsigned int))); + HANDLE_ERROR(cudaMalloc(&d_rprmt, lmprop.nitag * sizeof(unsigned int))); + + HANDLE_ERROR(cudaMemset(d_rdlyd, 0, lmprop.nitag * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_rprmt, 0, lmprop.nitag * sizeof(unsigned int))); + //--- + + //--- for motion detection (centre of Mass) + mMass d_mass; + cudaMalloc(&d_mass.zR, lmprop.nitag * sizeof(int)); + cudaMalloc(&d_mass.zM, lmprop.nitag * sizeof(int)); + cudaMemset(d_mass.zR, 0, lmprop.nitag * sizeof(int)); + cudaMemset(d_mass.zM, 0, lmprop.nitag * sizeof(int)); + //--- + + //--- sino views for motion visualisation + // already copy variables to output (number of time tags) + dicout.nitag = lmprop.nitag; + if (lmprop.nitag > MXNITAG) + dicout.sne = MXNITAG / (1 << VTIME) * SEG0 * NSBINS; + else + dicout.sne = (lmprop.nitag + (1 << VTIME) - 1) / (1 << VTIME) * SEG0 * NSBINS; + + // projections for videos + unsigned int *d_snview; + if (lmprop.nitag > MXNITAG) { + // reduce the sino views to only the first 2 hours + cudaMalloc(&d_snview, dicout.sne * sizeof(unsigned int)); + cudaMemset(d_snview, 0, dicout.sne * sizeof(unsigned int)); + } else { + cudaMalloc(&d_snview, dicout.sne * sizeof(unsigned int)); + cudaMemset(d_snview, 0, dicout.sne * sizeof(unsigned int)); + } + //--- + + //--- fansums for randoms estimation + unsigned int *d_fansums; + cudaMalloc(&d_fansums, NRINGS * nCRS * sizeof(unsigned int)); + cudaMemset(d_fansums, 0, NRINGS * nCRS * sizeof(unsigned int)); + //--- + + //--- singles (buckets) + // double the size as additionally saving the number of single + // reports per second (there may be two singles' readings...) + unsigned int *d_bucks; + cudaMalloc(&d_bucks, 2 * NBUCKTS * lmprop.nitag * sizeof(unsigned int)); + cudaMemset(d_bucks, 0, 2 * NBUCKTS * lmprop.nitag * sizeof(unsigned int)); + //--- + + //--- SSRB sino + unsigned int *d_ssrb; + HANDLE_ERROR(cudaMalloc(&d_ssrb, SEG0 * NSBINANG * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_ssrb, 0, SEG0 * NSBINANG * sizeof(unsigned int))); + //--- + + //--- sinograms in span-1 or span-11 or ssrb + unsigned int tot_bins; + + if (Cnt.SPN == 1) { + tot_bins = TOT_BINS_S1; + } else if (Cnt.SPN == 11) { + tot_bins = TOT_BINS; + } else if (Cnt.SPN == 0) { + tot_bins = SEG0 * NSBINANG; + } + + // prompt and delayed sinograms + unsigned int *d_psino; //, *d_dsino; + + // prompt and compressed delayeds in one sinogram (two unsigned shorts) + HANDLE_ERROR(cudaMalloc(&d_psino, tot_bins * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemset(d_psino, 0, tot_bins * sizeof(unsigned int))); + + //--- start and stop time + if (tstart == tstop) { + tstart = 0; + tstop = lmprop.nitag; + } + lmprop.tstart = tstart; + lmprop.tstop = tstop; + //> bytes per LM event + lmprop.bpe = Cnt.BPE; + //> list mode data offset, start of events + lmprop.lmoff = Cnt.LMOFF; + + if (Cnt.LOG <= LOGDEBUG) + printf("i> LM offset in bytes: %d\n", lmprop.lmoff); + if (Cnt.LOG <= LOGDEBUG) + printf("i> bytes per LM event: %d\n", lmprop.bpe); + if (Cnt.LOG <= LOGINFO) + printf("i> frame start time: %d\n", tstart); + if (Cnt.LOG <= LOGINFO) + printf("i> frame stop time: %d\n", tstop); + //--- + + //======= get only the chunks which have the time frame data + modifyLMinfo(tstart, tstop, Cnt); + lmprop.span = Cnt.SPN; + //=========== + + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + + //************************************************************************************** + gpu_hst(d_psino, d_ssrb, d_rdlyd, d_rprmt, d_mass, d_snview, d_fansums, d_bucks, tstart, tstop, + s2cF, axLUT, Cnt); + //************************************************************************************** + // cudaDeviceSynchronize(); + + dicout.tot = tot_bins; + + //---SSRB + HANDLE_ERROR(cudaMemcpy(dicout.ssr, d_ssrb, SEG0 * NSBINANG * sizeof(unsigned int), + cudaMemcpyDeviceToHost)); + unsigned long long psum_ssrb = 0; + for (int i = 0; i < SEG0 * NSBINANG; i++) { + psum_ssrb += dicout.ssr[i]; + } + //--- + + //> copy to host the compressed prompt and delayed sinograms + unsigned int *sino = (unsigned int *)malloc(tot_bins * sizeof(unsigned int)); + HANDLE_ERROR(cudaMemcpy(sino, d_psino, tot_bins * sizeof(unsigned int), cudaMemcpyDeviceToHost)); + + unsigned int mxbin = 0; + dicout.psm = 0; + dicout.dsm = 0; + for (int i = 0; i < tot_bins; i++) { + dicout.psn[i] = sino[i] & 0x0000FFFF; + dicout.dsn[i] = sino[i] >> 16; + dicout.psm += dicout.psn[i]; + dicout.dsm += dicout.dsn[i]; + if (mxbin < dicout.psn[i]) + mxbin = dicout.psn[i]; + } + + //--- output data to Python + // projection views + HANDLE_ERROR( + cudaMemcpy(dicout.snv, d_snview, dicout.sne * sizeof(unsigned int), cudaMemcpyDeviceToHost)); + + // head curves + HANDLE_ERROR(cudaMemcpy(dicout.hcd, d_rdlyd, lmprop.nitag * sizeof(unsigned int), + cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(dicout.hcp, d_rprmt, lmprop.nitag * sizeof(unsigned int), + cudaMemcpyDeviceToHost)); + + // //mass centre + int *zR = (int *)malloc(lmprop.nitag * sizeof(int)); + int *zM = (int *)malloc(lmprop.nitag * sizeof(int)); + cudaMemcpy(zR, d_mass.zR, lmprop.nitag * sizeof(int), cudaMemcpyDeviceToHost); + cudaMemcpy(zM, d_mass.zM, lmprop.nitag * sizeof(int), cudaMemcpyDeviceToHost); + + //> calculate the centre of mass while also the sum of head-curve prompts and delayeds + unsigned long long sphc = 0, sdhc = 0; + for (int i = 0; i < lmprop.nitag; i++) { + dicout.mss[i] = zR[i] / (float)zM[i]; + sphc += dicout.hcp[i]; + sdhc += dicout.hcd[i]; + } + + if (Cnt.LOG <= LOGINFO) + printf("\nic> total prompt single slice rebinned sinogram: P = %llu\n", psum_ssrb); + if (Cnt.LOG <= LOGINFO) + printf("\nic> total prompt and delayeds sinogram events: P = %llu, D = %llu\n", dicout.psm, + dicout.dsm); + if (Cnt.LOG <= LOGINFO) + printf("\nic> total prompt and delayeds head-curve events: P = %llu, D = %llu\n", sphc, sdhc); + if (Cnt.LOG <= LOGINFO) + printf("\nic> maximum prompt sino value: %u \n", mxbin); + + //-fansums and bucket singles + HANDLE_ERROR(cudaMemcpy(dicout.fan, d_fansums, NRINGS * nCRS * sizeof(unsigned int), + cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(dicout.bck, d_bucks, 2 * NBUCKTS * lmprop.nitag * sizeof(unsigned int), + cudaMemcpyDeviceToHost)); + + /* Clean up. */ + free(zR); + free(zM); + + free(lmprop.atag); + free(lmprop.btag); + free(lmprop.ele4chnk); + free(lmprop.ele4thrd); + + cudaFree(d_psino); + cudaFree(d_ssrb); + cudaFree(d_rdlyd); + cudaFree(d_rprmt); + cudaFree(d_snview); + cudaFree(d_bucks); + cudaFree(d_fansums); + cudaFree(d_mass.zR); + cudaFree(d_mass.zM); + + return; } diff --git a/niftypet/nipet/lm/src/lmproc.h b/niftypet/nipet/lm/src/lmproc.h index e72a65e5..211ca749 100644 --- a/niftypet/nipet/lm/src/lmproc.h +++ b/niftypet/nipet/lm/src/lmproc.h @@ -4,36 +4,29 @@ #include #include "def.h" -#include "scanner_0.h" -#include "lmaux.h" #include "hst.h" +#include "lmaux.h" +#include "scanner_0.h" typedef struct { - int nitag; - int sne; //number of elements in sino views - unsigned int * snv; //sino views - unsigned int * hcp; //head curve prompts - unsigned int * hcd; //head curve delayeds - unsigned int * fan; //fansums - unsigned int * bck; //buckets (singles) - float * mss; //centre of mass (axially) - - unsigned int * ssr; // SSRB sinogram - unsigned short * psn; // prompt sinogram - unsigned short * dsn; // delayed sinogram - unsigned long long psm; // prompt sum - unsigned long long dsm; // delayed sum - unsigned int tot; // total number of bins -} hstout; // structure of LM processing outputs - - -void lmproc(hstout dicout, - char *flm, - int tstart, int tstop, - LORcc *s2cF, - axialLUT axLUT, - Cnst Cnt); - - + int nitag; + int sne; // number of elements in sino views + unsigned int *snv; // sino views + unsigned int *hcp; // head curve prompts + unsigned int *hcd; // head curve delayeds + unsigned int *fan; // fansums + unsigned int *bck; // buckets (singles) + float *mss; // centre of mass (axially) + + unsigned int *ssr; // SSRB sinogram + unsigned short *psn; // prompt sinogram + unsigned short *dsn; // delayed sinogram + unsigned long long psm; // prompt sum + unsigned long long dsm; // delayed sum + unsigned int tot; // total number of bins +} hstout; // structure of LM processing outputs + +void lmproc(hstout dicout, char *flm, int tstart, int tstop, LORcc *s2cF, axialLUT axLUT, + Cnst Cnt); #endif diff --git a/niftypet/nipet/lm/src/rnd.cu b/niftypet/nipet/lm/src/rnd.cu index 8841b076..a06cb71c 100644 --- a/niftypet/nipet/lm/src/rnd.cu +++ b/niftypet/nipet/lm/src/rnd.cu @@ -6,733 +6,697 @@ author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include #include "rnd.h" +#include -//for constant memory init -#define nrCRS 448 //number of active crystals transaxially +// for constant memory init +#define nrCRS 448 // number of active crystals transaxially #define nrRNG 64 -#define nrSN1 4084 //for span-1 to span-11 +#define nrSN1 4084 // for span-1 to span-11 __constant__ short c_crange[4 * nrCRS]; __constant__ short c_rrange[3 * nrRNG]; __constant__ short c_li2span11[nrSN1]; // Do reduction (sum) within a warp, i.e., for 32 out 64 rings (axially). -__inline__ __device__ -float warpsum(float rval) { - for (int off = 16; off>0; off /= 2) - rval += __shfl_down_sync(0xffffffff, rval, off);//__shfl_down(rval, off); - return rval; +__inline__ __device__ float warpsum(float rval) { + for (int off = 16; off > 0; off /= 2) + rval += __shfl_down_sync(0xffffffff, rval, off); //__shfl_down(rval, off); + return rval; } // Do reduction (sum) between warps, i.e., for crystals transaxially. -__inline__ __device__ -float crystal_sum(float cval) { +__inline__ __device__ float crystal_sum(float cval) { - // Shared mem for 32 (max) partial sums - static __shared__ float shared[32]; - int cidx = (threadIdx.x + blockDim.x*threadIdx.y); - int lane = cidx & (warpSize - 1); - int warpid = cidx / warpSize; + // Shared mem for 32 (max) partial sums + static __shared__ float shared[32]; + int cidx = (threadIdx.x + blockDim.x * threadIdx.y); + int lane = cidx & (warpSize - 1); + int warpid = cidx / warpSize; - //parital sum within warp - cval = warpsum(cval); + // parital sum within warp + cval = warpsum(cval); - //write the sum to shared memory and then sync (wait) - if (lane == 0) shared[warpid] = cval; - __syncthreads(); + // write the sum to shared memory and then sync (wait) + if (lane == 0) + shared[warpid] = cval; + __syncthreads(); - //read from shared memory only if that warp existed - cval = (cidx < (blockDim.x*blockDim.y) / warpSize) ? shared[lane] : 0; + // read from shared memory only if that warp existed + cval = (cidx < (blockDim.x * blockDim.y) / warpSize) ? shared[lane] : 0; - if (warpid == 0) cval = warpsum(cval); //Final reduce within first warp + if (warpid == 0) + cval = warpsum(cval); // Final reduce within first warp - return cval; + return cval; } - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> -__global__ void rinit(float * init, - const unsigned int * fsum, - const float * ncrs) { +__global__ void rinit(float *init, const unsigned int *fsum, const float *ncrs) { - int idx = threadIdx.x + blockIdx.x*blockDim.x; - init[idx] = sqrtf((float)fsum[idx] / ncrs[idx]); + int idx = threadIdx.x + blockIdx.x * blockDim.x; + init[idx] = sqrtf((float)fsum[idx] / ncrs[idx]); } //---------------------------------------------------------------------------------------- -__global__ void rdiv(float * res, - const unsigned int * fsum, - const float * csum) { +__global__ void rdiv(float *res, const unsigned int *fsum, const float *csum) { - int idx = threadIdx.x + blockIdx.x*blockDim.x; - res[idx] = (float)fsum[idx] / csum[idx]; + int idx = threadIdx.x + blockIdx.x * blockDim.x; + res[idx] = (float)fsum[idx] / csum[idx]; } //---------------------------------------------------------------------------------------- -__global__ void radd(float * resp, - const float * res, - float alpha) { +__global__ void radd(float *resp, const float *res, float alpha) { - int idx = threadIdx.x + blockIdx.x*blockDim.x; + int idx = threadIdx.x + blockIdx.x * blockDim.x; - resp[idx] = (1 - alpha)*resp[idx] + alpha*res[idx]; + resp[idx] = (1 - alpha) * resp[idx] + alpha * res[idx]; } //---------------------------------------------------------------------------------------- // create random sinogram from crystal singles -__global__ void sgl2sino(float * rsino, - const float * csngl, - const short2 *s2cr, - const short2 *aw2sn, - const short2 *sn1_rno, - const int span) { - - int idx = threadIdx.x + blockIdx.x*blockDim.x; - if (idxr1; - - int ai = aw2sn[awi].x; - int wi = aw2sn[awi].y; - int c0 = s2cr[awi].x; - int c1 = s2cr[awi].y; - - //singlses to random sino - if (span == 1) - rsino[si*NSBINS*NSANGLES + ai*NSBINS + wi] = csngl[r0 + NRINGS*c0] * csngl[r1 + NRINGS*c1]; - else if (span == 11) { - int si11 = c_li2span11[si]; - atomicAdd(rsino + si11*NSBINS*NSANGLES + ai*NSBINS + wi, csngl[r0 + NRINGS*c0] * csngl[r1 + NRINGS*c1]); - } - } - +__global__ void sgl2sino(float *rsino, const float *csngl, const short2 *s2cr, const short2 *aw2sn, + const short2 *sn1_rno, const int span) { + + int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < AW * NSINOS) { + + int si = idx / AW; + int awi = idx - si * AW; + + int r0 = sn1_rno[si].x; + int r1 = sn1_rno[si].y; + + // bool neg = r0>r1; + + int ai = aw2sn[awi].x; + int wi = aw2sn[awi].y; + int c0 = s2cr[awi].x; + int c1 = s2cr[awi].y; + + // singlses to random sino + if (span == 1) + rsino[si * NSBINS * NSANGLES + ai * NSBINS + wi] = + csngl[r0 + NRINGS * c0] * csngl[r1 + NRINGS * c1]; + else if (span == 11) { + int si11 = c_li2span11[si]; + atomicAdd(rsino + si11 * NSBINS * NSANGLES + ai * NSBINS + wi, + csngl[r0 + NRINGS * c0] * csngl[r1 + NRINGS * c1]); + } + } } //---------------------------------------------------------------------------------------- -__global__ void rnd(float * res, - const float * crs) { - //ring index - int itx = threadIdx.x; - - //crystal (transaxial) index - int ity = threadIdx.y; - - //rings (vertex of the fan sums) - int ibx = blockIdx.x; - - //crystals - int iby = blockIdx.y; - - float crystal_val = 0; - float c_sum = 0; - - //crystal index with an offset - int ic; - - for (int i = 0; i= c_rrange[ibx]) && (itx <= c_rrange[ibx + NRINGS])) { - - //go through all transaxial crystals in the for loop (indexing: x-axial, y-transaxial) - ic = c_crange[iby] + (i + ity*CFOR); - - //check which crystals are in coincidence (within the range)(3rd row of c_crange) - //first see the order of the range; since it is on a circle the other end can be of lower number - if (c_crange[iby + 2 * nCRSR] == 0) { - if (ic <= c_crange[iby + nCRSR]) - crystal_val = crs[itx + NRINGS*ic]; - } - else { - if (ic <= (c_crange[iby + nCRSR] + nCRSR)) { - ic -= nCRSR*(ic >= nCRSR); - crystal_val = crs[itx + NRINGS*ic]; - } - } - }//end of if's - - __syncthreads(); - crystal_val = crystal_sum(crystal_val); - - // the partial sums are taken from the first warp and its first lane. - if (itx == 0 && ity == 0) { - c_sum += crystal_val; - //printf("\n(%d) = %lu\n", i, c_sum); - } - - } - - //get the sub-total sum - if (itx == 0 && ity == 0) { - //printf("\n[%d, %d] = %lu\n", ibx, iby, c_sum); - res[ibx + NRINGS*iby] = c_sum; - } - +__global__ void rnd(float *res, const float *crs) { + // ring index + int itx = threadIdx.x; + + // crystal (transaxial) index + int ity = threadIdx.y; + + // rings (vertex of the fan sums) + int ibx = blockIdx.x; + + // crystals + int iby = blockIdx.y; + + float crystal_val = 0; + float c_sum = 0; + + // crystal index with an offset + int ic; + + for (int i = 0; i < CFOR; i++) { + crystal_val = 0; + // check which rings are in coincidence (dependent on the MRD) + // only a few rings are discarded for crystals lying on the edges of the axial FOV + // ibx is the ring vertex crystal, itx is the current ring crystal for summation + if ((itx >= c_rrange[ibx]) && (itx <= c_rrange[ibx + NRINGS])) { + + // go through all transaxial crystals in the for loop (indexing: x-axial, y-transaxial) + ic = c_crange[iby] + (i + ity * CFOR); + + // check which crystals are in coincidence (within the range)(3rd row of c_crange) + // first see the order of the range; since it is on a circle the other end can be of lower + // number + if (c_crange[iby + 2 * nCRSR] == 0) { + if (ic <= c_crange[iby + nCRSR]) + crystal_val = crs[itx + NRINGS * ic]; + } else { + if (ic <= (c_crange[iby + nCRSR] + nCRSR)) { + ic -= nCRSR * (ic >= nCRSR); + crystal_val = crs[itx + NRINGS * ic]; + } + } + } // end of if's + + __syncthreads(); + crystal_val = crystal_sum(crystal_val); + + // the partial sums are taken from the first warp and its first lane. + if (itx == 0 && ity == 0) { + c_sum += crystal_val; + // printf("\n(%d) = %lu\n", i, c_sum); + } + } + + // get the sub-total sum + if (itx == 0 && ity == 0) { + // printf("\n[%d, %d] = %lu\n", ibx, iby, c_sum); + res[ibx + NRINGS * iby] = c_sum; + } } - - - - //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -void gpu_randoms(float *rsn, - float *cmap, - unsigned int * fansums, - txLUTs txlut, - short *sn1_rno, - short *sn1_sn11, - const Cnst Cnt) -{ - - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); - - //--- the sino for estimated random events - float * d_rsino; - unsigned long long tot_bins = 0; - if (Cnt.SPN == 1) - tot_bins = Cnt.A*Cnt.W*Cnt.NSN1; - else if (Cnt.SPN == 11) - tot_bins = Cnt.A*Cnt.W*Cnt.NSN11; - HANDLE_ERROR(cudaMalloc(&d_rsino, tot_bins * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_rsino, 0, tot_bins * sizeof(float))); - //--- - - - //SPAN-1 to SPAN-11 conversion table in GPU constant memory - HANDLE_ERROR(cudaMemcpyToSymbol(c_li2span11, sn1_sn11, Cnt.NSN1 * sizeof(short))); - - //--- sino to rings LUT - short2 *d_sn2rng; - HANDLE_ERROR(cudaMalloc(&d_sn2rng, NSINOS * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn2rng, sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); - //--- - - //--- GPU linear indx to sino and crystal lookup table - short2 *d_s2cr; - HANDLE_ERROR(cudaMalloc(&d_s2cr, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2cr, txlut.s2cr, AW * sizeof(short2), cudaMemcpyHostToDevice)); - short2 *d_aw2sn; - HANDLE_ERROR(cudaMalloc(&d_aw2sn, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_aw2sn, txlut.aw2sn, AW * sizeof(short2), cudaMemcpyHostToDevice)); - //---- - - - - //--- calculating transaxial crystal range being in coincidence with each opposing crystal - int wsum = 0; - int prv; //previous - short *crange = (short*)malloc(4 * Cnt.NCRSR * sizeof(short)); - for (int c1 = 0; c1prv) - crange[c1] = c2; - if (txlut.cij[c2 + Cnt.NCRSR*c1] or operator in crystal range calculations. - crange[c1 + 2 * Cnt.NCRSR] = (crange[c1] - crange[c1 + Cnt.NCRSR]) > 0; - - // if (crange[c1+2*Cnt.NCRSR] == 0) printf("cr1=%d, cr2=%d; c1 = %d, wsum=%d\n", crange[c1], crange[c1+Cnt.NCRSR], c1,wsum); - - crange[c1 + 3 * Cnt.NCRSR] = wsum; - //printf("%d. crange = <%d, %d, %d> . %d\n", c1, crange[c1], crange[c1+Cnt.NCRSR], crange[c1+2*Cnt.NCRSR], crange[c1]-crange[c1+Cnt.NCRSR]); - wsum = 0; - } - - // to constant memory (GPU) - HANDLE_ERROR(cudaMemcpyToSymbol(c_crange, crange, 4 * Cnt.NCRSR * sizeof(short))); - //--- - - //--- calculate axial crystal range (rings) being in coincidence with each opposing ring - short *rrange = (short*)malloc(3 * Cnt.NRNG * sizeof(short)); - memset(rrange, 1, 4 * Cnt.NRNG); - wsum = 0; - for (int ri = 0; ri= 0) && (rq> %d, %d.\n", ri, rrange[ri], rrange[ri + Cnt.NRNG]); - } - // to constant memory (GPU) - HANDLE_ERROR(cudaMemcpyToSymbol(c_rrange, rrange, 3 * Cnt.NRNG * sizeof(short))); - //--- - - - //---------- GET THE FAN SUMS in GPU----------------- - //get rid of gaps from the crystal map [64x504] - unsigned int * fsum = (unsigned int*)malloc(Cnt.NRNG*Cnt.NCRSR * sizeof(unsigned int)); - //indx for reduced number of crystals by the gaps - for (int i = 0; i-1) { - for (int ri = 0; ri estimating random events (variance reduction)... "); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - HANDLE_ERROR(cudaGetLastError()); - - // //===== Number of Crystal in Coincidence ====== - dim3 dBpG(Cnt.NRNG, Cnt.NCRSR, 1); - dim3 dTpB(Cnt.NRNG, 16, 1);//16 is chosen as with Cnt.NRNG it makes max for no of threads ie 1024 - rnd << > >(d_ncrs, d_ones); - HANDLE_ERROR(cudaGetLastError()); - // //============================================= - - - //========= INIT ============================== - rinit << > >(d_resp, d_fsum, d_ncrs); - HANDLE_ERROR(cudaGetLastError()); - //============================================= - - //========= ITERATE =========================== - for (int k = 0; k<10; k++) { - rnd << > >(d_res1, d_resp); - rdiv << > >(d_res2, d_fsum, d_res1); - radd << > >(d_resp, d_res2, 0.5); - } - HANDLE_ERROR(cudaGetLastError()); - //============================================= - HANDLE_ERROR(cudaDeviceSynchronize()); - - //=== form randoms sino === - sgl2sino << <(NSINOS*AW + 1024) / 1024, 1024 >> >(d_rsino, d_resp, d_s2cr, d_aw2sn, d_sn2rng, Cnt.SPN); - HANDLE_ERROR(cudaGetLastError()); - //=== - - HANDLE_ERROR(cudaDeviceSynchronize()); - //--- - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001*elapsedTime); - //=============================================<<<<<<<< - - - - //--- results to CPU - float * res = (float*)malloc(Cnt.NRNG*Cnt.NCRSR * sizeof(float)); - HANDLE_ERROR(cudaMemcpy(res, d_resp, Cnt.NRNG*Cnt.NCRSR * sizeof(float), cudaMemcpyDeviceToHost));//d_resp - //CRYSTAL MAP: put the gaps back to the crystal map [64x504] - for (int i = 0; i-1) { - for (int ri = 0; ri using CUDA device #%d\n", dev_id); + + //--- the sino for estimated random events + float *d_rsino; + unsigned long long tot_bins = 0; + if (Cnt.SPN == 1) + tot_bins = Cnt.A * Cnt.W * Cnt.NSN1; + else if (Cnt.SPN == 11) + tot_bins = Cnt.A * Cnt.W * Cnt.NSN11; + HANDLE_ERROR(cudaMalloc(&d_rsino, tot_bins * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_rsino, 0, tot_bins * sizeof(float))); + //--- + + // SPAN-1 to SPAN-11 conversion table in GPU constant memory + HANDLE_ERROR(cudaMemcpyToSymbol(c_li2span11, sn1_sn11, Cnt.NSN1 * sizeof(short))); + + //--- sino to rings LUT + short2 *d_sn2rng; + HANDLE_ERROR(cudaMalloc(&d_sn2rng, NSINOS * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_sn2rng, sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); + //--- + + //--- GPU linear indx to sino and crystal lookup table + short2 *d_s2cr; + HANDLE_ERROR(cudaMalloc(&d_s2cr, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2cr, txlut.s2cr, AW * sizeof(short2), cudaMemcpyHostToDevice)); + short2 *d_aw2sn; + HANDLE_ERROR(cudaMalloc(&d_aw2sn, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_aw2sn, txlut.aw2sn, AW * sizeof(short2), cudaMemcpyHostToDevice)); + //---- + + //--- calculating transaxial crystal range being in coincidence with each opposing crystal + int wsum = 0; + int prv; // previous + short *crange = (short *)malloc(4 * Cnt.NCRSR * sizeof(short)); + for (int c1 = 0; c1 < Cnt.NCRSR; c1 += 1) { + prv = txlut.cij[Cnt.NCRSR * c1 + Cnt.NCRSR - 1]; + + for (int c2 = 0; c2 < Cnt.NCRSR; c2 += 1) { + wsum += txlut.cij[c2 + Cnt.NCRSR * c1]; + if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) + crange[c1] = c2; + if (txlut.cij[c2 + Cnt.NCRSR * c1] < prv) + crange[c1 + Cnt.NCRSR] = c2 - 1 + Cnt.NCRSR * (c2 == 0); + prv = txlut.cij[c2 + Cnt.NCRSR * c1]; + } + // for GPU conditional use of or operator in crystal range calculations. + crange[c1 + 2 * Cnt.NCRSR] = (crange[c1] - crange[c1 + Cnt.NCRSR]) > 0; + + // if (crange[c1+2*Cnt.NCRSR] == 0) printf("cr1=%d, cr2=%d; c1 = %d, wsum=%d\n", crange[c1], + // crange[c1+Cnt.NCRSR], c1,wsum); + + crange[c1 + 3 * Cnt.NCRSR] = wsum; + // printf("%d. crange = <%d, %d, %d> . %d\n", c1, crange[c1], crange[c1+Cnt.NCRSR], + // crange[c1+2*Cnt.NCRSR], crange[c1]-crange[c1+Cnt.NCRSR]); + wsum = 0; + } + + // to constant memory (GPU) + HANDLE_ERROR(cudaMemcpyToSymbol(c_crange, crange, 4 * Cnt.NCRSR * sizeof(short))); + //--- + + //--- calculate axial crystal range (rings) being in coincidence with each opposing ring + short *rrange = (short *)malloc(3 * Cnt.NRNG * sizeof(short)); + memset(rrange, 1, 4 * Cnt.NRNG); + wsum = 0; + for (int ri = 0; ri < Cnt.NRNG; ri++) { + for (int rq = (ri - Cnt.MRD); rq < (ri + Cnt.MRD + 1); rq++) { + if ((rq >= 0) && (rq < Cnt.NRNG)) { + wsum += 1; + if (rrange[ri] == 257) + rrange[ri] = rq; + rrange[ri + Cnt.NRNG] = rq; + } + rrange[ri + 2 * Cnt.NRNG] = wsum; + wsum = 0; + } + // printf("%d >> %d, %d.\n", ri, rrange[ri], rrange[ri + Cnt.NRNG]); + } + // to constant memory (GPU) + HANDLE_ERROR(cudaMemcpyToSymbol(c_rrange, rrange, 3 * Cnt.NRNG * sizeof(short))); + //--- + + //---------- GET THE FAN SUMS in GPU----------------- + // get rid of gaps from the crystal map [64x504] + unsigned int *fsum = (unsigned int *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int)); + // indx for reduced number of crystals by the gaps + for (int i = 0; i < Cnt.NCRS; i++) { + if (txlut.crsr[i] > -1) { + for (int ri = 0; ri < Cnt.NRNG; ri++) { + fsum[ri + txlut.crsr[i] * Cnt.NRNG] = fansums[Cnt.NCRS * ri + i]; + // printf("fsum(%d,%d)=%d * ", ri, txlut.crsr[i], fsum[ri + txlut.crsr[i]*Cnt.NRNG]); + } + } + } + + // load the reduced fansums to the device + unsigned int *d_fsum; + HANDLE_ERROR(cudaMalloc(&d_fsum, Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemcpy(d_fsum, fsum, Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int), + cudaMemcpyHostToDevice)); + //---------------------------------------------- + + // results GPU + float *d_resp; + HANDLE_ERROR(cudaMalloc(&d_resp, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + float *d_res1; + HANDLE_ERROR(cudaMalloc(&d_res1, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + float *d_res2; + HANDLE_ERROR(cudaMalloc(&d_res2, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_res2, 0, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + // crystal 'ones' for init and number of crystal in coincidence for each opposing crystal + float *ones = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); + for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) + ones[i] = 1; + float *d_ones; + HANDLE_ERROR(cudaMalloc(&d_ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_ones, ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float), cudaMemcpyHostToDevice)); + + // number of crystals in coincidence + float *d_ncrs; + HANDLE_ERROR(cudaMalloc(&d_ncrs, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + //=============================================<<<<<<<< + if (Cnt.LOG <= LOGINFO) + printf("\ni> estimating random events (variance reduction)... "); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + HANDLE_ERROR(cudaGetLastError()); + + // //===== Number of Crystal in Coincidence ====== + dim3 dBpG(Cnt.NRNG, Cnt.NCRSR, 1); + dim3 dTpB(Cnt.NRNG, 16, + 1); // 16 is chosen as with Cnt.NRNG it makes max for no of threads ie 1024 + rnd<<>>(d_ncrs, d_ones); + HANDLE_ERROR(cudaGetLastError()); + // //============================================= + + //========= INIT ============================== + rinit<<>>(d_resp, d_fsum, d_ncrs); + HANDLE_ERROR(cudaGetLastError()); + //============================================= + + //========= ITERATE =========================== + for (int k = 0; k < 10; k++) { + rnd<<>>(d_res1, d_resp); + rdiv<<>>(d_res2, d_fsum, d_res1); + radd<<>>(d_resp, d_res2, 0.5); + } + HANDLE_ERROR(cudaGetLastError()); + //============================================= + HANDLE_ERROR(cudaDeviceSynchronize()); + + //=== form randoms sino === + sgl2sino<<<(NSINOS * AW + 1024) / 1024, 1024>>>(d_rsino, d_resp, d_s2cr, d_aw2sn, d_sn2rng, + Cnt.SPN); + HANDLE_ERROR(cudaGetLastError()); + //=== + + HANDLE_ERROR(cudaDeviceSynchronize()); + //--- + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf(" DONE in %fs.\n", 0.001 * elapsedTime); + //=============================================<<<<<<<< + + //--- results to CPU + float *res = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); + HANDLE_ERROR(cudaMemcpy( + res, d_resp, Cnt.NRNG * Cnt.NCRSR * sizeof(float), + cudaMemcpyDeviceToHost)); // d_resp + // CRYSTAL MAP: put the gaps back to the crystal map [64x504] + for (int i = 0; i < Cnt.NCRS; i++) { + if (txlut.crsr[i] > -1) { + for (int ri = 0; ri < Cnt.NRNG; ri++) { + cmap[ri + i * Cnt.NRNG] = res[Cnt.NRNG * txlut.crsr[i] + ri]; + } + } + } + + // randoms sino to the output structure + HANDLE_ERROR(cudaMemcpy(rsn, d_rsino, tot_bins * sizeof(float), cudaMemcpyDeviceToHost)); + //--- + + free(res); + free(fsum); + free(rrange); + + cudaFree(d_sn2rng); + cudaFree(d_rsino); + cudaFree(d_ones); + cudaFree(d_ncrs); + cudaFree(d_res1); + cudaFree(d_res2); + cudaFree(d_resp); + cudaFree(d_fsum); + cudaFree(d_aw2sn); + cudaFree(d_s2cr); + + return; } - - - - - - - //=============================================================================================== // New randoms //----------------------------------------------------------------------------------------------- -__global__ void p_rnd(float * res, - const float * crs, - const char *pmsksn, - const short *Msn1, - const int *cr2s) -{ - // res: array of results (sums for each crystals) - // crs: values for each crystal - // pmsksn: prompt sinogram mask for random regions only - // c2s: crystal to sino LUT (transaxially only) - // Msn1: michelogram LUT, from rings to sino number in span-1 - - //ring index - int itx = threadIdx.x; - - //crystal (transaxial) index - int ity = threadIdx.y; - - //rings (vertex of the fan sums) - int ibx = blockIdx.x; - - //crystals - int iby = blockIdx.y; - - float crystal_val = 0; - float c_sum = 0; - - //crystal index with an offset - int ic; - - for (int i = 0; i= c_rrange[ibx]) && (itx <= c_rrange[ibx + NRINGS])) { - - short sni = Msn1[NRINGS*ibx + itx]; - - //go through all transaxial crystals in the for loop (indexing: x-axial, y-transaxial) - ic = c_crange[iby] + (i + ity*CFOR); - - //check which crystals are in coincidence (within the range)(3rd row of c_crange) - //first see the order of the range; since it is on a circle the other end can be of lower number - if (c_crange[iby + 2 * nCRSR] == 0) { - if (ic <= c_crange[iby + nCRSR]) - crystal_val = crs[itx + NRINGS*ic] * pmsksn[sni + NSINOS*cr2s[nCRSR*iby + ic]]; - } - else { - if (ic <= (c_crange[iby + nCRSR] + nCRSR)) { - ic -= nCRSR*(ic >= nCRSR); - crystal_val = crs[itx + NRINGS*ic] * pmsksn[sni + NSINOS*cr2s[nCRSR*iby + ic]]; - } - } - }//end of if's - - __syncthreads(); - crystal_val = crystal_sum(crystal_val); - - // the partial sums are taken from the first warp and its first lane. - if (itx == 0 && ity == 0) { - c_sum += crystal_val; - //printf("\n(%d) = %lu\n", i, c_sum); - } - - } - - //get the sub-total sum - if (itx == 0 && ity == 0) { - //printf("\n[%d, %d] = %lu\n", ibx, iby, c_sum); - res[ibx + NRINGS*iby] = c_sum; - } - +__global__ void p_rnd(float *res, const float *crs, const char *pmsksn, const short *Msn1, + const int *cr2s) { + // res: array of results (sums for each crystals) + // crs: values for each crystal + // pmsksn: prompt sinogram mask for random regions only + // c2s: crystal to sino LUT (transaxially only) + // Msn1: michelogram LUT, from rings to sino number in span-1 + + // ring index + int itx = threadIdx.x; + + // crystal (transaxial) index + int ity = threadIdx.y; + + // rings (vertex of the fan sums) + int ibx = blockIdx.x; + + // crystals + int iby = blockIdx.y; + + float crystal_val = 0; + float c_sum = 0; + + // crystal index with an offset + int ic; + + for (int i = 0; i < CFOR; i++) { + crystal_val = 0; + // check which rings are in coincidence (dependent on the MRD) + // only a few rings are discarded for crystals lying on the edges of the axial FOV + // ibx is the ring vertex crystal, itx is the current ring crystal for summation + if ((itx >= c_rrange[ibx]) && (itx <= c_rrange[ibx + NRINGS])) { + + short sni = Msn1[NRINGS * ibx + itx]; + + // go through all transaxial crystals in the for loop (indexing: x-axial, y-transaxial) + ic = c_crange[iby] + (i + ity * CFOR); + + // check which crystals are in coincidence (within the range)(3rd row of c_crange) + // first see the order of the range; since it is on a circle the other end can be of lower + // number + if (c_crange[iby + 2 * nCRSR] == 0) { + if (ic <= c_crange[iby + nCRSR]) + crystal_val = crs[itx + NRINGS * ic] * pmsksn[sni + NSINOS * cr2s[nCRSR * iby + ic]]; + } else { + if (ic <= (c_crange[iby + nCRSR] + nCRSR)) { + ic -= nCRSR * (ic >= nCRSR); + crystal_val = crs[itx + NRINGS * ic] * pmsksn[sni + NSINOS * cr2s[nCRSR * iby + ic]]; + } + } + } // end of if's + + __syncthreads(); + crystal_val = crystal_sum(crystal_val); + + // the partial sums are taken from the first warp and its first lane. + if (itx == 0 && ity == 0) { + c_sum += crystal_val; + // printf("\n(%d) = %lu\n", i, c_sum); + } + } + + // get the sub-total sum + if (itx == 0 && ity == 0) { + // printf("\n[%d, %d] = %lu\n", ibx, iby, c_sum); + res[ibx + NRINGS * iby] = c_sum; + } } - // THE CPU PART: //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -void p_randoms(float *rsn, - float *cmap, - - const char *pmsksn, - unsigned int * fansums, - - txLUTs txlut, - short *sn1_rno, - short *sn1_sn11, - const short *Msn1, - const Cnst Cnt) -{ - - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); - - //--- the sino for estimated random events - float * d_rsino; - unsigned long long tot_bins = 0; - if (Cnt.SPN == 1) - tot_bins = Cnt.A*Cnt.W*Cnt.NSN1; - else if (Cnt.SPN == 11) - tot_bins = Cnt.A*Cnt.W*Cnt.NSN11; - HANDLE_ERROR(cudaMalloc(&d_rsino, tot_bins * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_rsino, 0, tot_bins * sizeof(float))); - //--- - - //SPAN-1 to SPAN-11 conversion table in GPU constant memory - HANDLE_ERROR(cudaMemcpyToSymbol(c_li2span11, sn1_sn11, Cnt.NSN1 * sizeof(short))); - - //--- sino to rings LUT - short2 *d_sn2rng; - HANDLE_ERROR(cudaMalloc(&d_sn2rng, NSINOS * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn2rng, sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); - //--- - - //--- GPU linear indx to sino and crystal lookup table - short2 *d_s2cr; - HANDLE_ERROR(cudaMalloc(&d_s2cr, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2cr, txlut.s2cr, AW * sizeof(short2), cudaMemcpyHostToDevice)); - short2 *d_aw2sn; - HANDLE_ERROR(cudaMalloc(&d_aw2sn, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_aw2sn, txlut.aw2sn, AW * sizeof(short2), cudaMemcpyHostToDevice)); - //---- - - //prompt mask - char *d_pmsksn; - HANDLE_ERROR(cudaMalloc(&d_pmsksn, NSINOS*AW * sizeof(char))); - HANDLE_ERROR(cudaMemcpy(d_pmsksn, pmsksn, NSINOS*AW * sizeof(char), cudaMemcpyHostToDevice)); - //michelogram for #sino in span-1 - short *d_Msn1; - HANDLE_ERROR(cudaMalloc(&d_Msn1, NRINGS*NRINGS * sizeof(short))); - HANDLE_ERROR(cudaMemcpy(d_Msn1, Msn1, NRINGS*NRINGS * sizeof(short), cudaMemcpyHostToDevice)); - //reduced crystal (without gaps) to sino (no gaps too) - int *d_cr2s; - HANDLE_ERROR(cudaMalloc(&d_cr2s, nCRSR*nCRSR * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_cr2s, txlut.cr2s, nCRSR*nCRSR * sizeof(int), cudaMemcpyHostToDevice)); - - - - //--- calculating transaxial crystal range being in coincidence with each opposing crystal - int wsum = 0; - int prv; //previous - short *crange = (short*)malloc(4 * Cnt.NCRSR * sizeof(short)); - for (int c1 = 0; c1prv) - crange[c1] = c2; - if (txlut.cij[c2 + Cnt.NCRSR*c1] or operator in crystal range calculations. - crange[c1 + 2 * Cnt.NCRSR] = (crange[c1] - crange[c1 + Cnt.NCRSR]) > 0; - - // if (crange[c1+2*Cnt.NCRSR] == 0) printf("cr1=%d, cr2=%d; c1 = %d, wsum=%d\n", crange[c1], crange[c1+Cnt.NCRSR], c1,wsum); - - crange[c1 + 3 * Cnt.NCRSR] = wsum; - //printf("%d. crange = <%d, %d, %d> . %d\n", c1, crange[c1], crange[c1+Cnt.NCRSR], crange[c1+2*Cnt.NCRSR], crange[c1]-crange[c1+Cnt.NCRSR]); - wsum = 0; - } - - // to constant memory (GPU) - HANDLE_ERROR(cudaMemcpyToSymbol(c_crange, crange, 4 * Cnt.NCRSR * sizeof(short))); - //--- - - //--- calculate axial crystal range (rings) being in coincidence with each opposing ring - short *rrange = (short*)malloc(3 * Cnt.NRNG * sizeof(short)); - memset(rrange, 1, 4 * Cnt.NRNG); - wsum = 0; - for (int ri = 0; ri= 0) && (rq> %d, %d.\n", ri, rrange[ri], rrange[ri + Cnt.NRNG]); - } - // to constant memory (GPU) - HANDLE_ERROR(cudaMemcpyToSymbol(c_rrange, rrange, 3 * Cnt.NRNG * sizeof(short))); - //--- - - - //---------- GET THE FAN SUMS in GPU----------------- - //get rid of gaps from the crystal map [64x504] - unsigned int * fsum = (unsigned int*)malloc(Cnt.NRNG*Cnt.NCRSR * sizeof(unsigned int)); - //indx for reduced number of crystals by the gaps - for (int i = 0; i-1) { - for (int ri = 0; ri estimating random events from prompts... "); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - HANDLE_ERROR(cudaGetLastError()); - - // //===== Number of Crystal in Coincidence ====== - dim3 dBpG(Cnt.NRNG, Cnt.NCRSR, 1); - dim3 dTpB(Cnt.NRNG, 16, 1);//16 is chosen as with Cnt.NRNG it makes max for no of threads ie 1024 - p_rnd << > >(d_ncrs, d_ones, d_pmsksn, d_Msn1, d_cr2s); - HANDLE_ERROR(cudaGetLastError()); - // //============================================= - - - //========= INIT ============================== - rinit << > >(d_resp, d_fsum, d_ncrs); - HANDLE_ERROR(cudaGetLastError()); - //============================================= - - //========= ITERATE =========================== - for (int k = 0; k<10; k++) { - p_rnd << > >(d_res1, d_resp, d_pmsksn, d_Msn1, d_cr2s); - rdiv << > >(d_res2, d_fsum, d_res1); - radd << > >(d_resp, d_res2, 0.5); - } - HANDLE_ERROR(cudaGetLastError()); - //============================================= - HANDLE_ERROR(cudaDeviceSynchronize()); - - //=== form randoms sino === - sgl2sino << <(NSINOS*AW + 1024) / 1024, 1024 >> >(d_rsino, d_resp, d_s2cr, d_aw2sn, d_sn2rng, Cnt.SPN); - HANDLE_ERROR(cudaGetLastError()); - //=== - - HANDLE_ERROR(cudaDeviceSynchronize()); - //--- - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001*elapsedTime); - //=============================================<<<<<<<< - - - - //--- results to CPU - float * res = (float*)malloc(Cnt.NRNG*Cnt.NCRSR * sizeof(float)); - HANDLE_ERROR(cudaMemcpy(res, d_resp, Cnt.NRNG*Cnt.NCRSR * sizeof(float), cudaMemcpyDeviceToHost));//d_resp - //CRYSTAL MAP: put the gaps back to the crystal map [64x504] - for (int i = 0; i-1) { - for (int ri = 0; ri using CUDA device #%d\n", dev_id); + + //--- the sino for estimated random events + float *d_rsino; + unsigned long long tot_bins = 0; + if (Cnt.SPN == 1) + tot_bins = Cnt.A * Cnt.W * Cnt.NSN1; + else if (Cnt.SPN == 11) + tot_bins = Cnt.A * Cnt.W * Cnt.NSN11; + HANDLE_ERROR(cudaMalloc(&d_rsino, tot_bins * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_rsino, 0, tot_bins * sizeof(float))); + //--- + + // SPAN-1 to SPAN-11 conversion table in GPU constant memory + HANDLE_ERROR(cudaMemcpyToSymbol(c_li2span11, sn1_sn11, Cnt.NSN1 * sizeof(short))); + + //--- sino to rings LUT + short2 *d_sn2rng; + HANDLE_ERROR(cudaMalloc(&d_sn2rng, NSINOS * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_sn2rng, sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); + //--- + + //--- GPU linear indx to sino and crystal lookup table + short2 *d_s2cr; + HANDLE_ERROR(cudaMalloc(&d_s2cr, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2cr, txlut.s2cr, AW * sizeof(short2), cudaMemcpyHostToDevice)); + short2 *d_aw2sn; + HANDLE_ERROR(cudaMalloc(&d_aw2sn, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_aw2sn, txlut.aw2sn, AW * sizeof(short2), cudaMemcpyHostToDevice)); + //---- + + // prompt mask + char *d_pmsksn; + HANDLE_ERROR(cudaMalloc(&d_pmsksn, NSINOS * AW * sizeof(char))); + HANDLE_ERROR(cudaMemcpy(d_pmsksn, pmsksn, NSINOS * AW * sizeof(char), cudaMemcpyHostToDevice)); + // michelogram for #sino in span-1 + short *d_Msn1; + HANDLE_ERROR(cudaMalloc(&d_Msn1, NRINGS * NRINGS * sizeof(short))); + HANDLE_ERROR(cudaMemcpy(d_Msn1, Msn1, NRINGS * NRINGS * sizeof(short), cudaMemcpyHostToDevice)); + // reduced crystal (without gaps) to sino (no gaps too) + int *d_cr2s; + HANDLE_ERROR(cudaMalloc(&d_cr2s, nCRSR * nCRSR * sizeof(int))); + HANDLE_ERROR( + cudaMemcpy(d_cr2s, txlut.cr2s, nCRSR * nCRSR * sizeof(int), cudaMemcpyHostToDevice)); + + //--- calculating transaxial crystal range being in coincidence with each opposing crystal + int wsum = 0; + int prv; // previous + short *crange = (short *)malloc(4 * Cnt.NCRSR * sizeof(short)); + for (int c1 = 0; c1 < Cnt.NCRSR; c1 += 1) { + prv = txlut.cij[Cnt.NCRSR * c1 + Cnt.NCRSR - 1]; + + for (int c2 = 0; c2 < Cnt.NCRSR; c2 += 1) { + wsum += txlut.cij[c2 + Cnt.NCRSR * c1]; + if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) + crange[c1] = c2; + if (txlut.cij[c2 + Cnt.NCRSR * c1] < prv) + crange[c1 + Cnt.NCRSR] = c2 - 1 + Cnt.NCRSR * (c2 == 0); + prv = txlut.cij[c2 + Cnt.NCRSR * c1]; + } + // for GPU conditional use of or operator in crystal range calculations. + crange[c1 + 2 * Cnt.NCRSR] = (crange[c1] - crange[c1 + Cnt.NCRSR]) > 0; + + // if (crange[c1+2*Cnt.NCRSR] == 0) printf("cr1=%d, cr2=%d; c1 = %d, wsum=%d\n", crange[c1], + // crange[c1+Cnt.NCRSR], c1,wsum); + + crange[c1 + 3 * Cnt.NCRSR] = wsum; + // printf("%d. crange = <%d, %d, %d> . %d\n", c1, crange[c1], crange[c1+Cnt.NCRSR], + // crange[c1+2*Cnt.NCRSR], crange[c1]-crange[c1+Cnt.NCRSR]); + wsum = 0; + } + + // to constant memory (GPU) + HANDLE_ERROR(cudaMemcpyToSymbol(c_crange, crange, 4 * Cnt.NCRSR * sizeof(short))); + //--- + + //--- calculate axial crystal range (rings) being in coincidence with each opposing ring + short *rrange = (short *)malloc(3 * Cnt.NRNG * sizeof(short)); + memset(rrange, 1, 4 * Cnt.NRNG); + wsum = 0; + for (int ri = 0; ri < Cnt.NRNG; ri++) { + for (int rq = (ri - Cnt.MRD); rq < (ri + Cnt.MRD + 1); rq++) { + if ((rq >= 0) && (rq < Cnt.NRNG)) { + wsum += 1; + if (rrange[ri] == 257) + rrange[ri] = rq; + rrange[ri + Cnt.NRNG] = rq; + } + rrange[ri + 2 * Cnt.NRNG] = wsum; + wsum = 0; + } + // printf("%d >> %d, %d.\n", ri, rrange[ri], rrange[ri + Cnt.NRNG]); + } + // to constant memory (GPU) + HANDLE_ERROR(cudaMemcpyToSymbol(c_rrange, rrange, 3 * Cnt.NRNG * sizeof(short))); + //--- + + //---------- GET THE FAN SUMS in GPU----------------- + // get rid of gaps from the crystal map [64x504] + unsigned int *fsum = (unsigned int *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int)); + // indx for reduced number of crystals by the gaps + for (int i = 0; i < Cnt.NCRS; i++) { + if (txlut.crsr[i] > -1) { + for (int ri = 0; ri < Cnt.NRNG; ri++) { + fsum[ri + txlut.crsr[i] * Cnt.NRNG] = fansums[Cnt.NCRS * ri + i]; + // printf("fsum(%d,%d)=%d * ", ri, txlut.crsr[i], fsum[ri + txlut.crsr[i]*Cnt.NRNG]); + } + } + } + + // load the reduced fansums to the device + unsigned int *d_fsum; + HANDLE_ERROR(cudaMalloc(&d_fsum, Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int))); + HANDLE_ERROR(cudaMemcpy(d_fsum, fsum, Cnt.NRNG * Cnt.NCRSR * sizeof(unsigned int), + cudaMemcpyHostToDevice)); + //---------------------------------------------- + + // results GPU + float *d_resp; + HANDLE_ERROR(cudaMalloc(&d_resp, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + float *d_res1; + HANDLE_ERROR(cudaMalloc(&d_res1, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + float *d_res2; + HANDLE_ERROR(cudaMalloc(&d_res2, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_res2, 0, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + // crystal 'ones' for init and number of crystal in coincidence for each opposing crystal + float *ones = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); + for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) + ones[i] = 1; + float *d_ones; + HANDLE_ERROR(cudaMalloc(&d_ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_ones, ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float), cudaMemcpyHostToDevice)); + + // number of crystals in coincidence + float *d_ncrs; + HANDLE_ERROR(cudaMalloc(&d_ncrs, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); + + //=============================================<<<<<<<< + if (Cnt.LOG <= LOGINFO) + printf("\ni> estimating random events from prompts... "); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + HANDLE_ERROR(cudaGetLastError()); + + // //===== Number of Crystal in Coincidence ====== + dim3 dBpG(Cnt.NRNG, Cnt.NCRSR, 1); + dim3 dTpB(Cnt.NRNG, 16, + 1); // 16 is chosen as with Cnt.NRNG it makes max for no of threads ie 1024 + p_rnd<<>>(d_ncrs, d_ones, d_pmsksn, d_Msn1, d_cr2s); + HANDLE_ERROR(cudaGetLastError()); + // //============================================= + + //========= INIT ============================== + rinit<<>>(d_resp, d_fsum, d_ncrs); + HANDLE_ERROR(cudaGetLastError()); + //============================================= + + //========= ITERATE =========================== + for (int k = 0; k < 10; k++) { + p_rnd<<>>(d_res1, d_resp, d_pmsksn, d_Msn1, d_cr2s); + rdiv<<>>(d_res2, d_fsum, d_res1); + radd<<>>(d_resp, d_res2, 0.5); + } + HANDLE_ERROR(cudaGetLastError()); + //============================================= + HANDLE_ERROR(cudaDeviceSynchronize()); + + //=== form randoms sino === + sgl2sino<<<(NSINOS * AW + 1024) / 1024, 1024>>>(d_rsino, d_resp, d_s2cr, d_aw2sn, d_sn2rng, + Cnt.SPN); + HANDLE_ERROR(cudaGetLastError()); + //=== + + HANDLE_ERROR(cudaDeviceSynchronize()); + //--- + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf(" DONE in %fs.\n", 0.001 * elapsedTime); + //=============================================<<<<<<<< + + //--- results to CPU + float *res = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); + HANDLE_ERROR(cudaMemcpy( + res, d_resp, Cnt.NRNG * Cnt.NCRSR * sizeof(float), + cudaMemcpyDeviceToHost)); // d_resp + // CRYSTAL MAP: put the gaps back to the crystal map [64x504] + for (int i = 0; i < Cnt.NCRS; i++) { + if (txlut.crsr[i] > -1) { + for (int ri = 0; ri < Cnt.NRNG; ri++) { + cmap[ri + i * Cnt.NRNG] = res[Cnt.NRNG * txlut.crsr[i] + ri]; + } + } + } + + // randoms sino to the output structure + HANDLE_ERROR(cudaMemcpy(rsn, d_rsino, tot_bins * sizeof(float), cudaMemcpyDeviceToHost)); + //--- + + free(res); + free(fsum); + free(rrange); + + cudaFree(d_sn2rng); + cudaFree(d_rsino); + cudaFree(d_ones); + cudaFree(d_ncrs); + cudaFree(d_res1); + cudaFree(d_res2); + cudaFree(d_resp); + cudaFree(d_fsum); + cudaFree(d_aw2sn); + cudaFree(d_s2cr); + + return; } diff --git a/niftypet/nipet/lm/src/rnd.h b/niftypet/nipet/lm/src/rnd.h index 3ef58442..ec9b7b3a 100644 --- a/niftypet/nipet/lm/src/rnd.h +++ b/niftypet/nipet/lm/src/rnd.h @@ -4,26 +4,13 @@ #include "def.h" #include "scanner_0.h" -void gpu_randoms(float *rsn, - float *cmap, - unsigned int *d_fansums, - txLUTs txlut, - short *sn1_rno, - short *sn1_sn11, - const Cnst Cnt); +void gpu_randoms(float *rsn, float *cmap, unsigned int *d_fansums, txLUTs txlut, short *sn1_rno, + short *sn1_sn11, const Cnst Cnt); +void p_randoms(float *rsn, float *cmap, -void p_randoms(float *rsn, - float *cmap, - - const char *pmsksn, - unsigned int * fansums, - - txLUTs txlut, - short *sn1_rno, - short *sn1_sn11, - const short *Msn1, - const Cnst Cnt); + const char *pmsksn, unsigned int *fansums, + txLUTs txlut, short *sn1_rno, short *sn1_sn11, const short *Msn1, const Cnst Cnt); #endif diff --git a/niftypet/nipet/prj/src/prj_module.cu b/niftypet/nipet/prj/src/prj_module.cu index 039aa6d2..71e38a93 100644 --- a/niftypet/nipet/prj/src/prj_module.cu +++ b/niftypet/nipet/prj/src/prj_module.cu @@ -8,23 +8,21 @@ Copyrights: 2019 ------------------------------------------------------------------------*/ #define PY_SSIZE_T_CLEAN -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION //NPY_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION +#include "def.h" #include -#include #include -#include "def.h" +#include -#include "prjf.h" #include "prjb.h" +#include "prjf.h" #include "tprj.h" #include "recon.h" #include "scanner_0.h" - - //===================== START PYTHON INIT ============================== //--- Available functions @@ -34,845 +32,814 @@ static PyObject *back_prj(PyObject *self, PyObject *args); static PyObject *osem_rec(PyObject *self, PyObject *args); //--- - //> Module Method Table static PyMethodDef petprj_methods[] = { - {"tprj", trnx_prj, METH_VARARGS, - "Transaxial projector."}, - {"fprj", frwd_prj, METH_VARARGS, - "PET forward projector."}, - {"bprj", back_prj, METH_VARARGS, - "PET back projector." }, - {"osem", osem_rec, METH_VARARGS, - "OSEM reconstruction of PET data." }, - {NULL, NULL, 0, NULL} // Sentinel + {"tprj", trnx_prj, METH_VARARGS, "Transaxial projector."}, + {"fprj", frwd_prj, METH_VARARGS, "PET forward projector."}, + {"bprj", back_prj, METH_VARARGS, "PET back projector."}, + {"osem", osem_rec, METH_VARARGS, "OSEM reconstruction of PET data."}, + {NULL, NULL, 0, NULL} // Sentinel }; //> Module Definition Structure static struct PyModuleDef petprj_module = { - PyModuleDef_HEAD_INIT, - "petprj", //> name of module - //> module documentation, may be NULL - "This module provides an interface for GPU routines of PET forward and back projection.", - -1, //> the module keeps state in global variables. - petprj_methods -}; + PyModuleDef_HEAD_INIT, + "petprj", //> name of module + //> module documentation, may be NULL + "This module provides an interface for GPU routines of PET forward and back projection.", + -1, //> the module keeps state in global variables. + petprj_methods}; //> Initialization function PyMODINIT_FUNC PyInit_petprj(void) { - Py_Initialize(); + Py_Initialize(); - //> load NumPy functionality - import_array(); + //> load NumPy functionality + import_array(); - return PyModule_Create(&petprj_module); + return PyModule_Create(&petprj_module); } //====================== END PYTHON INIT =============================== - //============================================================================== // T R A N S A X I A L P R O J E C T O R //------------------------------------------------------------------------------ -static PyObject *trnx_prj(PyObject *self, PyObject *args) -{ - //Structure of constants - Cnst Cnt; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; +static PyObject *trnx_prj(PyObject *self, PyObject *args) { + // Structure of constants + Cnst Cnt; - // input/output image - PyObject * o_im; + // Dictionary of scanner constants + PyObject *o_mmrcnst; - // input/output projection sinogram - PyObject * o_prjout; + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; - // output transaxial sampling parameters - PyObject * o_tv; - PyObject * o_tt; + // input/output image + PyObject *o_im; + // input/output projection sinogram + PyObject *o_prjout; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOO", &o_prjout, &o_im, &o_tv, &o_tt, &o_txLUT, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + // output transaxial sampling parameters + PyObject *o_tv; + PyObject *o_tt; - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOO", &o_prjout, &o_im, &o_tv, &o_tt, &o_txLUT, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - // transaxial sino LUTs: - PyObject* pd_crs = PyDict_GetItemString(o_txLUT, "crs"); - PyObject* pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); + // transaxial sino LUTs: + PyObject *pd_crs = PyDict_GetItemString(o_txLUT, "crs"); + PyObject *pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); - //sino to crystal, crystals - PyArrayObject *p_s2c = NULL, *p_crs = NULL; - p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + // sino to crystal, crystals + PyArrayObject *p_s2c = NULL, *p_crs = NULL; + p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + // image object + PyArrayObject *p_im = NULL; + p_im = (PyArrayObject *)PyArray_FROM_OTF(o_im, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //image object - PyArrayObject *p_im = NULL; - p_im = (PyArrayObject *)PyArray_FROM_OTF(o_im, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + // output sino object + PyArrayObject *p_prjout = NULL; + p_prjout = (PyArrayObject *)PyArray_FROM_OTF(o_prjout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //output sino object - PyArrayObject *p_prjout = NULL; - p_prjout = (PyArrayObject *)PyArray_FROM_OTF(o_prjout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + // transaxial voxel sampling (ray-driven) + PyArrayObject *p_tv = NULL; + p_tv = (PyArrayObject *)PyArray_FROM_OTF(o_tv, NPY_UINT8, NPY_ARRAY_INOUT_ARRAY2); - //transaxial voxel sampling (ray-driven) - PyArrayObject *p_tv = NULL; - p_tv = (PyArrayObject *)PyArray_FROM_OTF(o_tv, NPY_UINT8, NPY_ARRAY_INOUT_ARRAY2); + // transaxial parameters for voxel sampling (ray-driven) + PyArrayObject *p_tt = NULL; + p_tt = (PyArrayObject *)PyArray_FROM_OTF(o_tt, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //transaxial parameters for voxel sampling (ray-driven) - PyArrayObject *p_tt = NULL; - p_tt = (PyArrayObject *)PyArray_FROM_OTF(o_tt, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + //-- - //-- + /* If that didn't work, throw an exception. */ + if (p_s2c == NULL || p_im == NULL || p_crs == NULL || p_prjout == NULL || p_tv == NULL || + p_tt == NULL) { + // sino 2 crystals + Py_XDECREF(p_s2c); + Py_XDECREF(p_crs); - /* If that didn't work, throw an exception. */ - if (p_s2c == NULL || p_im == NULL || p_crs == NULL || - p_prjout == NULL || p_tv == NULL || p_tt == NULL) - { - //sino 2 crystals - Py_XDECREF(p_s2c); - Py_XDECREF(p_crs); + // image object + PyArray_DiscardWritebackIfCopy(p_im); + Py_XDECREF(p_im); - //image object - PyArray_DiscardWritebackIfCopy(p_im); - Py_XDECREF(p_im); + // output sino object + PyArray_DiscardWritebackIfCopy(p_prjout); + Py_XDECREF(p_prjout); - //output sino object - PyArray_DiscardWritebackIfCopy(p_prjout); - Py_XDECREF(p_prjout); + // transaxial outputs + PyArray_DiscardWritebackIfCopy(p_tv); + Py_XDECREF(p_tv); - //transaxial outputs - PyArray_DiscardWritebackIfCopy(p_tv); - Py_XDECREF(p_tv); + PyArray_DiscardWritebackIfCopy(p_tt); + Py_XDECREF(p_tt); - PyArray_DiscardWritebackIfCopy(p_tt); - Py_XDECREF(p_tt); + return NULL; + } - return NULL; - } + short *s2c = (short *)PyArray_DATA(p_s2c); + float *crs = (float *)PyArray_DATA(p_crs); - short *s2c = (short*)PyArray_DATA(p_s2c); - float *crs = (float*)PyArray_DATA(p_crs); + int N0crs = PyArray_DIM(p_crs, 0); + int N1crs = PyArray_DIM(p_crs, 1); + if (Cnt.LOG <= LOGDEBUG) + printf("\ni> N0crs=%d, N1crs=%d\n", N0crs, N1crs); - int N0crs = PyArray_DIM(p_crs, 0); - int N1crs = PyArray_DIM(p_crs, 1); - if (Cnt.LOG <= LOGDEBUG) - printf("\ni> N0crs=%d, N1crs=%d\n", N0crs, N1crs); + float *im = (float *)PyArray_DATA(p_im); + if (Cnt.LOG <= LOGDEBUG) + printf("i> forward-projection image dimensions: %ld, %ld\n", PyArray_DIM(p_im, 0), + PyArray_DIM(p_im, 1)); + // input/output projection sinogram + float *prjout = (float *)PyArray_DATA(p_prjout); - float *im = (float*)PyArray_DATA(p_im); - if (Cnt.LOG <= LOGDEBUG) - printf("i> forward-projection image dimensions: %ld, %ld\n", PyArray_DIM(p_im, 0), PyArray_DIM(p_im, 1)); + // output sampling + unsigned char *tv = (unsigned char *)PyArray_DATA(p_tv); + float *tt = (float *)PyArray_DATA(p_tt); - // input/output projection sinogram - float *prjout = (float*)PyArray_DATA(p_prjout); + // CUDA -------------------------------------------------------------------- - // output sampling - unsigned char *tv = (unsigned char*)PyArray_DATA(p_tv); - float *tt = (float*)PyArray_DATA(p_tt); + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); - // CUDA -------------------------------------------------------------------- + //--- TRANSAXIAL COMPONENTS + float4 *d_crs; + HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); + HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + short2 *d_s2c; + HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); + float *d_tt; + HANDLE_ERROR(cudaMalloc(&d_tt, N_TT * AW * sizeof(float))); - //--- TRANSAXIAL COMPONENTS - float4 *d_crs; HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); - HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); + unsigned char *d_tv; + HANDLE_ERROR(cudaMalloc(&d_tv, N_TV * AW * sizeof(unsigned char))); + HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV * AW * sizeof(unsigned char))); - short2 *d_s2c; HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); + //------------DO TRANSAXIAL CALCULATIONS------------------------------------ + gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); + //-------------------------------------------------------------------------- - float *d_tt; HANDLE_ERROR(cudaMalloc(&d_tt, N_TT*AW * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(tt, d_tt, N_TT * AW * sizeof(float), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(tv, d_tv, N_TV * AW * sizeof(unsigned char), cudaMemcpyDeviceToHost)); - unsigned char *d_tv; HANDLE_ERROR(cudaMalloc(&d_tv, N_TV*AW * sizeof(unsigned char))); - HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV*AW * sizeof(unsigned char))); + // CUDA END----------------------------------------------------------------- - //------------DO TRANSAXIAL CALCULATIONS------------------------------------ - gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); - //-------------------------------------------------------------------------- + // Clean up + Py_DECREF(p_s2c); + Py_DECREF(p_crs); - HANDLE_ERROR( - cudaMemcpy(tt, d_tt, N_TT*AW * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR( - cudaMemcpy(tv, d_tv, N_TV*AW * sizeof(unsigned char), cudaMemcpyDeviceToHost)); + PyArray_ResolveWritebackIfCopy(p_im); + Py_DECREF(p_im); - // CUDA END----------------------------------------------------------------- + PyArray_ResolveWritebackIfCopy(p_tv); + Py_DECREF(p_tv); + PyArray_ResolveWritebackIfCopy(p_tt); + Py_DECREF(p_tt); - //Clean up - Py_DECREF(p_s2c); - Py_DECREF(p_crs); + PyArray_ResolveWritebackIfCopy(p_prjout); + Py_DECREF(p_prjout); - PyArray_ResolveWritebackIfCopy(p_im); - Py_DECREF(p_im); - - PyArray_ResolveWritebackIfCopy(p_tv); - Py_DECREF(p_tv); - - PyArray_ResolveWritebackIfCopy(p_tt); - Py_DECREF(p_tt); - - PyArray_ResolveWritebackIfCopy(p_prjout); - Py_DECREF(p_prjout); - - Py_INCREF(Py_None); - return Py_None; + Py_INCREF(Py_None); + return Py_None; } //------------------------------------------------------------------------------ - - - - //============================================================================== // F O R W A R D P R O J E C T O R //------------------------------------------------------------------------------ -static PyObject *frwd_prj(PyObject *self, PyObject *args) -{ - //Structure of constants - Cnst Cnt; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // axial LUT dictionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; - - // input image to be forward projected (reshaped for GPU execution) - PyObject * o_im; - - // subsets for OSEM, first the default - PyObject * o_subs; - - //output projection sino - PyObject * o_prjout; - - //flag for attenuation factors to be found based on mu-map; if 0 normal emission projection is used - int att; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOOi", &o_prjout, &o_im, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst, &att)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (char)PyLong_AsLong(pd_span); - PyObject* pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); - Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); - PyObject* pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); - Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - /* Interpret the input objects as numpy arrays. */ - // axial LUTs: - PyObject* pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); - PyObject* pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); - PyObject* pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); - PyObject* pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); - PyObject* pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); - - //-- get the arrays from the dictionaries - // axLUTs - PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; - PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; - p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32,NPY_ARRAY_IN_ARRAY); - - - // transaxial sino LUTs: - PyObject* pd_crs = PyDict_GetItemString(o_txLUT, "crs"); - PyObject* pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); - PyObject* pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); - - //sino to crystal, crystals - PyArrayObject *p_s2c = NULL, *p_crs = NULL, *p_aw2ali = NULL; - p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - - //image object - PyArrayObject *p_im = NULL; - p_im = (PyArrayObject *)PyArray_FROM_OTF(o_im, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //subsets if using e.g., OSEM - PyArrayObject *p_subs = NULL; - p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - //output sino object - PyArrayObject *p_prjout = NULL; - p_prjout = (PyArrayObject *)PyArray_FROM_OTF(o_prjout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //-- - - - /* If that didn't work, throw an exception. */ - if (p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || - p_aw2ali == NULL || p_s2c == NULL || p_im == NULL || p_crs == NULL || - p_subs == NULL || p_prjout == NULL || p_li2rng == NULL) - { - //axLUTs - Py_XDECREF(p_li2rno); - Py_XDECREF(p_li2sn); - Py_XDECREF(p_li2sn1); - Py_XDECREF(p_li2nos); - Py_XDECREF(p_li2rng); - - //2D sino LUT - Py_XDECREF(p_aw2ali); - //sino 2 crystals - Py_XDECREF(p_s2c); - Py_XDECREF(p_crs); - //image object - Py_XDECREF(p_im); - //subset definition object - Py_XDECREF(p_subs); - - //output sino object - PyArray_DiscardWritebackIfCopy(p_prjout); - Py_XDECREF(p_prjout); - - return NULL; - } - - int *subs_ = (int*)PyArray_DATA(p_subs); - short *s2c = (short*)PyArray_DATA(p_s2c); - int *aw2ali = (int*)PyArray_DATA(p_aw2ali); - short *li2sn; - if (Cnt.SPN == 11) { - li2sn = (short*)PyArray_DATA(p_li2sn); - } - else if (Cnt.SPN == 1) { - li2sn = (short*)PyArray_DATA(p_li2sn1); - } - char *li2nos = (char*)PyArray_DATA(p_li2nos); - float *li2rng = (float*)PyArray_DATA(p_li2rng); - float *crs = (float*)PyArray_DATA(p_crs); - float *im = (float*)PyArray_DATA(p_im); - - if (Cnt.LOG <= LOGDEBUG) - printf("i> forward-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_im, 0), PyArray_DIM(p_im, 1), PyArray_DIM(p_im, 2)); - - int Nprj = PyArray_DIM(p_subs, 0); - int N0crs = PyArray_DIM(p_crs, 0); - int N1crs = PyArray_DIM(p_crs, 1); - int Naw = PyArray_DIM(p_aw2ali, 0); - - if (Cnt.LOG <= LOGDEBUG) - printf("\ni> N0crs=%d, N1crs=%d, Naw=%d, Nprj=%d\n", N0crs, N1crs, Naw, Nprj); - - int *subs; - if (subs_[0] == -1) { - Nprj = AW; - if (Cnt.LOG <= LOGWARNING) - printf("i> no subsets defined. number of projection bins in 2D: %d\n", Nprj); - // all projections in - subs = (int*)malloc(Nprj * sizeof(int)); - for (int i = 0; i subsets defined. number of subset projection bins in 2D: %d\n", Nprj); - subs = subs_; - } - - // output projection sinogram - float *prjout = (float*)PyArray_DATA(p_prjout); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><<><><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> - gpu_fprj(prjout, im, - li2rng, li2sn, li2nos, - s2c, aw2ali, crs, subs, - Nprj, Naw, N0crs, Cnt, att); - //<><><><><><><><<><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> - - - - //Clean up - Py_DECREF(p_li2rno); - Py_DECREF(p_li2rng); - Py_DECREF(p_li2sn); - Py_DECREF(p_li2sn1); - Py_DECREF(p_li2nos); - Py_DECREF(p_aw2ali); - Py_DECREF(p_s2c); - Py_DECREF(p_crs); - Py_DECREF(p_im); - Py_DECREF(p_subs); - - PyArray_ResolveWritebackIfCopy(p_prjout); - Py_DECREF(p_prjout); - - if (subs_[0] == -1) free(subs); - - Py_INCREF(Py_None); - return Py_None; +static PyObject *frwd_prj(PyObject *self, PyObject *args) { + // Structure of constants + Cnst Cnt; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // axial LUT dictionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; + + // input image to be forward projected (reshaped for GPU execution) + PyObject *o_im; + + // subsets for OSEM, first the default + PyObject *o_subs; + + // output projection sino + PyObject *o_prjout; + + // flag for attenuation factors to be found based on mu-map; if 0 normal emission projection is + // used + int att; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOOi", &o_prjout, &o_im, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst, + &att)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (char)PyLong_AsLong(pd_span); + PyObject *pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); + Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); + PyObject *pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); + Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + /* Interpret the input objects as numpy arrays. */ + // axial LUTs: + PyObject *pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); + PyObject *pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); + PyObject *pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); + PyObject *pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); + PyObject *pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); + + //-- get the arrays from the dictionaries + // axLUTs + PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; + PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; + p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // transaxial sino LUTs: + PyObject *pd_crs = PyDict_GetItemString(o_txLUT, "crs"); + PyObject *pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); + PyObject *pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); + + // sino to crystal, crystals + PyArrayObject *p_s2c = NULL, *p_crs = NULL, *p_aw2ali = NULL; + p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // image object + PyArrayObject *p_im = NULL; + p_im = (PyArrayObject *)PyArray_FROM_OTF(o_im, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // subsets if using e.g., OSEM + PyArrayObject *p_subs = NULL; + p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // output sino object + PyArrayObject *p_prjout = NULL; + p_prjout = (PyArrayObject *)PyArray_FROM_OTF(o_prjout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + //-- + + /* If that didn't work, throw an exception. */ + if (p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || + p_aw2ali == NULL || p_s2c == NULL || p_im == NULL || p_crs == NULL || p_subs == NULL || + p_prjout == NULL || p_li2rng == NULL) { + // axLUTs + Py_XDECREF(p_li2rno); + Py_XDECREF(p_li2sn); + Py_XDECREF(p_li2sn1); + Py_XDECREF(p_li2nos); + Py_XDECREF(p_li2rng); + + // 2D sino LUT + Py_XDECREF(p_aw2ali); + // sino 2 crystals + Py_XDECREF(p_s2c); + Py_XDECREF(p_crs); + // image object + Py_XDECREF(p_im); + // subset definition object + Py_XDECREF(p_subs); + + // output sino object + PyArray_DiscardWritebackIfCopy(p_prjout); + Py_XDECREF(p_prjout); + + return NULL; + } + + int *subs_ = (int *)PyArray_DATA(p_subs); + short *s2c = (short *)PyArray_DATA(p_s2c); + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + short *li2sn; + if (Cnt.SPN == 11) { + li2sn = (short *)PyArray_DATA(p_li2sn); + } else if (Cnt.SPN == 1) { + li2sn = (short *)PyArray_DATA(p_li2sn1); + } + char *li2nos = (char *)PyArray_DATA(p_li2nos); + float *li2rng = (float *)PyArray_DATA(p_li2rng); + float *crs = (float *)PyArray_DATA(p_crs); + float *im = (float *)PyArray_DATA(p_im); + + if (Cnt.LOG <= LOGDEBUG) + printf("i> forward-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_im, 0), + PyArray_DIM(p_im, 1), PyArray_DIM(p_im, 2)); + + int Nprj = PyArray_DIM(p_subs, 0); + int N0crs = PyArray_DIM(p_crs, 0); + int N1crs = PyArray_DIM(p_crs, 1); + int Naw = PyArray_DIM(p_aw2ali, 0); + + if (Cnt.LOG <= LOGDEBUG) + printf("\ni> N0crs=%d, N1crs=%d, Naw=%d, Nprj=%d\n", N0crs, N1crs, Naw, Nprj); + + int *subs; + if (subs_[0] == -1) { + Nprj = AW; + if (Cnt.LOG <= LOGWARNING) + printf("i> no subsets defined. number of projection bins in 2D: %d\n", Nprj); + // all projections in + subs = (int *)malloc(Nprj * sizeof(int)); + for (int i = 0; i < Nprj; i++) { + subs[i] = i; + } + } else { + if (Cnt.LOG <= LOGDEBUG) + printf("i> subsets defined. number of subset projection bins in 2D: %d\n", Nprj); + subs = subs_; + } + + // output projection sinogram + float *prjout = (float *)PyArray_DATA(p_prjout); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><<><><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> + gpu_fprj(prjout, im, li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, Nprj, Naw, N0crs, Cnt, att); + //<><><><><><><><<><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> + + // Clean up + Py_DECREF(p_li2rno); + Py_DECREF(p_li2rng); + Py_DECREF(p_li2sn); + Py_DECREF(p_li2sn1); + Py_DECREF(p_li2nos); + Py_DECREF(p_aw2ali); + Py_DECREF(p_s2c); + Py_DECREF(p_crs); + Py_DECREF(p_im); + Py_DECREF(p_subs); + + PyArray_ResolveWritebackIfCopy(p_prjout); + Py_DECREF(p_prjout); + + if (subs_[0] == -1) + free(subs); + + Py_INCREF(Py_None); + return Py_None; } - - //============================================================================== // B A C K P R O J E C T O R //------------------------------------------------------------------------------ -static PyObject *back_prj(PyObject *self, PyObject *args) -{ - - //Structure of constants - Cnst Cnt; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; - - // sino to be back projected to image (both reshaped for GPU execution) - PyObject * o_sino; - - // subsets for OSEM, first the default - PyObject * o_subs; - - //output backprojected image - PyObject * o_bimg; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOO", &o_bimg, &o_sino, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (char)PyLong_AsLong(pd_span); - PyObject* pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); - Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); - PyObject* pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); - Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - /* Interpret the input objects as numpy arrays. */ - //axial LUTs: - PyObject* pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); - PyObject* pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); - PyObject* pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); - PyObject* pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); - PyObject* pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); - - //transaxial sino LUTs: - PyObject* pd_crs = PyDict_GetItemString(o_txLUT, "crs"); - PyObject* pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); - PyObject* pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); - - //-- get the arrays from the dictionaries - //axLUTs - PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; - PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; - p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32,NPY_ARRAY_IN_ARRAY); - - //sino to crystal, crystals - PyArrayObject *p_s2c = NULL, *p_crs = NULL, *p_aw2ali = NULL; - p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - //sino object - PyArrayObject *p_sino = NULL; - p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //subsets if using e.g., OSEM - PyArrayObject *p_subs = NULL; - p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - //output back-projection image - PyArrayObject *p_bim = NULL; - p_bim = (PyArrayObject *)PyArray_FROM_OTF(o_bimg, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - //-- - - - /* If that didn't work, throw an exception. */ - if (p_li2rno==NULL || p_li2sn==NULL || p_li2sn1==NULL || p_li2nos==NULL || - p_aw2ali==NULL || p_s2c==NULL || p_sino==NULL || p_crs==NULL || - p_subs==NULL || p_li2rng==NULL || p_bim==NULL) - { - //axLUTs - Py_XDECREF(p_li2rno); - Py_XDECREF(p_li2sn); - Py_XDECREF(p_li2sn1); - Py_XDECREF(p_li2nos); - Py_XDECREF(p_li2rng); - - //2D sino LUT - Py_XDECREF(p_aw2ali); - //sino 2 crystals - Py_XDECREF(p_s2c); - Py_XDECREF(p_crs); - //sino object - Py_XDECREF(p_sino); - //subset definition object - Py_XDECREF(p_subs); - - //back-projection image - PyArray_DiscardWritebackIfCopy(p_bim); - Py_XDECREF(p_bim); - - return NULL; - } - - - int *subs_ = (int*)PyArray_DATA(p_subs); - short *s2c = (short*)PyArray_DATA(p_s2c); - int *aw2ali = (int*)PyArray_DATA(p_aw2ali); - short *li2sn; - if (Cnt.SPN == 11) { - li2sn = (short*)PyArray_DATA(p_li2sn); - } - else if (Cnt.SPN == 1) { - li2sn = (short*)PyArray_DATA(p_li2sn1); - } - char *li2nos = (char*)PyArray_DATA(p_li2nos); - float *li2rng = (float*)PyArray_DATA(p_li2rng); - float *crs = (float*)PyArray_DATA(p_crs); - float *sino = (float*)PyArray_DATA(p_sino); - - int Nprj = PyArray_DIM(p_subs, 0); - int N0crs = PyArray_DIM(p_crs, 0); - int N1crs = PyArray_DIM(p_crs, 1); - int Naw = PyArray_DIM(p_aw2ali, 0); - - int *subs; - if (subs_[0] == -1) { - Nprj = AW; - if (Cnt.LOG <= LOGDEBUG ) - printf("\ni> no subsets defined. number of projection bins in 2D: %d\n", Nprj); - // all projections in - subs = (int*)malloc(Nprj * sizeof(int)); - for (int i = 0; i subsets defined. number of subset projection bins in 2D: %d\n", Nprj); - subs = subs_; - } - - float *bimg = (float*)PyArray_DATA(p_bim); - - if (Cnt.LOG <= LOGDEBUG) - printf("i> back-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_bim, 0), PyArray_DIM(p_bim, 1), PyArray_DIM(p_bim, 2)); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><<><><><><><><><><><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> - gpu_bprj(bimg, sino, li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, Nprj, Naw, N0crs, Cnt); - //<><><><><><><><><><><>><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> - - //Clean up - Py_DECREF(p_li2rno); - Py_DECREF(p_li2rng); - Py_DECREF(p_li2sn); - Py_DECREF(p_li2sn1); - Py_DECREF(p_li2nos); - Py_DECREF(p_aw2ali); - Py_DECREF(p_s2c); - Py_DECREF(p_crs); - Py_DECREF(p_sino); - Py_DECREF(p_subs); - - PyArray_ResolveWritebackIfCopy(p_bim); - Py_DECREF(p_bim); - - if (subs_[0] == -1) free(subs); - - Py_INCREF(Py_None); - return Py_None; +static PyObject *back_prj(PyObject *self, PyObject *args) { + + // Structure of constants + Cnst Cnt; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; + + // sino to be back projected to image (both reshaped for GPU execution) + PyObject *o_sino; + + // subsets for OSEM, first the default + PyObject *o_subs; + + // output backprojected image + PyObject *o_bimg; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOO", &o_bimg, &o_sino, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (char)PyLong_AsLong(pd_span); + PyObject *pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); + Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); + PyObject *pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); + Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + /* Interpret the input objects as numpy arrays. */ + // axial LUTs: + PyObject *pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); + PyObject *pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); + PyObject *pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); + PyObject *pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); + PyObject *pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); + + // transaxial sino LUTs: + PyObject *pd_crs = PyDict_GetItemString(o_txLUT, "crs"); + PyObject *pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); + PyObject *pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); + + //-- get the arrays from the dictionaries + // axLUTs + PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; + PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; + p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // sino to crystal, crystals + PyArrayObject *p_s2c = NULL, *p_crs = NULL, *p_aw2ali = NULL; + p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // sino object + PyArrayObject *p_sino = NULL; + p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // subsets if using e.g., OSEM + PyArrayObject *p_subs = NULL; + p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // output back-projection image + PyArrayObject *p_bim = NULL; + p_bim = (PyArrayObject *)PyArray_FROM_OTF(o_bimg, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + //-- + + /* If that didn't work, throw an exception. */ + if (p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || + p_aw2ali == NULL || p_s2c == NULL || p_sino == NULL || p_crs == NULL || p_subs == NULL || + p_li2rng == NULL || p_bim == NULL) { + // axLUTs + Py_XDECREF(p_li2rno); + Py_XDECREF(p_li2sn); + Py_XDECREF(p_li2sn1); + Py_XDECREF(p_li2nos); + Py_XDECREF(p_li2rng); + + // 2D sino LUT + Py_XDECREF(p_aw2ali); + // sino 2 crystals + Py_XDECREF(p_s2c); + Py_XDECREF(p_crs); + // sino object + Py_XDECREF(p_sino); + // subset definition object + Py_XDECREF(p_subs); + + // back-projection image + PyArray_DiscardWritebackIfCopy(p_bim); + Py_XDECREF(p_bim); + + return NULL; + } + + int *subs_ = (int *)PyArray_DATA(p_subs); + short *s2c = (short *)PyArray_DATA(p_s2c); + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + short *li2sn; + if (Cnt.SPN == 11) { + li2sn = (short *)PyArray_DATA(p_li2sn); + } else if (Cnt.SPN == 1) { + li2sn = (short *)PyArray_DATA(p_li2sn1); + } + char *li2nos = (char *)PyArray_DATA(p_li2nos); + float *li2rng = (float *)PyArray_DATA(p_li2rng); + float *crs = (float *)PyArray_DATA(p_crs); + float *sino = (float *)PyArray_DATA(p_sino); + + int Nprj = PyArray_DIM(p_subs, 0); + int N0crs = PyArray_DIM(p_crs, 0); + int N1crs = PyArray_DIM(p_crs, 1); + int Naw = PyArray_DIM(p_aw2ali, 0); + + int *subs; + if (subs_[0] == -1) { + Nprj = AW; + if (Cnt.LOG <= LOGDEBUG) + printf("\ni> no subsets defined. number of projection bins in 2D: %d\n", Nprj); + // all projections in + subs = (int *)malloc(Nprj * sizeof(int)); + for (int i = 0; i < Nprj; i++) { + subs[i] = i; + } + } else { + if (Cnt.LOG <= LOGDEBUG) + printf("\ni> subsets defined. number of subset projection bins in 2D: %d\n", Nprj); + subs = subs_; + } + + float *bimg = (float *)PyArray_DATA(p_bim); + + if (Cnt.LOG <= LOGDEBUG) + printf("i> back-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_bim, 0), + PyArray_DIM(p_bim, 1), PyArray_DIM(p_bim, 2)); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><<><><><><><><><><><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> + gpu_bprj(bimg, sino, li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, Nprj, Naw, N0crs, Cnt); + //<><><><><><><><><><><>><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> + + // Clean up + Py_DECREF(p_li2rno); + Py_DECREF(p_li2rng); + Py_DECREF(p_li2sn); + Py_DECREF(p_li2sn1); + Py_DECREF(p_li2nos); + Py_DECREF(p_aw2ali); + Py_DECREF(p_s2c); + Py_DECREF(p_crs); + Py_DECREF(p_sino); + Py_DECREF(p_subs); + + PyArray_ResolveWritebackIfCopy(p_bim); + Py_DECREF(p_bim); + + if (subs_[0] == -1) + free(subs); + + Py_INCREF(Py_None); + return Py_None; } - - //============================================================================== // O S E M R E C O N S T R U C T I O N //------------------------------------------------------------------------------ -static PyObject *osem_rec(PyObject *self, PyObject *args) -{ - //Structure of constants - Cnst Cnt; - - //output image - PyObject * o_imgout; - - //output image mask - PyObject * o_rcnmsk; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; - - // subsets for OSEM, first the default - PyObject * o_subs; - - // separable kernel matrix, for x, y, and z dimensions - PyObject *o_krnl; - - // sinos using in reconstruction (reshaped for GPU execution) - PyObject * o_psng; //prompts (measured) - PyObject * o_rsng; //randoms - PyObject * o_ssng; //scatter - PyObject * o_nsng; //norm - PyObject * o_asng; //attenuation - - //sensitivity image - PyObject * o_imgsens; - - /* ^^^^^^^^^^^^^^^^^^^^^^^ Parse the input tuple ^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ - if (!PyArg_ParseTuple(args, "OOOOOOOOOOOOO", &o_imgout, &o_psng, &o_rsng, &o_ssng, &o_nsng, &o_asng, - &o_subs, &o_imgsens, &o_rcnmsk, &o_krnl, &o_txLUT, &o_axLUT, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (char)PyLong_AsLong(pd_span); - PyObject* pd_sigma_rm = PyDict_GetItemString(o_mmrcnst, "SIGMA_RM"); - Cnt.SIGMA_RM = (float)PyFloat_AsDouble(pd_sigma_rm); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - /* Interpret the input objects as numpy arrays. */ - //axial LUTs: - PyObject* pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); - PyObject* pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); - PyObject* pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); - PyObject* pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); - PyObject* pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); - //transaxial sino LUTs: - PyObject* pd_crs = PyDict_GetItemString(o_txLUT, "crs"); - PyObject* pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); - PyObject* pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); - - //-- get the arrays from the dictionaries - //output back-projection image - PyArrayObject *p_imgout = NULL; - p_imgout = (PyArrayObject *)PyArray_FROM_OTF(o_imgout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - //image mask - PyArrayObject *p_rcnmsk = NULL; - p_rcnmsk = (PyArrayObject *)PyArray_FROM_OTF(o_rcnmsk, NPY_BOOL, NPY_ARRAY_IN_ARRAY); - - //sensitivity image - PyArrayObject *p_imgsens = NULL; - p_imgsens = (PyArrayObject *)PyArray_FROM_OTF(o_imgsens, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //> PSF kernel - PyArrayObject *p_krnl=NULL; - p_krnl = (PyArrayObject *)PyArray_FROM_OTF(o_krnl, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //> sinogram objects - PyArrayObject *p_psng = NULL, *p_rsng = NULL, *p_ssng = NULL, *p_nsng = NULL, *p_asng = NULL; - p_psng = (PyArrayObject *)PyArray_FROM_OTF(o_psng, NPY_UINT16, NPY_ARRAY_IN_ARRAY); - p_rsng = (PyArrayObject *)PyArray_FROM_OTF(o_rsng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_ssng = (PyArrayObject *)PyArray_FROM_OTF(o_ssng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_nsng = (PyArrayObject *)PyArray_FROM_OTF(o_nsng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_asng = (PyArrayObject *)PyArray_FROM_OTF(o_asng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //subset definition - PyArrayObject *p_subs = NULL; - p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - - //axLUTs - PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; - PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; - p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //2D sino index LUT: - PyArrayObject *p_aw2ali = NULL; - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - //sino to crystal, crystals - PyArrayObject *p_s2c = NULL, *p_crs = NULL; - p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - //-- - - /* If that didn't work, throw an exception. */ - if (p_imgout == NULL || p_rcnmsk == NULL || p_subs == NULL || p_psng == NULL || p_rsng == NULL || p_ssng == NULL || - p_nsng == NULL || p_asng == NULL || p_imgsens == NULL || p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || - p_li2nos == NULL || p_aw2ali == NULL || p_s2c == NULL || p_crs == NULL || p_krnl == NULL) - { - //> output image - PyArray_DiscardWritebackIfCopy(p_imgout); - Py_XDECREF(p_imgout); - - Py_XDECREF(p_rcnmsk); - - //> objects in the sinogram space - Py_XDECREF(p_psng); - Py_XDECREF(p_rsng); - Py_XDECREF(p_ssng); - Py_XDECREF(p_nsng); - Py_XDECREF(p_asng); - - //> subsets - Py_XDECREF(p_subs); - - //> objects in the image space - Py_XDECREF(p_imgsens); - Py_XDECREF(p_krnl); - - //> axLUTs - Py_XDECREF(p_li2rno); - Py_XDECREF(p_li2sn); - Py_XDECREF(p_li2sn1); - Py_XDECREF(p_li2nos); - //> 2D sinogram LUT - Py_XDECREF(p_aw2ali); - //> sinogram to crystal LUTs - Py_XDECREF(p_s2c); - Py_XDECREF(p_crs); - - return NULL; - } - - float *imgout = (float*)PyArray_DATA(p_imgout); - bool *rcnmsk = (bool*)PyArray_DATA(p_rcnmsk); - unsigned short *psng = (unsigned short*)PyArray_DATA(p_psng); - float *rsng = (float*)PyArray_DATA(p_rsng); - float *ssng = (float*)PyArray_DATA(p_ssng); - float *nsng = (float*)PyArray_DATA(p_nsng); - float *asng = (float*)PyArray_DATA(p_asng); - - //> sensitivity image - float *imgsens = (float*)PyArray_DATA(p_imgsens); - - //>--- PSF KERNEL --- - float *krnl; - int SZ_KRNL = (int)PyArray_DIM(p_krnl, 1); - if (Cnt.LOG <=LOGINFO) printf("i> kernel size [voxels]: %d\n", SZ_KRNL); - - if (SZ_KRNL != KERNEL_LENGTH) { - if (Cnt.LOG <=LOGWARNING) printf("w> wrong kernel size.\n"); - krnl = (float *)malloc(KERNEL_LENGTH * sizeof(float)); +static PyObject *osem_rec(PyObject *self, PyObject *args) { + // Structure of constants + Cnst Cnt; + + // output image + PyObject *o_imgout; + + // output image mask + PyObject *o_rcnmsk; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; + + // subsets for OSEM, first the default + PyObject *o_subs; + + // separable kernel matrix, for x, y, and z dimensions + PyObject *o_krnl; + + // sinos using in reconstruction (reshaped for GPU execution) + PyObject *o_psng; // prompts (measured) + PyObject *o_rsng; // randoms + PyObject *o_ssng; // scatter + PyObject *o_nsng; // norm + PyObject *o_asng; // attenuation + + // sensitivity image + PyObject *o_imgsens; + + /* ^^^^^^^^^^^^^^^^^^^^^^^ Parse the input tuple ^^^^^^^^^^^^^^^^^^^^^^^^^^^ */ + if (!PyArg_ParseTuple(args, "OOOOOOOOOOOOO", &o_imgout, &o_psng, &o_rsng, &o_ssng, &o_nsng, + &o_asng, &o_subs, &o_imgsens, &o_rcnmsk, &o_krnl, &o_txLUT, &o_axLUT, + &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (char)PyLong_AsLong(pd_span); + PyObject *pd_sigma_rm = PyDict_GetItemString(o_mmrcnst, "SIGMA_RM"); + Cnt.SIGMA_RM = (float)PyFloat_AsDouble(pd_sigma_rm); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + /* Interpret the input objects as numpy arrays. */ + // axial LUTs: + PyObject *pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); + PyObject *pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); + PyObject *pd_li2sn1 = PyDict_GetItemString(o_axLUT, "li2sn1"); + PyObject *pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); + PyObject *pd_li2rng = PyDict_GetItemString(o_axLUT, "li2rng"); + // transaxial sino LUTs: + PyObject *pd_crs = PyDict_GetItemString(o_txLUT, "crs"); + PyObject *pd_s2c = PyDict_GetItemString(o_txLUT, "s2c"); + PyObject *pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); + + //-- get the arrays from the dictionaries + // output back-projection image + PyArrayObject *p_imgout = NULL; + p_imgout = (PyArrayObject *)PyArray_FROM_OTF(o_imgout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + // image mask + PyArrayObject *p_rcnmsk = NULL; + p_rcnmsk = (PyArrayObject *)PyArray_FROM_OTF(o_rcnmsk, NPY_BOOL, NPY_ARRAY_IN_ARRAY); + + // sensitivity image + PyArrayObject *p_imgsens = NULL; + p_imgsens = (PyArrayObject *)PyArray_FROM_OTF(o_imgsens, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + //> PSF kernel + PyArrayObject *p_krnl = NULL; + p_krnl = (PyArrayObject *)PyArray_FROM_OTF(o_krnl, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + //> sinogram objects + PyArrayObject *p_psng = NULL, *p_rsng = NULL, *p_ssng = NULL, *p_nsng = NULL, *p_asng = NULL; + p_psng = (PyArrayObject *)PyArray_FROM_OTF(o_psng, NPY_UINT16, NPY_ARRAY_IN_ARRAY); + p_rsng = (PyArrayObject *)PyArray_FROM_OTF(o_rsng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_ssng = (PyArrayObject *)PyArray_FROM_OTF(o_ssng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_nsng = (PyArrayObject *)PyArray_FROM_OTF(o_nsng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_asng = (PyArrayObject *)PyArray_FROM_OTF(o_asng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // subset definition + PyArrayObject *p_subs = NULL; + p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // axLUTs + PyArrayObject *p_li2rno = NULL, *p_li2sn1 = NULL, *p_li2sn = NULL; + PyArrayObject *p_li2nos = NULL, *p_li2rng = NULL; + p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2sn1 = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn1, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_li2rng = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // 2D sino index LUT: + PyArrayObject *p_aw2ali = NULL; + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // sino to crystal, crystals + PyArrayObject *p_s2c = NULL, *p_crs = NULL; + p_s2c = (PyArrayObject *)PyArray_FROM_OTF(pd_s2c, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_crs = (PyArrayObject *)PyArray_FROM_OTF(pd_crs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + //-- + + /* If that didn't work, throw an exception. */ + if (p_imgout == NULL || p_rcnmsk == NULL || p_subs == NULL || p_psng == NULL || p_rsng == NULL || + p_ssng == NULL || p_nsng == NULL || p_asng == NULL || p_imgsens == NULL || + p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || + p_aw2ali == NULL || p_s2c == NULL || p_crs == NULL || p_krnl == NULL) { + //> output image + PyArray_DiscardWritebackIfCopy(p_imgout); + Py_XDECREF(p_imgout); + + Py_XDECREF(p_rcnmsk); + + //> objects in the sinogram space + Py_XDECREF(p_psng); + Py_XDECREF(p_rsng); + Py_XDECREF(p_ssng); + Py_XDECREF(p_nsng); + Py_XDECREF(p_asng); + + //> subsets + Py_XDECREF(p_subs); + + //> objects in the image space + Py_XDECREF(p_imgsens); + Py_XDECREF(p_krnl); + + //> axLUTs + Py_XDECREF(p_li2rno); + Py_XDECREF(p_li2sn); + Py_XDECREF(p_li2sn1); + Py_XDECREF(p_li2nos); + //> 2D sinogram LUT + Py_XDECREF(p_aw2ali); + //> sinogram to crystal LUTs + Py_XDECREF(p_s2c); + Py_XDECREF(p_crs); + + return NULL; + } + + float *imgout = (float *)PyArray_DATA(p_imgout); + bool *rcnmsk = (bool *)PyArray_DATA(p_rcnmsk); + unsigned short *psng = (unsigned short *)PyArray_DATA(p_psng); + float *rsng = (float *)PyArray_DATA(p_rsng); + float *ssng = (float *)PyArray_DATA(p_ssng); + float *nsng = (float *)PyArray_DATA(p_nsng); + float *asng = (float *)PyArray_DATA(p_asng); + + //> sensitivity image + float *imgsens = (float *)PyArray_DATA(p_imgsens); + + //>--- PSF KERNEL --- + float *krnl; + int SZ_KRNL = (int)PyArray_DIM(p_krnl, 1); + if (Cnt.LOG <= LOGINFO) + printf("i> kernel size [voxels]: %d\n", SZ_KRNL); + + if (SZ_KRNL != KERNEL_LENGTH) { + if (Cnt.LOG <= LOGWARNING) + printf("w> wrong kernel size.\n"); + krnl = (float *)malloc(KERNEL_LENGTH * sizeof(float)); krnl[0] = -1; - } else { - krnl = (float*)PyArray_DATA(p_krnl); - } - //>------------------- - - short *li2sn; - if (Cnt.SPN == 11) { - li2sn = (short*)PyArray_DATA(p_li2sn); - } - else if (Cnt.SPN == 1) { - li2sn = (short*)PyArray_DATA(p_li2sn1); - } - char *li2nos = (char*)PyArray_DATA(p_li2nos); - float *li2rng = (float*)PyArray_DATA(p_li2rng); - float *crs = (float*)PyArray_DATA(p_crs); - short *s2c = (short*)PyArray_DATA(p_s2c); - int *aw2ali = (int*)PyArray_DATA(p_aw2ali); - - - int N0crs = PyArray_DIM(p_crs, 0); - int N1crs = PyArray_DIM(p_crs, 1); - - // number of subsets - int Nsub = PyArray_DIM(p_subs, 0); - // number of elements used to store max. number of subsets projection - 1 - int Nprj = PyArray_DIM(p_subs, 1); - if (Cnt.LOG <= LOGDEBUG) printf("i> number of subsets = %d, and max. number of projections/subset = %d\n", Nsub, Nprj - 1); - - int *subs = (int*)PyArray_DATA(p_subs); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><<><><><><<><><><><><><><><><><> - osem(imgout, rcnmsk, psng, rsng, ssng, nsng, asng, subs, imgsens, - krnl, li2rng, li2sn, li2nos, s2c, crs, Nsub, Nprj, N0crs, Cnt); - //<><><><><><><><<><><><>><><><><><><> - - //Clean up - PyArray_ResolveWritebackIfCopy(p_imgout); - Py_DECREF(p_imgout); - - Py_DECREF(p_rcnmsk); - Py_DECREF(p_psng); - Py_DECREF(p_rsng); - Py_DECREF(p_ssng); - Py_DECREF(p_nsng); - Py_DECREF(p_asng); - - Py_DECREF(p_subs); - - Py_DECREF(p_imgsens); - Py_DECREF(p_krnl); - - Py_DECREF(p_li2rno); - Py_DECREF(p_li2rng); - Py_DECREF(p_li2sn); - Py_DECREF(p_li2sn1); - Py_DECREF(p_li2nos); - Py_DECREF(p_aw2ali); - Py_DECREF(p_s2c); - Py_DECREF(p_crs); - - Py_INCREF(Py_None); - return Py_None; - + } else { + krnl = (float *)PyArray_DATA(p_krnl); + } + //>------------------- + + short *li2sn; + if (Cnt.SPN == 11) { + li2sn = (short *)PyArray_DATA(p_li2sn); + } else if (Cnt.SPN == 1) { + li2sn = (short *)PyArray_DATA(p_li2sn1); + } + char *li2nos = (char *)PyArray_DATA(p_li2nos); + float *li2rng = (float *)PyArray_DATA(p_li2rng); + float *crs = (float *)PyArray_DATA(p_crs); + short *s2c = (short *)PyArray_DATA(p_s2c); + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + + int N0crs = PyArray_DIM(p_crs, 0); + int N1crs = PyArray_DIM(p_crs, 1); + + // number of subsets + int Nsub = PyArray_DIM(p_subs, 0); + // number of elements used to store max. number of subsets projection - 1 + int Nprj = PyArray_DIM(p_subs, 1); + if (Cnt.LOG <= LOGDEBUG) + printf("i> number of subsets = %d, and max. number of projections/subset = %d\n", Nsub, + Nprj - 1); + + int *subs = (int *)PyArray_DATA(p_subs); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><<><><><><<><><><><><><><><><><> + osem(imgout, rcnmsk, psng, rsng, ssng, nsng, asng, subs, imgsens, krnl, li2rng, li2sn, li2nos, + s2c, crs, Nsub, Nprj, N0crs, Cnt); + //<><><><><><><><<><><><>><><><><><><> + + // Clean up + PyArray_ResolveWritebackIfCopy(p_imgout); + Py_DECREF(p_imgout); + + Py_DECREF(p_rcnmsk); + Py_DECREF(p_psng); + Py_DECREF(p_rsng); + Py_DECREF(p_ssng); + Py_DECREF(p_nsng); + Py_DECREF(p_asng); + + Py_DECREF(p_subs); + + Py_DECREF(p_imgsens); + Py_DECREF(p_krnl); + + Py_DECREF(p_li2rno); + Py_DECREF(p_li2rng); + Py_DECREF(p_li2sn); + Py_DECREF(p_li2sn1); + Py_DECREF(p_li2nos); + Py_DECREF(p_aw2ali); + Py_DECREF(p_s2c); + Py_DECREF(p_crs); + + Py_INCREF(Py_None); + return Py_None; } diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index a0b83f6b..5f722c3e 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -6,447 +6,417 @@ reconstruction. author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include "prjb.h" #include "auxmath.h" +#include "prjb.h" #include "tprj.h" __constant__ float2 c_li2rng[NLI2R]; __constant__ short2 c_li2sn[NLI2R]; -__constant__ char c_li2nos[NLI2R]; +__constant__ char c_li2nos[NLI2R]; //=============================================================== -//copy to the smaller axially image -__global__ -void imReduce(float * imr, - float * im, - int vz0, - int nvz) -{ - int iz = vz0 + threadIdx.x; - int iy = SZ_IMZ*threadIdx.y + SZ_IMZ*blockDim.y*blockIdx.x; - if (iy> UV_SHFT); - int uv = SZ_IMZ*((u & 0x000001ff) + SZ_IMX*v); - //next voxel (skipping the first fractional one) - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - - float dtr = tt[N_TT*ixt + 2]; - float dtc = tt[N_TT*ixt + 3]; - - float trc = tt[N_TT*ixt] + rbit*dtr; - float tcc = tt[N_TT*ixt + 1] + dtc * !rbit; - rbit = tv[N_TV*ixt + 3] & 0x01; - - float tn = trc * rbit + tcc * !rbit; // next t - float tp = tt[N_TT*ixt + 5]; //previous t - - float lt; - //------------------------------------------------- - - - for (int k = 3; k<(int)tt[N_TT*ixt + 9]; k++) { - lt = tn - tp; - - atomicAdd(&im[uv + w], lt*bin); - - trc += dtr * rbit; - tcc += dtc * !rbit; - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - tp = tn; - rbit = tv[N_TV*ixt + k + 1] & 0x01; - tn = trc * rbit + tcc * !rbit; - } - +__global__ void bprj_drct(const float *sino, float *im, const float *tt, const unsigned char *tv, + const int *subs, const short snno) { + int ixt = subs[blockIdx.x]; // transaxial indx + int ixz = threadIdx.x; // axial (z) + + float bin = sino[c_li2sn[ixz].x + blockIdx.x * snno]; + + float z = c_li2rng[ixz].x + .5 * SZ_RING; + int w = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + + //------------------------------------------------- + /*** accumulation ***/ + // vector a (at) component signs + int sgna0 = tv[N_TV * ixt] - 1; + int sgna1 = tv[N_TV * ixt + 1] - 1; + bool rbit = tv[N_TV * ixt + 2] & 0x01; // row bit + + int u = (int)tt[N_TT * ixt + 8]; + int v = (u >> UV_SHFT); + int uv = SZ_IMZ * ((u & 0x000001ff) + SZ_IMX * v); + // next voxel (skipping the first fractional one) + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + + float dtr = tt[N_TT * ixt + 2]; + float dtc = tt[N_TT * ixt + 3]; + + float trc = tt[N_TT * ixt] + rbit * dtr; + float tcc = tt[N_TT * ixt + 1] + dtc * !rbit; + rbit = tv[N_TV * ixt + 3] & 0x01; + + float tn = trc * rbit + tcc * !rbit; // next t + float tp = tt[N_TT * ixt + 5]; // previous t + + float lt; + //------------------------------------------------- + + for (int k = 3; k < (int)tt[N_TT * ixt + 9]; k++) { + lt = tn - tp; + + atomicAdd(&im[uv + w], lt * bin); + + trc += dtr * rbit; + tcc += dtc * !rbit; + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + tp = tn; + rbit = tv[N_TV * ixt + k + 1] & 0x01; + tn = trc * rbit + tcc * !rbit; + } } //************** OBLIQUE ************************************************** -__global__ void bprj_oblq(const float * sino, - float * im, - const float * tt, - const unsigned char * tv, - const int * subs, - const short snno, - const int zoff) -{ - int ixz = threadIdx.x + zoff; // axial (z) - if (ixz> UV_SHFT); - int uv = SZ_IMZ*((u & 0x000001ff) + SZ_IMX*v); - //next voxel (skipping the first fractional one) - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - - float dtr = tt[N_TT*ixt + 2]; - float dtc = tt[N_TT*ixt + 3]; - - float trc = tt[N_TT*ixt] + rbit*dtr; - float tcc = tt[N_TT*ixt + 1] + dtc * !rbit; - rbit = tv[N_TV*ixt + 3] & 0x01; - - float tn = trc * rbit + tcc * !rbit; // next t - float tp = tt[N_TT*ixt + 5]; //previous t - //-------------------------------------------------- - - //**** AXIAL ***** - float atn = tt[N_TT*ixt + 7]; - float az = c_li2rng[ixz].y - c_li2rng[ixz].x; - float az_atn = az / atn; - float s_az_atn = sqrtf(az_atn*az_atn + 1); - int sgnaz; - if (az >= 0)sgnaz = 1; else sgnaz = -1; - - float pz = c_li2rng[ixz].x + .5*SZ_RING; - float z = pz + az_atn * tp; //here was t1 = tt[N_TT*ixt+4]<<<<<<<< - int w = (floorf(.5*SZ_IMZ + SZ_VOXZi*z)); - float lz1 = (ceilf(.5*SZ_IMZ + SZ_VOXZi*z))*SZ_VOXZ - .5*SZ_IMZ*SZ_VOXZ; //w is like in matlab by one greater - - z = c_li2rng[ixz].y + .5*SZ_RING - az_atn * tp;//here was t1 = tt[N_TT*ixt+4]<<<<<<<<< - int w_ = (floorf(.5*SZ_IMZ + SZ_VOXZi*z)); - z = pz + az_atn*tt[N_TT*ixt + 6]; //t2 - float lz2 = (floorf(.5*SZ_IMZ + SZ_VOXZi*z))*SZ_VOXZ - .5*SZ_IMZ*SZ_VOXZ; - int nz = fabsf(lz2 - lz1) / SZ_VOXZ; //rintf - float tz1 = (lz1 - pz) / az_atn; //first ray interaction with a row - float tz2 = (lz2 - pz) / az_atn; //last ray interaction with a row - float dtz = (tz2 - tz1) / nz; - float tzc = tz1; - //**************** - - float fr, lt; - - for (int k = 3; k0) { - fr = (tzc - tp) / lt; - atomicAdd(im + uv + w, fr*lt*s_az_atn*bin); - atomicAdd(im + uv + w_, fr*lt*s_az_atn*bin_); - // acc += fr*lt*s_az_atn * im[ w + uv ]; - // acc_+= fr*lt*s_az_atn * im[ w_+ uv ]; - w += sgnaz; - w_ -= sgnaz; - atomicAdd(im + uv + w, (1 - fr)*lt*s_az_atn*bin); - atomicAdd(im + uv + w_, (1 - fr)*lt*s_az_atn*bin_); - // acc += (1-fr)*lt*s_az_atn * im[ w + uv]; - // acc_+= (1-fr)*lt*s_az_atn * im[ w_+ uv]; - tzc += dtz; - } - else { - atomicAdd(im + uv + w, lt*s_az_atn*bin); - atomicAdd(im + uv + w_, lt*s_az_atn*bin_); - // acc += lt*s_az_atn * im[ w + uv ]; - // acc_+= lt*s_az_atn * im[ w_+ uv ]; - } - - trc += dtr * rbit; - tcc += dtc * !rbit; - - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMY; - - tp = tn; - rbit = tv[N_TV*ixt + k + 1] & 0x01; - tn = trc * rbit + tcc * !rbit; - } - - } +__global__ void bprj_oblq(const float *sino, float *im, const float *tt, const unsigned char *tv, + const int *subs, const short snno, const int zoff) { + int ixz = threadIdx.x + zoff; // axial (z) + if (ixz < NLI2R) { + int ixt = subs[blockIdx.x]; // blockIdx.x is the transaxial bin index + // bin values to be back projected + float bin = sino[c_li2sn[ixz].x + snno * blockIdx.x]; + float bin_ = sino[c_li2sn[ixz].y + snno * blockIdx.x]; + + //------------------------------------------------- + /*** accumulation ***/ + // vector a (at) component signs + int sgna0 = tv[N_TV * ixt] - 1; + int sgna1 = tv[N_TV * ixt + 1] - 1; + bool rbit = tv[N_TV * ixt + 2] & 0x01; // row bit + + int u = (int)tt[N_TT * ixt + 8]; + int v = (u >> UV_SHFT); + int uv = SZ_IMZ * ((u & 0x000001ff) + SZ_IMX * v); + // next voxel (skipping the first fractional one) + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + + float dtr = tt[N_TT * ixt + 2]; + float dtc = tt[N_TT * ixt + 3]; + + float trc = tt[N_TT * ixt] + rbit * dtr; + float tcc = tt[N_TT * ixt + 1] + dtc * !rbit; + rbit = tv[N_TV * ixt + 3] & 0x01; + + float tn = trc * rbit + tcc * !rbit; // next t + float tp = tt[N_TT * ixt + 5]; // previous t + //-------------------------------------------------- + + //**** AXIAL ***** + float atn = tt[N_TT * ixt + 7]; + float az = c_li2rng[ixz].y - c_li2rng[ixz].x; + float az_atn = az / atn; + float s_az_atn = sqrtf(az_atn * az_atn + 1); + int sgnaz; + if (az >= 0) + sgnaz = 1; + else + sgnaz = -1; + + float pz = c_li2rng[ixz].x + .5 * SZ_RING; + float z = pz + az_atn * tp; // here was t1 = tt[N_TT*ixt+4]<<<<<<<< + int w = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + float lz1 = (ceilf(.5 * SZ_IMZ + SZ_VOXZi * z)) * SZ_VOXZ - + .5 * SZ_IMZ * SZ_VOXZ; // w is like in matlab by one greater + + z = c_li2rng[ixz].y + .5 * SZ_RING - az_atn * tp; // here was t1 = tt[N_TT*ixt+4]<<<<<<<<< + int w_ = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + z = pz + az_atn * tt[N_TT * ixt + 6]; // t2 + float lz2 = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)) * SZ_VOXZ - .5 * SZ_IMZ * SZ_VOXZ; + int nz = fabsf(lz2 - lz1) / SZ_VOXZ; // rintf + float tz1 = (lz1 - pz) / az_atn; // first ray interaction with a row + float tz2 = (lz2 - pz) / az_atn; // last ray interaction with a row + float dtz = (tz2 - tz1) / nz; + float tzc = tz1; + //**************** + + float fr, lt; + + for (int k = 3; k < tt[N_TT * ixt + 9]; + k++) { //<<< k=3 as 0 and 1 are for sign and 2 is skipped + lt = tn - tp; + if ((tn - tzc) > 0) { + fr = (tzc - tp) / lt; + atomicAdd(im + uv + w, fr * lt * s_az_atn * bin); + atomicAdd(im + uv + w_, fr * lt * s_az_atn * bin_); + // acc += fr*lt*s_az_atn * im[ w + uv ]; + // acc_+= fr*lt*s_az_atn * im[ w_+ uv ]; + w += sgnaz; + w_ -= sgnaz; + atomicAdd(im + uv + w, (1 - fr) * lt * s_az_atn * bin); + atomicAdd(im + uv + w_, (1 - fr) * lt * s_az_atn * bin_); + // acc += (1-fr)*lt*s_az_atn * im[ w + uv]; + // acc_+= (1-fr)*lt*s_az_atn * im[ w_+ uv]; + tzc += dtz; + } else { + atomicAdd(im + uv + w, lt * s_az_atn * bin); + atomicAdd(im + uv + w_, lt * s_az_atn * bin_); + // acc += lt*s_az_atn * im[ w + uv ]; + // acc_+= lt*s_az_atn * im[ w_+ uv ]; + } + + trc += dtr * rbit; + tcc += dtc * !rbit; + + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMY; + + tp = tn; + rbit = tv[N_TV * ixt + k + 1] & 0x01; + tn = trc * rbit + tcc * !rbit; + } + } } //-------------------------------------------------------------------------------------------------- -void gpu_bprj(float *bimg, - float * sino, - float * li2rng, - short * li2sn, - char * li2nos, - short *s2c, - int *aw2ali, - float *crs, - int *subs, - int Nprj, - int Naw, - int N0crs, - Cnst Cnt) -{ - - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); - - //--- TRANSAXIAL COMPONENT - float4 *d_crs; HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); - HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); - - short2 *d_s2c; HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); - - float *d_tt; HANDLE_ERROR(cudaMalloc(&d_tt, N_TT*AW * sizeof(float))); - - unsigned char *d_tv; HANDLE_ERROR(cudaMalloc(&d_tv, N_TV*AW * sizeof(unsigned char))); - HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV*AW * sizeof(unsigned char))); - - // array of subset projection bins - int *d_subs; HANDLE_ERROR(cudaMalloc(&d_subs, Nprj * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nprj * sizeof(int), cudaMemcpyHostToDevice)); - //--- - - //----------------------------------------------------------------- - //RINGS: either all or a subset of rings can be used for fast calc. - //----------------------------------------------------------------- - // number of rings customised - int nrng_c, nil2r_c, vz0, vz1, nvz; - //number of sinos - short snno = -1; - if (Cnt.SPN == 1) { - // number of direct rings considered - nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; - // number of "positive" michelogram elements used for projection (can be smaller than the maximum) - nil2r_c = (nrng_c + 1)*nrng_c / 2; - snno = nrng_c*nrng_c; - //correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if (nrng_c == NRINGS) { - snno -= 12; - nil2r_c -= 6; - } - } - else if (Cnt.SPN == 11) { - snno = NSINOS11; - nrng_c = NRINGS; - nil2r_c = NLI2R; - } - // voxels in axial direction - vz0 = 2 * Cnt.RNG_STRT; - vz1 = 2 * (Cnt.RNG_END - 1); - nvz = 2 * nrng_c - 1; - if (Cnt.LOG <= LOGDEBUG) { - printf("i> detector rings range: [%d, %d) => number of sinos: %d\n", Cnt.RNG_STRT, Cnt.RNG_END, snno); - printf(" corresponding voxels: [%d, %d] => number of voxels: %d\n", vz0, vz1, nvz); - } - //----------------------------------------------------------------- - - //--- FULLY 3D sino to be back-projected to image - float *d_sino; HANDLE_ERROR(cudaMalloc(&d_sino, Nprj*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_sino, sino, Nprj*snno * sizeof(float), cudaMemcpyHostToDevice)); - - float *d_im; HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float))); - //--- - - cudaMemcpyToSymbol(c_li2rng, li2rng, nil2r_c * sizeof(float2)); - cudaMemcpyToSymbol(c_li2sn, li2sn, nil2r_c * sizeof(short2)); - cudaMemcpyToSymbol(c_li2nos, li2nos, nil2r_c * sizeof(char)); - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - if (Cnt.LOG <= LOGDEBUG) - printf("i> calculating image through back projection... "); - - //------------DO TRANSAXIAL CALCULATIONS--------------------------------- - gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); - //----------------------------------------------------------------------- - - //============================================================================ - bprj_drct << > >(d_sino, d_im, d_tt, d_tv, d_subs, snno); - HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - int zoff = nrng_c; - //number of oblique sinograms - int Noblq = (nrng_c - 1)*nrng_c / 2; - - //cudaGetDeviceCount(&nDevices); - //for (int i = 0; i < nDevices; i++) { - // cudaDeviceProp prop; - // cudaGetDeviceProperties(&prop, i); - // printf("Device Number: %d\n", i); - // printf(" Device name: %s\n", prop.name); - // printf(" Device supports concurrentManagedAccess?: %s\n", prop.concurrentManagedAccess); - //} - - //cudaMemPrefetchAsync(d_sino, Nprj*snno * sizeof(float), nDevices, NULL); - - if (Cnt.SPN == 1 && Noblq <= 1024){ - bprj_oblq <<< Nprj, Noblq >>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - } - else { - bprj_oblq <<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - zoff += NSINOS / 4; - bprj_oblq <<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - } - //============================================================================ - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001*elapsedTime); - - cudaDeviceSynchronize(); - - // // the actual axial size used (due to the customised ring subset used) - // int vz0 = 2*Cnt.RNG_STRT; - // int vz1 = 2*(Cnt.RNG_END-1); - // // number of voxel for reduced number of rings (customised) - // int nvz = vz1-vz0+1; - - // when rings are reduced - if (nvz> >(d_imr, d_im, vz0, nvz); - HANDLE_ERROR(cudaGetLastError()); - //copy to host memory - HANDLE_ERROR(cudaMemcpy(bimg, d_imr, SZ_IMX*SZ_IMY*nvz * sizeof(float), cudaMemcpyDeviceToHost)); - cudaFree(d_im); - cudaFree(d_imr); - if (Cnt.LOG <= LOGDEBUG) - printf("i> reduced the axial (z) image size to %d\n", nvz); - } - else { - //copy to host memory - HANDLE_ERROR(cudaMemcpy(bimg, d_im, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float), cudaMemcpyDeviceToHost)); - cudaFree(d_im); - } - - cudaFree(d_sino); - cudaFree(d_tt); - cudaFree(d_tv); - cudaFree(d_subs); - cudaFree(d_crs); - cudaFree(d_s2c); - - return; +void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, + int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt) { + + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); + + //--- TRANSAXIAL COMPONENT + float4 *d_crs; + HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); + HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); + + short2 *d_s2c; + HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); + + float *d_tt; + HANDLE_ERROR(cudaMalloc(&d_tt, N_TT * AW * sizeof(float))); + + unsigned char *d_tv; + HANDLE_ERROR(cudaMalloc(&d_tv, N_TV * AW * sizeof(unsigned char))); + HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV * AW * sizeof(unsigned char))); + + // array of subset projection bins + int *d_subs; + HANDLE_ERROR(cudaMalloc(&d_subs, Nprj * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nprj * sizeof(int), cudaMemcpyHostToDevice)); + //--- + + //----------------------------------------------------------------- + // RINGS: either all or a subset of rings can be used for fast calc. + //----------------------------------------------------------------- + // number of rings customised + int nrng_c, nil2r_c, vz0, vz1, nvz; + // number of sinos + short snno = -1; + if (Cnt.SPN == 1) { + // number of direct rings considered + nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; + // number of "positive" michelogram elements used for projection (can be smaller than the + // maximum) + nil2r_c = (nrng_c + 1) * nrng_c / 2; + snno = nrng_c * nrng_c; + // correct for the max. ring difference in the full axial extent (don't use ring range (1,63) + // as for this case no correction) + if (nrng_c == NRINGS) { + snno -= 12; + nil2r_c -= 6; + } + } else if (Cnt.SPN == 11) { + snno = NSINOS11; + nrng_c = NRINGS; + nil2r_c = NLI2R; + } + // voxels in axial direction + vz0 = 2 * Cnt.RNG_STRT; + vz1 = 2 * (Cnt.RNG_END - 1); + nvz = 2 * nrng_c - 1; + if (Cnt.LOG <= LOGDEBUG) { + printf("i> detector rings range: [%d, %d) => number of sinos: %d\n", Cnt.RNG_STRT, + Cnt.RNG_END, snno); + printf(" corresponding voxels: [%d, %d] => number of voxels: %d\n", vz0, vz1, nvz); + } + //----------------------------------------------------------------- + + //--- FULLY 3D sino to be back-projected to image + float *d_sino; + HANDLE_ERROR(cudaMalloc(&d_sino, Nprj * snno * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_sino, sino, Nprj * snno * sizeof(float), cudaMemcpyHostToDevice)); + + float *d_im; + HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + //--- + + cudaMemcpyToSymbol(c_li2rng, li2rng, nil2r_c * sizeof(float2)); + cudaMemcpyToSymbol(c_li2sn, li2sn, nil2r_c * sizeof(short2)); + cudaMemcpyToSymbol(c_li2nos, li2nos, nil2r_c * sizeof(char)); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + if (Cnt.LOG <= LOGDEBUG) + printf("i> calculating image through back projection... "); + + //------------DO TRANSAXIAL CALCULATIONS--------------------------------- + gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); + //----------------------------------------------------------------------- + + //============================================================================ + bprj_drct<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno); + HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + int zoff = nrng_c; + // number of oblique sinograms + int Noblq = (nrng_c - 1) * nrng_c / 2; + + // cudaGetDeviceCount(&nDevices); + // for (int i = 0; i < nDevices; i++) { + // cudaDeviceProp prop; + // cudaGetDeviceProperties(&prop, i); + // printf("Device Number: %d\n", i); + // printf(" Device name: %s\n", prop.name); + // printf(" Device supports concurrentManagedAccess?: %s\n", prop.concurrentManagedAccess); + //} + + // cudaMemPrefetchAsync(d_sino, Nprj*snno * sizeof(float), nDevices, NULL); + + if (Cnt.SPN == 1 && Noblq <= 1024) { + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); + HANDLE_ERROR(cudaGetLastError()); + } else { + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); + HANDLE_ERROR(cudaGetLastError()); + zoff += NSINOS / 4; + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); + HANDLE_ERROR(cudaGetLastError()); + } + //============================================================================ + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + // // the actual axial size used (due to the customised ring subset used) + // int vz0 = 2*Cnt.RNG_STRT; + // int vz1 = 2*(Cnt.RNG_END-1); + // // number of voxel for reduced number of rings (customised) + // int nvz = vz1-vz0+1; + + // when rings are reduced + if (nvz < SZ_IMZ) { + float *d_imr; + HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_imr, 0, SZ_IMX * SZ_IMY * nvz * sizeof(float))); + // number of axial row for max threads + int nar = MXTHRD / nvz; + dim3 THRD(nvz, nar, 1); + dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); + imReduce<<>>(d_imr, d_im, vz0, nvz); + HANDLE_ERROR(cudaGetLastError()); + // copy to host memory + HANDLE_ERROR( + cudaMemcpy(bimg, d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyDeviceToHost)); + cudaFree(d_im); + cudaFree(d_imr); + if (Cnt.LOG <= LOGDEBUG) + printf("i> reduced the axial (z) image size to %d\n", nvz); + } else { + // copy to host memory + HANDLE_ERROR( + cudaMemcpy(bimg, d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), cudaMemcpyDeviceToHost)); + cudaFree(d_im); + } + + cudaFree(d_sino); + cudaFree(d_tt); + cudaFree(d_tv); + cudaFree(d_subs); + cudaFree(d_crs); + cudaFree(d_s2c); + + return; } - - - - - - - - - //======================================================================= -void rec_bprj(float *d_bimg, - float *d_sino, - int *d_sub, - int Nprj, - float *d_tt, - unsigned char *d_tv, - float *li2rng, - short *li2sn, - char *li2nos, - Cnst Cnt) +void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, unsigned char *d_tv, + float *li2rng, short *li2sn, char *li2nos, Cnst Cnt) { - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); - - //get the axial LUTs in constant memory - cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); - cudaMemcpyToSymbol(c_li2sn, li2sn, NLI2R * sizeof(short2)); - cudaMemcpyToSymbol(c_li2nos, li2nos, NLI2R * sizeof(char)); - - //number of sinos - short snno = -1; - if (Cnt.SPN == 1) snno = NSINOS; - else if (Cnt.SPN == 11) snno = NSINOS11; - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) printf("i> subset back projection (Nprj=%d)... ", Nprj); - - //============================================================================ - bprj_drct << > >(d_sino, d_bimg, d_tt, d_tv, d_sub, snno); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - int zoff = NRINGS; - //============================================================================ - bprj_oblq << > >(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - zoff += NSINOS / 4; - //============================================================================ - bprj_oblq << > >(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001*elapsedTime); - - cudaDeviceSynchronize(); - - - return; - + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); + + // get the axial LUTs in constant memory + cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); + cudaMemcpyToSymbol(c_li2sn, li2sn, NLI2R * sizeof(short2)); + cudaMemcpyToSymbol(c_li2nos, li2nos, NLI2R * sizeof(char)); + + // number of sinos + short snno = -1; + if (Cnt.SPN == 1) + snno = NSINOS; + else if (Cnt.SPN == 11) + snno = NSINOS11; + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + if (Cnt.LOG <= LOGDEBUG) + printf("i> subset back projection (Nprj=%d)... ", Nprj); + + //============================================================================ + bprj_drct<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + int zoff = NRINGS; + //============================================================================ + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + zoff += NSINOS / 4; + //============================================================================ + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + return; } diff --git a/niftypet/nipet/prj/src/prjb.h b/niftypet/nipet/prj/src/prjb.h index 9639a0b6..98da6422 100644 --- a/niftypet/nipet/prj/src/prjb.h +++ b/niftypet/nipet/prj/src/prjb.h @@ -1,39 +1,22 @@ -#include #include "def.h" -#include "tprj.h" #include "scanner_0.h" +#include "tprj.h" +#include #ifndef PRJB_H #define PRJB_H -//used from Python -void gpu_bprj(float *bimg, - float *sino, - float *li2rng, - short *li2sn, - char *li2nos, - short *s2c, - int *aw2ali, - float *crs, - int *subs, - int Nprj, - int Naw, - int N0crs, - Cnst Cnt); +// used from Python +void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, + int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt); -//to be used within CUDA C reconstruction -void rec_bprj(float *d_bimg, - float *d_sino, - int *sub, - int Nprj, +// to be used within CUDA C reconstruction +void rec_bprj(float *d_bimg, float *d_sino, int *sub, int Nprj, - float *d_tt, - unsigned char *d_tv, + float *d_tt, unsigned char *d_tv, - float *li2rng, - short *li2sn, - char *li2nos, + float *li2rng, short *li2sn, char *li2nos, - Cnst Cnt); + Cnst Cnt); #endif diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index 22123a50..83ab6bb0 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -6,467 +6,434 @@ reconstruction. author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include "prjf.h" #include "auxmath.h" +#include "prjf.h" #include "tprj.h" __constant__ float2 c_li2rng[NLI2R]; __constant__ short2 c_li2sn[NLI2R]; -__constant__ char c_li2nos[NLI2R]; +__constant__ char c_li2nos[NLI2R]; //=============================================================== -//copy the smaller axially image to the one with full axial extension -__global__ -void imExpand(float * im, - float * imr, - int vz0, - int nvz) -{ - int iz = vz0 + threadIdx.x; - int iy = SZ_IMZ*threadIdx.y + SZ_IMZ*blockDim.y*blockIdx.x; - if (iy> UV_SHFT); - int uv = SZ_IMZ*((u & 0x000001ff) + SZ_IMX*v); - - //if((ixz==0) && (u>SZ_IMX || v>SZ_IMY)) printf("\n!!! u,v = %d,%d\n", u,v ); - - //next voxel (skipping the first fractional one) - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - - float dtr = tt[N_TT*ixt + 2]; - float dtc = tt[N_TT*ixt + 3]; - - float trc = tt[N_TT*ixt] + rbit*dtr; - float tcc = tt[N_TT*ixt + 1] + dtc * !rbit; - rbit = tv[N_TV*ixt + 3] & 0x01; - - float tn = trc * rbit + tcc * !rbit; // next t - float tp = tt[N_TT*ixt + 5]; //previous t - - float lt, acc = 0; - //------------------------------------------------- - - - for (int k = 3; k<(int)tt[N_TT*ixt + 9]; k++) {//<<<< k=3, was k=2 - lt = tn - tp; - acc += lt*im[w + uv]; - trc += dtr * rbit; - tcc += dtc * !rbit; - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - tp = tn; - rbit = tv[N_TV*ixt + k + 1] & 0x01; - tn = trc * rbit + tcc * !rbit; - } - - if (att == 1) { - if (span == 1) - sino[c_li2sn[ixz].x + blockIdx.x*snno] = expf(-acc); - else if (span == 11) - atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x*snno, expf(-acc) / (float)c_li2nos[ixz]); - } - else if (att == 0) atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x*snno, acc); - +__global__ void fprj_drct(float *sino, const float *im, const float *tt, const unsigned char *tv, + const int *subs, const short snno, const char span, const char att) { + int ixt = subs[blockIdx.x]; // transaxial indx + int ixz = threadIdx.x; // axial (z) + + float z = c_li2rng[ixz].x + .5 * SZ_RING; + int w = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + + // if(ixz==33 && ixt==5301){ + // printf("\n*** li2rng[ixz] = %f | li2sn[ixz] = %d, li2nos[ixz] = %d\n", li2rng[ixz], + // li2sn[ixz], li2nos[ixz]); + // } + + //------------------------------------------------- + /*** accumulation ***/ + // vector a (at) component signs + int sgna0 = tv[N_TV * ixt] - 1; + int sgna1 = tv[N_TV * ixt + 1] - 1; + bool rbit = tv[N_TV * ixt + 2] & 0x01; // row bit + + int u = (int)tt[N_TT * ixt + 8]; + int v = (u >> UV_SHFT); + int uv = SZ_IMZ * ((u & 0x000001ff) + SZ_IMX * v); + + // if((ixz==0) && (u>SZ_IMX || v>SZ_IMY)) printf("\n!!! u,v = %d,%d\n", u,v ); + + // next voxel (skipping the first fractional one) + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + + float dtr = tt[N_TT * ixt + 2]; + float dtc = tt[N_TT * ixt + 3]; + + float trc = tt[N_TT * ixt] + rbit * dtr; + float tcc = tt[N_TT * ixt + 1] + dtc * !rbit; + rbit = tv[N_TV * ixt + 3] & 0x01; + + float tn = trc * rbit + tcc * !rbit; // next t + float tp = tt[N_TT * ixt + 5]; // previous t + + float lt, acc = 0; + //------------------------------------------------- + + for (int k = 3; k < (int)tt[N_TT * ixt + 9]; k++) { //<<<< k=3, was k=2 + lt = tn - tp; + acc += lt * im[w + uv]; + trc += dtr * rbit; + tcc += dtc * !rbit; + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + tp = tn; + rbit = tv[N_TV * ixt + k + 1] & 0x01; + tn = trc * rbit + tcc * !rbit; + } + + if (att == 1) { + if (span == 1) + sino[c_li2sn[ixz].x + blockIdx.x * snno] = expf(-acc); + else if (span == 11) + atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x * snno, expf(-acc) / (float)c_li2nos[ixz]); + } else if (att == 0) + atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x * snno, acc); } //************** OBLIQUE ************************************************** -__global__ void fprj_oblq(float * sino, - const float * im, - const float * tt, - const unsigned char * tv, - const int * subs, - const short snno, - const char span, - const char att, - const int zoff) -{ - int ixz = threadIdx.x + zoff; // axial (z) - if (ixz> UV_SHFT); - int uv = SZ_IMZ*((u & 0x000001ff) + SZ_IMX*v); - //next voxel (skipping the first fractional one) - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMX; - - float dtr = tt[N_TT*ixt + 2]; - float dtc = tt[N_TT*ixt + 3]; - - float trc = tt[N_TT*ixt] + rbit*dtr; - float tcc = tt[N_TT*ixt + 1] + dtc * !rbit; - rbit = tv[N_TV*ixt + 3] & 0x01; - - float tn = trc * rbit + tcc * !rbit; // next t - float tp = tt[N_TT*ixt + 5]; //previous t - //-------------------------------------------------- - - //**** AXIAL ***** - float atn = tt[N_TT*ixt + 7]; - float az = c_li2rng[ixz].y - c_li2rng[ixz].x; - float az_atn = az / atn; - float s_az_atn = sqrtf(az_atn*az_atn + 1); - int sgnaz; - if (az >= 0)sgnaz = 1; else sgnaz = -1; - - float pz = c_li2rng[ixz].x + .5*SZ_RING; - float z = pz + az_atn * tp; //here was t1 = tt[N_TT*ixt+4]<<<<<<<< - int w = (floorf(.5*SZ_IMZ + SZ_VOXZi*z)); - float lz1 = (ceilf(.5*SZ_IMZ + SZ_VOXZi*z))*SZ_VOXZ - .5*SZ_IMZ*SZ_VOXZ; //w is like in matlab by one greater - - z = c_li2rng[ixz].y + .5*SZ_RING - az_atn * tp;//here was t1 = tt[N_TT*ixt+4]<<<<<<<<< - int w_ = (floorf(.5*SZ_IMZ + SZ_VOXZi*z)); - z = pz + az_atn*tt[N_TT*ixt + 6]; //t2 - float lz2 = (floorf(.5*SZ_IMZ + SZ_VOXZi*z))*SZ_VOXZ - .5*SZ_IMZ*SZ_VOXZ; - int nz = fabsf(lz2 - lz1) / SZ_VOXZ; //rintf - float tz1 = (lz1 - pz) / az_atn; //first ray interaction with a row - float tz2 = (lz2 - pz) / az_atn; //last ray interaction with a row - float dtz = (tz2 - tz1) / nz; - float tzc = tz1; - //**************** - - float fr, lt, acc = 0, acc_ = 0; - for (int k = 3; k0) { - fr = (tzc - tp) / lt; - acc += fr*lt*s_az_atn * im[w + uv]; - acc_ += fr*lt*s_az_atn * im[w_ + uv]; - w += sgnaz; - w_ -= sgnaz; - acc += (1 - fr)*lt*s_az_atn * im[w + uv]; - acc_ += (1 - fr)*lt*s_az_atn * im[w_ + uv]; - tzc += dtz; - } - else { - acc += lt*s_az_atn * im[w + uv]; - acc_ += lt*s_az_atn * im[w_ + uv]; - } - - trc += dtr * rbit; - tcc += dtc * !rbit; - - uv += !rbit * sgna0*SZ_IMZ; - uv -= rbit * sgna1*SZ_IMZ*SZ_IMY; - - tp = tn; - rbit = tv[N_TV*ixt + k + 1] & 0x01; - tn = trc * rbit + tcc * !rbit; - } - - - // blockIdx.x is the transaxial bin index - if (att == 1) { - if (span == 1) { - sino[c_li2sn[ixz].x + blockIdx.x*snno] = expf(-acc); - sino[c_li2sn[ixz].y + blockIdx.x*snno] = expf(-acc_); - } - else if (span == 11) { - atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x*snno, expf(-acc) / (float)c_li2nos[ixz]); - atomicAdd(sino + c_li2sn[ixz].y + blockIdx.x*snno, expf(-acc_) / (float)c_li2nos[ixz]); - } - } - else if (att == 0) { - atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x*snno, acc); - atomicAdd(sino + c_li2sn[ixz].y + blockIdx.x*snno, acc_); - } - - } +__global__ void fprj_oblq(float *sino, const float *im, const float *tt, const unsigned char *tv, + const int *subs, const short snno, const char span, const char att, + const int zoff) { + int ixz = threadIdx.x + zoff; // axial (z) + if (ixz < NLI2R) { + int ixt = subs[blockIdx.x]; // transaxial indx + + //------------------------------------------------- + /*** accumulation ***/ + // vector a (at) component signs + int sgna0 = tv[N_TV * ixt] - 1; + int sgna1 = tv[N_TV * ixt + 1] - 1; + bool rbit = tv[N_TV * ixt + 2] & 0x01; // row bit + + int u = (int)tt[N_TT * ixt + 8]; + int v = (u >> UV_SHFT); + int uv = SZ_IMZ * ((u & 0x000001ff) + SZ_IMX * v); + // next voxel (skipping the first fractional one) + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMX; + + float dtr = tt[N_TT * ixt + 2]; + float dtc = tt[N_TT * ixt + 3]; + + float trc = tt[N_TT * ixt] + rbit * dtr; + float tcc = tt[N_TT * ixt + 1] + dtc * !rbit; + rbit = tv[N_TV * ixt + 3] & 0x01; + + float tn = trc * rbit + tcc * !rbit; // next t + float tp = tt[N_TT * ixt + 5]; // previous t + //-------------------------------------------------- + + //**** AXIAL ***** + float atn = tt[N_TT * ixt + 7]; + float az = c_li2rng[ixz].y - c_li2rng[ixz].x; + float az_atn = az / atn; + float s_az_atn = sqrtf(az_atn * az_atn + 1); + int sgnaz; + if (az >= 0) + sgnaz = 1; + else + sgnaz = -1; + + float pz = c_li2rng[ixz].x + .5 * SZ_RING; + float z = pz + az_atn * tp; // here was t1 = tt[N_TT*ixt+4]<<<<<<<< + int w = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + float lz1 = (ceilf(.5 * SZ_IMZ + SZ_VOXZi * z)) * SZ_VOXZ - + .5 * SZ_IMZ * SZ_VOXZ; // w is like in matlab by one greater + + z = c_li2rng[ixz].y + .5 * SZ_RING - az_atn * tp; // here was t1 = tt[N_TT*ixt+4]<<<<<<<<< + int w_ = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)); + z = pz + az_atn * tt[N_TT * ixt + 6]; // t2 + float lz2 = (floorf(.5 * SZ_IMZ + SZ_VOXZi * z)) * SZ_VOXZ - .5 * SZ_IMZ * SZ_VOXZ; + int nz = fabsf(lz2 - lz1) / SZ_VOXZ; // rintf + float tz1 = (lz1 - pz) / az_atn; // first ray interaction with a row + float tz2 = (lz2 - pz) / az_atn; // last ray interaction with a row + float dtz = (tz2 - tz1) / nz; + float tzc = tz1; + //**************** + + float fr, lt, acc = 0, acc_ = 0; + for (int k = 3; k < tt[N_TT * ixt + 9]; + k++) { //<<< k=3 as 0 and 1 are for sign and 2 is skipped + lt = tn - tp; + if ((tn - tzc) > 0) { + fr = (tzc - tp) / lt; + acc += fr * lt * s_az_atn * im[w + uv]; + acc_ += fr * lt * s_az_atn * im[w_ + uv]; + w += sgnaz; + w_ -= sgnaz; + acc += (1 - fr) * lt * s_az_atn * im[w + uv]; + acc_ += (1 - fr) * lt * s_az_atn * im[w_ + uv]; + tzc += dtz; + } else { + acc += lt * s_az_atn * im[w + uv]; + acc_ += lt * s_az_atn * im[w_ + uv]; + } + + trc += dtr * rbit; + tcc += dtc * !rbit; + + uv += !rbit * sgna0 * SZ_IMZ; + uv -= rbit * sgna1 * SZ_IMZ * SZ_IMY; + + tp = tn; + rbit = tv[N_TV * ixt + k + 1] & 0x01; + tn = trc * rbit + tcc * !rbit; + } + + // blockIdx.x is the transaxial bin index + if (att == 1) { + if (span == 1) { + sino[c_li2sn[ixz].x + blockIdx.x * snno] = expf(-acc); + sino[c_li2sn[ixz].y + blockIdx.x * snno] = expf(-acc_); + } else if (span == 11) { + atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x * snno, expf(-acc) / (float)c_li2nos[ixz]); + atomicAdd(sino + c_li2sn[ixz].y + blockIdx.x * snno, expf(-acc_) / (float)c_li2nos[ixz]); + } + } else if (att == 0) { + atomicAdd(sino + c_li2sn[ixz].x + blockIdx.x * snno, acc); + atomicAdd(sino + c_li2sn[ixz].y + blockIdx.x * snno, acc_); + } + } } - //-------------------------------------------------------------------------------------------------- -void gpu_fprj(float * prjout, - float * im, - float * li2rng, - short * li2sn, - char * li2nos, - short *s2c, - int *aw2ali, - float *crs, - int *subs, - int Nprj, - int Naw, - int N0crs, - Cnst Cnt, char att) -{ - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); - - //--- TRANSAXIAL COMPONENT - float4 *d_crs; HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); - HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); - - short2 *d_s2c; HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); - - float *d_tt; HANDLE_ERROR(cudaMalloc(&d_tt, N_TT*AW * sizeof(float))); - - unsigned char *d_tv; HANDLE_ERROR(cudaMalloc(&d_tv, N_TV*AW * sizeof(unsigned char))); - HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV*AW * sizeof(unsigned char))); - - // array of subset projection bins - int *d_subs; HANDLE_ERROR(cudaMalloc(&d_subs, Nprj * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nprj * sizeof(int), cudaMemcpyHostToDevice)); - //--- - - //----------------------------------------------------------------- - //RINGS: either all or a subset of rings can be used (span-1 feature only) - //----------------------------------------------------------------- - // number of rings customised and the resulting size of LUTs and voxels - int nrng_c, nil2r_c, vz0, vz1, nvz; - //number of sinos - short snno = -1; - if (Cnt.SPN == 1) { - // number of direct rings considered - nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; - // number of "positive" michelogram elements used for projection (can be smaller than the maximum) - nil2r_c = (nrng_c + 1)*nrng_c / 2; - snno = nrng_c*nrng_c; - //correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if (nrng_c == NRINGS) { - snno -= 12; - nil2r_c -= 6; - } - } - else if (Cnt.SPN == 11) { - snno = NSINOS11; - nrng_c = NRINGS; - nil2r_c = NLI2R; - } - // voxels in axial direction - vz0 = 2 * Cnt.RNG_STRT; - vz1 = 2 * (Cnt.RNG_END - 1); - nvz = 2 * nrng_c - 1; - if (Cnt.LOG <= LOGDEBUG) { - printf("i> detector rings range: [%d, %d) => number of sinos: %d\n", Cnt.RNG_STRT, Cnt.RNG_END, snno); - printf(" corresponding voxels: [%d, %d] => number of voxels: %d\n", vz0, vz1, nvz); - } - - //----------------------------------------------------------------- - - //--- FULLY 3D - float *d_sn; HANDLE_ERROR(cudaMalloc(&d_sn, Nprj*snno * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_sn, 0, Nprj*snno * sizeof(float))); - - //allocate for image to be forward projected on the device - float *d_im; HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float))); - - - // when rings are reduced expand the image to account for whole axial FOV - if (nvz>>(d_im, d_imr, vz0, nvz); - HANDLE_ERROR(cudaGetLastError()); - cudaFree(d_imr); - } - else { - //copy to GPU memory - HANDLE_ERROR(cudaMemcpy(d_im, im, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float), cudaMemcpyHostToDevice)); - } - - // float *d_li2rng; HANDLE_ERROR( cudaMalloc(&d_li2rng, N0li*N1li*sizeof(float)) ); - // HANDLE_ERROR( cudaMemcpy( d_li2rng, li2rng, N0li*N1li*sizeof(float), cudaMemcpyHostToDevice) ); - - // int *d_li2sn; HANDLE_ERROR(cudaMalloc(&d_li2sn, N0li*N1li*sizeof(int)) ); - // HANDLE_ERROR( cudaMemcpy( d_li2sn, li2sn, N0li*N1li*sizeof(int), cudaMemcpyHostToDevice) ); - - // int *d_li2nos; HANDLE_ERROR( cudaMalloc(&d_li2nos, N1li*sizeof(int)) ); - // HANDLE_ERROR( cudaMemcpy( d_li2nos, li2nos, N1li*sizeof(int), cudaMemcpyHostToDevice) ); - - cudaMemcpyToSymbol(c_li2rng, li2rng, nil2r_c * sizeof(float2)); - cudaMemcpyToSymbol(c_li2sn, li2sn, nil2r_c * sizeof(short2)); - cudaMemcpyToSymbol(c_li2nos, li2nos, nil2r_c * sizeof(char)); - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - if (Cnt.LOG <= LOGDEBUG) - printf("i> calculating sinograms via forward projection..."); - - //------------DO TRANSAXIAL CALCULATIONS--------------------------------- - gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); - //----------------------------------------------------------------------- - - //============================================================================ - fprj_drct <<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att); - HANDLE_ERROR(cudaGetLastError()); - // ============================================================================ - - int zoff = nrng_c; - //number of oblique sinograms - int Noblq = (nrng_c - 1)*nrng_c / 2; - - //first for reduced number of detector rings - if (Cnt.SPN == 1 && Noblq <= 1024 && Noblq>0){ - fprj_oblq <<< Nprj, Noblq >>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); - - } - else { - fprj_oblq <<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); - - zoff += NSINOS / 4; - fprj_oblq <<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); - - } - - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001*elapsedTime); - - cudaDeviceSynchronize(); - - HANDLE_ERROR(cudaMemcpy(prjout, d_sn, Nprj*snno * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_sn); - cudaFree(d_im); - cudaFree(d_tt); - cudaFree(d_tv); - cudaFree(d_subs); - HANDLE_ERROR(cudaFree(d_crs)); - HANDLE_ERROR(cudaFree(d_s2c)); - - return; +void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2nos, short *s2c, + int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt, + char att) { + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); + + //--- TRANSAXIAL COMPONENT + float4 *d_crs; + HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); + HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); + + short2 *d_s2c; + HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); + + float *d_tt; + HANDLE_ERROR(cudaMalloc(&d_tt, N_TT * AW * sizeof(float))); + + unsigned char *d_tv; + HANDLE_ERROR(cudaMalloc(&d_tv, N_TV * AW * sizeof(unsigned char))); + HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV * AW * sizeof(unsigned char))); + + // array of subset projection bins + int *d_subs; + HANDLE_ERROR(cudaMalloc(&d_subs, Nprj * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nprj * sizeof(int), cudaMemcpyHostToDevice)); + //--- + + //----------------------------------------------------------------- + // RINGS: either all or a subset of rings can be used (span-1 feature only) + //----------------------------------------------------------------- + // number of rings customised and the resulting size of LUTs and voxels + int nrng_c, nil2r_c, vz0, vz1, nvz; + // number of sinos + short snno = -1; + if (Cnt.SPN == 1) { + // number of direct rings considered + nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; + // number of "positive" michelogram elements used for projection (can be smaller than the + // maximum) + nil2r_c = (nrng_c + 1) * nrng_c / 2; + snno = nrng_c * nrng_c; + // correct for the max. ring difference in the full axial extent (don't use ring range (1,63) + // as for this case no correction) + if (nrng_c == NRINGS) { + snno -= 12; + nil2r_c -= 6; + } + } else if (Cnt.SPN == 11) { + snno = NSINOS11; + nrng_c = NRINGS; + nil2r_c = NLI2R; + } + // voxels in axial direction + vz0 = 2 * Cnt.RNG_STRT; + vz1 = 2 * (Cnt.RNG_END - 1); + nvz = 2 * nrng_c - 1; + if (Cnt.LOG <= LOGDEBUG) { + printf("i> detector rings range: [%d, %d) => number of sinos: %d\n", Cnt.RNG_STRT, + Cnt.RNG_END, snno); + printf(" corresponding voxels: [%d, %d] => number of voxels: %d\n", vz0, vz1, nvz); + } + + //----------------------------------------------------------------- + + //--- FULLY 3D + float *d_sn; + HANDLE_ERROR(cudaMalloc(&d_sn, Nprj * snno * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_sn, 0, Nprj * snno * sizeof(float))); + + // allocate for image to be forward projected on the device + float *d_im; + HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + + // when rings are reduced expand the image to account for whole axial FOV + if (nvz < SZ_IMZ) { + // first the reduced image into the device + float *d_imr; + HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_imr, im, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyHostToDevice)); + // put zeros in the gaps of unused voxels + HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + // number of axial row for max threads + int nar = MXTHRD / nvz; + dim3 THRD(nvz, nar, 1); + dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); + imExpand<<>>(d_im, d_imr, vz0, nvz); + HANDLE_ERROR(cudaGetLastError()); + cudaFree(d_imr); + } else { + // copy to GPU memory + HANDLE_ERROR( + cudaMemcpy(d_im, im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), cudaMemcpyHostToDevice)); + } + + // float *d_li2rng; HANDLE_ERROR( cudaMalloc(&d_li2rng, N0li*N1li*sizeof(float)) ); + // HANDLE_ERROR( cudaMemcpy( d_li2rng, li2rng, N0li*N1li*sizeof(float), cudaMemcpyHostToDevice) + // ); + + // int *d_li2sn; HANDLE_ERROR(cudaMalloc(&d_li2sn, N0li*N1li*sizeof(int)) ); + // HANDLE_ERROR( cudaMemcpy( d_li2sn, li2sn, N0li*N1li*sizeof(int), cudaMemcpyHostToDevice) ); + + // int *d_li2nos; HANDLE_ERROR( cudaMalloc(&d_li2nos, N1li*sizeof(int)) ); + // HANDLE_ERROR( cudaMemcpy( d_li2nos, li2nos, N1li*sizeof(int), cudaMemcpyHostToDevice) ); + + cudaMemcpyToSymbol(c_li2rng, li2rng, nil2r_c * sizeof(float2)); + cudaMemcpyToSymbol(c_li2sn, li2sn, nil2r_c * sizeof(short2)); + cudaMemcpyToSymbol(c_li2nos, li2nos, nil2r_c * sizeof(char)); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + if (Cnt.LOG <= LOGDEBUG) + printf("i> calculating sinograms via forward projection..."); + + //------------DO TRANSAXIAL CALCULATIONS--------------------------------- + gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); + //----------------------------------------------------------------------- + + //============================================================================ + fprj_drct<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att); + HANDLE_ERROR(cudaGetLastError()); + // ============================================================================ + + int zoff = nrng_c; + // number of oblique sinograms + int Noblq = (nrng_c - 1) * nrng_c / 2; + + // first for reduced number of detector rings + if (Cnt.SPN == 1 && Noblq <= 1024 && Noblq > 0) { + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); + HANDLE_ERROR(cudaGetLastError()); + + } else { + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); + HANDLE_ERROR(cudaGetLastError()); + + zoff += NSINOS / 4; + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); + HANDLE_ERROR(cudaGetLastError()); + } + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + HANDLE_ERROR(cudaMemcpy(prjout, d_sn, Nprj * snno * sizeof(float), cudaMemcpyDeviceToHost)); + + cudaFree(d_sn); + cudaFree(d_im); + cudaFree(d_tt); + cudaFree(d_tv); + cudaFree(d_subs); + HANDLE_ERROR(cudaFree(d_crs)); + HANDLE_ERROR(cudaFree(d_s2c)); + + return; } - - - - - - - - //======================================================================= -void rec_fprj(float *d_sino, - float *d_img, - int *d_sub, - int Nprj, +void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, - float *d_tt, - unsigned char *d_tv, + float *d_tt, unsigned char *d_tv, - float *li2rng, - short *li2sn, - char *li2nos, + float *li2rng, short *li2sn, char *li2nos, - Cnst Cnt) + Cnst Cnt) { - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); - - //get the axial LUTs in constant memory - cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); - cudaMemcpyToSymbol(c_li2sn, li2sn, NLI2R * sizeof(short2)); - cudaMemcpyToSymbol(c_li2nos, li2nos, NLI2R * sizeof(char)); - - //number of sinos - short snno = -1; - if (Cnt.SPN == 1) snno = NSINOS; - else if (Cnt.SPN == 11) snno = NSINOS11; - - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) printf("i> subset forward projection (Nprj=%d)... ", Nprj); - - //============================================================================ - fprj_drct << > >(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - int zoff = NRINGS; - //============================================================================ - fprj_oblq << > >(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - zoff += NSINOS / 4; - //============================================================================ - fprj_oblq << > >(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); - // HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001*elapsedTime); - - cudaDeviceSynchronize(); - - - return; - + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); + + // get the axial LUTs in constant memory + cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); + cudaMemcpyToSymbol(c_li2sn, li2sn, NLI2R * sizeof(short2)); + cudaMemcpyToSymbol(c_li2nos, li2nos, NLI2R * sizeof(char)); + + // number of sinos + short snno = -1; + if (Cnt.SPN == 1) + snno = NSINOS; + else if (Cnt.SPN == 11) + snno = NSINOS11; + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + if (Cnt.LOG <= LOGDEBUG) + printf("i> subset forward projection (Nprj=%d)... ", Nprj); + + //============================================================================ + fprj_drct<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + int zoff = NRINGS; + //============================================================================ + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + zoff += NSINOS / 4; + //============================================================================ + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); + // HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + return; } diff --git a/niftypet/nipet/prj/src/prjf.h b/niftypet/nipet/prj/src/prjf.h index 6563a32e..b37d16ee 100644 --- a/niftypet/nipet/prj/src/prjf.h +++ b/niftypet/nipet/prj/src/prjf.h @@ -1,37 +1,21 @@ -#include #include "def.h" -#include "tprj.h" #include "scanner_0.h" +#include "tprj.h" +#include #ifndef PRJF_H #define PRJF_H -void gpu_fprj(float * prjout, - float * im, - float * li2rng, - short * li2sn, - char * li2nos, - short *s2c, - int *aw2ali, - float *crs, - int *subs, - int Nprj, - int Naw, - int N0crs, - Cnst Cnt, char att); +void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2nos, short *s2c, + int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt, + char att); -void rec_fprj(float *d_sino, - float *d_img, - int *d_sub, - int Nprj, +void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, - float *d_tt, - unsigned char *d_tv, + float *d_tt, unsigned char *d_tv, - float *li2rng, - short *li2sn, - char *li2nos, + float *li2rng, short *li2sn, char *li2nos, - Cnst Cnt); + Cnst Cnt); #endif diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index fc4eb447..da539101 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -9,7 +9,7 @@ Copyrights: #include "recon.h" #include -//number of threads used for element-wise GPU calculations +// number of threads used for element-wise GPU calculations #define NTHRDS 1024 #define FLOAT_WITHIN_EPS(x) (-0.000001f < x && x < 0.000001f) @@ -26,7 +26,8 @@ __global__ void pad(float *dst, float *src, const int z) { for (int k = 0; k < SZ_IMZ; ++k) dst[k] = src[k]; } -void d_pad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { +void d_pad(float *dst, float *src, + const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { HANDLE_ERROR(cudaMemset(dst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + z) * sizeof(float))); dim3 BpG((SZ_IMX + NTHRDS / 32 - 1) / (NTHRDS / 32), (SZ_IMY + 31) / 32); dim3 TpB(NTHRDS / 32, 32); @@ -46,7 +47,8 @@ __global__ void unpad(float *dst, float *src, const int z) { for (int k = 0; k < SZ_IMZ; ++k) dst[k] = src[k]; } -void d_unpad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { +void d_unpad(float *dst, float *src, + const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { dim3 BpG((SZ_IMX + NTHRDS / 32 - 1) / (NTHRDS / 32), (SZ_IMY + 31) / 32); dim3 TpB(NTHRDS / 32, 32); unpad<<>>(dst, src, z); @@ -56,7 +58,7 @@ void d_unpad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % /// Convolution kernel array __constant__ float c_Kernel[3 * KERNEL_LENGTH]; void setConvolutionKernel(float *krnl) { - //krnl: separable three kernels for x, y and z + // krnl: separable three kernels for x, y and z cudaMemcpyToSymbol(c_Kernel, krnl, 3 * KERNEL_LENGTH * sizeof(float)); } /// sigma: Gaussian sigma @@ -80,10 +82,12 @@ void setKernelGaussian(float sigma) { /// Row convolution filter __global__ void cnv_rows(float *d_Dst, float *d_Src, int imageW, int imageH, int pitch) { - __shared__ float s_Data[ROWS_BLOCKDIM_Y][(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X]; + __shared__ float s_Data[ROWS_BLOCKDIM_Y] + [(ROWS_RESULT_STEPS + 2 * ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X]; // Offset to the left halo edge - const int baseX = (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x; + const int baseX = + (blockIdx.x * ROWS_RESULT_STEPS - ROWS_HALO_STEPS) * ROWS_BLOCKDIM_X + threadIdx.x; const int baseY = blockIdx.y * ROWS_BLOCKDIM_Y + threadIdx.y; d_Src += baseY * pitch + baseX; @@ -104,7 +108,8 @@ __global__ void cnv_rows(float *d_Dst, float *d_Src, int imageW, int imageH, int // Load right halo #pragma unroll - for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++) { + for (int i = ROWS_HALO_STEPS + ROWS_RESULT_STEPS; + i < ROWS_HALO_STEPS + ROWS_RESULT_STEPS + ROWS_HALO_STEPS; i++) { s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X] = (imageW - baseX > i * ROWS_BLOCKDIM_X) ? d_Src[i * ROWS_BLOCKDIM_X] : 0; } @@ -117,7 +122,8 @@ __global__ void cnv_rows(float *d_Dst, float *d_Src, int imageW, int imageH, int float sum = 0; #pragma unroll for (int j = -RSZ_PSF_KRNL; j <= RSZ_PSF_KRNL; j++) { - sum += c_Kernel[RSZ_PSF_KRNL - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j]; + sum += + c_Kernel[RSZ_PSF_KRNL - j] * s_Data[threadIdx.y][threadIdx.x + i * ROWS_BLOCKDIM_X + j]; } d_Dst[i * ROWS_BLOCKDIM_X] = sum; } @@ -127,19 +133,23 @@ __global__ void cnv_rows(float *d_Dst, float *d_Src, int imageW, int imageH, int __global__ void cnv_columns(float *d_Dst, float *d_Src, int imageW, int imageH, int pitch, int offKrnl // kernel offset for asymmetric kernels // x, y, z (still the same dims though) - ) { - __shared__ float s_Data[COLUMNS_BLOCKDIM_X][(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 1]; +) { + __shared__ float + s_Data[COLUMNS_BLOCKDIM_X] + [(COLUMNS_RESULT_STEPS + 2 * COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + 1]; // Offset to the upper halo edge const int baseX = blockIdx.x * COLUMNS_BLOCKDIM_X + threadIdx.x; - const int baseY = (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y; + const int baseY = + (blockIdx.y * COLUMNS_RESULT_STEPS - COLUMNS_HALO_STEPS) * COLUMNS_BLOCKDIM_Y + threadIdx.y; d_Src += baseY * pitch + baseX; d_Dst += baseY * pitch + baseX; // Main data #pragma unroll for (int i = COLUMNS_HALO_STEPS; i < COLUMNS_HALO_STEPS + COLUMNS_RESULT_STEPS; i++) { - s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = d_Src[i * COLUMNS_BLOCKDIM_Y * pitch]; + s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y] = + d_Src[i * COLUMNS_BLOCKDIM_Y * pitch]; } // Upper halo @@ -165,7 +175,8 @@ __global__ void cnv_columns(float *d_Dst, float *d_Src, int imageW, int imageH, float sum = 0; #pragma unroll for (int j = -RSZ_PSF_KRNL; j <= RSZ_PSF_KRNL; j++) { - sum += c_Kernel[offKrnl + RSZ_PSF_KRNL - j] * s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j]; + sum += c_Kernel[offKrnl + RSZ_PSF_KRNL - j] * + s_Data[threadIdx.x][threadIdx.y + i * COLUMNS_BLOCKDIM_Y + j]; } d_Dst[i * COLUMNS_BLOCKDIM_Y * pitch] = sum; } @@ -191,13 +202,15 @@ void d_conv(float *d_buff, float *d_imgout, float *d_imgint, int Nvk, int Nvj, i //------ ROWS ------- dim3 blocks(Nvi / (ROWS_RESULT_STEPS * ROWS_BLOCKDIM_X), Nvj / ROWS_BLOCKDIM_Y); dim3 threads(ROWS_BLOCKDIM_X, ROWS_BLOCKDIM_Y); - cnv_rows<<>>(d_imgout + k * Nvi * Nvj, d_imgint + k * Nvi * Nvj, Nvi, Nvj, Nvi); + cnv_rows<<>>(d_imgout + k * Nvi * Nvj, d_imgint + k * Nvi * Nvj, Nvi, Nvj, + Nvi); HANDLE_ERROR(cudaGetLastError()); //----- COLUMNS ---- dim3 blocks2(Nvi / COLUMNS_BLOCKDIM_X, Nvj / (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y)); dim3 threads2(COLUMNS_BLOCKDIM_X, COLUMNS_BLOCKDIM_Y); - cnv_columns<<>>(d_buff + k * Nvi * Nvj, d_imgout + k * Nvi * Nvj, Nvi, Nvj, Nvi, KERNEL_LENGTH); + cnv_columns<<>>(d_buff + k * Nvi * Nvj, d_imgout + k * Nvi * Nvj, Nvi, Nvj, + Nvi, KERNEL_LENGTH); HANDLE_ERROR(cudaGetLastError()); } @@ -205,348 +218,320 @@ void d_conv(float *d_buff, float *d_imgout, float *d_imgint, int Nvk, int Nvj, i for (int j = 0; j < Nvj; j++) { dim3 blocks3(Nvi / COLUMNS_BLOCKDIM_X, Nvk / (COLUMNS_RESULT_STEPS * COLUMNS_BLOCKDIM_Y)); dim3 threads3(COLUMNS_BLOCKDIM_X, COLUMNS_BLOCKDIM_Y); - cnv_columns<<>>(d_imgout + j * Nvi, d_buff + j * Nvi, Nvi, Nvk, Nvi * Nvj, 2 * KERNEL_LENGTH); + cnv_columns<<>>(d_imgout + j * Nvi, d_buff + j * Nvi, Nvi, Nvk, Nvi * Nvj, + 2 * KERNEL_LENGTH); HANDLE_ERROR(cudaGetLastError()); } } /** end of separable convolution */ - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//Element-wise multiplication -__global__ void elmult(float * inA, - float * inB, - int length) -{ - int idx = threadIdx.x + blockDim.x*blockIdx.x; - if (idx> >(d_inA, d_inB, length); +void d_elmult(float *d_inA, float *d_inB, int length) { + dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); + dim3 TpB(NTHRDS, 1, 1); + elmult<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -//Element-wise division with result stored in first input variable -__global__ void eldiv0(float * inA, - float * inB, - int length) -{ - int idx = threadIdx.x + blockDim.x*blockIdx.x; - if (idx>=length) return; - if(FLOAT_WITHIN_EPS(inB[idx])) inA[idx] = 0; - else inA[idx] /= inB[idx]; +// Element-wise division with result stored in first input variable +__global__ void eldiv0(float *inA, float *inB, int length) { + int idx = threadIdx.x + blockDim.x * blockIdx.x; + if (idx >= length) + return; + if (FLOAT_WITHIN_EPS(inB[idx])) + inA[idx] = 0; + else + inA[idx] /= inB[idx]; } -void d_eldiv(float * d_inA, - float * d_inB, - int length) -{ - dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); - dim3 TpB(NTHRDS, 1, 1); - eldiv0 << > >(d_inA, d_inB, length); +void d_eldiv(float *d_inA, float *d_inB, int length) { + dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); + dim3 TpB(NTHRDS, 1, 1); + eldiv0<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void sneldiv(float *inA, - unsigned short *inB, - int *sub, - int Nprj, - int snno) -{ - int idz = threadIdx.x + blockDim.x*blockIdx.x; - if (!(blockIdx.y only active bins of the subset - // inB > all sinogram bins - float b = (float)inB[snno*sub[blockIdx.y] + idz]; - if (FLOAT_WITHIN_EPS(inA[snno*blockIdx.y + idz])) b = 0; - else b /= inA[snno*blockIdx.y + idz];//sub[blockIdx.y] - inA[snno*blockIdx.y + idz] = b; //sub[blockIdx.y] +__global__ void sneldiv(float *inA, unsigned short *inB, int *sub, int Nprj, int snno) { + int idz = threadIdx.x + blockDim.x * blockIdx.x; + if (!(blockIdx.y < Nprj && idz < snno)) + return; + // inA > only active bins of the subset + // inB > all sinogram bins + float b = (float)inB[snno * sub[blockIdx.y] + idz]; + if (FLOAT_WITHIN_EPS(inA[snno * blockIdx.y + idz])) + b = 0; + else + b /= inA[snno * blockIdx.y + idz]; // sub[blockIdx.y] + inA[snno * blockIdx.y + idz] = b; // sub[blockIdx.y] } -void d_sneldiv(float *d_inA, - unsigned short *d_inB, - int *d_sub, - int Nprj, - int snno) -{ - dim3 BpG(ceil(snno / (float)NTHRDS), Nprj, 1); - dim3 TpB(NTHRDS, 1, 1); - sneldiv << > >(d_inA, d_inB, d_sub, Nprj, snno); +void d_sneldiv(float *d_inA, unsigned short *d_inB, int *d_sub, int Nprj, int snno) { + dim3 BpG(ceil(snno / (float)NTHRDS), Nprj, 1); + dim3 TpB(NTHRDS, 1, 1); + sneldiv<<>>(d_inA, d_inB, d_sub, Nprj, snno); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void sneladd(float * inA, - float * inB, - int *sub, - int Nprj, - int snno) -{ - int idz = threadIdx.x + blockDim.x*blockIdx.x; - if (blockIdx.y> >(d_inA, d_inB, d_sub, Nprj, snno); +void d_sneladd(float *d_inA, float *d_inB, int *d_sub, int Nprj, int snno) { + dim3 BpG(ceil(snno / (float)NTHRDS), Nprj, 1); + dim3 TpB(NTHRDS, 1, 1); + sneladd<<>>(d_inA, d_inB, d_sub, Nprj, snno); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void eladd(float * inA, - float * inB, - int length) -{ - int idx = threadIdx.x + blockDim.x*blockIdx.x; - if (idx> >(d_inA, d_inB, length); +void d_eladd(float *d_inA, float *d_inB, int length) { + dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); + dim3 TpB(NTHRDS, 1, 1); + eladd<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -__global__ void elmsk(float *inA, - float *inB, - bool *msk, - int length) -{ - int idx = threadIdx.x + blockDim.x*blockIdx.x; - - if (idx> >(d_inA, d_inB, d_msk, length); +void d_elmsk(float *d_inA, float *d_inB, bool *d_msk, int length) { + dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); + dim3 TpB(NTHRDS, 1, 1); + elmsk<<>>(d_inA, d_inB, d_msk, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - +void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float *ssng, float *nsng, + float *asng, + int *subs, + float *sensimg, float *krnl, -void osem(float *imgout, - bool *rncmsk, - unsigned short *psng, - float *rsng, - float *ssng, - float *nsng, - float *asng, - - int *subs, - - float *sensimg, - float *krnl, - - float *li2rng, - short *li2sn, - char *li2nos, - short *s2c, - float *crs, - - int Nsub, int Nprj, - int N0crs, - Cnst Cnt) -{ - - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); - - - //--- TRANSAXIAL COMPONENT - float4 *d_crs; HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); - HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); + float *li2rng, short *li2sn, char *li2nos, short *s2c, float *crs, - short2 *d_s2c; HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); - - - float *d_tt; HANDLE_ERROR(cudaMalloc(&d_tt, N_TT*AW * sizeof(float))); - - unsigned char *d_tv; HANDLE_ERROR(cudaMalloc(&d_tv, N_TV*AW * sizeof(unsigned char))); - HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV*AW * sizeof(unsigned char))); - - //------------------------------------------------- - gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); - //------------------------------------------------- - - // array of subset projection bins - int *d_subs; HANDLE_ERROR(cudaMalloc(&d_subs, Nsub*Nprj * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nsub*Nprj * sizeof(int), cudaMemcpyHostToDevice)); - //--- - - //number of sinos - short snno = -1; - if (Cnt.SPN == 1) snno = NSINOS; - else if (Cnt.SPN == 11) snno = NSINOS11; - - //full sinos (3D) - unsigned short *d_psng; HANDLE_ERROR(cudaMalloc(&d_psng, AW*snno * sizeof(unsigned short))); - HANDLE_ERROR(cudaMemcpy(d_psng, psng, AW*snno * sizeof(unsigned short), cudaMemcpyHostToDevice)); - - float *d_rsng; HANDLE_ERROR(cudaMalloc(&d_rsng, AW*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_rsng, rsng, AW*snno * sizeof(float), cudaMemcpyHostToDevice)); - - float *d_ssng; HANDLE_ERROR(cudaMalloc(&d_ssng, AW*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_ssng, ssng, AW*snno * sizeof(float), cudaMemcpyHostToDevice)); - - //add scatter and randoms together - d_eladd(d_rsng, d_ssng, snno*AW); - cudaFree(d_ssng); - - float *d_nsng; HANDLE_ERROR(cudaMalloc(&d_nsng, AW*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_nsng, nsng, AW*snno * sizeof(float), cudaMemcpyHostToDevice)); - - //join norm and attenuation factors - float *d_ansng; HANDLE_ERROR(cudaMalloc(&d_ansng, snno*AW * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_ansng, asng, snno*AW * sizeof(float), cudaMemcpyHostToDevice)); - - //combine attenuation and normalisation in one sinogram - d_elmult(d_ansng, d_nsng, snno*AW); - cudaFree(d_nsng); - - //divide randoms+scatter by attenuation and norm factors - d_eldiv(d_rsng, d_ansng, snno*AW); + int Nsub, int Nprj, int N0crs, Cnst Cnt) { + + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGDEBUG) + printf("i> using CUDA device #%d\n", dev_id); + + //--- TRANSAXIAL COMPONENT + float4 *d_crs; + HANDLE_ERROR(cudaMalloc(&d_crs, N0crs * sizeof(float4))); + HANDLE_ERROR(cudaMemcpy(d_crs, crs, N0crs * sizeof(float4), cudaMemcpyHostToDevice)); + + short2 *d_s2c; + HANDLE_ERROR(cudaMalloc(&d_s2c, AW * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_s2c, s2c, AW * sizeof(short2), cudaMemcpyHostToDevice)); + + float *d_tt; + HANDLE_ERROR(cudaMalloc(&d_tt, N_TT * AW * sizeof(float))); + + unsigned char *d_tv; + HANDLE_ERROR(cudaMalloc(&d_tv, N_TV * AW * sizeof(unsigned char))); + HANDLE_ERROR(cudaMemset(d_tv, 0, N_TV * AW * sizeof(unsigned char))); + + //------------------------------------------------- + gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); + //------------------------------------------------- + + // array of subset projection bins + int *d_subs; + HANDLE_ERROR(cudaMalloc(&d_subs, Nsub * Nprj * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_subs, subs, Nsub * Nprj * sizeof(int), cudaMemcpyHostToDevice)); + //--- + + // number of sinos + short snno = -1; + if (Cnt.SPN == 1) + snno = NSINOS; + else if (Cnt.SPN == 11) + snno = NSINOS11; + + // full sinos (3D) + unsigned short *d_psng; + HANDLE_ERROR(cudaMalloc(&d_psng, AW * snno * sizeof(unsigned short))); + HANDLE_ERROR( + cudaMemcpy(d_psng, psng, AW * snno * sizeof(unsigned short), cudaMemcpyHostToDevice)); + + float *d_rsng; + HANDLE_ERROR(cudaMalloc(&d_rsng, AW * snno * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_rsng, rsng, AW * snno * sizeof(float), cudaMemcpyHostToDevice)); + + float *d_ssng; + HANDLE_ERROR(cudaMalloc(&d_ssng, AW * snno * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_ssng, ssng, AW * snno * sizeof(float), cudaMemcpyHostToDevice)); + + // add scatter and randoms together + d_eladd(d_rsng, d_ssng, snno * AW); + cudaFree(d_ssng); + + float *d_nsng; + HANDLE_ERROR(cudaMalloc(&d_nsng, AW * snno * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_nsng, nsng, AW * snno * sizeof(float), cudaMemcpyHostToDevice)); + + // join norm and attenuation factors + float *d_ansng; + HANDLE_ERROR(cudaMalloc(&d_ansng, snno * AW * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_ansng, asng, snno * AW * sizeof(float), cudaMemcpyHostToDevice)); + + // combine attenuation and normalisation in one sinogram + d_elmult(d_ansng, d_nsng, snno * AW); + cudaFree(d_nsng); + + // divide randoms+scatter by attenuation and norm factors + d_eldiv(d_rsng, d_ansng, snno * AW); + + float *d_imgout; + HANDLE_ERROR(cudaMalloc(&d_imgout, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_imgout, imgout, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), + cudaMemcpyHostToDevice)); + + bool *d_rcnmsk; + HANDLE_ERROR(cudaMalloc(&d_rcnmsk, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(bool))); + HANDLE_ERROR(cudaMemcpy(d_rcnmsk, rncmsk, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(bool), + cudaMemcpyHostToDevice)); + + // allocate sino for estimation (esng) + float *d_esng; + HANDLE_ERROR(cudaMalloc(&d_esng, Nprj * snno * sizeof(float))); + + //--sensitivity image (images for all subsets) + float *d_sensim; + + HANDLE_ERROR(cudaMalloc(&d_sensim, Nsub * SZ_IMZ * SZ_IMX * SZ_IMY * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_sensim, sensimg, Nsub * SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), + cudaMemcpyHostToDevice)); + + // cudaMemset(d_sensim, 0, Nsub * SZ_IMZ*SZ_IMX*SZ_IMY*sizeof(float)); + // for(int i=0; i> The sensitivity pointer has size of %d and it's value is %lu \n", + // sizeof(d_sensim), &d_sensim); + // //~~~~ + + // resolution modelling kernel + setConvolutionKernel(krnl); + float *d_convTmp; + HANDLE_ERROR(cudaMalloc(&d_convTmp, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); + float *d_convSrc; + HANDLE_ERROR(cudaMalloc(&d_convSrc, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); + float *d_convDst; + HANDLE_ERROR(cudaMalloc(&d_convDst, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); + + // resolution modelling sensitivity image + for (int i = 0; i < Nsub && krnl[0] >= 0; i++) { + d_pad(d_convSrc, &d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY]); + d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); + d_unpad(&d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY], d_convDst); + } - float *d_imgout; HANDLE_ERROR(cudaMalloc(&d_imgout, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_imgout, imgout, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float), cudaMemcpyHostToDevice)); + // resolution modelling image + float *d_imgout_rm; + HANDLE_ERROR(cudaMalloc(&d_imgout_rm, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); - bool *d_rcnmsk; HANDLE_ERROR(cudaMalloc(&d_rcnmsk, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(bool))); - HANDLE_ERROR(cudaMemcpy(d_rcnmsk, rncmsk, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(bool), cudaMemcpyHostToDevice)); + //--back-propagated image + float *d_bimg; + HANDLE_ERROR(cudaMalloc(&d_bimg, SZ_IMY * SZ_IMY * SZ_IMZ * sizeof(float))); - // allocate sino for estimation (esng) - float *d_esng; HANDLE_ERROR(cudaMalloc(&d_esng, Nprj*snno * sizeof(float))); + if (Cnt.LOG <= LOGDEBUG) + printf("i> loaded variables in device memory for image reconstruction.\n"); + getMemUse(Cnt); - //--sensitivity image (images for all subsets) - float *d_sensim; + for (int i = 0; i < Nsub; i++) { + if (Cnt.LOG <= LOGDEBUG) + printf("<> subset %d-th <>\n", i); - HANDLE_ERROR(cudaMalloc(&d_sensim, Nsub * SZ_IMZ*SZ_IMX*SZ_IMY * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_sensim, sensimg, Nsub * SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float), cudaMemcpyHostToDevice)); + // resolution modelling current image + if (krnl[0] >= 0) { + d_pad(d_convSrc, d_imgout); + d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); + d_unpad(d_imgout_rm, d_convDst); + } - // cudaMemset(d_sensim, 0, Nsub * SZ_IMZ*SZ_IMX*SZ_IMY*sizeof(float)); - // for(int i=0; i> The sensitivity pointer has size of %d and it's value is %lu \n", sizeof(d_sensim), &d_sensim); - // //~~~~ + // forward project + cudaMemset(d_esng, 0, Nprj * snno * sizeof(float)); + rec_fprj(d_esng, Cnt.SIGMA_RM > 0 ? d_imgout_rm : d_imgout, &d_subs[i * Nprj + 1], + subs[i * Nprj], d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); - // resolution modelling kernel - setConvolutionKernel(krnl); - float *d_convTmp; HANDLE_ERROR(cudaMalloc(&d_convTmp, SZ_IMX*SZ_IMY*(SZ_IMZ + 1) * sizeof(float))); - float *d_convSrc; HANDLE_ERROR(cudaMalloc(&d_convSrc, SZ_IMX*SZ_IMY*(SZ_IMZ + 1) * sizeof(float))); - float *d_convDst; HANDLE_ERROR(cudaMalloc(&d_convDst, SZ_IMX*SZ_IMY*(SZ_IMZ + 1) * sizeof(float))); + // add the randoms+scatter + d_sneladd(d_esng, d_rsng, &d_subs[i * Nprj + 1], subs[i * Nprj], snno); - // resolution modelling sensitivity image - for (int i=0; i=0; i++) { - d_pad(d_convSrc, &d_sensim[i*SZ_IMZ*SZ_IMX*SZ_IMY]); - d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); - d_unpad(&d_sensim[i*SZ_IMZ*SZ_IMX*SZ_IMY], d_convDst); - } + // divide to get the correction + d_sneldiv(d_esng, d_psng, &d_subs[i * Nprj + 1], subs[i * Nprj], snno); - // resolution modelling image - float *d_imgout_rm; HANDLE_ERROR(cudaMalloc(&d_imgout_rm, SZ_IMX*SZ_IMY*SZ_IMZ * sizeof(float))); + // back-project the correction + cudaMemset(d_bimg, 0, SZ_IMZ * SZ_IMX * SZ_IMY * sizeof(float)); + rec_bprj(d_bimg, d_esng, &d_subs[i * Nprj + 1], subs[i * Nprj], d_tt, d_tv, li2rng, li2sn, + li2nos, Cnt); - //--back-propagated image - float *d_bimg; HANDLE_ERROR(cudaMalloc(&d_bimg, SZ_IMY*SZ_IMY*SZ_IMZ * sizeof(float))); + // resolution modelling backprojection + if (krnl[0] >= 0) { + d_pad(d_convSrc, d_bimg); + d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); + d_unpad(d_bimg, d_convDst); + } - if (Cnt.LOG <= LOGDEBUG) printf("i> loaded variables in device memory for image reconstruction.\n"); - getMemUse(Cnt); + // divide by sensitivity image + d_eldiv(d_bimg, &d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY], SZ_IMZ * SZ_IMX * SZ_IMY); - for (int i = 0; i subset %d-th <>\n", i); + // apply the recon mask to the back-projected image + d_elmsk(d_imgout, d_bimg, d_rcnmsk, SZ_IMZ * SZ_IMX * SZ_IMY); + } - //resolution modelling current image - if(krnl[0]>=0) { - d_pad(d_convSrc, d_imgout); - d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); - d_unpad(d_imgout_rm, d_convDst); - } - - //forward project - cudaMemset(d_esng, 0, Nprj*snno * sizeof(float)); - rec_fprj(d_esng, Cnt.SIGMA_RM>0 ? d_imgout_rm : d_imgout, &d_subs[i*Nprj + 1], subs[i*Nprj], d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); - - //add the randoms+scatter - d_sneladd(d_esng, d_rsng, &d_subs[i*Nprj + 1], subs[i*Nprj], snno); - - //divide to get the correction - d_sneldiv(d_esng, d_psng, &d_subs[i*Nprj + 1], subs[i*Nprj], snno); - - //back-project the correction - cudaMemset(d_bimg, 0, SZ_IMZ*SZ_IMX*SZ_IMY * sizeof(float)); - rec_bprj(d_bimg, d_esng, &d_subs[i*Nprj + 1], subs[i*Nprj], d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); - - //resolution modelling backprojection - if (krnl[0]>=0) { - d_pad(d_convSrc, d_bimg); - d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); - d_unpad(d_bimg, d_convDst); - } - - //divide by sensitivity image - d_eldiv(d_bimg, &d_sensim[i*SZ_IMZ*SZ_IMX*SZ_IMY], SZ_IMZ*SZ_IMX*SZ_IMY); - - //apply the recon mask to the back-projected image - d_elmsk(d_imgout, d_bimg, d_rcnmsk, SZ_IMZ*SZ_IMX*SZ_IMY); - } - - HANDLE_ERROR(cudaMemcpy(imgout, d_imgout, SZ_IMZ*SZ_IMX*SZ_IMY * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_crs); - cudaFree(d_s2c); - cudaFree(d_tt); - cudaFree(d_tv); - cudaFree(d_subs); - - cudaFree(d_psng); - cudaFree(d_rsng); - cudaFree(d_ansng); - cudaFree(d_esng); - - cudaFree(d_sensim); - cudaFree(d_convTmp); - cudaFree(d_convSrc); - cudaFree(d_convDst); - cudaFree(d_imgout); - cudaFree(d_imgout_rm); - cudaFree(d_bimg); - cudaFree(d_rcnmsk); + HANDLE_ERROR(cudaMemcpy(imgout, d_imgout, SZ_IMZ * SZ_IMX * SZ_IMY * sizeof(float), + cudaMemcpyDeviceToHost)); + + cudaFree(d_crs); + cudaFree(d_s2c); + cudaFree(d_tt); + cudaFree(d_tv); + cudaFree(d_subs); + + cudaFree(d_psng); + cudaFree(d_rsng); + cudaFree(d_ansng); + cudaFree(d_esng); + + cudaFree(d_sensim); + cudaFree(d_convTmp); + cudaFree(d_convSrc); + cudaFree(d_convDst); + cudaFree(d_imgout); + cudaFree(d_imgout_rm); + cudaFree(d_bimg); + cudaFree(d_rcnmsk); } diff --git a/niftypet/nipet/prj/src/recon.h b/niftypet/nipet/prj/src/recon.h index edd97de6..e3e3f2d1 100644 --- a/niftypet/nipet/prj/src/recon.h +++ b/niftypet/nipet/prj/src/recon.h @@ -1,49 +1,37 @@ -#include #include "def.h" #include "prjb.h" #include "prjf.h" -#include "tprj.h" #include "scanner_0.h" +#include "tprj.h" +#include #ifndef RECON_H #define RECON_H /* separable convolution */ -#define KERNEL_LENGTH (2*RSZ_PSF_KRNL + 1) +#define KERNEL_LENGTH (2 * RSZ_PSF_KRNL + 1) // Column convolution filter -#define COLUMNS_BLOCKDIM_X 8 -#define COLUMNS_BLOCKDIM_Y 8 +#define COLUMNS_BLOCKDIM_X 8 +#define COLUMNS_BLOCKDIM_Y 8 #define COLUMNS_RESULT_STEPS 8 -#define COLUMNS_HALO_STEPS 1 +#define COLUMNS_HALO_STEPS 1 // Row convolution filter -#define ROWS_BLOCKDIM_X 8 -#define ROWS_BLOCKDIM_Y 8 +#define ROWS_BLOCKDIM_X 8 +#define ROWS_BLOCKDIM_Y 8 #define ROWS_RESULT_STEPS 8 -#define ROWS_HALO_STEPS 1 - -void osem(float *imgout, - bool *rcnmsk, - unsigned short *psng, - float *rsng, - float *ssng, - float *nsng, - float *asng, - - int *subs, - - float *sensimg, - float *krnl, - - float *li2rng, - short *li2sn, - char *li2nos, - short *s2c, - float *crs, - - int Nsub, int Nprj, - int N0crs, - Cnst Cnt); +#define ROWS_HALO_STEPS 1 + +void osem(float *imgout, bool *rcnmsk, unsigned short *psng, float *rsng, float *ssng, float *nsng, + float *asng, + + int *subs, + + float *sensimg, float *krnl, + + float *li2rng, short *li2sn, char *li2nos, short *s2c, float *crs, + + int Nsub, int Nprj, int N0crs, Cnst Cnt); #endif diff --git a/niftypet/nipet/prj/src/tprj.cu b/niftypet/nipet/prj/src/tprj.cu index 10319828..09cd3f77 100644 --- a/niftypet/nipet/prj/src/tprj.cu +++ b/niftypet/nipet/prj/src/tprj.cu @@ -6,210 +6,194 @@ transaxial dimension. author: Pawel Markiewicz Copyrights: 2020 ------------------------------------------------------------------------*/ -#include "tprj.h" #include "scanner_0.h" +#include "tprj.h" /*************** TRANSAXIAL FWD/BCK *****************/ -__global__ void sddn_tx( - const float4 * crs, - const short2 * s2c, - float * tt, - unsigned char * tv) -{ - // indexing along the transaxial part of projection space - // (angle fast changing) - int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx= SZ_VOXY); - bool lr21 = (fabsf(lr1 - lr2) < L21); - int nr = y21 * roundf(abs(lr2 - lr1) / SZ_VOXY) + lr21; // number of rows on the way *_SZVXY - float dtr; - if (nr>0) - dtr = (tr2 - tr1) / nr + lr21*t2; // t increment for each row; add max (t2) when only one - else - dtr = t2; - - //-columns - double x1 = px + at[0] * t1; - float lc1 = SZ_VOXY*(ceil(x1 / SZ_VOXY) - signbit(at[0])); - int u = 0.5*SZ_IMX + floor(x1 / SZ_VOXY); //starting voxel column - - float x2 = px + at[0] * t2; - float lc2 = SZ_VOXY*(floor(x2 / SZ_VOXY) + signbit(at[0])); - - float tc1 = (lc1 - px) / at[0]; - float tc2 = (lc2 - px) / at[0]; - - bool x21 = (fabsf(x2 - x1) >= SZ_VOXY); - bool lc21 = (fabsf(lc1 - lc2) < L21); - int nc = x21 * roundf(fabsf(lc2 - lc1) / SZ_VOXY) + lc21; - float dtc; - if (nc>0) - dtc = (tc2 - tc1) / nc + lc21*t2; - else - dtc = t2; - - // if(idx==62301){ - // printf("\n$$$> e[0] = %f, e[1] = %f | px[0] = %f, py[1] = %f\n", e[0], e[1], px, py ); - // for(int i=0; i<9; i++) printf("tt[%d] = %f\n",i, tt[N_TT*idx+i]); - // } - - - /***************************************************************/ - float ang = atanf(at[1] / at[0]); // angle of the ray - bool tsin; // condition for the slower changing to be in - - // save the sign of vector at components. used for image indx increments. - // since it is saved in unsigned format use offset of 1; - if (at[0] >= 0) - tv[N_TV*idx] = 2; - else - tv[N_TV*idx] = 0; - - if (at[1] >= 0) - tv[N_TV*idx + 1] = 2; - else - tv[N_TV*idx + 1] = 0; - - int k = 2; - if ((angTA2)) { - float tf = tc1; // fast changing t (columns) - float ts = tr1; // slow changing t (rows) - //k = 0; - for (int i = 0; i <= nc; i++) { - tsin = (tf - ts)>0; - tv[N_TV*idx + k] = 1; - k += tsin; - ts += dtr*tsin; - - tv[N_TV*idx + k] = 0; - k += 1; - tf += dtc; - } - if (tr2>tc2) { - tv[N_TV*idx + k] = 1; - k += 1; - } - } - else { - float tf = tr1; // fast changing t (rows) - float ts = tc1; // slow changing t (columns) - //k = 0; - for (int i = 0; i <= nr; i++) { - tsin = (tf - ts)>0; - tv[idx*N_TV + k] = 0; - k += tsin; - ts += dtc*tsin; - - tv[idx*N_TV + k] = 1; - k += 1; - tf += dtr; - } - if (tc2>tr2) { - tv[N_TV*idx + k] = 0; - k += 1; - } - } - - tt[N_TT*idx ] = tr1; - tt[N_TT*idx + 1] = tc1; - tt[N_TT*idx + 2] = dtr; - tt[N_TT*idx + 3] = dtc; - tt[N_TT*idx + 4] = t1; - tt[N_TT*idx + 5] = fminf(tr1, tc1); - tt[N_TT*idx + 6] = t2; - tt[N_TT*idx + 7] = atn; - tt[N_TT*idx + 8] = u + (v << UV_SHFT); - tt[N_TT*idx + 9] = k; // note: the first two are used for signs - /***************************************************************/ - //tsino[idx] = dtc; - } +__global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigned char *tv) { + // indexing along the transaxial part of projection space + // (angle fast changing) + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (idx < AW) { + + // get crystal indexes from projection index + short c1 = s2c[idx].x; + short c2 = s2c[idx].y; + + float cc1[3]; + float cc2[3]; + cc1[0] = .5 * (crs[c1].x + crs[c1].z); + cc2[0] = .5 * (crs[c2].x + crs[c2].z); + + cc1[1] = .5 * (crs[c1].y + crs[c1].w); + cc2[1] = .5 * (crs[c2].y + crs[c2].w); + + // crystal edge vector + float e[2]; + e[0] = crs[c1].z - crs[c1].x; + e[1] = crs[c1].w - crs[c1].y; + + float px, py; + px = crs[c1].x + 0.5 * e[0]; + py = crs[c1].y + 0.5 * e[1]; + + float at[3], atn; + for (int i = 0; i < 2; i++) { + at[i] = cc2[i] - cc1[i]; + atn += at[i] * at[i]; + } + atn = sqrtf(atn); + + at[0] = at[0] / atn; + at[1] = at[1] / atn; + + //--ring tfov + float Br = 2 * (px * at[0] + py * at[1]); + float Cr = 4 * (-TFOV2 + px * px + py * py); + float t1 = .5 * (-Br - sqrtf(Br * Br - Cr)); + float t2 = .5 * (-Br + sqrtf(Br * Br - Cr)); + //-- + + //-rows + float y1 = py + at[1] * t1; + float lr1 = SZ_VOXY * (ceilf(y1 / SZ_VOXY) - signbit(at[1])); // line of the first row + int v = 0.5 * SZ_IMY - ceil(y1 / SZ_VOXY); + + float y2 = py + at[1] * t2; + float lr2 = SZ_VOXY * (floorf(y2 / SZ_VOXY) + signbit(at[1])); // line of the last row + + float tr1 = (lr1 - py) / at[1]; // first ray interaction with a row + float tr2 = (lr2 - py) / at[1]; // last ray interaction with a row + // boolean + bool y21 = (fabsf(y2 - y1) >= SZ_VOXY); + bool lr21 = (fabsf(lr1 - lr2) < L21); + int nr = y21 * roundf(abs(lr2 - lr1) / SZ_VOXY) + lr21; // number of rows on the way *_SZVXY + float dtr; + if (nr > 0) + dtr = (tr2 - tr1) / nr + lr21 * t2; // t increment for each row; add max (t2) when only one + else + dtr = t2; + + //-columns + double x1 = px + at[0] * t1; + float lc1 = SZ_VOXY * (ceil(x1 / SZ_VOXY) - signbit(at[0])); + int u = 0.5 * SZ_IMX + floor(x1 / SZ_VOXY); // starting voxel column + + float x2 = px + at[0] * t2; + float lc2 = SZ_VOXY * (floor(x2 / SZ_VOXY) + signbit(at[0])); + + float tc1 = (lc1 - px) / at[0]; + float tc2 = (lc2 - px) / at[0]; + + bool x21 = (fabsf(x2 - x1) >= SZ_VOXY); + bool lc21 = (fabsf(lc1 - lc2) < L21); + int nc = x21 * roundf(fabsf(lc2 - lc1) / SZ_VOXY) + lc21; + float dtc; + if (nc > 0) + dtc = (tc2 - tc1) / nc + lc21 * t2; + else + dtc = t2; + + // if(idx==62301){ + // printf("\n$$$> e[0] = %f, e[1] = %f | px[0] = %f, py[1] = %f\n", e[0], e[1], px, py ); + // for(int i=0; i<9; i++) printf("tt[%d] = %f\n",i, tt[N_TT*idx+i]); + // } + + /***************************************************************/ + float ang = atanf(at[1] / at[0]); // angle of the ray + bool tsin; // condition for the slower changing to be in + + // save the sign of vector at components. used for image indx increments. + // since it is saved in unsigned format use offset of 1; + if (at[0] >= 0) + tv[N_TV * idx] = 2; + else + tv[N_TV * idx] = 0; + + if (at[1] >= 0) + tv[N_TV * idx + 1] = 2; + else + tv[N_TV * idx + 1] = 0; + + int k = 2; + if ((ang < TA1) & (ang > TA2)) { + float tf = tc1; // fast changing t (columns) + float ts = tr1; // slow changing t (rows) + // k = 0; + for (int i = 0; i <= nc; i++) { + tsin = (tf - ts) > 0; + tv[N_TV * idx + k] = 1; + k += tsin; + ts += dtr * tsin; + + tv[N_TV * idx + k] = 0; + k += 1; + tf += dtc; + } + if (tr2 > tc2) { + tv[N_TV * idx + k] = 1; + k += 1; + } + } else { + float tf = tr1; // fast changing t (rows) + float ts = tc1; // slow changing t (columns) + // k = 0; + for (int i = 0; i <= nr; i++) { + tsin = (tf - ts) > 0; + tv[idx * N_TV + k] = 0; + k += tsin; + ts += dtc * tsin; + + tv[idx * N_TV + k] = 1; + k += 1; + tf += dtr; + } + if (tc2 > tr2) { + tv[N_TV * idx + k] = 0; + k += 1; + } + } + + tt[N_TT * idx] = tr1; + tt[N_TT * idx + 1] = tc1; + tt[N_TT * idx + 2] = dtr; + tt[N_TT * idx + 3] = dtc; + tt[N_TT * idx + 4] = t1; + tt[N_TT * idx + 5] = fminf(tr1, tc1); + tt[N_TT * idx + 6] = t2; + tt[N_TT * idx + 7] = atn; + tt[N_TT * idx + 8] = u + (v << UV_SHFT); + tt[N_TT * idx + 9] = k; // note: the first two are used for signs + /***************************************************************/ + // tsino[idx] = dtc; + } } -void gpu_siddon_tx( - float4 *d_crs, - short2 *d_s2c, - float *d_tt, - unsigned char *d_tv) -{ - - //============================================================================ - //printf("i> calculating transaxial SIDDON weights..."); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - //----- - dim3 BpG(ceil(AW / (float)NTHREADS), 1, 1); - dim3 TpB(NTHREADS, 1, 1); - sddn_tx<<>>(d_crs, d_s2c, d_tt, d_tv); - HANDLE_ERROR(cudaGetLastError()); - //----- - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - //printf("DONE in %fs.\n", 0.001*elapsedTime); - //============================================================================ - - return; - +void gpu_siddon_tx(float4 *d_crs, short2 *d_s2c, float *d_tt, unsigned char *d_tv) { + + //============================================================================ + // printf("i> calculating transaxial SIDDON weights..."); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + //----- + dim3 BpG(ceil(AW / (float)NTHREADS), 1, 1); + dim3 TpB(NTHREADS, 1, 1); + sddn_tx<<>>(d_crs, d_s2c, d_tt, d_tv); + HANDLE_ERROR(cudaGetLastError()); + //----- + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + // printf("DONE in %fs.\n", 0.001*elapsedTime); + //============================================================================ + + return; } diff --git a/niftypet/nipet/prj/src/tprj.h b/niftypet/nipet/prj/src/tprj.h index 02a73ebd..8421ff19 100644 --- a/niftypet/nipet/prj/src/tprj.h +++ b/niftypet/nipet/prj/src/tprj.h @@ -5,10 +5,6 @@ #include -void gpu_siddon_tx( - float4 *d_crs, - short2 *d_s2c, - float *d_tt, - unsigned char *d_tv); +void gpu_siddon_tx(float4 *d_crs, short2 *d_s2c, float *d_tt, unsigned char *d_tv); -#endif //FWD_BCK_TX_H +#endif // FWD_BCK_TX_H diff --git a/niftypet/nipet/sct/src/ray.cu b/niftypet/nipet/sct/src/ray.cu index 5bf9b7c5..02a3eff9 100644 --- a/niftypet/nipet/sct/src/ray.cu +++ b/niftypet/nipet/sct/src/ray.cu @@ -8,158 +8,152 @@ Copyrights: 2018 #include "ray.h" #include "sct.h" -__inline__ __device__ -float warpsum(float uval) -{ - for (int off = 16; off>0; off /= 2) - uval += __shfl_down_sync(0xffffffff, uval, off); - return uval; +__inline__ __device__ float warpsum(float uval) { + for (int off = 16; off > 0; off /= 2) + uval += __shfl_down_sync(0xffffffff, uval, off); + return uval; } - -__inline__ __device__ -float warpsum_xor(float val) { - for (int mask = 16; mask > 0; mask /= 2) - val += __shfl_xor_sync(0xffffffff, val, mask); - return val; +__inline__ __device__ float warpsum_xor(float val) { + for (int mask = 16; mask > 0; mask /= 2) + val += __shfl_xor_sync(0xffffffff, val, mask); + return val; } //<><><><<><><><><><><><><><><><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><> -__global__ -void satt(short *output, - cudaTextureObject_t texo, - const int *i2v, - const scrsDEF scrsdef) -{ - //voxel index - //int vxi = 531520;//u=192, v=152, w=63; - int vxi = blockIdx.x; - //scatter crystal index (transaxially, default 64 in total) - int icrs = blockIdx.y; - - //scatter ring index (default 8) - int irng = threadIdx.y; - //general sampling index - int idx = threadIdx.x; - - //origin voxel and its coordinates - int im_idx = i2v[vxi]; - int w = im_idx / (SS_IMX*SS_IMY); - int v = (im_idx - w * SS_IMY*SS_IMX) / SS_IMX; - int u = im_idx - (w*SS_IMY*SS_IMX + v*SS_IMX); - - // //check - // u = 192; - // v = 152; - // w = 38; - - //corresponding x and y - float x = (u + 0.5*(1 - SS_IMY))*SS_VXY; - float y = ((SS_IMY - 1)*0.5 - v)*SS_VXY; - float z = w*SS_VXZ - .5*SS_VXZ*(SS_IMZ - 1); - - - //vector between the origin and crystal - float3 a; - a.x = scrsdef.crs[3 * icrs + 1] - x; - a.y = scrsdef.crs[3 * icrs + 2] - y; - a.z = scrsdef.rng[2 * irng + 1] - z; - - float a_lgth = powf(a.x*a.x + a.y*a.y + a.z*a.z, 0.5); - - //normalise - a.x /= a_lgth; - a.y /= a_lgth; - a.z /= a_lgth; - - //float Br = 2*( x*a.x + y*a.y ); - //float Cr = 4*(x*x + y*y - R_2); - //float2 to; - //to.x = .5*(-Br-sqrtf(Br*Br-Cr)); - //to.y = .5*(-Br+sqrtf(Br*Br-Cr)); - //bool tin = (t(texo, su, sv, sw); - - float sx = .5*SS_IMX + (x + a.x*t) / SS_VXY; - float sy = .5*SS_IMY - (y + a.y*t) / SS_VXY; - float sz = .5*SS_IMZ + (z + a.z*t) / SS_VXZ; - //<><><><><><><><><><><><><><><><><><><><><> - float uval = tex3D(texo, sx, sy, sz); - //<><><><><><><><><><><><><><><><><><><><><> - uval = warpsum(uval); - - if (idx == 0) ray_sum += uval; - } - - if (idx == 0) output[vxi * scrsdef.nscrs*scrsdef.nsrng + icrs * scrsdef.nsrng + irng] = (short)(ray_sum*ASTP / RES_SUM); - - //if(idx==0&&irng==2) printf("rsum[%d]= %9.8f \n", icrs, ray_sum); - //<<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> - //if( (idx==0) ) printf("att[%d,%d]= %9.8f \n", icrs, irng, expf(-ray_sum*ASTP)); - //printf("att[%d]: %9.8f, apprx: %9.8f. u=%d, v=%d\n", icrs, expf(-ray_sum*ASTP), expf(-output[nscrs*vxi + icrs]*RES_SUM), u , v ); +__global__ void satt(short *output, cudaTextureObject_t texo, const int *i2v, + const scrsDEF scrsdef) { + // voxel index + // int vxi = 531520;//u=192, v=152, w=63; + int vxi = blockIdx.x; + // scatter crystal index (transaxially, default 64 in total) + int icrs = blockIdx.y; + + // scatter ring index (default 8) + int irng = threadIdx.y; + // general sampling index + int idx = threadIdx.x; + + // origin voxel and its coordinates + int im_idx = i2v[vxi]; + int w = im_idx / (SS_IMX * SS_IMY); + int v = (im_idx - w * SS_IMY * SS_IMX) / SS_IMX; + int u = im_idx - (w * SS_IMY * SS_IMX + v * SS_IMX); + + // //check + // u = 192; + // v = 152; + // w = 38; + + // corresponding x and y + float x = (u + 0.5 * (1 - SS_IMY)) * SS_VXY; + float y = ((SS_IMY - 1) * 0.5 - v) * SS_VXY; + float z = w * SS_VXZ - .5 * SS_VXZ * (SS_IMZ - 1); + + // vector between the origin and crystal + float3 a; + a.x = scrsdef.crs[3 * icrs + 1] - x; + a.y = scrsdef.crs[3 * icrs + 2] - y; + a.z = scrsdef.rng[2 * irng + 1] - z; + + float a_lgth = powf(a.x * a.x + a.y * a.y + a.z * a.z, 0.5); + + // normalise + a.x /= a_lgth; + a.y /= a_lgth; + a.z /= a_lgth; + + // float Br = 2*( x*a.x + y*a.y ); + // float Cr = 4*(x*x + y*y - R_2); + // float2 to; + // to.x = .5*(-Br-sqrtf(Br*Br-Cr)); + // to.y = .5*(-Br+sqrtf(Br*Br-Cr)); + // bool tin = (t(texo, su, sv, sw); + + float sx = .5 * SS_IMX + (x + a.x * t) / SS_VXY; + float sy = .5 * SS_IMY - (y + a.y * t) / SS_VXY; + float sz = .5 * SS_IMZ + (z + a.z * t) / SS_VXZ; + //<><><><><><><><><><><><><><><><><><><><><> + float uval = tex3D(texo, sx, sy, sz); + //<><><><><><><><><><><><><><><><><><><><><> + uval = warpsum(uval); + + if (idx == 0) + ray_sum += uval; + } + + if (idx == 0) + output[vxi * scrsdef.nscrs * scrsdef.nsrng + icrs * scrsdef.nsrng + irng] = + (short)(ray_sum * ASTP / RES_SUM); + + // if(idx==0&&irng==2) printf("rsum[%d]= %9.8f \n", icrs, ray_sum); + //<<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> <<*>> + // if( (idx==0) ) printf("att[%d,%d]= %9.8f \n", icrs, irng, expf(-ray_sum*ASTP)); + // printf("att[%d]: %9.8f, apprx: %9.8f. u=%d, v=%d\n", icrs, expf(-ray_sum*ASTP), + // expf(-output[nscrs*vxi + icrs]*RES_SUM), u , v ); } //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -short *raysLUT(cudaTextureObject_t texo_mu3d, iMSK d_mu_msk, scrsDEF d_scrsdef, Cnst Cnt) -{ - // check which device is going to be used - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); +short *raysLUT(cudaTextureObject_t texo_mu3d, iMSK d_mu_msk, scrsDEF d_scrsdef, Cnst Cnt) { + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); - // Allocate result of transformation in device memory - short *d_LUTout; + // Allocate result of transformation in device memory + short *d_LUTout; #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_LUTout, d_mu_msk.nvx * d_scrsdef.nscrs * d_scrsdef.nsrng * sizeof(short))); + HANDLE_ERROR( + cudaMalloc(&d_LUTout, d_mu_msk.nvx * d_scrsdef.nscrs * d_scrsdef.nsrng * sizeof(short))); #else - HANDLE_ERROR(cudaMallocManaged(&d_LUTout, d_mu_msk.nvx * d_scrsdef.nscrs * d_scrsdef.nsrng * sizeof(short))); + HANDLE_ERROR(cudaMallocManaged(&d_LUTout, d_mu_msk.nvx * d_scrsdef.nscrs * d_scrsdef.nsrng * + sizeof(short))); #endif - //return d_LUTout; - - if (Cnt.LOG <= LOGINFO) printf("i> precalculating attenuation paths into LUT..."); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //<<<<<<<<<<<<<<<<<<<<<<<<<<<< KERNEL <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - //dimension of the grid. depending on how many scatter crystals there are. - dim3 grid(d_mu_msk.nvx, d_scrsdef.nscrs, 1); - dim3 block(SS_WRP, d_scrsdef.nsrng, 1); - satt <<>>(d_LUTout, - texo_mu3d, - d_mu_msk.i2v, - d_scrsdef); - HANDLE_ERROR(cudaGetLastError()); - - //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 0.001*elapsedTime); - - cudaDeviceSynchronize(); - - return d_LUTout; - + // return d_LUTout; + + if (Cnt.LOG <= LOGINFO) + printf("i> precalculating attenuation paths into LUT..."); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //<<<<<<<<<<<<<<<<<<<<<<<<<<<< KERNEL <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + // dimension of the grid. depending on how many scatter crystals there are. + dim3 grid(d_mu_msk.nvx, d_scrsdef.nscrs, 1); + dim3 block(SS_WRP, d_scrsdef.nsrng, 1); + satt<<>>(d_LUTout, texo_mu3d, d_mu_msk.i2v, d_scrsdef); + HANDLE_ERROR(cudaGetLastError()); + + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + cudaDeviceSynchronize(); + + return d_LUTout; } diff --git a/niftypet/nipet/sct/src/sct.cu b/niftypet/nipet/sct/src/sct.cu index 96d4f655..4f3e140d 100644 --- a/niftypet/nipet/sct/src/sct.cu +++ b/niftypet/nipet/sct/src/sct.cu @@ -5,8 +5,8 @@ scatter modelling (VSM) author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include "scanner_0.h" #include "ray.h" +#include "scanner_0.h" #include "sct.h" #include //round and arc cos functions @@ -18,607 +18,616 @@ __constant__ float c_SCTCNT[2]; __constant__ float2 c_KN[NCOS]; __constant__ float c_TOFBIN[4]; - -__device__ -char sgn(float x) -{ - return x > 0 ? 1 : (x<0 ? -1 : 0); -} - +__device__ char sgn(float x) { return x > 0 ? 1 : (x < 0 ? -1 : 0); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -__inline__ __device__ -float warpsum(float val) -{ - for (int off = 16; off>0; off /= 2) - val += __shfl_down_sync(0xffffffff, val, off); - return val; +__inline__ __device__ float warpsum(float val) { + for (int off = 16; off > 0; off /= 2) + val += __shfl_down_sync(0xffffffff, val, off); + return val; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -__inline__ __device__ -float warpsum_xor(float val) { - for (int mask = SS_WRP / 2; mask > 0; mask /= 2) - val += __shfl_xor_sync(0xffffffff, val, mask); - return val; +__inline__ __device__ float warpsum_xor(float val) { + for (int mask = SS_WRP / 2; mask > 0; mask /= 2) + val += __shfl_xor_sync(0xffffffff, val, mask); + return val; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -__inline__ __device__ -float wcumsum(int idx, float val) -{ - for (int off = 1; off= 0); - return val; +__inline__ __device__ float wcumsum(int idx, float val) { + for (int off = 1; off < SS_WRP; off *= 2) + val += __shfl_sync(0xffffffff, val, idx - off) * ((idx - off) >= 0); + return val; } - //<><><><<><><><><><><><><><><><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><> -__global__ -void Psct(float *rslt, - cudaTextureObject_t texo, - const short *rays, - const scrsDEF scrsdef, - iMSK mu_msk, - iMSK em_msk, - const float *em) -{ - // general sampling index - // used for scatter crystals and sampling scatter patches/points - int idx = threadIdx.x; - //index of scatter rings (default 8) (for singly scattered photons) - int isr = threadIdx.y; - - //index of unscattered ring and crystal index (transaxially, default is 64 and axially (rings) it is 8) - int iur = blockIdx.y; - int iuc = blockIdx.z; - - //emitting voxel - int evxi = blockIdx.x; - - //original emission voxel index - int im_idx = em_msk.i2v[evxi]; - - //emission voxel value - float em_vox = em[im_idx]; - - //original image indices - int w = im_idx / (SSE_IMX*SSE_IMY); - int v = (im_idx - w * SSE_IMY*SSE_IMX) / SSE_IMX; - int u = im_idx - (w*SSE_IMY*SSE_IMX + v*SSE_IMX); - - //corresponding x and y for the emission point/voxel - float x = (u + 0.5*(1 - SSE_IMX))*SSE_VXY; - float y = ((SSE_IMY - 1)*0.5 - v)*SSE_VXY; - float z = w*SSE_VXZ - .5*SSE_VXZ*(SSE_IMZ - 1); - - //mu-map indices (may be of different resolution to that of emission image) - u = .5*SS_IMX + floorf(x / SS_VXY); - v = (.5*SS_IMY - ceilf(y / SS_VXY)); - w = floorf(.5*SS_IMZ + z*IS_VXZ); - - //get the mu-map index corresponding to the emission image index (they may have different image size) - int mvxi = mu_msk.v2i[(int)(u + SS_IMX*v + SS_IMX*SS_IMY * w)]; - - if (mvxi<0) return; - // if ((mvxi>393674)||(mvxi<0)) printf(">>>>DISASTER: mvxi=%d, u=%d,v=%d,w=%d\n", mvxi, u, v, w ); - - // unscattered photon receiving crystal coordinates - float2 uc; - uc.x = scrsdef.crs[3 * iuc + 1]; - uc.y = scrsdef.crs[3 * iuc + 2]; - - //vector between the origin and crystal - float3 a; - a.x = uc.x - x; - a.y = uc.y - y; - a.z = scrsdef.rng[2 * iur + 1] - z; - //path length for an unscattered photon - float an = powf(a.x*a.x + a.y*a.y + a.z*a.z, 0.5); - - //2D version - float2 aux; - aux.x = a.x; - aux.y = a.y; - float a_lgth = powf(aux.x*aux.x + aux.y*aux.y, 0.5); - - //normalise vectors - a.x /= an; - a.y /= an; - a.z /= an; - //--- - aux.x /= a_lgth; - aux.y /= a_lgth; - - //solid angle with probability of unscattered photon reaching a given crystal - float uomg = (SRFCRS*(a.x*uc.x*IR_RING + a.y*uc.y*IR_RING) / (2 * PI*an*an)) - * expf(-rays[mvxi*scrsdef.nscrs*scrsdef.nsrng + iuc*scrsdef.nsrng + iur] * RES_SUM); - - - // if (idx==0 && iur==2 && isr==2) printf("uatt[%d] = %6.8f\n", iuc, 1e6*uomg); - // if (idx==0 && iur==0) - // printf("uomg[%d, %d] = %8.7f | atn=%8.7f, an=%8.7f | att=%8.7f |cosbeta = %8.7f\n", - // iuc, iur, uomg, an, a_lgth, expf(-rays[vxi*scrsdef.nscrs*scrsdef.nsrng + iuc*scrsdef.nsrng + iur] * RES_SUM), (a_lgth/an)); - - //take the opposite direction for the scattering photon: - a.x *= -1; - a.y *= -1; - a.z *= -1; - //-- - aux.x *= -1; - aux.y *= -1; - - // NEW<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - // get a_length which is now the other direction, i.e., along the scattering path. - // first start in the transaxial plane only - float Br = 2 * (x*aux.x + y*aux.y); - float t = .5*(-Br + sqrtf(Br*Br - 4 * (-R_2 + x*x + y*y))); - - // main/most scatter receiving location on the transaxial ring - float2 ms; - ms.x = aux.x*t + x; - ms.y = aux.y*t + y; - - // scatter crystal index, opposing to unscattered photons receiving crystal - char isuc = (iuc + scrsdef.nscrs/2) & (scrsdef.nscrs - 1); - - // the coordinates of the opposing scatter crystal - aux.x = scrsdef.crs[3*isuc+1]; - aux.y = scrsdef.crs[3*isuc+2]; - - // crystal offset (multi-line equation) - char imsc = isuc + - (char)( - // offset direction sign: - // (1) subtract mc vector from sc vector for the determination of offset direction - // (2) get the direction of crystal numbering by increasing the index of the opposing crystal - // (3) get the sign of the dot product of (1) and (2) - sgn((ms.x-aux.x)*(scrsdef.crs[3*((isuc+1)&(scrsdef.nscrs-1))+1]-aux.x) + (ms.y-aux.y)*(scrsdef.crs[3*((isuc+1)&(scrsdef.nscrs-1))+2]-aux.y)) * - // crystal offset as an angle fraction based on the scatter opposing and main scatter vectors - scrsdef.nscrs * acosf((ms.x*aux.x + ms.y*aux.y) / (sqrtf(aux.x*aux.x+aux.y*aux.y) * sqrtf(ms.x*ms.x+ms.y*ms.y))) / (2*PI) - ); - - // get the full 3D version dividing by the ratio which is cos(beta), angle between transaxial and axial parts of the vector - a_lgth = t/(a_lgth/an); - - //scattering crystals (half considered, 32 out of 64, found using the index main scatter beam index - char isc = (imsc-(scrsdef.nscrs/4)+idx) & (scrsdef.nscrs - 1); - - // if ((iuc==31) && isr==4 && iur==4) - // printf(">> iuc = %d; isc = %d; isuc = %d; >> imsc = %d >> em = (%2.3f, %2.3f), t = %f; ms = (%2.3f, %2.3f)\n", iuc, isc, isuc, imsc, x, y, t, ms.x, ms.y); - // NEW<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - - // // OLD<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - // //> get a_length which is now the other direction, i.e., along the scattering path. - // //> first start in the transaxial plane only - // float Br = 2 * (x*aux.x + y*aux.y); - // //> get the full 3D version dividing by the ratio which is cos(beta), angle between transaxial and axial parts of the vector - // a_lgth = .5*(-Br + sqrtf(Br*Br - 4 * (-R_2 + x*x + y*y))) / (a_lgth / an); - // //> scattering crystals (half considered, 32 out of 64, found using the index of unscattered photon crystal - // char isc = (iuc + (scrsdef.nscrs / 4) + idx) & (scrsdef.nscrs - 1); - // // OLD<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - //---find out how far to go with scatter points (number of warps, Nw) - int Nw = 0; - for (int k = 0; k <= (int)(a_lgth / (SS_WRP*SSTP)); k++) { - //sampling coordinates within a warp (idx<=warpSize) - float t = (idx + 0.5 + k*SS_WRP)*SSTP; - u = .5*SS_IMX + floorf((x + a.x*t) / SS_VXY); - v = .5*SS_IMX - ceilf((y + a.y*t) / SS_VXY); - // u = .5*SS_IMX + ceilf ((x + a.x*t)/SS_VXY); - // v = .5*SS_IMX - floorf((y + a.y*t)/SS_VXY); - w = floorf(.5*SS_IMZ + (z + a.z*t)*IS_VXZ); - float uval = tex3D(texo, u, v, w); - - uval = warpsum_xor(uval); - if (uval>0) Nw = k; - } - //--- - - //scatter crystal coordinates and their normal vector - float3 sc; - sc.x = scrsdef.crs[3 * isc + 1]; - sc.y = scrsdef.crs[3 * isc + 2]; - sc.z = scrsdef.rng[2 * isr + 1]; - - // if (idx==0 && isr==4) - // printf("[%d, %d]: s(x,y,z) = (%f, %f, %f)\n", iuc, iur, sc.x, sc.y, sc.z); - - - //sum along the path, updated with shuffle reductions - float rcsum = 0; - - for (int k = 0; k <= Nw; k++) - { - - //sampling the texture along the scattering path - float t = (idx + k*SS_WRP + 0.5)*SSTP; - float sval = tex3D(texo, .5*SS_IMX + (x + a.x*t) / SS_VXY, - .5*SS_IMY - (y + a.y*t) / SS_VXY, - .5*SS_IMZ + (z + a.z*t)*IS_VXZ); - - //accumulate mu-values. - float cumum = wcumsum(idx, sval); - float sumWarp = __shfl_sync(0xffffffff, cumum, (SS_WRP - 1)); - - //get the scattering point mu-values sum by subtracting the sum back by four (default) voxels. - //make it zero index when negative. - float smu = cumum - __shfl_sync(0xffffffff, cumum, idx - (1 << LSCT2)) * ((idx - (1 << LSCT2)) >= 0); - - //probability of scattering from a scatter point - float p_scatter = (1 - expf(-smu*SSTP)); - - //now subtract the warp sample to have the cumsum starting from 0 for incident probability calculations. - cumum -= sval;//__shfl(sval,0); - - //probability of incident photons on scattering point. - p_scatter *= uomg * expf(-(__shfl_sync(0xffffffff, cumum, idx & ~((1 << LSCT2) - 1)) + rcsum)* SSTP); - - //if(idx==0&&iur==2&&iuc==7) printf("%d> ps=%6.8f\n", k, 1e7*p_scatter ); - - //now update the global sum along the path - rcsum += sumWarp; - - - //from scattering point (sampled by ) to crystals - //scatter-point -> crystal vector ; scatter crystal normal vector , reusing - float tt = t - ((1 << (LSCT2 - 1)) - 0.5)*SSTP; - - //scattering points/patches: 3, 7, 11, ..., 31 - char sct_id = (idx & (-((1 << LSCT2)))) + (1 << LSCT2) - 1; - - //within scattering point - char aid = idx&((1 << LSCT2) - 1); - - /* NOTE: - The size of the scattering patch (with its corresponding point - in the middle) is always a power of two and govern by LSCT2. - This also helps to divide the loop over scatter crystal (32) - done partly by threads (which are used for scattering points) - and partly by the following for-loop of size (SS_WRP>>LSCT2). - Therefore, the crs_shft accounts for both as seen below. - */ - - - for (int j = 0; j<(SS_WRP >> LSCT2); j++) { - - char crs_shft = aid + j*(1 << LSCT2); - - //distance from the emission point to the scattering point - - //scatter vector used first for the scattering point (fixed for all j's) - float3 s; - s.x = (x + a.x * __shfl_sync(0xffffffff, tt, sct_id)); - s.y = (y + a.y * __shfl_sync(0xffffffff, tt, sct_id)); - s.z = (z + a.z * __shfl_sync(0xffffffff, tt, sct_id)); - - //if ((iur==2)&&(isr==2)) printf("k%d, iuc%d: s.z=%4.3f | a.z=%4.3f\n", k, iuc, s.z, a.z); - - // if (s.x>35 || s.y>35 || s.z>13 || s.z<-13) - // printf("<%4.2f,%4.2f,%4.2f> 2[k:%d][idx:%d][iur:%d][iuc:%d][isr%d][isc:%d]\n", - // s.x,s.y,s.z, a_lgth, a_lgth, k, idx, iur, iuc, isr, isc ); - - //get the masked voxel index for scatter points: - int i_smsk; - char infov = 1; - if ((fabsf(s.z)<(SS_VXZ*SS_IMZ/2-0.01*SS_VXZ)) && - (fabsf(s.x)<(SS_VXY*SS_IMX/2-0.01*SS_VXY)) && - (fabsf(s.y)<(SS_VXY*SS_IMY/2-0.01*SS_VXY))){ - // subtract one hundredth of a voxel to be on the conservative side - // and not let indices go out - - i_smsk = mu_msk.v2i[(int)(.5*SS_IMX + floorf(s.x / SS_VXY) //u - + SS_IMX*(.5*SS_IMY - ceilf(s.y / SS_VXY)) //v - + SS_IMX*SS_IMY*floorf(.5*SS_IMZ + s.z*IS_VXZ))]; //w - } - else { infov = 0; i_smsk = 0; } - // else {s.x=1e7; i_smsk = 0;} - - //make x-coordinate long away when not enough scattering medium in voxel - if (i_smsk<0) { infov = 0; i_smsk = 0; } - // if(i_smsk<0) {s.x=1e7; i_smsk = 0;} - - //finish forming the scatter vector by subtracting scatter crystal coordinates - s.x = __shfl_sync(0xffffffff, sc.x, crs_shft) - s.x; - s.y = __shfl_sync(0xffffffff, sc.y, crs_shft) - s.y; - s.z = __shfl_sync(0xffffffff, sc.z, crs_shft) - s.z; - - //distance from the scattering point to the detector - aux.y = powf(s.x*s.x + s.y*s.y + s.z*s.z, 0.5); - - float _s_lgth = 1 / aux.y;//powf(s.x*s.x + s.y*s.y + s.z*s.z, 0.5); // - s.x *= _s_lgth; - s.y *= _s_lgth; - s.z *= _s_lgth; - - //<<+>><<+>><<+>> scattering angle <<+>><<+>><<+>><< - float cosups = s.x*a.x + s.y*a.y + s.z*a.z; - //<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> - - //translate cosups into index for K-N and mu-correction LUTs - // if (cosups>=c_SCTCNT[0]) then icos=0 for which KN=0, causing the Psct = 0. - unsigned short icos = (unsigned short)(c_SCTCNT[1] * (cosups - c_SCTCNT[0]))*(cosups >= c_SCTCNT[0]); - - //--scatter to detectors: solid angle, KN (including energy resolution), mucrr, rays from LUTs - //--make solid angle zero for scatter angles past threshold - //indexing resutls: singly_scattered_crystal_index + singly_scattered_ring_index * no_of_scatter_crystals + - //unscattered_crystal_ring_index * no_of_scattered_crastals_rings. - //normal vector of scatter receiving crystals has the z-component always zero for cylindrical scanners - //(__shfl(sc.x, crs_shft)*IR_RING) is the x-component norm of scatter crystal - - if (c_TOFBIN[0]>1) { - //TOF bin index with determination of the sign - char m = infov*floorf(0.5*c_TOFBIN[0] + c_TOFBIN[3] * - (__shfl_sync(0xffffffff, tt, sct_id) + aux.y - an) * - (((__fdividef(__shfl_sync(0xffffffff, sc.y, crs_shft) - uc.y, __shfl_sync(0xffffffff, sc.x, crs_shft) - uc.x)>0) != (__shfl_sync(0xffffffff, sc.y, crs_shft)>uc.y)) * (-2) + 1) - ); - atomicAdd(rslt + m * scrsdef.nsrng*scrsdef.nscrs*scrsdef.nsrng*scrsdef.nscrs / 2 + - __shfl_sync(0xffffffff, idx, crs_shft) + isr*(scrsdef.nscrs / 2) + (iuc + iur*scrsdef.nscrs) * (scrsdef.nsrng*scrsdef.nscrs / 2), - infov*em_vox * c_KN[icos].x * - (SRFCRS*(s.x*__shfl_sync(0xffffffff, sc.x, crs_shft)*IR_RING + s.y*__shfl_sync(0xffffffff, sc.y, crs_shft)*IR_RING) * (_s_lgth*_s_lgth)) * - expf(-c_KN[icos].y * rays[i_smsk*scrsdef.nscrs*scrsdef.nsrng + __shfl_sync(0xffffffff, isc, crs_shft)*scrsdef.nsrng + isr] * RES_SUM) * - __shfl_sync(0xffffffff, p_scatter, sct_id)); - } - else { - // atomicAdd(rslt + __shfl_sync(0xffffffff, idx, crs_shft) + isr*(scrsdef.nscrs / 2) + (iuc + iur*scrsdef.nscrs) * (scrsdef.nsrng*scrsdef.nscrs / 2), - // infov*em_vox * c_KN[icos].x * - // (SRFCRS*(s.x*__shfl_sync(0xffffffff, sc.x, crs_shft)*IR_RING + s.y*__shfl_sync(0xffffffff, sc.y, crs_shft)*IR_RING) * (_s_lgth*_s_lgth)) * - // expf(-c_KN[icos].y * rays[i_smsk*scrsdef.nscrs*scrsdef.nsrng + __shfl_sync(0xffffffff, isc, crs_shft)*scrsdef.nsrng + isr] * RES_SUM) * - // __shfl_sync(0xffffffff, p_scatter, sct_id)); - - - atomicAdd(rslt + __shfl_sync(0xffffffff, isc, crs_shft) + isr*scrsdef.nscrs + (iuc + iur*scrsdef.nscrs) * (scrsdef.nsrng*scrsdef.nscrs), - infov * c_KN[icos].x * em_vox * - (SRFCRS*(s.x*__shfl_sync(0xffffffff, sc.x, crs_shft)*IR_RING + s.y*__shfl_sync(0xffffffff, sc.y, crs_shft)*IR_RING) * (_s_lgth*_s_lgth)) * - expf(-c_KN[icos].y * rays[i_smsk*scrsdef.nscrs*scrsdef.nsrng + __shfl_sync(0xffffffff, isc, crs_shft)*scrsdef.nsrng + isr] * RES_SUM) * - __shfl_sync(0xffffffff, p_scatter, sct_id) - ); - } - - // #endif - - // if ( (blockIdx.x==0) & (k==0) && (isr==2) && (iur==2) && (iuc==25) && ((idx&((1< sc[%d] idx[%d]: t = %6.4f | tt = %6.4f | an=%6.4f, as0=%6.4f + as1=%6.4f, m=%d\n", - // __shfl(isc, crs_shft), idx, t, tt, an, __shfl(tt, sct_id), aux.y, m); - - } - } +__global__ void Psct(float *rslt, cudaTextureObject_t texo, const short *rays, + const scrsDEF scrsdef, iMSK mu_msk, iMSK em_msk, const float *em) { + // general sampling index + // used for scatter crystals and sampling scatter patches/points + int idx = threadIdx.x; + // index of scatter rings (default 8) (for singly scattered photons) + int isr = threadIdx.y; + + // index of unscattered ring and crystal index (transaxially, default is 64 and axially (rings) + // it is 8) + int iur = blockIdx.y; + int iuc = blockIdx.z; + + // emitting voxel + int evxi = blockIdx.x; + + // original emission voxel index + int im_idx = em_msk.i2v[evxi]; + + // emission voxel value + float em_vox = em[im_idx]; + + // original image indices + int w = im_idx / (SSE_IMX * SSE_IMY); + int v = (im_idx - w * SSE_IMY * SSE_IMX) / SSE_IMX; + int u = im_idx - (w * SSE_IMY * SSE_IMX + v * SSE_IMX); + + // corresponding x and y for the emission point/voxel + float x = (u + 0.5 * (1 - SSE_IMX)) * SSE_VXY; + float y = ((SSE_IMY - 1) * 0.5 - v) * SSE_VXY; + float z = w * SSE_VXZ - .5 * SSE_VXZ * (SSE_IMZ - 1); + + // mu-map indices (may be of different resolution to that of emission image) + u = .5 * SS_IMX + floorf(x / SS_VXY); + v = (.5 * SS_IMY - ceilf(y / SS_VXY)); + w = floorf(.5 * SS_IMZ + z * IS_VXZ); + + // get the mu-map index corresponding to the emission image index (they may have different image + // size) + int mvxi = mu_msk.v2i[(int)(u + SS_IMX * v + SS_IMX * SS_IMY * w)]; + + if (mvxi < 0) + return; + // if ((mvxi>393674)||(mvxi<0)) printf(">>>>DISASTER: mvxi=%d, u=%d,v=%d,w=%d\n", mvxi, u, v, w + // ); + + // unscattered photon receiving crystal coordinates + float2 uc; + uc.x = scrsdef.crs[3 * iuc + 1]; + uc.y = scrsdef.crs[3 * iuc + 2]; + + // vector between the origin and crystal + float3 a; + a.x = uc.x - x; + a.y = uc.y - y; + a.z = scrsdef.rng[2 * iur + 1] - z; + // path length for an unscattered photon + float an = powf(a.x * a.x + a.y * a.y + a.z * a.z, 0.5); + + // 2D version + float2 aux; + aux.x = a.x; + aux.y = a.y; + float a_lgth = powf(aux.x * aux.x + aux.y * aux.y, 0.5); + + // normalise vectors + a.x /= an; + a.y /= an; + a.z /= an; + //--- + aux.x /= a_lgth; + aux.y /= a_lgth; + + // solid angle with probability of unscattered photon reaching a given crystal + float uomg = + (SRFCRS * (a.x * uc.x * IR_RING + a.y * uc.y * IR_RING) / (2 * PI * an * an)) * + expf(-rays[mvxi * scrsdef.nscrs * scrsdef.nsrng + iuc * scrsdef.nsrng + iur] * RES_SUM); + + // if (idx==0 && iur==2 && isr==2) printf("uatt[%d] = %6.8f\n", iuc, 1e6*uomg); + // if (idx==0 && iur==0) + // printf("uomg[%d, %d] = %8.7f | atn=%8.7f, an=%8.7f | att=%8.7f |cosbeta = %8.7f\n", + // iuc, iur, uomg, an, a_lgth, expf(-rays[vxi*scrsdef.nscrs*scrsdef.nsrng + + // iuc*scrsdef.nsrng + iur] * RES_SUM), (a_lgth/an)); + + // take the opposite direction for the scattering photon: + a.x *= -1; + a.y *= -1; + a.z *= -1; + //-- + aux.x *= -1; + aux.y *= -1; + + // NEW<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + // get a_length which is now the other direction, i.e., along the scattering path. + // first start in the transaxial plane only + float Br = 2 * (x * aux.x + y * aux.y); + float t = .5 * (-Br + sqrtf(Br * Br - 4 * (-R_2 + x * x + y * y))); + + // main/most scatter receiving location on the transaxial ring + float2 ms; + ms.x = aux.x * t + x; + ms.y = aux.y * t + y; + + // scatter crystal index, opposing to unscattered photons receiving crystal + char isuc = (iuc + scrsdef.nscrs / 2) & (scrsdef.nscrs - 1); + + // the coordinates of the opposing scatter crystal + aux.x = scrsdef.crs[3 * isuc + 1]; + aux.y = scrsdef.crs[3 * isuc + 2]; + + // crystal offset (multi-line equation) + char imsc = + isuc + + (char)( + // offset direction sign: + // (1) subtract mc vector from sc vector for the determination of offset direction + // (2) get the direction of crystal numbering by increasing the index of the opposing + // crystal (3) get the sign of the dot product of (1) and (2) + sgn((ms.x - aux.x) * (scrsdef.crs[3 * ((isuc + 1) & (scrsdef.nscrs - 1)) + 1] - aux.x) + + (ms.y - aux.y) * (scrsdef.crs[3 * ((isuc + 1) & (scrsdef.nscrs - 1)) + 2] - aux.y)) * + // crystal offset as an angle fraction based on the scatter opposing and main scatter + // vectors + scrsdef.nscrs * + acosf((ms.x * aux.x + ms.y * aux.y) / + (sqrtf(aux.x * aux.x + aux.y * aux.y) * sqrtf(ms.x * ms.x + ms.y * ms.y))) / + (2 * PI)); + + // get the full 3D version dividing by the ratio which is cos(beta), angle between transaxial and + // axial parts of the vector + a_lgth = t / (a_lgth / an); + + // scattering crystals (half considered, 32 out of 64, found using the index main scatter beam + // index + char isc = (imsc - (scrsdef.nscrs / 4) + idx) & (scrsdef.nscrs - 1); + + // if ((iuc==31) && isr==4 && iur==4) + // printf(">> iuc = %d; isc = %d; isuc = %d; >> imsc = %d >> em = (%2.3f, %2.3f), t = %f; ms = + // (%2.3f, %2.3f)\n", iuc, isc, isuc, imsc, x, y, t, ms.x, ms.y); + // NEW<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + // // OLD<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + // //> get a_length which is now the other direction, i.e., along the scattering path. + // //> first start in the transaxial plane only + // float Br = 2 * (x*aux.x + y*aux.y); + // //> get the full 3D version dividing by the ratio which is cos(beta), angle between transaxial + // and axial parts of the vector a_lgth = .5*(-Br + sqrtf(Br*Br - 4 * (-R_2 + x*x + y*y))) / + // (a_lgth / an); + // //> scattering crystals (half considered, 32 out of 64, found using the index of unscattered + // photon crystal char isc = (iuc + (scrsdef.nscrs / 4) + idx) & (scrsdef.nscrs - 1); + // // OLD<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + //---find out how far to go with scatter points (number of warps, Nw) + int Nw = 0; + for (int k = 0; k <= (int)(a_lgth / (SS_WRP * SSTP)); k++) { + // sampling coordinates within a warp (idx<=warpSize) + float t = (idx + 0.5 + k * SS_WRP) * SSTP; + u = .5 * SS_IMX + floorf((x + a.x * t) / SS_VXY); + v = .5 * SS_IMX - ceilf((y + a.y * t) / SS_VXY); + // u = .5*SS_IMX + ceilf ((x + a.x*t)/SS_VXY); + // v = .5*SS_IMX - floorf((y + a.y*t)/SS_VXY); + w = floorf(.5 * SS_IMZ + (z + a.z * t) * IS_VXZ); + float uval = tex3D(texo, u, v, w); + + uval = warpsum_xor(uval); + if (uval > 0) + Nw = k; + } + //--- + + // scatter crystal coordinates and their normal vector + float3 sc; + sc.x = scrsdef.crs[3 * isc + 1]; + sc.y = scrsdef.crs[3 * isc + 2]; + sc.z = scrsdef.rng[2 * isr + 1]; + + // if (idx==0 && isr==4) + // printf("[%d, %d]: s(x,y,z) = (%f, %f, %f)\n", iuc, iur, sc.x, sc.y, sc.z); + + // sum along the path, updated with shuffle reductions + float rcsum = 0; + + for (int k = 0; k <= Nw; k++) { + + // sampling the texture along the scattering path + float t = (idx + k * SS_WRP + 0.5) * SSTP; + float sval = + tex3D(texo, .5 * SS_IMX + (x + a.x * t) / SS_VXY, + .5 * SS_IMY - (y + a.y * t) / SS_VXY, .5 * SS_IMZ + (z + a.z * t) * IS_VXZ); + + // accumulate mu-values. + float cumum = wcumsum(idx, sval); + float sumWarp = __shfl_sync(0xffffffff, cumum, (SS_WRP - 1)); + + // get the scattering point mu-values sum by subtracting the sum back by four (default) voxels. + // make it zero index when negative. + float smu = + cumum - __shfl_sync(0xffffffff, cumum, idx - (1 << LSCT2)) * ((idx - (1 << LSCT2)) >= 0); + + // probability of scattering from a scatter point + float p_scatter = (1 - expf(-smu * SSTP)); + + // now subtract the warp sample to have the cumsum starting from 0 for incident probability + // calculations. + cumum -= sval; //__shfl(sval,0); + + // probability of incident photons on scattering point. + p_scatter *= + uomg * expf(-(__shfl_sync(0xffffffff, cumum, idx & ~((1 << LSCT2) - 1)) + rcsum) * SSTP); + + // if(idx==0&&iur==2&&iuc==7) printf("%d> ps=%6.8f\n", k, 1e7*p_scatter ); + + // now update the global sum along the path + rcsum += sumWarp; + + // from scattering point (sampled by ) to crystals + // scatter-point -> crystal vector ; scatter crystal normal vector , reusing + float tt = t - ((1 << (LSCT2 - 1)) - 0.5) * SSTP; + + // scattering points/patches: 3, 7, 11, ..., 31 + char sct_id = (idx & (-((1 << LSCT2)))) + (1 << LSCT2) - 1; + + // within scattering point + char aid = idx & ((1 << LSCT2) - 1); + + /* NOTE: + The size of the scattering patch (with its corresponding point + in the middle) is always a power of two and govern by LSCT2. + This also helps to divide the loop over scatter crystal (32) + done partly by threads (which are used for scattering points) + and partly by the following for-loop of size (SS_WRP>>LSCT2). + Therefore, the crs_shft accounts for both as seen below. + */ + + for (int j = 0; j < (SS_WRP >> LSCT2); j++) { + + char crs_shft = aid + j * (1 << LSCT2); + + // distance from the emission point to the scattering point + + // scatter vector used first for the scattering point (fixed for all j's) + float3 s; + s.x = (x + a.x * __shfl_sync(0xffffffff, tt, sct_id)); + s.y = (y + a.y * __shfl_sync(0xffffffff, tt, sct_id)); + s.z = (z + a.z * __shfl_sync(0xffffffff, tt, sct_id)); + + // if ((iur==2)&&(isr==2)) printf("k%d, iuc%d: s.z=%4.3f | a.z=%4.3f\n", k, iuc, s.z, a.z); + + // if (s.x>35 || s.y>35 || s.z>13 || s.z<-13) + // printf("<%4.2f,%4.2f,%4.2f> + // 2[k:%d][idx:%d][iur:%d][iuc:%d][isr%d][isc:%d]\n", + // s.x,s.y,s.z, a_lgth, a_lgth, k, idx, iur, iuc, isr, isc ); + + // get the masked voxel index for scatter points: + int i_smsk; + char infov = 1; + if ((fabsf(s.z) < (SS_VXZ * SS_IMZ / 2 - 0.01 * SS_VXZ)) && + (fabsf(s.x) < (SS_VXY * SS_IMX / 2 - 0.01 * SS_VXY)) && + (fabsf(s.y) < (SS_VXY * SS_IMY / 2 - 0.01 * SS_VXY))) { + // subtract one hundredth of a voxel to be on the conservative side + // and not let indices go out + + i_smsk = mu_msk.v2i[(int)(.5 * SS_IMX + floorf(s.x / SS_VXY) // u + + SS_IMX * (.5 * SS_IMY - ceilf(s.y / SS_VXY)) // v + + SS_IMX * SS_IMY * floorf(.5 * SS_IMZ + s.z * IS_VXZ))]; // w + } else { + infov = 0; + i_smsk = 0; + } + // else {s.x=1e7; i_smsk = 0;} + + // make x-coordinate long away when not enough scattering medium in voxel + if (i_smsk < 0) { + infov = 0; + i_smsk = 0; + } + // if(i_smsk<0) {s.x=1e7; i_smsk = 0;} + + // finish forming the scatter vector by subtracting scatter crystal coordinates + s.x = __shfl_sync(0xffffffff, sc.x, crs_shft) - s.x; + s.y = __shfl_sync(0xffffffff, sc.y, crs_shft) - s.y; + s.z = __shfl_sync(0xffffffff, sc.z, crs_shft) - s.z; + + // distance from the scattering point to the detector + aux.y = powf(s.x * s.x + s.y * s.y + s.z * s.z, 0.5); + + float _s_lgth = 1 / aux.y; // powf(s.x*s.x + s.y*s.y + s.z*s.z, 0.5); // + s.x *= _s_lgth; + s.y *= _s_lgth; + s.z *= _s_lgth; + + //<<+>><<+>><<+>> scattering angle <<+>><<+>><<+>><< + float cosups = s.x * a.x + s.y * a.y + s.z * a.z; + //<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + + // translate cosups into index for K-N and mu-correction LUTs + // if (cosups>=c_SCTCNT[0]) then icos=0 for which KN=0, causing the Psct = 0. + unsigned short icos = + (unsigned short)(c_SCTCNT[1] * (cosups - c_SCTCNT[0])) * (cosups >= c_SCTCNT[0]); + + //--scatter to detectors: solid angle, KN (including energy resolution), mucrr, rays from + // LUTs + //--make solid angle zero for scatter angles past threshold + // indexing resutls: singly_scattered_crystal_index + singly_scattered_ring_index * + // no_of_scatter_crystals + unscattered_crystal_ring_index * no_of_scattered_crastals_rings. + // normal vector of scatter receiving crystals has the z-component always zero for + // cylindrical scanners + //(__shfl(sc.x, crs_shft)*IR_RING) is the x-component norm of scatter crystal + + if (c_TOFBIN[0] > 1) { + // TOF bin index with determination of the sign + char m = infov * floorf(0.5 * c_TOFBIN[0] + + c_TOFBIN[3] * (__shfl_sync(0xffffffff, tt, sct_id) + aux.y - an) * + (((__fdividef(__shfl_sync(0xffffffff, sc.y, crs_shft) - uc.y, + __shfl_sync(0xffffffff, sc.x, crs_shft) - uc.x) > + 0) != (__shfl_sync(0xffffffff, sc.y, crs_shft) > uc.y)) * + (-2) + + 1)); + atomicAdd(rslt + m * scrsdef.nsrng * scrsdef.nscrs * scrsdef.nsrng * scrsdef.nscrs / 2 + + __shfl_sync(0xffffffff, idx, crs_shft) + isr * (scrsdef.nscrs / 2) + + (iuc + iur * scrsdef.nscrs) * (scrsdef.nsrng * scrsdef.nscrs / 2), + infov * em_vox * c_KN[icos].x * + (SRFCRS * + (s.x * __shfl_sync(0xffffffff, sc.x, crs_shft) * IR_RING + + s.y * __shfl_sync(0xffffffff, sc.y, crs_shft) * IR_RING) * + (_s_lgth * _s_lgth)) * + expf(-c_KN[icos].y * + rays[i_smsk * scrsdef.nscrs * scrsdef.nsrng + + __shfl_sync(0xffffffff, isc, crs_shft) * scrsdef.nsrng + isr] * + RES_SUM) * + __shfl_sync(0xffffffff, p_scatter, sct_id)); + } else { + // atomicAdd(rslt + __shfl_sync(0xffffffff, idx, crs_shft) + isr*(scrsdef.nscrs / 2) + (iuc + // + iur*scrsdef.nscrs) * (scrsdef.nsrng*scrsdef.nscrs / 2), infov*em_vox * c_KN[icos].x + // * (SRFCRS*(s.x*__shfl_sync(0xffffffff, sc.x, crs_shft)*IR_RING + + // s.y*__shfl_sync(0xffffffff, sc.y, crs_shft)*IR_RING) * (_s_lgth*_s_lgth)) * + // expf(-c_KN[icos].y * rays[i_smsk*scrsdef.nscrs*scrsdef.nsrng + + // __shfl_sync(0xffffffff, isc, crs_shft)*scrsdef.nsrng + isr] * RES_SUM) * + // __shfl_sync(0xffffffff, p_scatter, sct_id)); + + atomicAdd(rslt + __shfl_sync(0xffffffff, isc, crs_shft) + isr * scrsdef.nscrs + + (iuc + iur * scrsdef.nscrs) * (scrsdef.nsrng * scrsdef.nscrs), + infov * c_KN[icos].x * em_vox * + (SRFCRS * + (s.x * __shfl_sync(0xffffffff, sc.x, crs_shft) * IR_RING + + s.y * __shfl_sync(0xffffffff, sc.y, crs_shft) * IR_RING) * + (_s_lgth * _s_lgth)) * + expf(-c_KN[icos].y * + rays[i_smsk * scrsdef.nscrs * scrsdef.nsrng + + __shfl_sync(0xffffffff, isc, crs_shft) * scrsdef.nsrng + isr] * + RES_SUM) * + __shfl_sync(0xffffffff, p_scatter, sct_id)); + } + + // #endif + + // if ( (blockIdx.x==0) & (k==0) && (isr==2) && (iur==2) && (iuc==25) && + // ((idx&((1< sc[%d] idx[%d]: t = %6.4f | tt = %6.4f | an=%6.4f, as0=%6.4f + as1=%6.4f, + // m=%d\n", + // __shfl(isc, crs_shft), idx, t, tt, an, __shfl(tt, sct_id), aux.y, m); + } + } } - //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -scatOUT prob_scatt( - scatOUT sctout, - float *KNlut, - char* mumsk, - IMflt mu, - IMflt em, - int *sctaxR, - float *sctaxW, - short *offseg, - float *scrs, - short *isrng, - float *srng, - char *xsxu, - short *sn1_rno, - short *sn1_sn11, - Cnst Cnt) -{ - clock_t begin, end; - double time_spent; - begin = clock(); - - // check which device is going to be used - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); - - getMemUse(Cnt); - - //scatter constants: max scatter angle and cosine step - float sctcnt[2]; - sctcnt[0] = Cnt.COSUPSMX; - sctcnt[1] = (NCOS - 1) / (1 - Cnt.COSUPSMX); - cudaMemcpyToSymbol(c_SCTCNT, sctcnt, 2 * sizeof(float)); - - float tofbin[4]; - tofbin[0] = (float)Cnt.TOFBINN; - tofbin[1] = Cnt.TOFBINS; - tofbin[2] = Cnt.TOFBIND; - tofbin[3] = Cnt.ITOFBIND; - cudaMemcpyToSymbol(c_TOFBIN, tofbin, 4 * sizeof(float)); - - if (Cnt.LOG <= LOGINFO) { - printf("i> time of flight properties for scatter estimation:\n"); - for (int i = 0; i<4; i++) printf(" tofbin[%d]=%f\n", i, tofbin[i]); - } - - //--------------- K-N LUTs --------------------------- - cudaMemcpyToSymbol(c_KN, KNlut, NCOS * sizeof(float2)); - //---------------------------------------------------- - - //================================================================== - //scatter crystals definitions [crs no, centre.x, centre.y] - scrsDEF d_scrsdef; - HANDLE_ERROR(cudaMallocManaged(&d_scrsdef.rng, 2*Cnt.NSRNG * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_scrsdef.rng, srng, 2*Cnt.NSRNG * sizeof(float), cudaMemcpyHostToDevice)); - - HANDLE_ERROR(cudaMallocManaged(&d_scrsdef.crs, 3*Cnt.NSCRS * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_scrsdef.crs, scrs, 3*Cnt.NSCRS * sizeof(float), cudaMemcpyHostToDevice)); - - d_scrsdef.nscrs = Cnt.NSCRS; - d_scrsdef.nsrng = Cnt.NSRNG; - if (Cnt.LOG <= LOGINFO) printf("i> number of scatter crystals used:\n >transaxially: %d\n >axially: %d\n", d_scrsdef.nscrs, d_scrsdef.nsrng); - - // test the scatter ring and crystal sampling - // for(int i=0; i 3D CUDA texture for the mu-map has been initialised.\n"); - //==================================================================== - - //============================================================ - //create a mask of attenuating voxels based on the object's mu-map - iMSK d_mu_msk = get_imskMu(mu, mumsk, Cnt); - //create a mask of active voxels based on the object's current emission image - iMSK d_em_msk = get_imskEm(em, Cnt.ETHRLD*em.max, Cnt); - //============================================================ - - if (d_em_msk.nvx>0) { - //============================================================ - //pre-calculate the line integrals for photon attenuation paths - short *d_rays = raysLUT(texo_mu3d, d_mu_msk, d_scrsdef, Cnt); - //============================================================ - - - if (Cnt.LOG <= LOGINFO) printf("i> calculating scatter probabilities for %d emission voxels...", d_em_msk.nvx); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //<<<<<<<<<<<<<<<<<<<<<<<<<<<< KERNEL <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - //dimension of the grid. depending on how many crystals (receiving an unscattered photon) there are. - //MAKE SURE and are less than 255 due to data type limits (uchar) - if (Cnt.LOG <= LOGDEBUG) printf("\n i>> kernel setup: nvx: %d, nsrng: %d, nscrs: %d, SS_WRP: %d\n", d_em_msk.nvx, d_scrsdef.nsrng, d_scrsdef.nscrs, SS_WRP); - - dim3 grid(d_em_msk.nvx, d_scrsdef.nsrng, d_scrsdef.nscrs); - dim3 block(SS_WRP, d_scrsdef.nsrng, 1); - Psct <<>>( - d_rslt, - texo_mu3d, - d_rays, - d_scrsdef, - d_mu_msk, - d_em_msk, - d_em); - HANDLE_ERROR(cudaGetLastError()); - //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n\n", 0.001*elapsedTime); - cudaFree(d_rays); - cudaDeviceSynchronize(); - HANDLE_ERROR(cudaGetLastError()); - } - - - //> number of sinograms in different spans - int tbins; - if (Cnt.SPN == 1) { - tbins = Cnt.NSN64*d_scrsdef.nscrs*d_scrsdef.nscrs; - } - else if (Cnt.SPN == 11) { - tbins = Cnt.NSN11*d_scrsdef.nscrs*d_scrsdef.nscrs; - } - else{ - if (Cnt.LOG <= LOGWARNING) { - printf("e> Unrecognised span definition.\n"); - } - } - - - //3D scatter pre-sino out - float *d_sct3d = srslt2sino(d_rslt, d_xsxu, d_scrsdef, sctaxR, sctaxW, offseg, isrng, sn1_rno, sn1_sn11, Cnt); - HANDLE_ERROR(cudaMemcpy(sctout.s3d, d_sct3d, Cnt.TOFBINN*tbins * sizeof(float), cudaMemcpyDeviceToHost)); - - //raw result - // for (int i = 0; i<(Cnt.TOFBINN*d_scrsdef.nsrng*d_scrsdef.nsrng * d_scrsdef.nscrs*d_scrsdef.nscrs); i++) { - // sctout.sval[i] = d_rslt[i]; - // } - HANDLE_ERROR(cudaMemcpy( - sctout.sval, - d_rslt, - Cnt.TOFBINN*d_scrsdef.nsrng*d_scrsdef.nsrng * d_scrsdef.nscrs*d_scrsdef.nscrs * sizeof(float), - cudaMemcpyDeviceToHost)); - - // Destroy texture object - cudaDestroyTextureObject(texo_mu3d); - - // Free device memory - cudaFreeArray(d_muVolume); - cudaFree(d_sct3d); - cudaFree(d_mu_msk.i2v); - cudaFree(d_mu_msk.v2i); - cudaFree(d_em_msk.i2v); - cudaFree(d_em_msk.v2i); - cudaFree(d_em); - cudaFree(d_scrsdef.rng); - cudaFree(d_scrsdef.crs); - cudaFree(d_xsxu); - - cudaFree(d_rslt); - - getMemUse(Cnt); - - end = clock(); - time_spent = (double)(end - begin) / CLOCKS_PER_SEC; - if (Cnt.LOG <= LOGINFO) printf("\ni> TOTAL SCATTER TIME: %f\n", time_spent); - - return sctout; +scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em, int *sctaxR, + float *sctaxW, short *offseg, float *scrs, short *isrng, float *srng, + char *xsxu, short *sn1_rno, short *sn1_sn11, Cnst Cnt) { + clock_t begin, end; + double time_spent; + begin = clock(); + + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); + + getMemUse(Cnt); + + // scatter constants: max scatter angle and cosine step + float sctcnt[2]; + sctcnt[0] = Cnt.COSUPSMX; + sctcnt[1] = (NCOS - 1) / (1 - Cnt.COSUPSMX); + cudaMemcpyToSymbol(c_SCTCNT, sctcnt, 2 * sizeof(float)); + + float tofbin[4]; + tofbin[0] = (float)Cnt.TOFBINN; + tofbin[1] = Cnt.TOFBINS; + tofbin[2] = Cnt.TOFBIND; + tofbin[3] = Cnt.ITOFBIND; + cudaMemcpyToSymbol(c_TOFBIN, tofbin, 4 * sizeof(float)); + + if (Cnt.LOG <= LOGINFO) { + printf("i> time of flight properties for scatter estimation:\n"); + for (int i = 0; i < 4; i++) + printf(" tofbin[%d]=%f\n", i, tofbin[i]); + } + + //--------------- K-N LUTs --------------------------- + cudaMemcpyToSymbol(c_KN, KNlut, NCOS * sizeof(float2)); + //---------------------------------------------------- + + //================================================================== + // scatter crystals definitions [crs no, centre.x, centre.y] + scrsDEF d_scrsdef; + HANDLE_ERROR(cudaMallocManaged(&d_scrsdef.rng, 2 * Cnt.NSRNG * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_scrsdef.rng, srng, 2 * Cnt.NSRNG * sizeof(float), cudaMemcpyHostToDevice)); + + HANDLE_ERROR(cudaMallocManaged(&d_scrsdef.crs, 3 * Cnt.NSCRS * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_scrsdef.crs, scrs, 3 * Cnt.NSCRS * sizeof(float), cudaMemcpyHostToDevice)); + + d_scrsdef.nscrs = Cnt.NSCRS; + d_scrsdef.nsrng = Cnt.NSRNG; + if (Cnt.LOG <= LOGINFO) + printf("i> number of scatter crystals used:\n >transaxially: %d\n >axially: %d\n", + d_scrsdef.nscrs, d_scrsdef.nsrng); + + // test the scatter ring and crystal sampling + // for(int i=0; i 3D CUDA texture for the mu-map has been initialised.\n"); + //==================================================================== + + //============================================================ + // create a mask of attenuating voxels based on the object's mu-map + iMSK d_mu_msk = get_imskMu(mu, mumsk, Cnt); + // create a mask of active voxels based on the object's current emission image + iMSK d_em_msk = get_imskEm(em, Cnt.ETHRLD * em.max, Cnt); + //============================================================ + + if (d_em_msk.nvx > 0) { + //============================================================ + // pre-calculate the line integrals for photon attenuation paths + short *d_rays = raysLUT(texo_mu3d, d_mu_msk, d_scrsdef, Cnt); + //============================================================ + + if (Cnt.LOG <= LOGINFO) + printf("i> calculating scatter probabilities for %d emission voxels...", d_em_msk.nvx); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //<<<<<<<<<<<<<<<<<<<<<<<<<<<< KERNEL <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + // dimension of the grid. depending on how many crystals (receiving an unscattered photon) + // there are. MAKE SURE and are less than 255 due to data type limits (uchar) + if (Cnt.LOG <= LOGDEBUG) + printf("\n i>> kernel setup: nvx: %d, nsrng: %d, nscrs: %d, SS_WRP: %d\n", d_em_msk.nvx, + d_scrsdef.nsrng, d_scrsdef.nscrs, SS_WRP); + + dim3 grid(d_em_msk.nvx, d_scrsdef.nsrng, d_scrsdef.nscrs); + dim3 block(SS_WRP, d_scrsdef.nsrng, 1); + Psct<<>>(d_rslt, texo_mu3d, d_rays, d_scrsdef, d_mu_msk, d_em_msk, d_em); + HANDLE_ERROR(cudaGetLastError()); + //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf("DONE in %fs.\n\n", 0.001 * elapsedTime); + cudaFree(d_rays); + cudaDeviceSynchronize(); + HANDLE_ERROR(cudaGetLastError()); + } + + //> number of sinograms in different spans + int tbins; + if (Cnt.SPN == 1) { + tbins = Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs; + } else if (Cnt.SPN == 11) { + tbins = Cnt.NSN11 * d_scrsdef.nscrs * d_scrsdef.nscrs; + } else { + if (Cnt.LOG <= LOGWARNING) { + printf("e> Unrecognised span definition.\n"); + } + } + + // 3D scatter pre-sino out + float *d_sct3d = + srslt2sino(d_rslt, d_xsxu, d_scrsdef, sctaxR, sctaxW, offseg, isrng, sn1_rno, sn1_sn11, Cnt); + HANDLE_ERROR(cudaMemcpy(sctout.s3d, d_sct3d, Cnt.TOFBINN * tbins * sizeof(float), + cudaMemcpyDeviceToHost)); + + // raw result + // for (int i = 0; i<(Cnt.TOFBINN*d_scrsdef.nsrng*d_scrsdef.nsrng * + // d_scrsdef.nscrs*d_scrsdef.nscrs); i++) { sctout.sval[i] = d_rslt[i]; + // } + HANDLE_ERROR(cudaMemcpy(sctout.sval, d_rslt, + Cnt.TOFBINN * d_scrsdef.nsrng * d_scrsdef.nsrng * d_scrsdef.nscrs * + d_scrsdef.nscrs * sizeof(float), + cudaMemcpyDeviceToHost)); + + // Destroy texture object + cudaDestroyTextureObject(texo_mu3d); + + // Free device memory + cudaFreeArray(d_muVolume); + cudaFree(d_sct3d); + cudaFree(d_mu_msk.i2v); + cudaFree(d_mu_msk.v2i); + cudaFree(d_em_msk.i2v); + cudaFree(d_em_msk.v2i); + cudaFree(d_em); + cudaFree(d_scrsdef.rng); + cudaFree(d_scrsdef.crs); + cudaFree(d_xsxu); + + cudaFree(d_rslt); + + getMemUse(Cnt); + + end = clock(); + time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + if (Cnt.LOG <= LOGINFO) + printf("\ni> TOTAL SCATTER TIME: %f\n", time_spent); + + return sctout; } diff --git a/niftypet/nipet/sct/src/sct.h b/niftypet/nipet/sct/src/sct.h index 00b5fd53..f70c4a94 100644 --- a/niftypet/nipet/sct/src/sct.h +++ b/niftypet/nipet/sct/src/sct.h @@ -5,35 +5,16 @@ float *KN_LUT(void); typedef struct { - float * sval; //bin value - float * s3d; //scatter pre-sino in span-1 -}scatOUT; - -scatOUT prob_scatt( - scatOUT sctout, - float *KNlut, - char* mumsk, - IMflt mu, - IMflt em, - int *sctaxR, - float *sctaxW, - short *offseg, - float *scrs, - short *isrng, - float *srng, - char *xsxu, - short *sn1_rno, - short *sn1_sn11, - Cnst Cnt); + float *sval; // bin value + float *s3d; // scatter pre-sino in span-1 +} scatOUT; +scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em, int *sctaxR, + float *sctaxW, short *offseg, float *scrs, short *isrng, float *srng, + char *xsxu, short *sn1_rno, short *sn1_sn11, Cnst Cnt); //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - - - - //## start ##// constants definitions in synch with Python. DO NOT MODIFY! // SCATTER IMAGE SIZE AND PROPERTIES @@ -59,39 +40,37 @@ scatOUT prob_scatt( #define SRFCRS 0.1695112f //## end ##// constants definitions in synch with Python -//number of samples per scattering patch (point) length; used as the power of 2: 2**LSCT2 = patch length +// number of samples per scattering patch (point) length; used as the power of 2: 2**LSCT2 = patch +// length #define LSCT2 2 //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - //============ RAY PATH SAMPLING ===================== -//period of scatter crystals (needed for definition) +// period of scatter crystals (needed for definition) #define SCRS_T 7 -//number of crystal rings for scatter estimation +// number of crystal rings for scatter estimation #define N_SRNG 8 -//accumulation step for attenuation calculations +// accumulation step for attenuation calculations #define ASTP SS_VXZ -//scatter step +// scatter step #define SSTP SS_VXZ -//Warp size for reductions in scatter attenuation calculation +// Warp size for reductions in scatter attenuation calculation #define SS_WRP 32 -//Threshold for mu-map values to be considered +// Threshold for mu-map values to be considered #define THR_MU 0.02f -//short dtype. step for path sums (max 6) +// short dtype. step for path sums (max 6) #define RES_SUM 0.000091552734375f -//short dtype. step for angle +// short dtype. step for angle #define RES_ANG 0.0054931640625f //==================================================== - //## end of constants definitions ##// //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - #endif diff --git a/niftypet/nipet/sct/src/sct_module.cu b/niftypet/nipet/sct/src/sct_module.cu index a067df58..48c48ab7 100644 --- a/niftypet/nipet/sct/src/sct_module.cu +++ b/niftypet/nipet/sct/src/sct_module.cu @@ -7,18 +7,17 @@ Copyrights: 2019 ------------------------------------------------------------------------*/ #define PY_SSIZE_T_CLEAN -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION //NPY_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION #include -#include #include +#include #include "def.h" #include "scanner_0.h" #include "sct.h" #include "sctaux.h" - //=== START PYTHON INIT === //--- Available function @@ -27,331 +26,318 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args); //> Module Method Table static PyMethodDef nifty_scatter_methods[] = { - {"vsm", vsm_scatter, METH_VARARGS, - "Estimates fully 3D TOF scatter event sinograms using a mu-map and an emission image."}, - {NULL, NULL, 0, NULL} // Sentinel + {"vsm", vsm_scatter, METH_VARARGS, + "Estimates fully 3D TOF scatter event sinograms using a mu-map and an emission image."}, + {NULL, NULL, 0, NULL} // Sentinel }; //> Module Definition Structure static struct PyModuleDef nifty_scatter_module = { - PyModuleDef_HEAD_INIT, - "nifty_scatter", //> name of module - //> module documentation, may be NULL - "This module provides an interface for the high throughput Voxel Driven Scatter modelling using CUDA.", - -1, //> the module keeps state in global variables. - nifty_scatter_methods -}; + PyModuleDef_HEAD_INIT, + "nifty_scatter", //> name of module + //> module documentation, may be NULL + "This module provides an interface for the high throughput Voxel Driven Scatter modelling " + "using CUDA.", + -1, //> the module keeps state in global variables. + nifty_scatter_methods}; //> Initialization function PyMODINIT_FUNC PyInit_nifty_scatter(void) { - Py_Initialize(); + Py_Initialize(); - //> load NumPy functionality - import_array(); + //> load NumPy functionality + import_array(); - return PyModule_Create(&nifty_scatter_module); + return PyModule_Create(&nifty_scatter_module); } //=== END PYTHON INIT === - - - - - - //====================================================================================== // E S T I M A T I N G S C A T T E R E V E N T S //-------------------------------------------------------------------------------------- static PyObject *vsm_scatter(PyObject *self, PyObject *args) { - //Structure of constants - Cnst Cnt; - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - //Image structures - IMflt emIMG; - IMflt muIMG; - - // mu-map image - PyObject * o_mumap; - // mu-map mask (based on smoothed mu-map to enable further extension of attenuating/scattering voxels) - PyObject * o_mumsk; - - // emiassion image - PyObject * o_emimg; - - //3D scatter LUTs - PyObject * o_sctLUT; - - // axial LUTs - PyObject * o_axLUT; - - //output dictionary for scatter results - PyObject * o_sctout; - - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOOO", &o_sctout, &o_mumap, &o_mumsk, &o_emimg, &o_sctLUT, &o_axLUT, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - /* Interpret the input objects as numpy arrays. */ - PyObject* pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); - Cnt.aw = (int)PyLong_AsLong(pd_aw); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_NSN64 = PyDict_GetItemString(o_mmrcnst, "NSN64"); - Cnt.NSN64 = (int)PyLong_AsLong(pd_NSN64); - PyObject* pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); - Cnt.MRD = (int)PyLong_AsLong(pd_MRD); - PyObject* pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); - Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); - // PyObject* pd_NSRNG = PyDict_GetItemString(o_mmrcnst, "NSRNG"); - // Cnt.NSRNG = (int)PyLong_AsLong(pd_NSRNG); - PyObject* pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); - Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); - PyObject* pd_NSEG0 = PyDict_GetItemString(o_mmrcnst, "NSEG0"); - Cnt.NSEG0 = (int)PyLong_AsLong(pd_NSEG0); - PyObject* pd_ALPHA = PyDict_GetItemString(o_mmrcnst, "ALPHA"); - Cnt.ALPHA = (float)PyFloat_AsDouble(pd_ALPHA); - PyObject* pd_AXR = PyDict_GetItemString(o_mmrcnst, "AXR"); - Cnt.AXR = (float)PyFloat_AsDouble(pd_AXR); - - - PyObject* pd_TOFBINN = PyDict_GetItemString(o_mmrcnst, "TOFBINN"); - Cnt.TOFBINN = (int)PyLong_AsLong(pd_TOFBINN); - PyObject* pd_TOFBINS = PyDict_GetItemString(o_mmrcnst, "TOFBINS"); - Cnt.TOFBINS = (float)PyFloat_AsDouble(pd_TOFBINS); - PyObject* pd_TOFBIND = PyDict_GetItemString(o_mmrcnst, "TOFBIND"); - Cnt.TOFBIND = (float)PyFloat_AsDouble(pd_TOFBIND); - PyObject* pd_ITOFBIND = PyDict_GetItemString(o_mmrcnst, "ITOFBIND"); - Cnt.ITOFBIND = (float)PyFloat_AsDouble(pd_ITOFBIND); - - PyObject* pd_ETHRLD = PyDict_GetItemString(o_mmrcnst, "ETHRLD"); - Cnt.ETHRLD = (float)PyFloat_AsDouble(pd_ETHRLD); - PyObject* pd_COSUPSMX = PyDict_GetItemString(o_mmrcnst, "COSUPSMX"); - Cnt.COSUPSMX = (float)PyFloat_AsDouble(pd_COSUPSMX); - - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_span); - PyObject* pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); - Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); - PyObject* pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); - Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - - //> images - PyArrayObject *p_mumap=NULL, *p_mumsk=NULL, *p_emimg=NULL; - p_mumap = (PyArrayObject *)PyArray_FROM_OTF(o_mumap, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_mumsk = (PyArrayObject *)PyArray_FROM_OTF(o_mumsk, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_emimg = (PyArrayObject *)PyArray_FROM_OTF(o_emimg, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //> output dictionary for results - PyObject* pd_sct3 = PyDict_GetItemString(o_sctout, "sct_3d"); - PyObject* pd_sval = PyDict_GetItemString(o_sctout, "sct_val"); - - PyArrayObject *p_sct3=NULL, *p_sval=NULL; - p_sct3 = (PyArrayObject *)PyArray_FROM_OTF(pd_sct3, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - p_sval = (PyArrayObject *)PyArray_FROM_OTF(pd_sval, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - - //> axial LUTs: - PyObject* pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); - PyObject* pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); - PyArrayObject *p_sn1_rno=NULL, *p_sn1_sn11=NULL; - p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); - - - //-------- SCATTER -------- - // number of axial scatter crystals (rings) for modelling - PyObject* pd_NSRNG = PyDict_GetItemString(o_sctLUT, "NSRNG"); - Cnt.NSRNG = (int)PyLong_AsLong(pd_NSRNG); - // number of transaxial scatter crystals for modelling - PyObject* pd_NSCRS = PyDict_GetItemString(o_sctLUT, "NSCRS"); - Cnt.NSCRS = (int)PyLong_AsLong(pd_NSCRS); - - //> scatter LUTs: - PyObject* pd_scrs = PyDict_GetItemString(o_sctLUT, "scrs"); - PyObject* pd_xsxu = PyDict_GetItemString(o_sctLUT, "xsxu"); - PyObject* pd_KN = PyDict_GetItemString(o_sctLUT, "KN"); - PyObject* pd_sirng = PyDict_GetItemString(o_sctLUT, "sirng"); - PyObject* pd_srng = PyDict_GetItemString(o_sctLUT, "srng"); - PyObject* pd_offseg = PyDict_GetItemString(o_sctLUT, "offseg"); - PyObject* pd_sctaxR = PyDict_GetItemString(o_sctLUT, "sctaxR"); - PyObject* pd_sctaxW = PyDict_GetItemString(o_sctLUT, "sctaxW"); - - PyArrayObject *p_scrs=NULL, *p_KN=NULL, - *p_isrng=NULL, *p_srng=NULL, *p_xsxu=NULL, - *p_offseg=NULL, *p_sctaxR=NULL, *p_sctaxW=NULL; - - p_scrs = (PyArrayObject *)PyArray_FROM_OTF(pd_scrs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_xsxu = (PyArrayObject *)PyArray_FROM_OTF(pd_xsxu, NPY_INT8, NPY_ARRAY_IN_ARRAY); - p_KN = (PyArrayObject *)PyArray_FROM_OTF(pd_KN, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_isrng = (PyArrayObject *)PyArray_FROM_OTF(pd_sirng, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_srng = (PyArrayObject *)PyArray_FROM_OTF(pd_srng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - p_offseg = (PyArrayObject *)PyArray_FROM_OTF(pd_offseg, NPY_INT16, NPY_ARRAY_IN_ARRAY); - p_sctaxR = (PyArrayObject *)PyArray_FROM_OTF(pd_sctaxR, NPY_INT32, NPY_ARRAY_IN_ARRAY); - p_sctaxW = (PyArrayObject *)PyArray_FROM_OTF(pd_sctaxW, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - //------------------------- - - - /* If that didn't work, throw an exception. */ - if (p_mumap == NULL || p_mumsk == NULL || p_emimg == NULL || - p_sct3 == NULL || p_sval == NULL || p_xsxu == NULL || - p_sn1_sn11 == NULL || p_sn1_rno == NULL|| p_srng == NULL || - p_scrs == NULL|| p_KN == NULL || p_isrng == NULL || - p_offseg == NULL|| p_sctaxR == NULL || p_sctaxW == NULL) - { - Py_XDECREF(p_mumap); - Py_XDECREF(p_mumsk); - Py_XDECREF(p_emimg); - Py_XDECREF(p_xsxu); - Py_XDECREF(p_sn1_rno); - Py_XDECREF(p_sn1_sn11); - - Py_XDECREF(p_scrs); - Py_XDECREF(p_KN); - Py_XDECREF(p_isrng); - Py_XDECREF(p_srng); - Py_XDECREF(p_offseg); - Py_XDECREF(p_sctaxR); - Py_XDECREF(p_sctaxW); - - PyArray_DiscardWritebackIfCopy(p_sct3); - Py_XDECREF(p_sct3); - PyArray_DiscardWritebackIfCopy(p_sval); - Py_XDECREF(p_sval); - - printf("e> problem with getting the images and LUTs in C functions... :(\n"); - return NULL; - } - - //get the c-type arrays - char *mumsk = (char*)PyArray_DATA(p_mumsk); - float *mumap = (float*)PyArray_DATA(p_mumap); - float *emimg = (float*)PyArray_DATA(p_emimg); - - short *sn1_rno = (short*)PyArray_DATA(p_sn1_rno); - short *sn1_sn11 = (short*)PyArray_DATA(p_sn1_sn11); - - //indexes of rings included in scatter estimation - short *isrng = (short*)PyArray_DATA(p_isrng); - //axial scatter ring position - float *srng = (float*)PyArray_DATA(p_srng); - - //offset in each segment used for rings to sino LUT - short *offseg = (short*)PyArray_DATA(p_offseg); - //scatter sino indexes in axial dimensions through Michelogram used for interpolation in 3D - int *sctaxR = (int*)PyArray_DATA(p_sctaxR); - //weights for the interpolation in 3D (used together with the above) - float *sctaxW = (float*)PyArray_DATA(p_sctaxW); - //K-N probabilities in the LUT - float *KNlut = (float*)PyArray_DATA(p_KN); - - // transaxial scatter crystal table - float *scrs = (float*)PyArray_DATA(p_scrs); - - char *xsxu = (char*)PyArray_DATA(p_xsxu); - - //output structure - scatOUT sctout; - sctout.sval = (float*)PyArray_DATA(p_sval); - sctout.s3d = (float*)PyArray_DATA(p_sct3); - - //Get the image dims - muIMG.nvx = (size_t)(PyArray_DIM(p_mumap, 0) * PyArray_DIM(p_mumap, 1) * PyArray_DIM(p_mumap, 2)); - emIMG.nvx = (size_t)(PyArray_DIM(p_emimg, 0) * PyArray_DIM(p_emimg, 1) * PyArray_DIM(p_emimg, 2)); - - if ((muIMG.nvx != emIMG.nvx) && (Cnt.LOG <= LOGWARNING)) - printf("\nw> mu-map and emission image have different dims: mu.nvx = %lu, em.nvx = %lu\n", muIMG.nvx, emIMG.nvx); - - //get the stats in the image structure - float mumx = -1e12, emmx = -1e12, mumn = 1e12, emmn = 1e12; - for (int i = 0; imumx) mumx = mumap[i]; - if (mumap[i]emmx) emmx = emimg[i]; - if (emimg[i]0.1*mumx) muIMG.n10mx += 1; - - for (int i = 0; i0.1*emmx) emIMG.n10mx += 1; - - if (Cnt.LOG <= LOGDEBUG) printf("i> mumx = %f, mumin = %f, emmx = %f, emmn = %f\n", mumx, mumn, emmx, emmn); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><><> S C A T T E R K E R N E L <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - prob_scatt( - sctout, - KNlut, - mumsk, - muIMG, emIMG, - sctaxR,sctaxW, - offseg, - scrs, - isrng, - srng, - xsxu, - sn1_rno, - sn1_sn11, - Cnt); - - cudaDeviceSynchronize(); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - //Clean up - if (Cnt.LOG <= LOGDEBUG) printf("i> cleaning scatter variables..."); - Py_DECREF(p_mumap); - Py_DECREF(p_mumsk); - Py_DECREF(p_emimg); - Py_DECREF(p_sn1_rno); - Py_DECREF(p_sn1_sn11); - Py_DECREF(p_isrng); - Py_DECREF(p_srng); - Py_DECREF(p_xsxu); - Py_DECREF(p_offseg); - Py_DECREF(p_sctaxR); - Py_DECREF(p_sctaxW); - Py_DECREF(p_KN); - Py_DECREF(p_scrs); - - PyArray_ResolveWritebackIfCopy(p_sct3); - Py_DECREF(p_sct3); - PyArray_ResolveWritebackIfCopy(p_sval); - Py_DECREF(p_sval); - - Py_INCREF(Py_None); - if (Cnt.LOG <= LOGDEBUG) printf("DONE.\n"); - return Py_None; + // Structure of constants + Cnst Cnt; + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // Image structures + IMflt emIMG; + IMflt muIMG; + + // mu-map image + PyObject *o_mumap; + // mu-map mask (based on smoothed mu-map to enable further extension of attenuating/scattering + // voxels) + PyObject *o_mumsk; + + // emiassion image + PyObject *o_emimg; + + // 3D scatter LUTs + PyObject *o_sctLUT; + + // axial LUTs + PyObject *o_axLUT; + + // output dictionary for scatter results + PyObject *o_sctout; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOOO", &o_sctout, &o_mumap, &o_mumsk, &o_emimg, &o_sctLUT, + &o_axLUT, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as numpy arrays. */ + PyObject *pd_aw = PyDict_GetItemString(o_mmrcnst, "Naw"); + Cnt.aw = (int)PyLong_AsLong(pd_aw); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_NSN64 = PyDict_GetItemString(o_mmrcnst, "NSN64"); + Cnt.NSN64 = (int)PyLong_AsLong(pd_NSN64); + PyObject *pd_MRD = PyDict_GetItemString(o_mmrcnst, "MRD"); + Cnt.MRD = (int)PyLong_AsLong(pd_MRD); + PyObject *pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); + Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); + // PyObject* pd_NSRNG = PyDict_GetItemString(o_mmrcnst, "NSRNG"); + // Cnt.NSRNG = (int)PyLong_AsLong(pd_NSRNG); + PyObject *pd_NCRS = PyDict_GetItemString(o_mmrcnst, "NCRS"); + Cnt.NCRS = (int)PyLong_AsLong(pd_NCRS); + PyObject *pd_NSEG0 = PyDict_GetItemString(o_mmrcnst, "NSEG0"); + Cnt.NSEG0 = (int)PyLong_AsLong(pd_NSEG0); + PyObject *pd_ALPHA = PyDict_GetItemString(o_mmrcnst, "ALPHA"); + Cnt.ALPHA = (float)PyFloat_AsDouble(pd_ALPHA); + PyObject *pd_AXR = PyDict_GetItemString(o_mmrcnst, "AXR"); + Cnt.AXR = (float)PyFloat_AsDouble(pd_AXR); + + PyObject *pd_TOFBINN = PyDict_GetItemString(o_mmrcnst, "TOFBINN"); + Cnt.TOFBINN = (int)PyLong_AsLong(pd_TOFBINN); + PyObject *pd_TOFBINS = PyDict_GetItemString(o_mmrcnst, "TOFBINS"); + Cnt.TOFBINS = (float)PyFloat_AsDouble(pd_TOFBINS); + PyObject *pd_TOFBIND = PyDict_GetItemString(o_mmrcnst, "TOFBIND"); + Cnt.TOFBIND = (float)PyFloat_AsDouble(pd_TOFBIND); + PyObject *pd_ITOFBIND = PyDict_GetItemString(o_mmrcnst, "ITOFBIND"); + Cnt.ITOFBIND = (float)PyFloat_AsDouble(pd_ITOFBIND); + + PyObject *pd_ETHRLD = PyDict_GetItemString(o_mmrcnst, "ETHRLD"); + Cnt.ETHRLD = (float)PyFloat_AsDouble(pd_ETHRLD); + PyObject *pd_COSUPSMX = PyDict_GetItemString(o_mmrcnst, "COSUPSMX"); + Cnt.COSUPSMX = (float)PyFloat_AsDouble(pd_COSUPSMX); + + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_span); + PyObject *pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); + Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); + PyObject *pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); + Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + //> images + PyArrayObject *p_mumap = NULL, *p_mumsk = NULL, *p_emimg = NULL; + p_mumap = (PyArrayObject *)PyArray_FROM_OTF(o_mumap, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_mumsk = (PyArrayObject *)PyArray_FROM_OTF(o_mumsk, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_emimg = (PyArrayObject *)PyArray_FROM_OTF(o_emimg, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + //> output dictionary for results + PyObject *pd_sct3 = PyDict_GetItemString(o_sctout, "sct_3d"); + PyObject *pd_sval = PyDict_GetItemString(o_sctout, "sct_val"); + + PyArrayObject *p_sct3 = NULL, *p_sval = NULL; + p_sct3 = (PyArrayObject *)PyArray_FROM_OTF(pd_sct3, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + p_sval = (PyArrayObject *)PyArray_FROM_OTF(pd_sval, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + //> axial LUTs: + PyObject *pd_sn1_rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); + PyObject *pd_sn1_sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); + PyArrayObject *p_sn1_rno = NULL, *p_sn1_sn11 = NULL; + p_sn1_rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_sn1_sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1_sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); + + //-------- SCATTER -------- + // number of axial scatter crystals (rings) for modelling + PyObject *pd_NSRNG = PyDict_GetItemString(o_sctLUT, "NSRNG"); + Cnt.NSRNG = (int)PyLong_AsLong(pd_NSRNG); + // number of transaxial scatter crystals for modelling + PyObject *pd_NSCRS = PyDict_GetItemString(o_sctLUT, "NSCRS"); + Cnt.NSCRS = (int)PyLong_AsLong(pd_NSCRS); + + //> scatter LUTs: + PyObject *pd_scrs = PyDict_GetItemString(o_sctLUT, "scrs"); + PyObject *pd_xsxu = PyDict_GetItemString(o_sctLUT, "xsxu"); + PyObject *pd_KN = PyDict_GetItemString(o_sctLUT, "KN"); + PyObject *pd_sirng = PyDict_GetItemString(o_sctLUT, "sirng"); + PyObject *pd_srng = PyDict_GetItemString(o_sctLUT, "srng"); + PyObject *pd_offseg = PyDict_GetItemString(o_sctLUT, "offseg"); + PyObject *pd_sctaxR = PyDict_GetItemString(o_sctLUT, "sctaxR"); + PyObject *pd_sctaxW = PyDict_GetItemString(o_sctLUT, "sctaxW"); + + PyArrayObject *p_scrs = NULL, *p_KN = NULL, *p_isrng = NULL, *p_srng = NULL, *p_xsxu = NULL, + *p_offseg = NULL, *p_sctaxR = NULL, *p_sctaxW = NULL; + + p_scrs = (PyArrayObject *)PyArray_FROM_OTF(pd_scrs, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_xsxu = (PyArrayObject *)PyArray_FROM_OTF(pd_xsxu, NPY_INT8, NPY_ARRAY_IN_ARRAY); + p_KN = (PyArrayObject *)PyArray_FROM_OTF(pd_KN, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_isrng = (PyArrayObject *)PyArray_FROM_OTF(pd_sirng, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_srng = (PyArrayObject *)PyArray_FROM_OTF(pd_srng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + p_offseg = (PyArrayObject *)PyArray_FROM_OTF(pd_offseg, NPY_INT16, NPY_ARRAY_IN_ARRAY); + p_sctaxR = (PyArrayObject *)PyArray_FROM_OTF(pd_sctaxR, NPY_INT32, NPY_ARRAY_IN_ARRAY); + p_sctaxW = (PyArrayObject *)PyArray_FROM_OTF(pd_sctaxW, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + //------------------------- + + /* If that didn't work, throw an exception. */ + if (p_mumap == NULL || p_mumsk == NULL || p_emimg == NULL || p_sct3 == NULL || p_sval == NULL || + p_xsxu == NULL || p_sn1_sn11 == NULL || p_sn1_rno == NULL || p_srng == NULL || + p_scrs == NULL || p_KN == NULL || p_isrng == NULL || p_offseg == NULL || p_sctaxR == NULL || + p_sctaxW == NULL) { + Py_XDECREF(p_mumap); + Py_XDECREF(p_mumsk); + Py_XDECREF(p_emimg); + Py_XDECREF(p_xsxu); + Py_XDECREF(p_sn1_rno); + Py_XDECREF(p_sn1_sn11); + + Py_XDECREF(p_scrs); + Py_XDECREF(p_KN); + Py_XDECREF(p_isrng); + Py_XDECREF(p_srng); + Py_XDECREF(p_offseg); + Py_XDECREF(p_sctaxR); + Py_XDECREF(p_sctaxW); + + PyArray_DiscardWritebackIfCopy(p_sct3); + Py_XDECREF(p_sct3); + PyArray_DiscardWritebackIfCopy(p_sval); + Py_XDECREF(p_sval); + + printf("e> problem with getting the images and LUTs in C functions... :(\n"); + return NULL; + } + + // get the c-type arrays + char *mumsk = (char *)PyArray_DATA(p_mumsk); + float *mumap = (float *)PyArray_DATA(p_mumap); + float *emimg = (float *)PyArray_DATA(p_emimg); + + short *sn1_rno = (short *)PyArray_DATA(p_sn1_rno); + short *sn1_sn11 = (short *)PyArray_DATA(p_sn1_sn11); + + // indexes of rings included in scatter estimation + short *isrng = (short *)PyArray_DATA(p_isrng); + // axial scatter ring position + float *srng = (float *)PyArray_DATA(p_srng); + + // offset in each segment used for rings to sino LUT + short *offseg = (short *)PyArray_DATA(p_offseg); + // scatter sino indexes in axial dimensions through Michelogram used for interpolation in 3D + int *sctaxR = (int *)PyArray_DATA(p_sctaxR); + // weights for the interpolation in 3D (used together with the above) + float *sctaxW = (float *)PyArray_DATA(p_sctaxW); + // K-N probabilities in the LUT + float *KNlut = (float *)PyArray_DATA(p_KN); + + // transaxial scatter crystal table + float *scrs = (float *)PyArray_DATA(p_scrs); + + char *xsxu = (char *)PyArray_DATA(p_xsxu); + + // output structure + scatOUT sctout; + sctout.sval = (float *)PyArray_DATA(p_sval); + sctout.s3d = (float *)PyArray_DATA(p_sct3); + + // Get the image dims + muIMG.nvx = + (size_t)(PyArray_DIM(p_mumap, 0) * PyArray_DIM(p_mumap, 1) * PyArray_DIM(p_mumap, 2)); + emIMG.nvx = + (size_t)(PyArray_DIM(p_emimg, 0) * PyArray_DIM(p_emimg, 1) * PyArray_DIM(p_emimg, 2)); + + if ((muIMG.nvx != emIMG.nvx) && (Cnt.LOG <= LOGWARNING)) + printf("\nw> mu-map and emission image have different dims: mu.nvx = %lu, em.nvx = %lu\n", + muIMG.nvx, emIMG.nvx); + + // get the stats in the image structure + float mumx = -1e12, emmx = -1e12, mumn = 1e12, emmn = 1e12; + for (int i = 0; i < muIMG.nvx; i++) { + if (mumap[i] > mumx) + mumx = mumap[i]; + if (mumap[i] < mumn) + mumn = mumap[i]; + } + for (int i = 0; i < emIMG.nvx; i++) { + if (emimg[i] > emmx) + emmx = emimg[i]; + if (emimg[i] < emmn) + emmn = emimg[i]; + } + + muIMG.im = mumap; + emIMG.im = emimg; + muIMG.max = mumx; + emIMG.max = emmx; + muIMG.min = mumn; + emIMG.min = emmn; + muIMG.n10mx = 0; + emIMG.n10mx = 0; + for (int i = 0; i < muIMG.nvx; i++) + if (mumap[i] > 0.1 * mumx) + muIMG.n10mx += 1; + + for (int i = 0; i < emIMG.nvx; i++) + if (emimg[i] > 0.1 * emmx) + emIMG.n10mx += 1; + + if (Cnt.LOG <= LOGDEBUG) + printf("i> mumx = %f, mumin = %f, emmx = %f, emmn = %f\n", mumx, mumn, emmx, emmn); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><><> S C A T T E R K E R N E L + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + prob_scatt(sctout, KNlut, mumsk, muIMG, emIMG, sctaxR, sctaxW, offseg, scrs, isrng, srng, xsxu, + sn1_rno, sn1_sn11, Cnt); + + cudaDeviceSynchronize(); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + + // Clean up + if (Cnt.LOG <= LOGDEBUG) + printf("i> cleaning scatter variables..."); + Py_DECREF(p_mumap); + Py_DECREF(p_mumsk); + Py_DECREF(p_emimg); + Py_DECREF(p_sn1_rno); + Py_DECREF(p_sn1_sn11); + Py_DECREF(p_isrng); + Py_DECREF(p_srng); + Py_DECREF(p_xsxu); + Py_DECREF(p_offseg); + Py_DECREF(p_sctaxR); + Py_DECREF(p_sctaxW); + Py_DECREF(p_KN); + Py_DECREF(p_scrs); + + PyArray_ResolveWritebackIfCopy(p_sct3); + Py_DECREF(p_sct3); + PyArray_ResolveWritebackIfCopy(p_sval); + Py_DECREF(p_sval); + + Py_INCREF(Py_None); + if (Cnt.LOG <= LOGDEBUG) + printf("DONE.\n"); + return Py_None; } diff --git a/niftypet/nipet/sct/src/sctaux.cu b/niftypet/nipet/sct/src/sctaux.cu index cc290f88..0dc8e7e1 100644 --- a/niftypet/nipet/sct/src/sctaux.cu +++ b/niftypet/nipet/sct/src/sctaux.cu @@ -5,379 +5,344 @@ voxel-driven scatter modelling (VSM) author: Pawel Markiewicz Copyrights: 2020 ------------------------------------------------------------------------*/ -#include #include "sctaux.h" +#include //====================================================================== -//SCATTER RESULTS PROCESSING +// SCATTER RESULTS PROCESSING //====================================================================== __constant__ short c_isrng[N_SRNG]; - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -__global__ void d_sct2sn1( - float *scts1, - float *srslt, - size_t offtof, - char *xsxu, - short *offseg, - int NBIN) -{ - //scatter crystal index - char ics = threadIdx.x; - - //scatter ring index - char irs = threadIdx.y; - - //unscattered crystal index - char icu = blockIdx.x; - //unscattered crystal index - char iru = blockIdx.y; +__global__ void d_sct2sn1(float *scts1, float *srslt, size_t offtof, char *xsxu, short *offseg, + int NBIN) { + // scatter crystal index + char ics = threadIdx.x; + // scatter ring index + char irs = threadIdx.y; + // unscattered crystal index + char icu = blockIdx.x; + // unscattered crystal index + char iru = blockIdx.y; - //number of considered crystals and rings for scatter - char nscrs = gridDim.x; - char nsrng = gridDim.y; + // number of considered crystals and rings for scatter + char nscrs = gridDim.x; + char nsrng = gridDim.y; - //scatter bin index for one scatter sino/plane - short ssi = nscrs*icu + ics; - bool pos = ((2*xsxu[ssi] - 1) * (irs - iru)) > 0; + // scatter bin index for one scatter sino/plane + short ssi = nscrs * icu + ics; + bool pos = ((2 * xsxu[ssi] - 1) * (irs - iru)) > 0; - // ring difference index used for addressing the segment offset to obtain sino index in span-1 - unsigned short rd = __usad(c_isrng[irs], c_isrng[iru], 0); + // ring difference index used for addressing the segment offset to obtain sino index in span-1 + unsigned short rd = __usad(c_isrng[irs], c_isrng[iru], 0); - unsigned short rdi = (2*rd - 1*pos); - unsigned short sni = offseg[rdi] + MIN(c_isrng[irs], c_isrng[iru]); + unsigned short rdi = (2 * rd - 1 * pos); + unsigned short sni = offseg[rdi] + MIN(c_isrng[irs], c_isrng[iru]); - atomicAdd(scts1 + sni*NBIN + ssi, - srslt[offtof + iru*nscrs*nsrng*nscrs + icu*nsrng*nscrs + irs*nscrs + ics]); + atomicAdd(scts1 + sni * NBIN + ssi, + srslt[offtof + iru * nscrs * nsrng * nscrs + icu * nsrng * nscrs + irs * nscrs + ics]); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -__global__ void d_sct_axinterp( - float *sct3d, - const float *scts1, - const int4 *sctaxR, - const float4 *sctaxW, - const short *sn1_sn11, - int NBIN, - int NSN1, - int SPN, - int tof_off) -{ - //scatter crystal index - char ics = threadIdx.x; - - //unscattered crystal index (the 4s are done in the loop below) - char icu = blockIdx.x; - - //span-1 sino index - short sni = blockIdx.y; - - float tmp = sctaxW[sni].x * scts1[NBIN*sctaxR[sni].x + icu*blockDim.x + ics] + - sctaxW[sni].y * scts1[NBIN*sctaxR[sni].y + icu*blockDim.x + ics] + - sctaxW[sni].z * scts1[NBIN*sctaxR[sni].z + icu*blockDim.x + ics] + - sctaxW[sni].w * scts1[NBIN*sctaxR[sni].w + icu*blockDim.x + ics]; - - //span-1 or span-11 scatter pre-sinogram interpolation - if (SPN == 1) - sct3d[tof_off + sni*NBIN + icu*blockDim.x + ics] = tmp; - else if (SPN == 11) - if (sni Number of scatter rings is different in definitions from Python! <<<<<<<<<<<<<<<<<<< error \n"); - - //---scatter ring indices to constant memory (GPU) - HANDLE_ERROR(cudaMemcpyToSymbol(c_isrng, isrng, Cnt.NSRNG * sizeof(short))); - //--- - - short2 *d_sn1_rno; - HANDLE_ERROR(cudaMalloc(&d_sn1_rno, Cnt.NSN1 * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn1_rno, sn1_rno, Cnt.NSN1 * sizeof(short2), cudaMemcpyHostToDevice)); - - short *d_sn1_sn11; - HANDLE_ERROR(cudaMalloc(&d_sn1_sn11, Cnt.NSN1 * sizeof(short))); - HANDLE_ERROR(cudaMemcpy(d_sn1_sn11, sn1_sn11, Cnt.NSN1 * sizeof(short), cudaMemcpyHostToDevice)); - //---- - - for (int i = 0; i 3D scatter results into span-1 pre-sino for TOF bin %d...", i); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - dim3 grid(d_scrsdef.nscrs, d_scrsdef.nsrng, 1); - dim3 block(d_scrsdef.nscrs, d_scrsdef.nsrng, 1); - d_sct2sn1 <<< grid, block >>>(d_scts1, - d_srslt, - offtof, - d_xsxu, - d_offseg, - (int)(d_scrsdef.nscrs*d_scrsdef.nscrs)); - HANDLE_ERROR(cudaGetLastError()); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3*elapsedTime); - - - - if (Cnt.LOG <= LOGINFO) printf("i> 3D scatter axial interpolation..."); - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - block.x = d_scrsdef.nscrs; - block.y = 1; - block.z = 1; - grid.x = d_scrsdef.nscrs; - grid.y = Cnt.NSN1; - grid.z = 1; - d_sct_axinterp <<< grid, block >>>(d_sct3di, - d_scts1, - d_sctaxR, - d_sctaxW, - d_sn1_sn11, - (int)(d_scrsdef.nscrs*d_scrsdef.nscrs), - Cnt.NSN1, - Cnt.SPN, - i*tbins); - HANDLE_ERROR(cudaGetLastError()); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3*elapsedTime); - - } - - cudaFree(d_scts1); - return d_sct3di; - - // cudaFree(d_sct3di); - // return d_scts1; - +float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, float *sctaxW, + short *offseg, short *isrng, short *sn1_rno, short *sn1_sn11, Cnst Cnt) { + + // scatter pre-sino in span-1 (tmporary) + float *d_scts1; + HANDLE_ERROR( + cudaMalloc(&d_scts1, Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs * sizeof(float))); + + // axially interpolated scatter pre-sino; full span-1 without MRD limit or span-11 with MRD=60 + float *d_sct3di; + int tbins = 0; + if (Cnt.SPN == 1) + tbins = Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs; + // scatter pre-sino, span-11 + else if (Cnt.SPN == 11) + tbins = Cnt.NSN11 * d_scrsdef.nscrs * d_scrsdef.nscrs; + + HANDLE_ERROR(cudaMalloc(&d_sct3di, Cnt.TOFBINN * tbins * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_sct3di, 0, Cnt.TOFBINN * tbins * sizeof(float))); + + // number of all scatter estimated values (sevn) for one TOF 3D sino + int sevn = d_scrsdef.nsrng * d_scrsdef.nscrs * d_scrsdef.nsrng * d_scrsdef.nscrs; + + //---- constants + int4 *d_sctaxR; + HANDLE_ERROR(cudaMalloc(&d_sctaxR, Cnt.NSN64 * sizeof(int4))); + HANDLE_ERROR(cudaMemcpy(d_sctaxR, sctaxR, Cnt.NSN64 * sizeof(int4), cudaMemcpyHostToDevice)); + + float4 *d_sctaxW; + HANDLE_ERROR(cudaMalloc(&d_sctaxW, Cnt.NSN64 * sizeof(float4))); + HANDLE_ERROR(cudaMemcpy(d_sctaxW, sctaxW, Cnt.NSN64 * sizeof(float4), cudaMemcpyHostToDevice)); + + short *d_offseg; + HANDLE_ERROR(cudaMalloc(&d_offseg, (Cnt.NSEG0 + 1) * sizeof(short))); + HANDLE_ERROR( + cudaMemcpy(d_offseg, offseg, (Cnt.NSEG0 + 1) * sizeof(short), cudaMemcpyHostToDevice)); + + if (N_SRNG != Cnt.NSRNG) + printf("e> Number of scatter rings is different in definitions from Python! " + "<<<<<<<<<<<<<<<<<<< error \n"); + + //---scatter ring indices to constant memory (GPU) + HANDLE_ERROR(cudaMemcpyToSymbol(c_isrng, isrng, Cnt.NSRNG * sizeof(short))); + //--- + + short2 *d_sn1_rno; + HANDLE_ERROR(cudaMalloc(&d_sn1_rno, Cnt.NSN1 * sizeof(short2))); + HANDLE_ERROR(cudaMemcpy(d_sn1_rno, sn1_rno, Cnt.NSN1 * sizeof(short2), cudaMemcpyHostToDevice)); + + short *d_sn1_sn11; + HANDLE_ERROR(cudaMalloc(&d_sn1_sn11, Cnt.NSN1 * sizeof(short))); + HANDLE_ERROR(cudaMemcpy(d_sn1_sn11, sn1_sn11, Cnt.NSN1 * sizeof(short), cudaMemcpyHostToDevice)); + //---- + + for (int i = 0; i < Cnt.TOFBINN; i++) { + + // offset for given TOF bin + size_t offtof = i * sevn; + + // init to zeros + HANDLE_ERROR( + cudaMemset(d_scts1, 0, Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs * sizeof(float))); + + if (Cnt.LOG <= LOGINFO) + printf("i> 3D scatter results into span-1 pre-sino for TOF bin %d...", i); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + dim3 grid(d_scrsdef.nscrs, d_scrsdef.nsrng, 1); + dim3 block(d_scrsdef.nscrs, d_scrsdef.nsrng, 1); + d_sct2sn1<<>>(d_scts1, d_srslt, offtof, d_xsxu, d_offseg, + (int)(d_scrsdef.nscrs * d_scrsdef.nscrs)); + HANDLE_ERROR(cudaGetLastError()); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf("DONE in %fs.\n", 1e-3 * elapsedTime); + + if (Cnt.LOG <= LOGINFO) + printf("i> 3D scatter axial interpolation..."); + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + block.x = d_scrsdef.nscrs; + block.y = 1; + block.z = 1; + grid.x = d_scrsdef.nscrs; + grid.y = Cnt.NSN1; + grid.z = 1; + d_sct_axinterp<<>>(d_sct3di, d_scts1, d_sctaxR, d_sctaxW, d_sn1_sn11, + (int)(d_scrsdef.nscrs * d_scrsdef.nscrs), Cnt.NSN1, Cnt.SPN, + i * tbins); + HANDLE_ERROR(cudaGetLastError()); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf("DONE in %fs.\n", 1e-3 * elapsedTime); + } + + cudaFree(d_scts1); + return d_sct3di; + + // cudaFree(d_sct3di); + // return d_scts1; } - - - - - //=================================================================== //------ CREATE MASK BASED ON THRESHOLD (SCATTER EMISSION DATA)------------ -iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt) -{ +iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt) { - // check which device is going to be used - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); - iMSK msk; - int nvx = 0; + iMSK msk; + int nvx = 0; - for (int i = 0; i<(SSE_IMX*SSE_IMY*SSE_IMZ); i++) { - if (imvol.im[i]>thrshld) nvx++; - } - //------------------------------------------------------------------ - //create the mask thru indexes - int *d_i2v, *d_v2i; + for (int i = 0; i < (SSE_IMX * SSE_IMY * SSE_IMZ); i++) { + if (imvol.im[i] > thrshld) + nvx++; + } + //------------------------------------------------------------------ + // create the mask thru indexes + int *d_i2v, *d_v2i; #ifdef WIN32 - int *h_i2v, *h_v2i; - HANDLE_ERROR(cudaMallocHost(&h_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMallocHost(&h_v2i, SSE_IMX*SSE_IMY*SSE_IMZ * sizeof(int))); - - HANDLE_ERROR(cudaMalloc(&d_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMalloc(&d_v2i, SSE_IMX*SSE_IMY*SSE_IMZ * sizeof(int))); - - nvx = 0; - for (int i = 0; i<(SSE_IMX*SSE_IMY*SSE_IMZ); i++) { - //if not in the mask then set to -1 - h_v2i[i] = 0; - //image-based TFOV - if (imvol.im[i]>thrshld) { - h_i2v[nvx] = i; - h_v2i[i] = nvx; - nvx++; - } - } - - HANDLE_ERROR(cudaMemcpy(d_i2v, h_i2v, nvx * sizeof(int), cudaMemcpyHostToDevice)); - HANDLE_ERROR(cudaMemcpy(d_v2i, h_v2i, SSE_IMX*SSE_IMY*SSE_IMZ * sizeof(int), cudaMemcpyHostToDevice)); - - HANDLE_ERROR(cudaFreeHost(h_i2v)); - HANDLE_ERROR(cudaFreeHost(h_v2i)); - + int *h_i2v, *h_v2i; + HANDLE_ERROR(cudaMallocHost(&h_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMallocHost(&h_v2i, SSE_IMX * SSE_IMY * SSE_IMZ * sizeof(int))); + + HANDLE_ERROR(cudaMalloc(&d_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMalloc(&d_v2i, SSE_IMX * SSE_IMY * SSE_IMZ * sizeof(int))); + + nvx = 0; + for (int i = 0; i < (SSE_IMX * SSE_IMY * SSE_IMZ); i++) { + // if not in the mask then set to -1 + h_v2i[i] = 0; + // image-based TFOV + if (imvol.im[i] > thrshld) { + h_i2v[nvx] = i; + h_v2i[i] = nvx; + nvx++; + } + } + + HANDLE_ERROR(cudaMemcpy(d_i2v, h_i2v, nvx * sizeof(int), cudaMemcpyHostToDevice)); + HANDLE_ERROR( + cudaMemcpy(d_v2i, h_v2i, SSE_IMX * SSE_IMY * SSE_IMZ * sizeof(int), cudaMemcpyHostToDevice)); + + HANDLE_ERROR(cudaFreeHost(h_i2v)); + HANDLE_ERROR(cudaFreeHost(h_v2i)); #else - //printf(">>>>> NVX:%d, THRESHOLD:%f\n", nvx, thrshld); - HANDLE_ERROR(cudaMallocManaged(&d_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMallocManaged(&d_v2i, SSE_IMX*SSE_IMY*SSE_IMZ * sizeof(int))); - - nvx = 0; - for (int i = 0; i<(SSE_IMX*SSE_IMY*SSE_IMZ); i++) { - //if not in the mask then set to -1 - d_v2i[i] = 0; - //image-based TFOV - if (imvol.im[i]>thrshld) { - d_i2v[nvx] = i; - d_v2i[i] = nvx; - nvx++; - } - } + // printf(">>>>> NVX:%d, THRESHOLD:%f\n", nvx, thrshld); + HANDLE_ERROR(cudaMallocManaged(&d_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMallocManaged(&d_v2i, SSE_IMX * SSE_IMY * SSE_IMZ * sizeof(int))); + + nvx = 0; + for (int i = 0; i < (SSE_IMX * SSE_IMY * SSE_IMZ); i++) { + // if not in the mask then set to -1 + d_v2i[i] = 0; + // image-based TFOV + if (imvol.im[i] > thrshld) { + d_i2v[nvx] = i; + d_v2i[i] = nvx; + nvx++; + } + } #endif - if (Cnt.LOG <= LOGINFO) printf("i> number of voxel values greater than %3.2f is %d out of %d (ratio: %3.2f)\n", thrshld, nvx, SSE_IMX*SSE_IMY*SSE_IMZ, nvx / (float)(SSE_IMX*SSE_IMY*SSE_IMZ)); - msk.nvx = nvx; - msk.i2v = d_i2v; - msk.v2i = d_v2i; - return msk; + if (Cnt.LOG <= LOGINFO) + printf("i> number of voxel values greater than %3.2f is %d out of %d (ratio: %3.2f)\n", + thrshld, nvx, SSE_IMX * SSE_IMY * SSE_IMZ, nvx / (float)(SSE_IMX * SSE_IMY * SSE_IMZ)); + msk.nvx = nvx; + msk.i2v = d_i2v; + msk.v2i = d_v2i; + return msk; } //=================================================================== //=================================================================== //----------- CREATE MASK BASED ON MASK PROVIDED ---------------- -iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt) -{ - - // check which device is going to be used - int dev_id; - cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); - - int nvx = 0; - for (int i = 0; i<(SS_IMX*SS_IMY*SS_IMZ); i++) { - if (msk[i]>0) nvx++; - } - //------------------------------------------------------------------ - //create the mask thru indecies - int *d_i2v, *d_v2i; +iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt) { + + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); + + int nvx = 0; + for (int i = 0; i < (SS_IMX * SS_IMY * SS_IMZ); i++) { + if (msk[i] > 0) + nvx++; + } + //------------------------------------------------------------------ + // create the mask thru indecies + int *d_i2v, *d_v2i; #ifdef WIN32 - int *h_i2v, *h_v2i; - HANDLE_ERROR(cudaMallocHost(&h_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMallocHost(&h_v2i, SS_IMX*SS_IMY*SS_IMZ * sizeof(int))); - - HANDLE_ERROR(cudaMalloc(&d_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMalloc(&d_v2i, SS_IMX*SS_IMY*SS_IMZ * sizeof(int))); - - nvx = 0; - for (int i = 0; i<(SS_IMX*SS_IMY*SS_IMZ); i++) { - //if not in the mask then set to -1 - h_v2i[i] = -1; - //image-based TFOV - if (msk[i]>0) { - h_i2v[nvx] = i; - h_v2i[i] = nvx; - nvx++; - } - } - - HANDLE_ERROR(cudaMemcpy(d_i2v, h_i2v, nvx * sizeof(int), cudaMemcpyHostToDevice)); - HANDLE_ERROR(cudaMemcpy(d_v2i, h_v2i, SS_IMX*SS_IMY*SS_IMZ * sizeof(int), cudaMemcpyHostToDevice)); - - HANDLE_ERROR(cudaFreeHost(h_i2v)); - HANDLE_ERROR(cudaFreeHost(h_v2i)); + int *h_i2v, *h_v2i; + HANDLE_ERROR(cudaMallocHost(&h_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMallocHost(&h_v2i, SS_IMX * SS_IMY * SS_IMZ * sizeof(int))); + + HANDLE_ERROR(cudaMalloc(&d_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMalloc(&d_v2i, SS_IMX * SS_IMY * SS_IMZ * sizeof(int))); + + nvx = 0; + for (int i = 0; i < (SS_IMX * SS_IMY * SS_IMZ); i++) { + // if not in the mask then set to -1 + h_v2i[i] = -1; + // image-based TFOV + if (msk[i] > 0) { + h_i2v[nvx] = i; + h_v2i[i] = nvx; + nvx++; + } + } + + HANDLE_ERROR(cudaMemcpy(d_i2v, h_i2v, nvx * sizeof(int), cudaMemcpyHostToDevice)); + HANDLE_ERROR( + cudaMemcpy(d_v2i, h_v2i, SS_IMX * SS_IMY * SS_IMZ * sizeof(int), cudaMemcpyHostToDevice)); + + HANDLE_ERROR(cudaFreeHost(h_i2v)); + HANDLE_ERROR(cudaFreeHost(h_v2i)); #else - HANDLE_ERROR(cudaMallocManaged(&d_i2v, nvx * sizeof(int))); - HANDLE_ERROR(cudaMallocManaged(&d_v2i, SS_IMX*SS_IMY*SS_IMZ * sizeof(int))); - - nvx = 0; - for (int i = 0; i<(SS_IMX*SS_IMY*SS_IMZ); i++) { - //if not in the mask then set to -1 - d_v2i[i] = -1; - //image-based TFOV - if (msk[i]>0) { - d_i2v[nvx] = i; - d_v2i[i] = nvx; - nvx++; - } - } + HANDLE_ERROR(cudaMallocManaged(&d_i2v, nvx * sizeof(int))); + HANDLE_ERROR(cudaMallocManaged(&d_v2i, SS_IMX * SS_IMY * SS_IMZ * sizeof(int))); + + nvx = 0; + for (int i = 0; i < (SS_IMX * SS_IMY * SS_IMZ); i++) { + // if not in the mask then set to -1 + d_v2i[i] = -1; + // image-based TFOV + if (msk[i] > 0) { + d_i2v[nvx] = i; + d_v2i[i] = nvx; + nvx++; + } + } #endif - if (Cnt.LOG <= LOGINFO) printf("i> number of voxels within the mu-mask is %d out of %d (ratio: %3.2f)\n", nvx, SS_IMX*SS_IMY*SS_IMZ, nvx / (float)(SS_IMX*SS_IMY*SS_IMZ)); - iMSK mlut; - mlut.nvx = nvx; - mlut.i2v = d_i2v; - mlut.v2i = d_v2i; - return mlut; + if (Cnt.LOG <= LOGINFO) + printf("i> number of voxels within the mu-mask is %d out of %d (ratio: %3.2f)\n", nvx, + SS_IMX * SS_IMY * SS_IMZ, nvx / (float)(SS_IMX * SS_IMY * SS_IMZ)); + iMSK mlut; + mlut.nvx = nvx; + mlut.i2v = d_i2v; + mlut.v2i = d_v2i; + return mlut; } diff --git a/niftypet/nipet/sct/src/sctaux.h b/niftypet/nipet/sct/src/sctaux.h index 25d528b7..f7efb6c3 100644 --- a/niftypet/nipet/sct/src/sctaux.h +++ b/niftypet/nipet/sct/src/sctaux.h @@ -1,52 +1,39 @@ -#include -#include "sct.h" -#include "scanner_0.h" #include "def.h" +#include "scanner_0.h" +#include "sct.h" +#include #ifndef SAUX_H #define SAUX_H //----- S C A T T E R -//images are stored in structures with some basic stats -struct IMflt -{ - float *im; - size_t nvx; - float max; - float min; - size_t n10mx; +// images are stored in structures with some basic stats +struct IMflt { + float *im; + size_t nvx; + float max; + float min; + size_t n10mx; }; -struct iMSK -{ - int nvx; - int *i2v; - int *v2i; +struct iMSK { + int nvx; + int *i2v; + int *v2i; }; -struct scrsDEF -{ - float *crs; - float *rng; - int nscrs; - int nsrng; +struct scrsDEF { + float *crs; + float *rng; + int nscrs; + int nsrng; }; - iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt); iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt); -//raw scatter results to sinogram -float * srslt2sino( - float *d_srslt, - char *d_xsxu, - scrsDEF d_scrsdef, - int *sctaxR, - float *sctaxW, - short *offseg, - short *isrng, - short *sn1_rno, - short *sn1_sn11, - Cnst Cnt); +// raw scatter results to sinogram +float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, float *sctaxW, + short *offseg, short *isrng, short *sn1_rno, short *sn1_sn11, Cnst Cnt); -#endif //SAUX_H +#endif // SAUX_H diff --git a/niftypet/nipet/src/aux_module.cu b/niftypet/nipet/src/aux_module.cu index 5282f218..e21a9979 100644 --- a/niftypet/nipet/src/aux_module.cu +++ b/niftypet/nipet/src/aux_module.cu @@ -8,16 +8,15 @@ Copyrights: 2018 ----------------------------------------------------------------------*/ #define PY_SSIZE_T_CLEAN -#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION //NPY_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION -#include -#include -#include +#include "auxmath.h" #include "def.h" #include "norm.h" #include "scanner_0.h" -#include "auxmath.h" - +#include +#include +#include //=== START PYTHON INIT === @@ -29,581 +28,561 @@ static PyObject *mmr_rgaps(PyObject *self, PyObject *args); static PyObject *aux_varon(PyObject *self, PyObject *args); //--- - //> Module Method Table static PyMethodDef mmr_auxe_methods[] = { - {"norm", mmr_norm, METH_VARARGS, - "Create 3D normalisation sinograms from provided normalisation components."}, - {"s1s11", mmr_span11LUT, METH_VARARGS, - "Create span-1 to span-11 look up table."}, - {"pgaps", mmr_pgaps, METH_VARARGS, - "Create span-11 Siemens compatible sinograms by inserting gaps into the GPU-optimised sinograms in span-11."}, - {"rgaps", mmr_rgaps, METH_VARARGS, - "Create span-11 GPU-optimised sinograms by removing the gaps in Siemens-compatible sinograms in span-11" }, - {"varon", aux_varon, METH_VARARGS, - "Calculate variance online for the provided vector."}, - {NULL, NULL, 0, NULL} // Sentinel + {"norm", mmr_norm, METH_VARARGS, + "Create 3D normalisation sinograms from provided normalisation components."}, + {"s1s11", mmr_span11LUT, METH_VARARGS, "Create span-1 to span-11 look up table."}, + {"pgaps", mmr_pgaps, METH_VARARGS, + "Create span-11 Siemens compatible sinograms by inserting gaps into the GPU-optimised " + "sinograms in span-11."}, + {"rgaps", mmr_rgaps, METH_VARARGS, + "Create span-11 GPU-optimised sinograms by removing the gaps in Siemens-compatible sinograms " + "in span-11"}, + {"varon", aux_varon, METH_VARARGS, "Calculate variance online for the provided vector."}, + {NULL, NULL, 0, NULL} // Sentinel }; - //> Module Definition Structure static struct PyModuleDef mmr_auxe_module = { - PyModuleDef_HEAD_INIT, + PyModuleDef_HEAD_INIT, - //> name of module - "mmr_auxe", + //> name of module + "mmr_auxe", - //> module documentation, may be NULL - "Initialisation and basic processing routines for the Siemens Biograph mMR.", + //> module documentation, may be NULL + "Initialisation and basic processing routines for the Siemens Biograph mMR.", - //> the module keeps state in global variables. - -1, - - mmr_auxe_methods -}; + //> the module keeps state in global variables. + -1, + mmr_auxe_methods}; //> Initialization function PyMODINIT_FUNC PyInit_mmr_auxe(void) { - Py_Initialize(); + Py_Initialize(); - //> load NumPy functionality - import_array(); + //> load NumPy functionality + import_array(); - return PyModule_Create(&mmr_auxe_module); + return PyModule_Create(&mmr_auxe_module); } //=== END PYTHON INIT === - //============================================================================== - - - - - //============================================================================== // N O R M A L I S A T I O N (component based) //------------------------------------------------------------------------------ -static PyObject *mmr_norm(PyObject *self, PyObject *args) -{ - - //Structure of constants - Cnst Cnt; - //Dictionary of scanner constants - PyObject * o_mmrcnst; - // structure of norm C arrays (defined in norm.h). - NormCmp normc; - // structure of axial LUTs in C arrays (defined in norm.h). - axialLUT axLUT; - - //Output norm sino - PyObject * o_sino=NULL; - // normalisation component dictionary. - PyObject * o_norm_cmp; - // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. - PyObject * o_axLUT; - // 2D sino index LUT (dead bisn are out). - PyObject * o_aw2ali=NULL; - // singles buckets for dead time correction - PyObject * o_bckts=NULL; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOO", &o_sino, &o_norm_cmp, &o_bckts, &o_axLUT, &o_aw2ali, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - /* Interpret the input objects as numpy arrays. */ - //norm components: - PyObject* pd_geo = PyDict_GetItemString(o_norm_cmp, "geo"); - PyObject* pd_cinf = PyDict_GetItemString(o_norm_cmp, "cinf"); - PyObject* pd_ceff = PyDict_GetItemString(o_norm_cmp, "ceff"); - PyObject* pd_axe1 = PyDict_GetItemString(o_norm_cmp, "axe1"); - PyObject* pd_dtp = PyDict_GetItemString(o_norm_cmp, "dtp"); - PyObject* pd_dtnp = PyDict_GetItemString(o_norm_cmp, "dtnp"); - PyObject* pd_dtc = PyDict_GetItemString(o_norm_cmp, "dtc"); - PyObject* pd_axe2 = PyDict_GetItemString(o_norm_cmp, "axe2"); - PyObject* pd_axf1 = PyDict_GetItemString(o_norm_cmp, "axf1"); - //axial LUTs: - PyObject* pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); - PyObject* pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); - PyObject* pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); - PyObject* pd_sn1sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); - PyObject* pd_sn1rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); - PyObject* pd_sn1sn11no = PyDict_GetItemString(o_axLUT, "sn1_sn11no"); - - PyObject* pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_span); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - //get the output sino - PyArrayObject *p_sino = NULL; - p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - //-- get the arrays from the dictionaries - //norm components - PyArrayObject *p_geo = NULL; - p_geo = (PyArrayObject *)PyArray_FROM_OTF(pd_geo, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_cinf = NULL; - p_cinf = (PyArrayObject *)PyArray_FROM_OTF(pd_cinf, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_ceff = NULL; - p_ceff = (PyArrayObject *)PyArray_FROM_OTF(pd_ceff, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_axe1 = NULL; - p_axe1 = (PyArrayObject *)PyArray_FROM_OTF(pd_axe1, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_dtp = NULL; - p_dtp = (PyArrayObject *)PyArray_FROM_OTF(pd_dtp, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_dtnp = NULL; - p_dtnp = (PyArrayObject *)PyArray_FROM_OTF(pd_dtnp, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_dtc = NULL; - p_dtc = (PyArrayObject *)PyArray_FROM_OTF(pd_dtc, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_axe2 = NULL; - p_axe2 = (PyArrayObject *)PyArray_FROM_OTF(pd_axe2, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_axf1 = NULL; - p_axf1 = (PyArrayObject *)PyArray_FROM_OTF(pd_axf1, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - //then axLUTs - PyArrayObject *p_li2rno = NULL; - p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_li2sn = NULL; - p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_li2nos = NULL; - p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1sn11 = NULL; - p_sn1sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1rno = NULL; - p_sn1rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_sn1sn11no = NULL; - p_sn1sn11no = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1sn11no, NPY_INT8, NPY_ARRAY_IN_ARRAY); - - //2D sino index LUT: - PyArrayObject *p_aw2ali = NULL; - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(o_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - // single bucktes: - PyArrayObject *p_bckts = NULL; - p_bckts = (PyArrayObject *)PyArray_FROM_OTF(o_bckts, NPY_INT32, NPY_ARRAY_IN_ARRAY); - //-- - - /* If that didn't work, throw an exception. */ - if (p_geo == NULL || p_cinf == NULL || p_ceff == NULL || p_axe1 == NULL || - p_dtp == NULL || p_dtnp == NULL || p_dtc == NULL || p_axe2 == NULL || - p_axf1 == NULL || p_li2rno == NULL || p_li2sn == NULL || p_li2nos == NULL || - p_aw2ali == NULL || p_sn1sn11 == NULL || p_sn1rno == NULL || p_sn1sn11no == NULL || - p_sino == NULL) - { - Py_XDECREF(p_geo); - Py_XDECREF(p_cinf); - Py_XDECREF(p_ceff); - Py_XDECREF(p_axe1); - Py_XDECREF(p_dtp); - Py_XDECREF(p_dtnp); - Py_XDECREF(p_dtc); - Py_XDECREF(p_axe2); - Py_XDECREF(p_axf1); - //axLUTs - Py_XDECREF(p_li2rno); - Py_XDECREF(p_li2sn); - Py_XDECREF(p_li2nos); - Py_XDECREF(p_sn1sn11); - Py_XDECREF(p_sn1rno); - Py_XDECREF(p_sn1sn11no); - //2D sino LUT - Py_XDECREF(p_aw2ali); - //singles buckets - Py_XDECREF(p_bckts); - - //output sino - PyArray_DiscardWritebackIfCopy(p_sino); - Py_XDECREF(p_sino); - return NULL; - } - - //-- get the pointers to the data as C-types - //norm components - normc.geo = (float*)PyArray_DATA(p_geo); - normc.cinf = (float*)PyArray_DATA(p_cinf); - normc.ceff = (float*)PyArray_DATA(p_ceff); - normc.axe1 = (float*)PyArray_DATA(p_axe1); - normc.dtp = (float*)PyArray_DATA(p_dtp); - normc.dtnp = (float*)PyArray_DATA(p_dtnp); - normc.dtc = (float*)PyArray_DATA(p_dtc); - normc.axe2 = (float*)PyArray_DATA(p_axe2); - normc.axf1 = (float*)PyArray_DATA(p_axf1); - //axLUTs - axLUT.li2rno = (int*)PyArray_DATA(p_li2rno); - axLUT.li2sn = (int*)PyArray_DATA(p_li2sn); - axLUT.li2nos = (int*)PyArray_DATA(p_li2nos); - axLUT.sn1_sn11 = (short*)PyArray_DATA(p_sn1sn11); - axLUT.sn1_rno = (short*)PyArray_DATA(p_sn1rno); - axLUT.sn1_sn11no = (char*)PyArray_DATA(p_sn1sn11no); - - //2D sino index LUT - int * aw2ali = (int*)PyArray_DATA(p_aw2ali); - //singles bucktes - int * bckts = (int*)PyArray_DATA(p_bckts); - - //--- Array size - int Naw = (int)PyArray_DIM(p_aw2ali, 0); - if (AW != Naw) printf("\ne> number of active bins is inconsitent !!! <<------------------<<<<<\n"); - - //output sino - float *sino = (float *)PyArray_DATA(p_sino); - - //norm components - normc.ngeo[0] = (int)PyArray_DIM(p_geo, 0); - normc.ngeo[1] = (int)PyArray_DIM(p_geo, 1); - normc.ncinf[0] = (int)PyArray_DIM(p_cinf, 0); - normc.ncinf[1] = (int)PyArray_DIM(p_cinf, 1); - normc.nceff[0] = (int)PyArray_DIM(p_ceff, 0); - normc.nceff[1] = (int)PyArray_DIM(p_ceff, 1); - normc.naxe = (int)PyArray_DIM(p_axe1, 0); - normc.nrdt = (int)PyArray_DIM(p_dtp, 0); - normc.ncdt = (int)PyArray_DIM(p_dtc, 0); - //axial LUTs: - axLUT.Nli2rno[0] = (int)PyArray_DIM(p_li2rno, 0); - axLUT.Nli2rno[1] = (int)PyArray_DIM(p_li2rno, 1); - axLUT.Nli2sn[0] = (int)PyArray_DIM(p_li2sn, 0); - axLUT.Nli2sn[1] = (int)PyArray_DIM(p_li2sn, 1); - axLUT.Nli2nos = (int)PyArray_DIM(p_li2nos, 0); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><><><> Call the CUDA stuff now - norm_from_components(sino, normc, axLUT, aw2ali, bckts, Cnt); - //<><><><><><><><><><> - - //-- Clear up - //norm components - Py_DECREF(p_geo); - Py_DECREF(p_cinf); - Py_DECREF(p_ceff); - Py_DECREF(p_axe1); - Py_DECREF(p_dtp); - Py_DECREF(p_dtnp); - Py_DECREF(p_dtc); - Py_DECREF(p_axe2); - //axLUT - Py_DECREF(p_li2rno); - Py_DECREF(p_li2sn); - Py_DECREF(p_li2nos); - //2D sino index LUT - Py_DECREF(p_aw2ali); - //singles buckets - Py_DECREF(p_bckts); - - //output sino - PyArray_ResolveWritebackIfCopy(p_sino); - Py_DECREF(p_sino); - - Py_INCREF(Py_None); - return Py_None; - +static PyObject *mmr_norm(PyObject *self, PyObject *args) { + + // Structure of constants + Cnst Cnt; + // Dictionary of scanner constants + PyObject *o_mmrcnst; + // structure of norm C arrays (defined in norm.h). + NormCmp normc; + // structure of axial LUTs in C arrays (defined in norm.h). + axialLUT axLUT; + + // Output norm sino + PyObject *o_sino = NULL; + // normalisation component dictionary. + PyObject *o_norm_cmp; + // axial LUT dicionary. contains such LUTs: li2rno, li2sn, li2nos. + PyObject *o_axLUT; + // 2D sino index LUT (dead bisn are out). + PyObject *o_aw2ali = NULL; + // singles buckets for dead time correction + PyObject *o_bckts = NULL; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOOO", &o_sino, &o_norm_cmp, &o_bckts, &o_axLUT, &o_aw2ali, + &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as numpy arrays. */ + // norm components: + PyObject *pd_geo = PyDict_GetItemString(o_norm_cmp, "geo"); + PyObject *pd_cinf = PyDict_GetItemString(o_norm_cmp, "cinf"); + PyObject *pd_ceff = PyDict_GetItemString(o_norm_cmp, "ceff"); + PyObject *pd_axe1 = PyDict_GetItemString(o_norm_cmp, "axe1"); + PyObject *pd_dtp = PyDict_GetItemString(o_norm_cmp, "dtp"); + PyObject *pd_dtnp = PyDict_GetItemString(o_norm_cmp, "dtnp"); + PyObject *pd_dtc = PyDict_GetItemString(o_norm_cmp, "dtc"); + PyObject *pd_axe2 = PyDict_GetItemString(o_norm_cmp, "axe2"); + PyObject *pd_axf1 = PyDict_GetItemString(o_norm_cmp, "axf1"); + // axial LUTs: + PyObject *pd_li2rno = PyDict_GetItemString(o_axLUT, "li2rno"); + PyObject *pd_li2sn = PyDict_GetItemString(o_axLUT, "li2sn"); + PyObject *pd_li2nos = PyDict_GetItemString(o_axLUT, "li2nos"); + PyObject *pd_sn1sn11 = PyDict_GetItemString(o_axLUT, "sn1_sn11"); + PyObject *pd_sn1rno = PyDict_GetItemString(o_axLUT, "sn1_rno"); + PyObject *pd_sn1sn11no = PyDict_GetItemString(o_axLUT, "sn1_sn11no"); + + PyObject *pd_span = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_span); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + // get the output sino + PyArrayObject *p_sino = NULL; + p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + //-- get the arrays from the dictionaries + // norm components + PyArrayObject *p_geo = NULL; + p_geo = (PyArrayObject *)PyArray_FROM_OTF(pd_geo, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_cinf = NULL; + p_cinf = (PyArrayObject *)PyArray_FROM_OTF(pd_cinf, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_ceff = NULL; + p_ceff = (PyArrayObject *)PyArray_FROM_OTF(pd_ceff, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_axe1 = NULL; + p_axe1 = (PyArrayObject *)PyArray_FROM_OTF(pd_axe1, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_dtp = NULL; + p_dtp = (PyArrayObject *)PyArray_FROM_OTF(pd_dtp, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_dtnp = NULL; + p_dtnp = (PyArrayObject *)PyArray_FROM_OTF(pd_dtnp, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_dtc = NULL; + p_dtc = (PyArrayObject *)PyArray_FROM_OTF(pd_dtc, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_axe2 = NULL; + p_axe2 = (PyArrayObject *)PyArray_FROM_OTF(pd_axe2, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_axf1 = NULL; + p_axf1 = (PyArrayObject *)PyArray_FROM_OTF(pd_axf1, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + // then axLUTs + PyArrayObject *p_li2rno = NULL; + p_li2rno = (PyArrayObject *)PyArray_FROM_OTF(pd_li2rno, NPY_INT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_li2sn = NULL; + p_li2sn = (PyArrayObject *)PyArray_FROM_OTF(pd_li2sn, NPY_INT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_li2nos = NULL; + p_li2nos = (PyArrayObject *)PyArray_FROM_OTF(pd_li2nos, NPY_INT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1sn11 = NULL; + p_sn1sn11 = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1sn11, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1rno = NULL; + p_sn1rno = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1rno, NPY_INT16, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_sn1sn11no = NULL; + p_sn1sn11no = (PyArrayObject *)PyArray_FROM_OTF(pd_sn1sn11no, NPY_INT8, NPY_ARRAY_IN_ARRAY); + + // 2D sino index LUT: + PyArrayObject *p_aw2ali = NULL; + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(o_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + // single bucktes: + PyArrayObject *p_bckts = NULL; + p_bckts = (PyArrayObject *)PyArray_FROM_OTF(o_bckts, NPY_INT32, NPY_ARRAY_IN_ARRAY); + //-- + + /* If that didn't work, throw an exception. */ + if (p_geo == NULL || p_cinf == NULL || p_ceff == NULL || p_axe1 == NULL || p_dtp == NULL || + p_dtnp == NULL || p_dtc == NULL || p_axe2 == NULL || p_axf1 == NULL || p_li2rno == NULL || + p_li2sn == NULL || p_li2nos == NULL || p_aw2ali == NULL || p_sn1sn11 == NULL || + p_sn1rno == NULL || p_sn1sn11no == NULL || p_sino == NULL) { + Py_XDECREF(p_geo); + Py_XDECREF(p_cinf); + Py_XDECREF(p_ceff); + Py_XDECREF(p_axe1); + Py_XDECREF(p_dtp); + Py_XDECREF(p_dtnp); + Py_XDECREF(p_dtc); + Py_XDECREF(p_axe2); + Py_XDECREF(p_axf1); + // axLUTs + Py_XDECREF(p_li2rno); + Py_XDECREF(p_li2sn); + Py_XDECREF(p_li2nos); + Py_XDECREF(p_sn1sn11); + Py_XDECREF(p_sn1rno); + Py_XDECREF(p_sn1sn11no); + // 2D sino LUT + Py_XDECREF(p_aw2ali); + // singles buckets + Py_XDECREF(p_bckts); + + // output sino + PyArray_DiscardWritebackIfCopy(p_sino); + Py_XDECREF(p_sino); + return NULL; + } + + //-- get the pointers to the data as C-types + // norm components + normc.geo = (float *)PyArray_DATA(p_geo); + normc.cinf = (float *)PyArray_DATA(p_cinf); + normc.ceff = (float *)PyArray_DATA(p_ceff); + normc.axe1 = (float *)PyArray_DATA(p_axe1); + normc.dtp = (float *)PyArray_DATA(p_dtp); + normc.dtnp = (float *)PyArray_DATA(p_dtnp); + normc.dtc = (float *)PyArray_DATA(p_dtc); + normc.axe2 = (float *)PyArray_DATA(p_axe2); + normc.axf1 = (float *)PyArray_DATA(p_axf1); + // axLUTs + axLUT.li2rno = (int *)PyArray_DATA(p_li2rno); + axLUT.li2sn = (int *)PyArray_DATA(p_li2sn); + axLUT.li2nos = (int *)PyArray_DATA(p_li2nos); + axLUT.sn1_sn11 = (short *)PyArray_DATA(p_sn1sn11); + axLUT.sn1_rno = (short *)PyArray_DATA(p_sn1rno); + axLUT.sn1_sn11no = (char *)PyArray_DATA(p_sn1sn11no); + + // 2D sino index LUT + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + // singles bucktes + int *bckts = (int *)PyArray_DATA(p_bckts); + + //--- Array size + int Naw = (int)PyArray_DIM(p_aw2ali, 0); + if (AW != Naw) + printf("\ne> number of active bins is inconsitent !!! <<------------------<<<<<\n"); + + // output sino + float *sino = (float *)PyArray_DATA(p_sino); + + // norm components + normc.ngeo[0] = (int)PyArray_DIM(p_geo, 0); + normc.ngeo[1] = (int)PyArray_DIM(p_geo, 1); + normc.ncinf[0] = (int)PyArray_DIM(p_cinf, 0); + normc.ncinf[1] = (int)PyArray_DIM(p_cinf, 1); + normc.nceff[0] = (int)PyArray_DIM(p_ceff, 0); + normc.nceff[1] = (int)PyArray_DIM(p_ceff, 1); + normc.naxe = (int)PyArray_DIM(p_axe1, 0); + normc.nrdt = (int)PyArray_DIM(p_dtp, 0); + normc.ncdt = (int)PyArray_DIM(p_dtc, 0); + // axial LUTs: + axLUT.Nli2rno[0] = (int)PyArray_DIM(p_li2rno, 0); + axLUT.Nli2rno[1] = (int)PyArray_DIM(p_li2rno, 1); + axLUT.Nli2sn[0] = (int)PyArray_DIM(p_li2sn, 0); + axLUT.Nli2sn[1] = (int)PyArray_DIM(p_li2sn, 1); + axLUT.Nli2nos = (int)PyArray_DIM(p_li2nos, 0); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><><><> Call the CUDA stuff now + norm_from_components(sino, normc, axLUT, aw2ali, bckts, Cnt); + //<><><><><><><><><><> + + //-- Clear up + // norm components + Py_DECREF(p_geo); + Py_DECREF(p_cinf); + Py_DECREF(p_ceff); + Py_DECREF(p_axe1); + Py_DECREF(p_dtp); + Py_DECREF(p_dtnp); + Py_DECREF(p_dtc); + Py_DECREF(p_axe2); + // axLUT + Py_DECREF(p_li2rno); + Py_DECREF(p_li2sn); + Py_DECREF(p_li2nos); + // 2D sino index LUT + Py_DECREF(p_aw2ali); + // singles buckets + Py_DECREF(p_bckts); + + // output sino + PyArray_ResolveWritebackIfCopy(p_sino); + Py_DECREF(p_sino); + + Py_INCREF(Py_None); + return Py_None; } - //==================================================================================================== static PyObject *mmr_pgaps(PyObject *self, PyObject *args) { - //output sino - PyObject * o_sino; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - //GPU input sino in span-11 - PyObject * o_sng; - - //Structure of constants - Cnst Cnt; - - int sino_no; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOi", &o_sino, &o_sng, &o_txLUT, &o_mmrcnst, &sino_no)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - - /* Interpret the input objects as... */ - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_SPN = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_SPN); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - PyObject* pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); - PyObject* pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); - Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); - Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); - - //GPU 2D linear sino index into Siemens sino index LUT - PyObject* pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); - - //GPU input sino and the above 2D LUT - PyArrayObject *p_sng = NULL; - p_sng = (PyArrayObject *)PyArray_FROM_OTF(o_sng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_aw2ali = NULL; - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - //output sino - PyArrayObject *p_sino = NULL; - p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - if (p_sng == NULL || p_aw2ali == NULL || p_sino == NULL) { - Py_XDECREF(p_aw2ali); - Py_XDECREF(p_sng); - - PyArray_DiscardWritebackIfCopy(p_sino); - Py_XDECREF(p_sino); - } - - int *aw2ali = (int*)PyArray_DATA(p_aw2ali); - float *sng = (float*)PyArray_DATA(p_sng); - //output sino - float *sino = (float*)PyArray_DATA(p_sino); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><><><><><><><><><><><><><><><> - //Run the conversion to sinos with gaps - put_gaps(sino, sng, aw2ali, sino_no, Cnt); - //<><><><><><><><><><><><><><><><><><><><><><> - - //Clean up - Py_DECREF(p_aw2ali); - Py_DECREF(p_sng); - - PyArray_ResolveWritebackIfCopy(p_sino); - Py_DECREF(p_sino); - - Py_INCREF(Py_None); - return Py_None; + // output sino + PyObject *o_sino; + + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // GPU input sino in span-11 + PyObject *o_sng; + + // Structure of constants + Cnst Cnt; + + int sino_no; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOOi", &o_sino, &o_sng, &o_txLUT, &o_mmrcnst, &sino_no)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as... */ + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_SPN = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_SPN); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + PyObject *pd_rngstrt = PyDict_GetItemString(o_mmrcnst, "RNG_STRT"); + PyObject *pd_rngend = PyDict_GetItemString(o_mmrcnst, "RNG_END"); + Cnt.RNG_STRT = (char)PyLong_AsLong(pd_rngstrt); + Cnt.RNG_END = (char)PyLong_AsLong(pd_rngend); + + // GPU 2D linear sino index into Siemens sino index LUT + PyObject *pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); + + // GPU input sino and the above 2D LUT + PyArrayObject *p_sng = NULL; + p_sng = (PyArrayObject *)PyArray_FROM_OTF(o_sng, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_aw2ali = NULL; + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // output sino + PyArrayObject *p_sino = NULL; + p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + if (p_sng == NULL || p_aw2ali == NULL || p_sino == NULL) { + Py_XDECREF(p_aw2ali); + Py_XDECREF(p_sng); + + PyArray_DiscardWritebackIfCopy(p_sino); + Py_XDECREF(p_sino); + } + + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + float *sng = (float *)PyArray_DATA(p_sng); + // output sino + float *sino = (float *)PyArray_DATA(p_sino); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><><><><><><><><><><><><><><><> + // Run the conversion to sinos with gaps + put_gaps(sino, sng, aw2ali, sino_no, Cnt); + //<><><><><><><><><><><><><><><><><><><><><><> + + // Clean up + Py_DECREF(p_aw2ali); + Py_DECREF(p_sng); + + PyArray_ResolveWritebackIfCopy(p_sino); + Py_DECREF(p_sino); + + Py_INCREF(Py_None); + return Py_None; } - //==================================================================================================== static PyObject *mmr_rgaps(PyObject *self, PyObject *args) { - //output sino with gaps removed - PyObject * o_sng; - - // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). - PyObject * o_txLUT; - - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - //input sino to be reformated with gaps removed - PyObject * o_sino; - - //Structure of constants - Cnst Cnt; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOO", &o_sng, &o_sino, &o_txLUT, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - /* Interpret the input objects as... PyLong_AsLong*/ - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); - Cnt.A = (int)PyLong_AsLong(pd_A); - PyObject* pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); - Cnt.W = (int)PyLong_AsLong(pd_W); - PyObject* pd_SPN = PyDict_GetItemString(o_mmrcnst, "SPN"); - Cnt.SPN = (int)PyLong_AsLong(pd_SPN); - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - //GPU 2D linear sino index into Siemens sino index LUT - PyObject* pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); - - //input sino and the above 2D LUT - PyArrayObject *p_sino = NULL; - p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - PyArrayObject *p_aw2ali = NULL; - p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - // number of sinogram from the shape of the sino (can be any number especially when using reduced ring number) - int snno = (int)PyArray_DIM(p_sino, 0); - - //output sino - PyArrayObject *p_sng = NULL; - p_sng = (PyArrayObject *)PyArray_FROM_OTF(o_sng, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - - if (p_sino == NULL || p_aw2ali == NULL || p_sino == NULL) { - Py_XDECREF(p_aw2ali); - Py_XDECREF(p_sino); - - PyArray_DiscardWritebackIfCopy(p_sng); - Py_XDECREF(p_sng); - } - - int *aw2ali = (int*)PyArray_DATA(p_aw2ali); - float *sino = (float*)PyArray_DATA(p_sino); - float *sng = (float*)PyArray_DATA(p_sng); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><><><><><><><><><><><><><><><> - //Run the conversion to GPU sinos - remove_gaps(sng, sino, snno, aw2ali, Cnt); - //<><><><><><><><><><><><><><><><><><><><><><> - - //Clean up - Py_DECREF(p_aw2ali); - Py_DECREF(p_sino); - - PyArray_ResolveWritebackIfCopy(p_sng); - Py_DECREF(p_sng); - - Py_INCREF(Py_None); - return Py_None; - + // output sino with gaps removed + PyObject *o_sng; + + // transaxial LUT dictionary (e.g., 2D sino where dead bins are out). + PyObject *o_txLUT; + + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // input sino to be reformated with gaps removed + PyObject *o_sino; + + // Structure of constants + Cnst Cnt; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOO", &o_sng, &o_sino, &o_txLUT, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as... PyLong_AsLong*/ + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_A = PyDict_GetItemString(o_mmrcnst, "NSANGLES"); + Cnt.A = (int)PyLong_AsLong(pd_A); + PyObject *pd_W = PyDict_GetItemString(o_mmrcnst, "NSBINS"); + Cnt.W = (int)PyLong_AsLong(pd_W); + PyObject *pd_SPN = PyDict_GetItemString(o_mmrcnst, "SPN"); + Cnt.SPN = (int)PyLong_AsLong(pd_SPN); + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + // GPU 2D linear sino index into Siemens sino index LUT + PyObject *pd_aw2ali = PyDict_GetItemString(o_txLUT, "aw2ali"); + + // input sino and the above 2D LUT + PyArrayObject *p_sino = NULL; + p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + PyArrayObject *p_aw2ali = NULL; + p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); + + // number of sinogram from the shape of the sino (can be any number especially when using reduced + // ring number) + int snno = (int)PyArray_DIM(p_sino, 0); + + // output sino + PyArrayObject *p_sng = NULL; + p_sng = (PyArrayObject *)PyArray_FROM_OTF(o_sng, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + + if (p_sino == NULL || p_aw2ali == NULL || p_sino == NULL) { + Py_XDECREF(p_aw2ali); + Py_XDECREF(p_sino); + + PyArray_DiscardWritebackIfCopy(p_sng); + Py_XDECREF(p_sng); + } + + int *aw2ali = (int *)PyArray_DATA(p_aw2ali); + float *sino = (float *)PyArray_DATA(p_sino); + float *sng = (float *)PyArray_DATA(p_sng); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><><><><><><><><><><><><><><><> + // Run the conversion to GPU sinos + remove_gaps(sng, sino, snno, aw2ali, Cnt); + //<><><><><><><><><><><><><><><><><><><><><><> + + // Clean up + Py_DECREF(p_aw2ali); + Py_DECREF(p_sino); + + PyArray_ResolveWritebackIfCopy(p_sng); + Py_DECREF(p_sng); + + Py_INCREF(Py_None); + return Py_None; } - - //==================================================================================================== static PyObject *mmr_span11LUT(PyObject *self, PyObject *args) { - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - //Structure of constants - Cnst Cnt; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "O", &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - /* Interpret the input objects as... */ - PyObject* pd_Naw = PyDict_GetItemString(o_mmrcnst, "Naw"); - Cnt.aw = (int)PyLong_AsLong(pd_Naw); - PyObject* pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); - Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); - PyObject* pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); - Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); - PyObject* pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); - Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); - - - span11LUT span11 = span1_span11(Cnt); - - npy_intp dims[2]; - dims[0] = Cnt.NSN1; - PyArrayObject *s1s11_out = (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT16, span11.li2s11); - dims[0] = Cnt.NSN11; - PyArrayObject *s1nos_out = (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT8, span11.NSinos); - - PyObject *o_out = PyTuple_New(2); - PyTuple_SetItem(o_out, 0, PyArray_Return(s1s11_out)); - PyTuple_SetItem(o_out, 1, PyArray_Return(s1nos_out)); - - - return o_out; + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // Structure of constants + Cnst Cnt; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "O", &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + /* Interpret the input objects as... */ + PyObject *pd_Naw = PyDict_GetItemString(o_mmrcnst, "Naw"); + Cnt.aw = (int)PyLong_AsLong(pd_Naw); + PyObject *pd_NSN1 = PyDict_GetItemString(o_mmrcnst, "NSN1"); + Cnt.NSN1 = (int)PyLong_AsLong(pd_NSN1); + PyObject *pd_NSN11 = PyDict_GetItemString(o_mmrcnst, "NSN11"); + Cnt.NSN11 = (int)PyLong_AsLong(pd_NSN11); + PyObject *pd_NRNG = PyDict_GetItemString(o_mmrcnst, "NRNG"); + Cnt.NRNG = (int)PyLong_AsLong(pd_NRNG); + + span11LUT span11 = span1_span11(Cnt); + + npy_intp dims[2]; + dims[0] = Cnt.NSN1; + PyArrayObject *s1s11_out = + (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT16, span11.li2s11); + dims[0] = Cnt.NSN11; + PyArrayObject *s1nos_out = + (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT8, span11.NSinos); + + PyObject *o_out = PyTuple_New(2); + PyTuple_SetItem(o_out, 0, PyArray_Return(s1s11_out)); + PyTuple_SetItem(o_out, 1, PyArray_Return(s1nos_out)); + + return o_out; } - - //==================================================================================================== static PyObject *aux_varon(PyObject *self, PyObject *args) { - // M1 (mean) vector - PyObject * o_m1; - // M2 (variance) vector - PyObject * o_m2; - //input of instance data X - PyObject * o_x; - //Dictionary of scanner constants - PyObject * o_mmrcnst; - - //Structure of constants - Cnst Cnt; - //realisation number - int b; - - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOiO", &o_m1, &o_m2, &o_x, &b, &o_mmrcnst)) - return NULL; - //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - PyObject* pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); - Cnt.LOG = (char)PyLong_AsLong(pd_log); - PyObject* pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); - Cnt.DEVID = (char)PyLong_AsLong(pd_devid); - - //input sino and the above 2D LUT - PyArrayObject *p_m1 = NULL; - p_m1 = (PyArrayObject *)PyArray_FROM_OTF(o_m1, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - PyArrayObject *p_m2 = NULL; - p_m2 = (PyArrayObject *)PyArray_FROM_OTF(o_m2, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); - PyArrayObject *p_x = NULL; - p_x = (PyArrayObject *)PyArray_FROM_OTF(o_x, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - - if (p_m1 == NULL || p_m2 == NULL || p_x == NULL) { - PyArray_DiscardWritebackIfCopy(p_m1); - PyArray_DiscardWritebackIfCopy(p_m2); - Py_XDECREF(p_m1); - Py_XDECREF(p_m2); - Py_XDECREF(p_x); - } - - float *m1 = (float*)PyArray_DATA(p_m1); - float *m2 = (float*)PyArray_DATA(p_m2); - float *x = (float*)PyArray_DATA(p_x); - int ndim = PyArray_NDIM(p_x); - size_t nele = 1; - for (int i = 0; i number of elements in data array: %lu\n", nele); - - // sets the device on which to calculate - HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); - - //<><><><><><><><><><><><><><><><><><><><><><> - //Update variance online (M1, M2) using data instance X - var_online(m1, m2, x, b, nele); - //<><><><><><><><><><><><><><><><><><><><><><> - - //Clean up - PyArray_ResolveWritebackIfCopy(p_m1); - PyArray_ResolveWritebackIfCopy(p_m2); - Py_DECREF(p_m1); - Py_DECREF(p_m2); - Py_DECREF(p_x); - - Py_INCREF(Py_None); - return Py_None; - + // M1 (mean) vector + PyObject *o_m1; + // M2 (variance) vector + PyObject *o_m2; + // input of instance data X + PyObject *o_x; + // Dictionary of scanner constants + PyObject *o_mmrcnst; + + // Structure of constants + Cnst Cnt; + // realisation number + int b; + + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /* Parse the input tuple */ + if (!PyArg_ParseTuple(args, "OOOiO", &o_m1, &o_m2, &o_x, &b, &o_mmrcnst)) + return NULL; + //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); + Cnt.LOG = (char)PyLong_AsLong(pd_log); + PyObject *pd_devid = PyDict_GetItemString(o_mmrcnst, "DEVID"); + Cnt.DEVID = (char)PyLong_AsLong(pd_devid); + + // input sino and the above 2D LUT + PyArrayObject *p_m1 = NULL; + p_m1 = (PyArrayObject *)PyArray_FROM_OTF(o_m1, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + PyArrayObject *p_m2 = NULL; + p_m2 = (PyArrayObject *)PyArray_FROM_OTF(o_m2, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); + PyArrayObject *p_x = NULL; + p_x = (PyArrayObject *)PyArray_FROM_OTF(o_x, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); + + if (p_m1 == NULL || p_m2 == NULL || p_x == NULL) { + PyArray_DiscardWritebackIfCopy(p_m1); + PyArray_DiscardWritebackIfCopy(p_m2); + Py_XDECREF(p_m1); + Py_XDECREF(p_m2); + Py_XDECREF(p_x); + } + + float *m1 = (float *)PyArray_DATA(p_m1); + float *m2 = (float *)PyArray_DATA(p_m2); + float *x = (float *)PyArray_DATA(p_x); + int ndim = PyArray_NDIM(p_x); + size_t nele = 1; + for (int i = 0; i < ndim; i++) { + nele *= PyArray_DIM(p_x, i); + } + + printf("i> number of elements in data array: %lu\n", nele); + + // sets the device on which to calculate + HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); + + //<><><><><><><><><><><><><><><><><><><><><><> + // Update variance online (M1, M2) using data instance X + var_online(m1, m2, x, b, nele); + //<><><><><><><><><><><><><><><><><><><><><><> + + // Clean up + PyArray_ResolveWritebackIfCopy(p_m1); + PyArray_ResolveWritebackIfCopy(p_m2); + Py_DECREF(p_m1); + Py_DECREF(p_m2); + Py_DECREF(p_x); + + Py_INCREF(Py_None); + return Py_None; } diff --git a/niftypet/nipet/src/auxmath.cu b/niftypet/nipet/src/auxmath.cu index e89acda9..b256bdac 100644 --- a/niftypet/nipet/src/auxmath.cu +++ b/niftypet/nipet/src/auxmath.cu @@ -7,62 +7,57 @@ author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ - #include "auxmath.h" #define MTHREADS 512 //============================================================================= -__global__ void var(float * M1, - float * M2, - float * X, - int b, - size_t nele) { - int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx. - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - float *d_m1; HANDLE_ERROR(cudaMalloc(&d_m1, nele * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_m1, M1, nele * sizeof(float), cudaMemcpyHostToDevice)); - float *d_m2; HANDLE_ERROR(cudaMalloc(&d_m2, nele * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_m2, M2, nele * sizeof(float), cudaMemcpyHostToDevice)); - float *d_x; HANDLE_ERROR(cudaMalloc(&d_x, nele * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_x, X, nele * sizeof(float), cudaMemcpyHostToDevice)); - - - int blcks = (nele + MTHREADS - 1) / MTHREADS; - var << < blcks, MTHREADS >> >(d_m1, d_m2, d_x, b, nele); - - - //copy M1 and M2 back to CPU memory - HANDLE_ERROR(cudaMemcpy(M1, d_m1, nele * sizeof(float), cudaMemcpyDeviceToHost)); - HANDLE_ERROR(cudaMemcpy(M2, d_m2, nele * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_m1); - cudaFree(d_m2); - cudaFree(d_x); - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - - printf("i> online variance calculation DONE in %fs.\n\n", 0.001*elapsedTime); +void var_online(float *M1, float *M2, float *X, int b, size_t nele) { + + // do calculation of variance online using CUDA kernel . + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + float *d_m1; + HANDLE_ERROR(cudaMalloc(&d_m1, nele * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_m1, M1, nele * sizeof(float), cudaMemcpyHostToDevice)); + float *d_m2; + HANDLE_ERROR(cudaMalloc(&d_m2, nele * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_m2, M2, nele * sizeof(float), cudaMemcpyHostToDevice)); + float *d_x; + HANDLE_ERROR(cudaMalloc(&d_x, nele * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_x, X, nele * sizeof(float), cudaMemcpyHostToDevice)); + + int blcks = (nele + MTHREADS - 1) / MTHREADS; + var<<>>(d_m1, d_m2, d_x, b, nele); + + // copy M1 and M2 back to CPU memory + HANDLE_ERROR(cudaMemcpy(M1, d_m1, nele * sizeof(float), cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaMemcpy(M2, d_m2, nele * sizeof(float), cudaMemcpyDeviceToHost)); + + cudaFree(d_m1); + cudaFree(d_m2); + cudaFree(d_x); + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + + printf("i> online variance calculation DONE in %fs.\n\n", 0.001 * elapsedTime); } //============================================================================= diff --git a/niftypet/nipet/src/norm.cu b/niftypet/nipet/src/norm.cu index a8cb9b34..dc4d76fd 100644 --- a/niftypet/nipet/src/norm.cu +++ b/niftypet/nipet/src/norm.cu @@ -6,250 +6,228 @@ author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include #include "norm.h" #include "scanner_0.h" +#include + +__global__ void dev_norm(float *nrmsino, const float *geo, const float *cinf, const float *ceff, + const float *axe1, const float *axf1, const float *DTp, const float *DTnp, + const int *bckts, const short *sn1_sn11, const short2 *sn1_rno, + const char *sn1_sn11no, const int *aw2li, Cnst cnt) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (idx < AW) { + + int wi = aw2li[idx] % cnt.W; + int ai = (aw2li[idx] - wi) / cnt.W; + int a9 = ai % 9; + + int c1 = floor(fmodf(ai + .5 * (cnt.NCRS - 2 + cnt.W / 2 - wi), cnt.NCRS)); + int c2 = floor(fmodf(ai + .5 * (2 * cnt.NCRS - 2 - cnt.W / 2 + wi), cnt.NCRS)); + + for (int si = 0; si < NSINOS; si++) { + short r0 = sn1_rno[si].x; + short r1 = sn1_rno[si].y; + short s11i = sn1_sn11[si]; -__global__ -void dev_norm(float *nrmsino, - const float *geo, - const float *cinf, - const float *ceff, - const float *axe1, - const float *axf1, - const float *DTp, - const float *DTnp, - const int *bckts, - const short *sn1_sn11, - const short2 *sn1_rno, - const char *sn1_sn11no, - const int *aw2li, - Cnst cnt) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - - if (idx using CUDA device #%d\n", dev_id); + //=========== CUDA ===================== + // create cuda norm sino for true and scatter data + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); - int snno = -1; - if (Cnt.SPN == 1) - snno = NSINOS; - else if (Cnt.SPN == 11) - snno = NSINOS11; + int snno = -1; + if (Cnt.SPN == 1) + snno = NSINOS; + else if (Cnt.SPN == 11) + snno = NSINOS11; - float *d_nrm; + float *d_nrm; #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_nrm, AW*snno * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_nrm, AW * snno * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_nrm, AW*snno * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_nrm, AW * snno * sizeof(float))); #endif - HANDLE_ERROR(cudaMemset(d_nrm, 0, AW*snno * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_nrm, 0, AW * snno * sizeof(float))); + //--- move the norm components to device memory + //-- transaxial components + float *d_geo, *d_cinf, *d_ceff; - //--- move the norm components to device memory - //-- transaxial components - float *d_geo, *d_cinf, *d_ceff; - - //geometric effects + // geometric effects #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float))); #endif - HANDLE_ERROR(cudaMemcpy(d_geo, normc.geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float), cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(d_geo, normc.geo, normc.ngeo[0] * normc.ngeo[1] * sizeof(float), + cudaMemcpyHostToDevice)); - //crystal interference + // crystal interference #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float))); #endif - HANDLE_ERROR(cudaMemcpy(d_cinf, normc.cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float), cudaMemcpyHostToDevice)); - - + HANDLE_ERROR(cudaMemcpy(d_cinf, normc.cinf, normc.ncinf[0] * normc.ncinf[1] * sizeof(float), + cudaMemcpyHostToDevice)); - //crystal efficiencies + // crystal efficiencies #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float))); #endif - HANDLE_ERROR(cudaMemcpy(d_ceff, normc.ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float), cudaMemcpyHostToDevice)); - //-- + HANDLE_ERROR(cudaMemcpy(d_ceff, normc.ceff, normc.nceff[0] * normc.nceff[1] * sizeof(float), + cudaMemcpyHostToDevice)); + //-- - //axial effects - float *d_axe1; + // axial effects + float *d_axe1; #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_axe1, normc.naxe * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_axe1, normc.naxe * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_axe1, normc.naxe * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_axe1, normc.naxe * sizeof(float))); #endif - HANDLE_ERROR(cudaMemcpy(d_axe1, normc.axe1, normc.naxe * sizeof(float), cudaMemcpyHostToDevice)); + HANDLE_ERROR(cudaMemcpy(d_axe1, normc.axe1, normc.naxe * sizeof(float), cudaMemcpyHostToDevice)); - //axial effects for span-1 - float *d_axf1; + // axial effects for span-1 + float *d_axf1; #ifdef WIN32 - HANDLE_ERROR(cudaMalloc(&d_axf1, NSINOS * sizeof(float))); + HANDLE_ERROR(cudaMalloc(&d_axf1, NSINOS * sizeof(float))); #else - HANDLE_ERROR(cudaMallocManaged(&d_axf1, NSINOS * sizeof(float))); + HANDLE_ERROR(cudaMallocManaged(&d_axf1, NSINOS * sizeof(float))); #endif - HANDLE_ERROR(cudaMemcpy(d_axf1, normc.axf1, NSINOS * sizeof(float), cudaMemcpyHostToDevice)); - - //axial paralysing ring Dead Time (DT) parameters - float *d_DTp; - HANDLE_ERROR(cudaMalloc(&d_DTp, normc.nrdt * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_DTp, normc.dtp, normc.nrdt * sizeof(float), cudaMemcpyHostToDevice)); - - //axial non-paralyzing ring DT parameters - float *d_DTnp; - HANDLE_ERROR(cudaMalloc(&d_DTnp, normc.nrdt * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_DTnp, normc.dtnp, normc.nrdt * sizeof(float), cudaMemcpyHostToDevice)); - - //singles rates bucktes - int *d_bckts; - HANDLE_ERROR(cudaMalloc(&d_bckts, NBUCKTS * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_bckts, bckts, NBUCKTS * sizeof(int), cudaMemcpyHostToDevice)); - //--- - - short2 *d_sn1rno; - HANDLE_ERROR(cudaMalloc(&d_sn1rno, NSINOS * sizeof(short2))); - HANDLE_ERROR(cudaMemcpy(d_sn1rno, axLUT.sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); - - short *d_sn1sn11; - HANDLE_ERROR(cudaMalloc(&d_sn1sn11, NSINOS * sizeof(short))); - HANDLE_ERROR(cudaMemcpy(d_sn1sn11, axLUT.sn1_sn11, NSINOS * sizeof(short), cudaMemcpyHostToDevice)); - - char *d_sn1sn11no; - HANDLE_ERROR(cudaMalloc(&d_sn1sn11no, NSINOS * sizeof(char))); - HANDLE_ERROR(cudaMemcpy(d_sn1sn11no, axLUT.sn1_sn11no, NSINOS * sizeof(char), cudaMemcpyHostToDevice)); - //--- - - //2D sino index LUT - int *d_aw2ali; - HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); - - - //Create a structure of constants - Cnt.W = normc.ngeo[1]; - Cnt.NCRS = normc.nceff[1]; - Cnt.NRNG = normc.nceff[0]; - Cnt.D = axLUT.Nli2rno[1]; - Cnt.Bt = 28; - Cnt.Cbt = 18; - Cnt.Cba = 8; - - //printf(">>>> W=%d, AW=%d, C=%d, R=%d, D=%d, B=%d\n", cnt.W, cnt.aw, cnt.C, cnt.R, cnt.D, cnt.B); - - //CUDA grid size (in blocks) - int blcks = ceil(AW / (float)NTHREADS); - - if (Cnt.LOG <= LOGINFO) printf("i> calculating normalisation sino from norm components..."); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //============================================================================ - dim3 BpG(blcks, 1, 1); - dim3 TpB(NTHREADS, 1, 1); - dev_norm << > >(d_nrm, - d_geo, d_cinf, d_ceff, - d_axe1, d_axf1, - d_DTp, d_DTnp, - d_bckts, - d_sn1sn11, d_sn1rno, d_sn1sn11no, - d_aw2ali, - Cnt); - HANDLE_ERROR(cudaGetLastError()); - //============================================================================ - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001*elapsedTime); - //===================================== - - - //copy the GPU norm array to the output normalisation sinogram - HANDLE_ERROR(cudaMemcpy(sino, d_nrm, AW*snno * sizeof(float), cudaMemcpyDeviceToHost)); - - - - //Clean up - cudaFree(d_geo); - cudaFree(d_cinf); - cudaFree(d_ceff); - cudaFree(d_axe1); - cudaFree(d_DTp); - cudaFree(d_DTnp); - cudaFree(d_bckts); - cudaFree(d_nrm); - cudaFree(d_axf1); - - cudaFree(d_sn1sn11); - cudaFree(d_sn1rno); - cudaFree(d_aw2ali); - cudaFree(d_sn1sn11no); - - - return; + HANDLE_ERROR(cudaMemcpy(d_axf1, normc.axf1, NSINOS * sizeof(float), cudaMemcpyHostToDevice)); + + // axial paralysing ring Dead Time (DT) parameters + float *d_DTp; + HANDLE_ERROR(cudaMalloc(&d_DTp, normc.nrdt * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_DTp, normc.dtp, normc.nrdt * sizeof(float), cudaMemcpyHostToDevice)); + + // axial non-paralyzing ring DT parameters + float *d_DTnp; + HANDLE_ERROR(cudaMalloc(&d_DTnp, normc.nrdt * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_DTnp, normc.dtnp, normc.nrdt * sizeof(float), cudaMemcpyHostToDevice)); + + // singles rates bucktes + int *d_bckts; + HANDLE_ERROR(cudaMalloc(&d_bckts, NBUCKTS * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_bckts, bckts, NBUCKTS * sizeof(int), cudaMemcpyHostToDevice)); + //--- + + short2 *d_sn1rno; + HANDLE_ERROR(cudaMalloc(&d_sn1rno, NSINOS * sizeof(short2))); + HANDLE_ERROR( + cudaMemcpy(d_sn1rno, axLUT.sn1_rno, NSINOS * sizeof(short2), cudaMemcpyHostToDevice)); + + short *d_sn1sn11; + HANDLE_ERROR(cudaMalloc(&d_sn1sn11, NSINOS * sizeof(short))); + HANDLE_ERROR( + cudaMemcpy(d_sn1sn11, axLUT.sn1_sn11, NSINOS * sizeof(short), cudaMemcpyHostToDevice)); + + char *d_sn1sn11no; + HANDLE_ERROR(cudaMalloc(&d_sn1sn11no, NSINOS * sizeof(char))); + HANDLE_ERROR( + cudaMemcpy(d_sn1sn11no, axLUT.sn1_sn11no, NSINOS * sizeof(char), cudaMemcpyHostToDevice)); + //--- + + // 2D sino index LUT + int *d_aw2ali; + HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); + + // Create a structure of constants + Cnt.W = normc.ngeo[1]; + Cnt.NCRS = normc.nceff[1]; + Cnt.NRNG = normc.nceff[0]; + Cnt.D = axLUT.Nli2rno[1]; + Cnt.Bt = 28; + Cnt.Cbt = 18; + Cnt.Cba = 8; + + // printf(">>>> W=%d, AW=%d, C=%d, R=%d, D=%d, B=%d\n", cnt.W, cnt.aw, cnt.C, cnt.R, cnt.D, + // cnt.B); + + // CUDA grid size (in blocks) + int blcks = ceil(AW / (float)NTHREADS); + + if (Cnt.LOG <= LOGINFO) + printf("i> calculating normalisation sino from norm components..."); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //============================================================================ + dim3 BpG(blcks, 1, 1); + dim3 TpB(NTHREADS, 1, 1); + dev_norm<<>>(d_nrm, d_geo, d_cinf, d_ceff, d_axe1, d_axf1, d_DTp, d_DTnp, d_bckts, + d_sn1sn11, d_sn1rno, d_sn1sn11no, d_aw2ali, Cnt); + HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf(" DONE in %fs.\n", 0.001 * elapsedTime); + //===================================== + + // copy the GPU norm array to the output normalisation sinogram + HANDLE_ERROR(cudaMemcpy(sino, d_nrm, AW * snno * sizeof(float), cudaMemcpyDeviceToHost)); + + // Clean up + cudaFree(d_geo); + cudaFree(d_cinf); + cudaFree(d_ceff); + cudaFree(d_axe1); + cudaFree(d_DTp); + cudaFree(d_DTnp); + cudaFree(d_bckts); + cudaFree(d_nrm); + cudaFree(d_axf1); + + cudaFree(d_sn1sn11); + cudaFree(d_sn1rno); + cudaFree(d_aw2ali); + cudaFree(d_sn1sn11no); + + return; } // matrix size [1]:={344,127} diff --git a/niftypet/nipet/src/norm.h b/niftypet/nipet/src/norm.h index 81ff7d38..46d0847b 100644 --- a/niftypet/nipet/src/norm.h +++ b/niftypet/nipet/src/norm.h @@ -5,30 +5,26 @@ #define NORM_COMPONENTS_H struct NormCmp { - float * geo; - float * cinf; - float * ceff; - float * axe1; - float * dtp; - float * dtnp; - float * dtc; - float * axe2; - float * axf1; // user obtained axial effects for span-1 - int ngeo[2]; - int ncinf[2]; - int nceff[2]; - int naxe; - int nrdt; - int ncdt; + float *geo; + float *cinf; + float *ceff; + float *axe1; + float *dtp; + float *dtnp; + float *dtc; + float *axe2; + float *axf1; // user obtained axial effects for span-1 + int ngeo[2]; + int ncinf[2]; + int nceff[2]; + int naxe; + int nrdt; + int ncdt; }; -void norm_from_components(float *sino, - NormCmp normc, - axialLUT axLUT, - int *aw2ali, // transaxial angle/bin indx to lenar indx - int *bckts, // singles buckets - Cnst Cnt); - - +void norm_from_components(float *sino, NormCmp normc, axialLUT axLUT, + int *aw2ali, // transaxial angle/bin indx to lenar indx + int *bckts, // singles buckets + Cnst Cnt); #endif diff --git a/niftypet/nipet/src/scanner_0.cu b/niftypet/nipet/src/scanner_0.cu index 5df45a17..a619a564 100644 --- a/niftypet/nipet/src/scanner_0.cu +++ b/niftypet/nipet/src/scanner_0.cu @@ -6,89 +6,86 @@ reconstruction. author: Pawel Markiewicz Copyrights: 2018 ------------------------------------------------------------------------*/ -#include #include "scanner_0.h" +#include -//Error handling for CUDA routines +// Error handling for CUDA routines void HandleError(cudaError_t err, const char *file, int line) { - if (err != cudaSuccess) { - printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line); - exit(EXIT_FAILURE); - } + if (err != cudaSuccess) { + printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line); + exit(EXIT_FAILURE); + } } -//global variable list-mode data properties +// global variable list-mode data properties LMprop lmprop; -//global variable LM data array -int* lm; - +// global variable LM data array +int *lm; //************ CHECK DEVICE MEMORY USAGE ********************* void getMemUse(const Cnst Cnt) { - if (Cnt.LOG > LOGDEBUG) return; - size_t free_mem; - size_t total_mem; - HANDLE_ERROR(cudaMemGetInfo(&free_mem, &total_mem)); - double free_db = (double)free_mem; - double total_db = (double)total_mem; - double used_db = total_db - free_db; - printf("\ni> current GPU memory usage: %7.2f/%7.2f [MB]\n", used_db / 1024.0 / 1024.0, total_db / 1024.0 / 1024.0); - // printf("\ni> GPU memory usage:\n used = %f MB,\n free = %f MB,\n total = %f MB\n", - // used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0); + if (Cnt.LOG > LOGDEBUG) + return; + size_t free_mem; + size_t total_mem; + HANDLE_ERROR(cudaMemGetInfo(&free_mem, &total_mem)); + double free_db = (double)free_mem; + double total_db = (double)total_mem; + double used_db = total_db - free_db; + printf("\ni> current GPU memory usage: %7.2f/%7.2f [MB]\n", used_db / 1024.0 / 1024.0, + total_db / 1024.0 / 1024.0); + // printf("\ni> GPU memory usage:\n used = %f MB,\n free = %f MB,\n total = %f MB\n", + // used_db/1024.0/1024.0, free_db/1024.0/1024.0, total_db/1024.0/1024.0); } //************************************************************ - //================================================================== #define SPAN 11 -span11LUT span1_span11(const Cnst Cnt) -{ - span11LUT span11; - span11.li2s11 = (short *)malloc(Cnt.NSN1 * sizeof(short)); - span11.NSinos = (char *)malloc(Cnt.NSN11 * sizeof(char)); - memset(span11.NSinos, 0, Cnt.NSN11); - - int sinoSeg[SPAN] = { 127,115,115,93,93,71,71,49,49,27,27 }; - //cumulative sum of the above segment def - int cumSeg[SPAN]; - cumSeg[0] = 0; - for (int i = 1; i= pmb) { - //(si-pmb) is the sino position index for a given +RD - ri = 2 * (si - pmb) + rd; - minus = 0; - } - else { - //(si-segsum+2*(Cnt.RE-rd)) is the sino position index for a given -RD - ri = 2 * (si - segsum + 2 * (Cnt.NRNG - rd)) + rd; - minus = 1; - } - //the below is equivalent to (rd-5+SPAN-1)/SPAN which is doing a ceil function on integer - int iseg = (rd + 5) / SPAN; - int off = (127 - sinoSeg[2 * iseg]) / 2; - - - int ci = 2 * iseg - minus*(iseg>0); - span11.li2s11[si] = (short)(cumSeg[ci] + ri - off); - span11.NSinos[(cumSeg[ci] + ri - off)] += 1; - //printf("[%d] %d\n", si, span11.li2s11[si]); - } - - return span11; +span11LUT span1_span11(const Cnst Cnt) { + span11LUT span11; + span11.li2s11 = (short *)malloc(Cnt.NSN1 * sizeof(short)); + span11.NSinos = (char *)malloc(Cnt.NSN11 * sizeof(char)); + memset(span11.NSinos, 0, Cnt.NSN11); + + int sinoSeg[SPAN] = {127, 115, 115, 93, 93, 71, 71, 49, 49, 27, 27}; + // cumulative sum of the above segment def + int cumSeg[SPAN]; + cumSeg[0] = 0; + for (int i = 1; i < SPAN; i++) + cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; + + int segsum = Cnt.NRNG; + int rd = 0; + for (int si = 0; si < Cnt.NSN1; si++) { + + while ((segsum - 1) < si) { + rd += 1; + segsum += 2 * (Cnt.NRNG - rd); + } + // plus/minus break (pmb) point + int pmb = segsum - (Cnt.NRNG - rd); + int ri, minus; + if (si >= pmb) { + //(si-pmb) is the sino position index for a given +RD + ri = 2 * (si - pmb) + rd; + minus = 0; + } else { + //(si-segsum+2*(Cnt.RE-rd)) is the sino position index for a given -RD + ri = 2 * (si - segsum + 2 * (Cnt.NRNG - rd)) + rd; + minus = 1; + } + // the below is equivalent to (rd-5+SPAN-1)/SPAN which is doing a ceil function on integer + int iseg = (rd + 5) / SPAN; + int off = (127 - sinoSeg[2 * iseg]) / 2; + + int ci = 2 * iseg - minus * (iseg > 0); + span11.li2s11[si] = (short)(cumSeg[ci] + ri - off); + span11.NSinos[(cumSeg[ci] + ri - off)] += 1; + // printf("[%d] %d\n", si, span11.li2s11[si]); + } + + return span11; } //<<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>> @@ -96,177 +93,160 @@ span11LUT span1_span11(const Cnst Cnt) //<<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>><<*>> //====================================================================== -__global__ void d_remgaps(float * sng, - const float * sn, - const int * aw2li, - const int snno) -{ - int idx = blockIdx.x*blockDim.x + threadIdx.x; - if (idx using CUDA device #%d\n", dev_id); - - int nthreads = 256; - int blcks = ceil(AW / (float)nthreads); - - float *d_sng; HANDLE_ERROR(cudaMalloc(&d_sng, AW*snno * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_sng, 0, AW*snno * sizeof(float))); - - float *d_sino; HANDLE_ERROR(cudaMalloc(&d_sino, NSBINS*NSANGLES*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_sino, sino, NSBINS*NSANGLES*snno * sizeof(float), cudaMemcpyHostToDevice)); - - int *d_aw2ali; - HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); - - if (Cnt.LOG <= LOGINFO) - printf("i> and removing the gaps and reordering sino for GPU..."); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - //================================================================== - d_remgaps << > >(d_sng, d_sino, d_aw2ali, snno); - HANDLE_ERROR(cudaGetLastError()); - //================================================================== - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf(" DONE in %fs\n", 0.001*elapsedTime); - - HANDLE_ERROR(cudaMemcpy(sng, d_sng, AW*snno * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_sng); - cudaFree(d_sino); - cudaFree(d_aw2ali); - - return; +void remove_gaps(float *sng, float *sino, int snno, int *aw2ali, Cnst Cnt) { + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); + + int nthreads = 256; + int blcks = ceil(AW / (float)nthreads); + + float *d_sng; + HANDLE_ERROR(cudaMalloc(&d_sng, AW * snno * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_sng, 0, AW * snno * sizeof(float))); + + float *d_sino; + HANDLE_ERROR(cudaMalloc(&d_sino, NSBINS * NSANGLES * snno * sizeof(float))); + HANDLE_ERROR( + cudaMemcpy(d_sino, sino, NSBINS * NSANGLES * snno * sizeof(float), cudaMemcpyHostToDevice)); + + int *d_aw2ali; + HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); + + if (Cnt.LOG <= LOGINFO) + printf("i> and removing the gaps and reordering sino for GPU..."); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + //================================================================== + d_remgaps<<>>(d_sng, d_sino, d_aw2ali, snno); + HANDLE_ERROR(cudaGetLastError()); + //================================================================== + + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf(" DONE in %fs\n", 0.001 * elapsedTime); + + HANDLE_ERROR(cudaMemcpy(sng, d_sng, AW * snno * sizeof(float), cudaMemcpyDeviceToHost)); + + cudaFree(d_sng); + cudaFree(d_sino); + cudaFree(d_aw2ali); + + return; } - //============================================================================= -__global__ void d_putgaps(float *sne7, - float *snaw, - int *aw2ali, - const int snno) -{ - //sino index - int sni = threadIdx.x + blockIdx.y*blockDim.x; - - //sino bin index - int awi = blockIdx.x; - - if (sni using CUDA device #%d\n", dev_id); - - //number of sinos - int snno = -1; - //number of blocks of threads - dim3 zBpG(AW, 1, 1); - - if (sino_no>0){ - snno = sino_no; - } - else if (Cnt.SPN == 11) { - // number of blocks (y) for CUDA launch - zBpG.y = 2; - snno = NSINOS11; - } - else if (Cnt.SPN == 1) { - // number of blocks (y) for CUDA launch - zBpG.y = 8; - // number of direct rings considered - int nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; - snno = nrng_c*nrng_c; - //correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if (nrng_c == 64) snno -= 12; - } - else { - printf("e> not span-1, span-11 nor user defined.\n"); - return; - } - - //printf("ci> number of sinograms to put gaps in: %d\n", snno); REMOVED AS SCREEN OUTPUT IS TOO MUCH - - float *d_sng; - HANDLE_ERROR(cudaMalloc(&d_sng, AW*snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_sng, sng, AW*snno * sizeof(float), cudaMemcpyHostToDevice)); - - float *d_sino; - HANDLE_ERROR(cudaMalloc(&d_sino, NSBINS*NSANGLES*snno * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_sino, 0, NSBINS*NSANGLES*snno * sizeof(float))); - - int *d_aw2ali; - HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); - HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); - - if (Cnt.LOG <= LOGINFO) - printf("i> put gaps in and reorder sino..."); - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - cudaEventRecord(start, 0); - - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - d_putgaps <<< zBpG, 64 * 14 >>>( - d_sino, - d_sng, - d_aw2ali, - snno); - HANDLE_ERROR(cudaGetLastError()); - //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n", 0.001*elapsedTime); - - HANDLE_ERROR(cudaMemcpy(sino, d_sino, NSBINS*NSANGLES*snno * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_sng); - cudaFree(d_sino); - cudaFree(d_aw2ali); - return; +void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt) { + // check which device is going to be used + int dev_id; + cudaGetDevice(&dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> using CUDA device #%d\n", dev_id); + + // number of sinos + int snno = -1; + // number of blocks of threads + dim3 zBpG(AW, 1, 1); + + if (sino_no > 0) { + snno = sino_no; + } else if (Cnt.SPN == 11) { + // number of blocks (y) for CUDA launch + zBpG.y = 2; + snno = NSINOS11; + } else if (Cnt.SPN == 1) { + // number of blocks (y) for CUDA launch + zBpG.y = 8; + // number of direct rings considered + int nrng_c = Cnt.RNG_END - Cnt.RNG_STRT; + snno = nrng_c * nrng_c; + // correct for the max. ring difference in the full axial extent (don't use ring range (1,63) + // as for this case no correction) + if (nrng_c == 64) + snno -= 12; + } else { + printf("e> not span-1, span-11 nor user defined.\n"); + return; + } + + // printf("ci> number of sinograms to put gaps in: %d\n", snno); REMOVED AS SCREEN OUTPUT IS TOO + // MUCH + + float *d_sng; + HANDLE_ERROR(cudaMalloc(&d_sng, AW * snno * sizeof(float))); + HANDLE_ERROR(cudaMemcpy(d_sng, sng, AW * snno * sizeof(float), cudaMemcpyHostToDevice)); + + float *d_sino; + HANDLE_ERROR(cudaMalloc(&d_sino, NSBINS * NSANGLES * snno * sizeof(float))); + HANDLE_ERROR(cudaMemset(d_sino, 0, NSBINS * NSANGLES * snno * sizeof(float))); + + int *d_aw2ali; + HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); + HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); + + if (Cnt.LOG <= LOGINFO) + printf("i> put gaps in and reorder sino..."); + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + cudaEventRecord(start, 0); + + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + d_putgaps<<>>(d_sino, d_sng, d_aw2ali, snno); + HANDLE_ERROR(cudaGetLastError()); + //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><> + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGINFO) + printf("DONE in %fs.\n", 0.001 * elapsedTime); + + HANDLE_ERROR( + cudaMemcpy(sino, d_sino, NSBINS * NSANGLES * snno * sizeof(float), cudaMemcpyDeviceToHost)); + + cudaFree(d_sng); + cudaFree(d_sino); + cudaFree(d_aw2ali); + return; } From 93c4598e6af62e3be7b2fafddbf54f52d754fc7b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 02:02:10 +0000 Subject: [PATCH 05/64] format: python config --- .github/workflows/test.yml | 14 ++++++++++++++ .pre-commit-config.yaml | 14 ++++++++++++++ setup.cfg | 18 ++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 63b078a9..1448c30f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,6 +23,20 @@ jobs: - name: dependencies run: | pip install -U pre-commit + sudo apt-get install -yqq clang-format + - uses: reviewdog/action-setup@v1 + - name: comment + run: | + if [[ $EVENT == pull_request ]]; then + REPORTER=github-pr-review + else + REPORTER=github-check + fi + pre-commit run -a todo | reviewdog -efm="%f:%l: %m" -name=TODO -tee -reporter=$REPORTER -filter-mode nofilter + pre-commit run -a flake8 | reviewdog -f=pep8 -name=flake8 -tee -reporter=$REPORTER -filter-mode nofilter + env: + REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + EVENT: ${{ github.event_name }} - run: pre-commit run -a --show-diff-on-failure test: if: github.event_name != 'pull_request' || github.head_ref != 'devel' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index acfd7e60..e746c633 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,20 @@ repos: types: [text] exclude: ^(.pre-commit-config.yaml|.github/workflows/test.yml)$ args: [-i] +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + additional_dependencies: + - flake8-bugbear + - flake8-comprehensions + - flake8-debugger + - flake8-string-format +- repo: https://github.com/google/yapf + rev: 6db9374 + hooks: + - id: yapf + args: [-i] - repo: https://github.com/PyCQA/isort rev: 5.7.0 hooks: diff --git a/setup.cfg b/setup.cfg index 8d393272..b8da5c57 100644 --- a/setup.cfg +++ b/setup.cfg @@ -63,10 +63,28 @@ dev= codecov examples=jupyter; ipywidgets; matplotlib; brainweb +[yapf] +spaces_before_comment=15, 20 +arithmetic_precedence_indication=true +allow_split_before_dict_value=false +coalesce_brackets=True +column_limit=99 +each_dict_entry_on_separate_line=False +space_between_ending_comma_and_closing_bracket=False +split_before_named_assigns=False +split_before_closing_bracket=False + [isort] profile=black +line_length=99 known_first_party=niftypet,tests +[flake8] +statistics=True +max_line_length=99 +extend-ignore=W504,E225,E261,E701,P1 +exclude=.git,__pycache__,build,dist,.eggs + [tool:pytest] timeout=3600 log_level=INFO From ff3d306eeb15b52ac3c20ee35a57672ba478e8f3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 7 Jan 2021 02:04:32 +0000 Subject: [PATCH 06/64] format: python --- niftypet/nipet/__init__.py | 14 +- niftypet/nipet/img/auximg.py | 55 +-- niftypet/nipet/img/mmrimg.py | 793 +++++++++++++++-------------------- niftypet/nipet/img/pipe.py | 329 +++++++-------- niftypet/nipet/lm/mmrhist.py | 345 ++++++++------- niftypet/nipet/lm/pviews.py | 71 ++-- niftypet/nipet/mmraux.py | 636 ++++++++++++++-------------- niftypet/nipet/mmrnorm.py | 68 ++- niftypet/nipet/prj/mmrprj.py | 80 ++-- niftypet/nipet/prj/mmrrec.py | 185 ++++---- niftypet/nipet/prj/mmrsim.py | 146 +++---- niftypet/nipet/sct/mmrsct.py | 350 ++++++++-------- setup.py | 56 +-- tests/conftest.py | 13 +- tests/test_amyloid_pvc.py | 96 ++--- 15 files changed, 1491 insertions(+), 1746 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 61bf7043..68ba3be8 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """initialise the NiftyPET NIPET package""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" # version detector. Precedence: installed dist, git, 'UNKNOWN' try: from ._dist_ver import __version__ @@ -47,22 +47,12 @@ # https://docs.python.org/3/howto/logging.html#library-config # log.addHandler(LogHandler()) # do it anyway for convenience - - - - - - - - - if resources.ENBLAGG: from .lm.pviews import video_dyn, video_frm if resources.ENBLXNAT: from xnat import xnat - #> GE Signa #from . import aux_sig diff --git a/niftypet/nipet/img/auximg.py b/niftypet/nipet/img/auximg.py index d02c7208..05efafbe 100644 --- a/niftypet/nipet/img/auximg.py +++ b/niftypet/nipet/img/auximg.py @@ -1,6 +1,6 @@ """auxilary imaging functions for PET image reconstruction and analysis.""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" import logging import os @@ -22,21 +22,23 @@ def obtain_image(img, Cnt=None, imtype=''): #> all findings go to the output dictionary output = {} if isinstance(img, dict): - if Cnt is not None and img['im'].shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): - log.error('provided '+imtype+' via the dictionary has inconsistent dimensions compared to Cnt.') + if Cnt is not None and img['im'].shape != (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): + log.error('provided ' + imtype + + ' via the dictionary has inconsistent dimensions compared to Cnt.') raise ValueError('Wrong dimensions of the mu-map') else: output['im'] = img['im'] output['exists'] = True - if 'fim' in img: output['fim'] = img['fim'] + if 'fim' in img: output['fim'] = img['fim'] if 'faff' in img: output['faff'] = img['faff'] if 'fmuref' in img: output['fmuref'] = img['fmuref'] if 'affine' in img: output['affine'] = img['affine'] - log.info('using '+imtype+' from dictionary') + log.info('using ' + imtype + ' from dictionary') - elif isinstance(img, (np.ndarray, np.generic) ): - if Cnt is not None and img.shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): - log.error('provided '+imtype+' via the numpy array has inconsistent dimensions compared to Cnt.') + elif isinstance(img, (np.ndarray, np.generic)): + if Cnt is not None and img.shape != (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): + log.error('provided ' + imtype + + ' via the numpy array has inconsistent dimensions compared to Cnt.') raise ValueError('Wrong dimensions of the mu-map') else: output['im'] = img @@ -49,19 +51,20 @@ def obtain_image(img, Cnt=None, imtype=''): imdct = nimpa.getnii(img, output='all') output['im'] = imdct['im'] output['affine'] = imdct['affine'] - if Cnt and output['im'].shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): - log.error('provided '+imtype+' via file has inconsistent dimensions compared to Cnt.') + if Cnt and output['im'].shape != (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): + log.error('provided ' + imtype + + ' via file has inconsistent dimensions compared to Cnt.') raise ValueError('Wrong dimensions of the mu-map') else: output['exists'] = True output['fim'] = img - log.info('using '+imtype+' from NIfTI file.') + log.info('using ' + imtype + ' from NIfTI file.') else: - log.error('provided '+imtype+' path is invalid.') + log.error('provided ' + imtype + ' path is invalid.') return None elif isinstance(img, list): output['im'] = np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) - log.info(imtype+' has not been provided -> using blank.') + log.info(imtype + ' has not been provided -> using blank.') output['fim'] = '' output['exists'] = False #------------------------------------------------------------------------ @@ -83,10 +86,12 @@ def dynamic_timings(flist, offset=0): ''' if not isinstance(flist, list): raise TypeError('Wrong type of frame data input') - if all([isinstance(t,(int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) for t in flist]): + if all([ + isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) + for t in flist]): tsum = offset # list of frame timings - if offset>0: + if offset > 0: t_frames = [[0, offset]] else: t_frames = [] @@ -99,16 +104,16 @@ def dynamic_timings(flist, offset=0): # append the timings to the list t_frames.append([t0, t1]) frms = np.uint16(flist) - elif all([isinstance(t,list) and len(t)==2 for t in flist]): - if offset>0: - flist.insert(0,[1,offset]) + elif all([isinstance(t, list) and len(t) == 2 for t in flist]): + if offset > 0: + flist.insert(0, [1, offset]) farray = np.asarray(flist, dtype=np.uint16) else: farray = np.array(flist) # number of dynamic frames - nfrm = np.sum(farray[:,0]) + nfrm = np.sum(farray[:, 0]) # list of frame duration - frms = np.zeros(nfrm,dtype=np.uint16) + frms = np.zeros(nfrm, dtype=np.uint16) #frame iterator fi = 0 #time sum of frames @@ -116,18 +121,18 @@ def dynamic_timings(flist, offset=0): # list of frame timings t_frames = [] for i in range(0, farray.shape[0]): - for t in range(0, farray[i,0]): + for t in range(0, farray[i, 0]): # frame start time t0 = tsum - tsum += farray[i,1] + tsum += farray[i, 1] # frame end time t1 = tsum # append the timings to the list t_frames.append([t0, t1]) - frms[fi] = farray[i,1] + frms[fi] = farray[i, 1] fi += 1 else: raise TypeError('Unrecognised data input.') # prepare the output dictionary - out = {'total':tsum, 'frames':frms, 'timings':t_frames} + out = {'total': tsum, 'frames': frms, 'timings': t_frames} return out diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index 2825784f..c3a399aa 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -23,14 +23,12 @@ from .. import mmraux from .. import resources as rs -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) - ct_nans = -1024 - #=================================================================================== # IMAGE ROUTINES #=================================================================================== @@ -39,10 +37,10 @@ def convert2e7(img, Cnt): '''Convert GPU optimised image to Siemens/E7 image shape (127,344,344).''' - margin = (Cnt['SO_IMX']-Cnt['SZ_IMX']) // 2 + margin = (Cnt['SO_IMX'] - Cnt['SZ_IMX']) // 2 #permute the dims first - imo = np.transpose(img, (2,0,1)) + imo = np.transpose(img, (2, 0, 1)) nvz = img.shape[2] @@ -58,20 +56,20 @@ def convert2e7(img, Cnt): def convert2dev(im, Cnt): '''Reshape Siemens/E7 (default) image for optimal GPU execution.''' - if im.shape[1]!=Cnt['SO_IMY'] or im.shape[2]!=Cnt['SO_IMX']: + if im.shape[1] != Cnt['SO_IMY'] or im.shape[2] != Cnt['SO_IMX']: raise ValueError('e> input image array is not of the correct Siemens shape.') - if 'rSZ_IMZ' in Cnt and im.shape[0]!=Cnt['rSZ_IMZ']: + if 'rSZ_IMZ' in Cnt and im.shape[0] != Cnt['rSZ_IMZ']: log.warning('the axial number of voxels does not match the reduced rings.') - elif 'rSZ_IMZ' not in Cnt and im.shape[0]!=Cnt['SZ_IMZ']: + elif 'rSZ_IMZ' not in Cnt and im.shape[0] != Cnt['SZ_IMZ']: log.warning('the axial number of voxels does not match the rings.') im_sqzd = np.zeros((im.shape[0], Cnt['SZ_IMY'], Cnt['SZ_IMX']), dtype=np.float32) - margin = int((Cnt['SO_IMX']-Cnt['SZ_IMX'])/2) - margin_=-margin - if margin==0: + margin = int((Cnt['SO_IMX'] - Cnt['SZ_IMX']) / 2) + margin_ = -margin + if margin == 0: margin = None - margin_= None + margin_ = None im_sqzd = im[:, margin:margin_, margin:margin_] im_sqzd = np.transpose(im_sqzd, (1, 2, 0)) @@ -83,22 +81,22 @@ def cropxy(im, imsize, datain, Cnt, store_pth=''): Crop image transaxially to the size in tuple . Return the image and the affine matrix. ''' - if not imsize[0]%2==0 and not imsize[1]%2==0: + if not imsize[0] % 2 == 0 and not imsize[1] % 2 == 0: log.error('image size has to be an even number!') return None # cropping indexes - i0 = int((Cnt['SO_IMX']-imsize[0])/2) - i1 = int((Cnt['SO_IMY']+imsize[1])/2) + i0 = int((Cnt['SO_IMX'] - imsize[0]) / 2) + i1 = int((Cnt['SO_IMY'] + imsize[1]) / 2) B = image_affine(datain, Cnt, gantry_offset=False) - B[0,3] -= 10*Cnt['SO_VXX']*i0 - B[1,3] += 10*Cnt['SO_VXY']*(Cnt['SO_IMY']-i1) + B[0, 3] -= 10 * Cnt['SO_VXX'] * i0 + B[1, 3] += 10 * Cnt['SO_VXY'] * (Cnt['SO_IMY'] - i1) cim = im[:, i0:i1, i0:i1] - if store_pth!='': - nimpa.array2nii( cim[::-1,::-1,:], B, store_pth, descrip='cropped') + if store_pth != '': + nimpa.array2nii(cim[::-1, ::-1, :], B, store_pth, descrip='cropped') log.info('saved cropped image to:\n{}'.format(store_pth)) return cim, B @@ -115,10 +113,10 @@ def image_affine(datain, Cnt, gantry_offset=False): goff = np.zeros((3)) vbed, hbed = mmraux.vh_bedpos(datain, Cnt) # create a reference empty mu-map image - B = np.diag(np.array([-10*Cnt['SO_VXX'], 10*Cnt['SO_VXY'], 10*Cnt['SO_VXZ'], 1])) - B[0,3] = 10*(.5*Cnt['SO_IMX']*Cnt['SO_VXX'] + goff[0]) - B[1,3] = 10*((-.5*Cnt['SO_IMY']+1)*Cnt['SO_VXY'] - goff[1]) - B[2,3] = 10*((-.5*Cnt['SO_IMZ']+1)*Cnt['SO_VXZ'] - goff[2] + hbed) + B = np.diag(np.array([-10 * Cnt['SO_VXX'], 10 * Cnt['SO_VXY'], 10 * Cnt['SO_VXZ'], 1])) + B[0, 3] = 10 * (.5 * Cnt['SO_IMX'] * Cnt['SO_VXX'] + goff[0]) + B[1, 3] = 10 * ((-.5 * Cnt['SO_IMY'] + 1) * Cnt['SO_VXY'] - goff[1]) + B[2, 3] = 10 * ((-.5 * Cnt['SO_IMZ'] + 1) * Cnt['SO_VXZ'] - goff[2] + hbed) # ------------------------------------------------------------------------------------- return B @@ -133,35 +131,21 @@ def getmu_off(mu, Cnt, Offst=np.array([0., 0., 0.])): # CORRECT THE MU-MAP for GANTRY OFFSET #------------------------------------------------------------------------- Cim = { - 'VXSOx':0.208626, - 'VXSOy':0.208626, - 'VXSOz':0.203125, - 'VXNOx':344, - 'VXNOy':344, - 'VXNOz':127, - - 'VXSRx':0.208626, - 'VXSRy':0.208626, - 'VXSRz':0.203125, - 'VXNRx':344, - 'VXNRy':344, - 'VXNRz':127 - } + 'VXSOx': 0.208626, 'VXSOy': 0.208626, 'VXSOz': 0.203125, 'VXNOx': 344, 'VXNOy': 344, + 'VXNOz': 127, 'VXSRx': 0.208626, 'VXSRy': 0.208626, 'VXSRz': 0.203125, 'VXNRx': 344, + 'VXNRy': 344, 'VXNRz': 127} #original image offset - Cim['OFFOx'] = -0.5*Cim['VXNOx']*Cim['VXSOx'] - Cim['OFFOy'] = -0.5*Cim['VXNOy']*Cim['VXSOy'] - Cim['OFFOz'] = -0.5*Cim['VXNOz']*Cim['VXSOz'] + Cim['OFFOx'] = -0.5 * Cim['VXNOx'] * Cim['VXSOx'] + Cim['OFFOy'] = -0.5 * Cim['VXNOy'] * Cim['VXSOy'] + Cim['OFFOz'] = -0.5 * Cim['VXNOz'] * Cim['VXSOz'] #resampled image offset - Cim['OFFRx'] = -0.5*Cim['VXNRx']*Cim['VXSRx'] - Cim['OFFRy'] = -0.5*Cim['VXNRy']*Cim['VXSRy'] - Cim['OFFRz'] = -0.5*Cim['VXNRz']*Cim['VXSRz'] + Cim['OFFRx'] = -0.5 * Cim['VXNRx'] * Cim['VXSRx'] + Cim['OFFRy'] = -0.5 * Cim['VXNRy'] * Cim['VXSRy'] + Cim['OFFRz'] = -0.5 * Cim['VXNRz'] * Cim['VXSRz'] #transformation matrix A = np.array( - [[ 1., 0., 0., Offst[0] ], - [ 0., 1., 0., Offst[1] ], - [ 0., 0., 1., Offst[2] ], - [ 0., 0., 0., 1. ]], dtype=np.float32 - ) + [[1., 0., 0., Offst[0]], [0., 1., 0., Offst[1]], [0., 0., 1., Offst[2]], [0., 0., 0., 1.]], + dtype=np.float32) #apply the gantry offset to the mu-map mur = nimpa.prc.improc.resample(mu, A, Cim) return mur @@ -187,14 +171,9 @@ def getinterfile_off(fmu, Cnt, Offst=np.array([0., 0., 0.])): mumax = np.max(mur) mumin = np.min(mur) #> number of voxels greater than 10% of max image value - n10mx = np.sum(mur>0.1*mumax) + n10mx = np.sum(mur > 0.1 * mumax) #> return image dictionary with the image itself and some other stats - mu_dct = {'im':mur, - 'ims':murs, - 'max':mumax, - 'min':mumin, - 'nvx':nvx, - 'n10mx':n10mx} + mu_dct = {'im': mur, 'ims': murs, 'max': mumax, 'min': mumin, 'nvx': nvx, 'n10mx': n10mx} return mu_dct @@ -215,18 +194,13 @@ def getinterfile(fim, Cnt): immin = np.min(im) #number of voxels greater than 10% of max image value - n10mx = np.sum(im>0.1*immax) + n10mx = np.sum(im > 0.1 * immax) #reorganise the image for optimal gpu execution im_sqzd = convert2dev(im, Cnt) #return image dictionary with the image itself and some other stats - im_dct = {'im':im, - 'ims':im_sqzd, - 'max':immax, - 'min':immin, - 'nvx':nvx, - 'n10mx':n10mx} + im_dct = {'im': im, 'ims': im_sqzd, 'max': immax, 'min': immin, 'nvx': nvx, 'n10mx': n10mx} return im_dct @@ -237,13 +211,13 @@ def getinterfile(fim, Cnt): def get_cylinder(Cnt, rad=25, xo=0, yo=0, unival=1, gpu_dim=False): '''Outputs image with a uniform cylinder of intensity = unival, radius = rad, and transaxial centre (xo, yo)''' imdsk = np.zeros((1, Cnt['SO_IMX'], Cnt['SO_IMY']), dtype=np.float32) - for t in np.arange(0, math.pi, math.pi/(2*360)): - x = xo+rad*math.cos(t) - y = yo+rad*math.sin(t) - yf = np.arange(-y+2*yo, y, Cnt['SO_VXY']/2) - v = np.int32(.5*Cnt['SO_IMX'] - np.ceil(yf/Cnt['SO_VXY'])) - u = np.int32(.5*Cnt['SO_IMY'] + np.floor(x/Cnt['SO_VXY'])) - imdsk[0,v,u] = unival + for t in np.arange(0, math.pi, math.pi / (2*360)): + x = xo + rad * math.cos(t) + y = yo + rad * math.sin(t) + yf = np.arange(-y + 2*yo, y, Cnt['SO_VXY'] / 2) + v = np.int32(.5 * Cnt['SO_IMX'] - np.ceil(yf / Cnt['SO_VXY'])) + u = np.int32(.5 * Cnt['SO_IMY'] + np.floor(x / Cnt['SO_VXY'])) + imdsk[0, v, u] = unival if 'rSO_IMZ' in Cnt: nvz = Cnt['rSO_IMZ'] else: @@ -258,16 +232,16 @@ def hu2mu(im): # convert nans to -1024 for the HU values only im[np.isnan(im)] = ct_nans # constants - muwater = 0.096 - mubone = 0.172 + muwater = 0.096 + mubone = 0.172 rhowater = 0.158 - rhobone = 0.326 + rhobone = 0.326 uim = np.zeros(im.shape, dtype=np.float32) - uim[im<=0] = muwater * ( 1+im[im<=0]*1e-3 ) + uim[im <= 0] = muwater * (1 + im[im <= 0] * 1e-3) uim[im> 0] = muwater * \ ( 1+im[im>0]*1e-3 * rhowater/muwater*(mubone-muwater)/(rhobone-rhowater) ) # remove negative values - uim[uim<0] = 0 + uim[uim < 0] = 0 return uim @@ -279,10 +253,11 @@ def mudcm2nii(datain, Cnt): mu, pos, ornt = nimpa.dcm2im(datain['mumapDCM']) mu *= 0.0001 A = pos['AFFINE'] - A[0,0] *= -1 - A[0,3] *= -1 - A[1,3] += A[1,1] - nimpa.array2nii(mu[:,::-1,:], A, os.path.join(os.path.dirname(datain['mumapDCM']),'mu.nii.gz')) + A[0, 0] *= -1 + A[0, 3] *= -1 + A[1, 3] += A[1, 1] + nimpa.array2nii(mu[:, ::-1, :], A, + os.path.join(os.path.dirname(datain['mumapDCM']), 'mu.nii.gz')) #------get necessary data for creating a blank reference image (to which resample)----- # gantry offset @@ -292,22 +267,22 @@ def mudcm2nii(datain, Cnt): p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') - hbedpos = 0.1*float(ihdr[m.start()+fi+1:m.end()]) + hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) - B = np.diag(np.array([-10*Cnt['SO_VXX'], 10*Cnt['SO_VXY'], 10*Cnt['SO_VXZ'], 1])) - B[0,3] = 10*(.5*Cnt['SO_IMX']*Cnt['SO_VXX'] + goff[0]) - B[1,3] = 10*((-.5*Cnt['SO_IMY']+1)*Cnt['SO_VXY'] - goff[1]) - B[2,3] = 10*((-.5*Cnt['SO_IMZ']+1)*Cnt['SO_VXZ'] - goff[2] + hbedpos) + B = np.diag(np.array([-10 * Cnt['SO_VXX'], 10 * Cnt['SO_VXY'], 10 * Cnt['SO_VXZ'], 1])) + B[0, 3] = 10 * (.5 * Cnt['SO_IMX'] * Cnt['SO_VXX'] + goff[0]) + B[1, 3] = 10 * ((-.5 * Cnt['SO_IMY'] + 1) * Cnt['SO_VXY'] - goff[1]) + B[2, 3] = 10 * ((-.5 * Cnt['SO_IMZ'] + 1) * Cnt['SO_VXZ'] - goff[2] + hbedpos) im = np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) - nimpa.array2nii(im, B, os.path.join(os.path.dirname(datain['mumapDCM']),'muref.nii.gz')) + nimpa.array2nii(im, B, os.path.join(os.path.dirname(datain['mumapDCM']), 'muref.nii.gz')) # ------------------------------------------------------------------------------------- - fmu = os.path.join(os.path.dirname(datain['mumapDCM']),'mu_r.nii.gz') - if os.path.isfile( Cnt['RESPATH'] ): - run( [ Cnt['RESPATH'], - '-ref', os.path.join(os.path.dirname(datain['mumapDCM']),'muref.nii.gz'), - '-flo', os.path.join(os.path.dirname(datain['mumapDCM']),'mu.nii.gz'), - '-res', fmu, - '-pad', '0'] ) + fmu = os.path.join(os.path.dirname(datain['mumapDCM']), 'mu_r.nii.gz') + if os.path.isfile(Cnt['RESPATH']): + run([ + Cnt['RESPATH'], '-ref', + os.path.join(os.path.dirname(datain['mumapDCM']), 'muref.nii.gz'), '-flo', + os.path.join(os.path.dirname(datain['mumapDCM']), 'mu.nii.gz'), '-res', fmu, '-pad', + '0']) else: log.error('path to resampling executable is incorrect!') raise IOError('Error launching NiftyReg for image resampling.') @@ -316,15 +291,15 @@ def mudcm2nii(datain, Cnt): def obj_mumap( - datain, - params=None, - outpath='', - comment='', - store=False, - store_npy=False, - gantry_offset=True, - del_auxilary=True, - ): + datain, + params=None, + outpath='', + comment='', + store=False, + store_npy=False, + gantry_offset=True, + del_auxilary=True, +): '''Get the object mu-map from DICOM images''' if params is None: params = {} @@ -338,10 +313,10 @@ def obj_mumap( Cnt = rs.get_mmr_constants() # output folder - if outpath=='': - fmudir = os.path.join( datain['corepath'], 'mumap-obj' ) + if outpath == '': + fmudir = os.path.join(datain['corepath'], 'mumap-obj') else: - fmudir = os.path.join( outpath, 'mumap-obj' ) + fmudir = os.path.join(outpath, 'mumap-obj') nimpa.create_dir(fmudir) #> ref file name @@ -365,25 +340,21 @@ def obj_mumap( tstmp = nimpa.time_stamp(simple_ascii=True) # find residual(s) from previous runs and delete them - resdcm = glob.glob( os.path.join(fmudir, '*'+fnii+'*.nii*') ) + resdcm = glob.glob(os.path.join(fmudir, '*' + fnii + '*.nii*')) for d in resdcm: os.remove(d) # convert the DICOM mu-map images to nii - run( [ Cnt['DCM2NIIX'], '-f', fnii+tstmp, '-o', fmudir, datain['mumapDCM'] ] ) + run([Cnt['DCM2NIIX'], '-f', fnii + tstmp, '-o', fmudir, datain['mumapDCM']]) #files for the T1w, pick one: - fmunii = glob.glob( os.path.join(fmudir, '*'+fnii+tstmp+'*.nii*') )[0] + fmunii = glob.glob(os.path.join(fmudir, '*' + fnii + tstmp + '*.nii*'))[0] # fmunii = glob.glob( os.path.join(datain['mumapDCM'], '*converted*.nii*') ) # fmunii = fmunii[0] # the converted nii image resample to the reference size - fmu = os.path.join(fmudir, comment+'mumap_tmp.nii.gz') - if os.path.isfile( Cnt['RESPATH'] ): - cmd = [ Cnt['RESPATH'], - '-ref', fmuref, - '-flo', fmunii, - '-res', fmu, - '-pad', '0'] + fmu = os.path.join(fmudir, comment + 'mumap_tmp.nii.gz') + if os.path.isfile(Cnt['RESPATH']): + cmd = [Cnt['RESPATH'], '-ref', fmuref, '-flo', fmunii, '-res', fmu, '-pad', '0'] if log.getEffectiveLevel() > logging.INFO: cmd.append('-voff') run(cmd) @@ -395,10 +366,10 @@ def obj_mumap( # get the affine transform A = nim.get_sform() mu = nim.get_fdata(dtype=np.float32) - mu = np.transpose(mu[:,::-1,::-1], (2, 1, 0)) + mu = np.transpose(mu[:, ::-1, ::-1], (2, 1, 0)) # convert to mu-values - mu = np.float32(mu)/1e4 - mu[mu<0] = 0 + mu = np.float32(mu) / 1e4 + mu[mu < 0] = 0 #> return image dictionary with the image itself and some other stats mu_dct = dict(im=mu, affine=A) @@ -413,8 +384,8 @@ def obj_mumap( if store: # with this file name - fmumap = os.path.join(fmudir, 'mumap-from-DICOM_no-alignment'+comment+'.nii.gz') - nimpa.array2nii(mu[::-1,::-1,:], A, fmumap) + fmumap = os.path.join(fmudir, 'mumap-from-DICOM_no-alignment' + comment + '.nii.gz') + nimpa.array2nii(mu[::-1, ::-1, :], A, fmumap) mu_dct['fim'] = fmumap if del_auxilary: @@ -422,7 +393,8 @@ def obj_mumap( os.remove(fmunii) os.remove(fmu) - if [f for f in os.listdir(fmudir) if not f.startswith('.') and not f.endswith('.json')] == []: + if [f for f in os.listdir(fmudir) + if not f.startswith('.') and not f.endswith('.json')] == []: shutil.rmtree(fmudir) return mu_dct @@ -434,25 +406,26 @@ def obj_mumap( def align_mumap( - datain, - scanner_params=None, - outpath='', - reg_tool='niftyreg', - use_stored=False, - hst=None, - t0=0, t1=0, - itr=2, - faff='', - fpet='', - fcomment='', - store=False, - store_npy=False, - petopt='ac', - musrc='ute', # another option is pct for mu-map source - ute_name='UTE2', - del_auxilary=True, - verbose=False, - ): + datain, + scanner_params=None, + outpath='', + reg_tool='niftyreg', + use_stored=False, + hst=None, + t0=0, + t1=0, + itr=2, + faff='', + fpet='', + fcomment='', + store=False, + store_npy=False, + petopt='ac', + musrc='ute', # another option is pct for mu-map source + ute_name='UTE2', + del_auxilary=True, + verbose=False, +): ''' Align the a pCT or MR-derived mu-map to a PET image reconstructed to chosen specifications (e.g., with/without attenuation and scatter corrections) @@ -462,12 +435,11 @@ def align_mumap( if scanner_params is None: scanner_params = {} - #> output folder - if outpath=='': - opth = os.path.join( datain['corepath'], 'mumap-obj' ) + if outpath == '': + opth = os.path.join(datain['corepath'], 'mumap-obj') else: - opth = os.path.join( outpath, 'mumap-obj' ) + opth = os.path.join(outpath, 'mumap-obj') #> create the folder, if not existent nimpa.create_dir(opth) @@ -477,7 +449,7 @@ def align_mumap( nimpa.create_dir(tmpdir) #> get the timing of PET if affine not given - if faff=='' and not hst is None and isinstance(hst, dict) and 't0' in hst: + if faff == '' and not hst is None and isinstance(hst, dict) and 't0' in hst: t0 = hst['t0'] t1 = hst['t1'] @@ -494,7 +466,7 @@ def align_mumap( + str(hst['t0'])+'-'+str(hst['t1'])+'_'+petopt.upper()\ + fcomment fmupath = os.path.join(opth, fmu_stored) - if os.path.isfile( fmupath ): + if os.path.isfile(fmupath): mudct_stored = nimpa.getnii(fmupath, output='all') #> create output dictionary mu_dct['im'] = mudct_stored['im'] @@ -521,17 +493,16 @@ def align_mumap( if 'txLUT' in scanner_params: hst = mmrhist(datain, scanner_params, t0=t0, t1=t1) else: - raise ValueError( - 'Full scanner are parameters not provided\ + raise ValueError('Full scanner are parameters not provided\ but are required for histogramming.') #========================================================= #-get hardware mu-map if 'hmumap' in datain and os.path.isfile(datain['hmumap']): muh = np.load(datain['hmumap'], allow_pickle=True)["hmu"] - (log.info if verbose else log.debug)( - 'loaded hardware mu-map from file:\n{}'.format(datain['hmumap'])) - elif outpath!='': + (log.info if verbose else log.debug)('loaded hardware mu-map from file:\n{}'.format( + datain['hmumap'])) + elif outpath != '': hmupath = os.path.join(outpath, "mumap-hdw", "hmumap.npz") if os.path.isfile(hmupath): muh = np.load(hmupath, allow_pickle=True)["hmu"] @@ -552,133 +523,112 @@ def align_mumap( #-it will be generated by reconstructing PET image, with some or no corrections if not os.path.isfile(faff): # first recon pet to get the T1 aligned to it - if petopt=='qnt': + if petopt == 'qnt': # --------------------------------------------- # OPTION 1 (quantitative recon with all corrections using MR-based mu-map) # get UTE object mu-map (may not be in register with the PET data) - mudic = obj_mumap( - datain, - Cnt, - outpath=tmpdir, - del_auxilary=del_auxilary) + mudic = obj_mumap(datain, Cnt, outpath=tmpdir, del_auxilary=del_auxilary) muo = mudic['im'] # reconstruct PET image with UTE mu-map to which co-register T1w - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=3, itr=itr, fwhm=0., - fcomment=fcomment+'_QNT-UTE', - outpath=os.path.join(outpath, 'PET', 'positioning'), - store_img=True) - elif petopt=='nac': + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=3, itr=itr, + fwhm=0., fcomment=fcomment + '_QNT-UTE', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) + elif petopt == 'nac': # --------------------------------------------- # OPTION 2 (recon without any corrections for scatter and attenuation) # reconstruct PET image with UTE mu-map to which co-register T1w muo = np.zeros(muh.shape, dtype=muh.dtype) - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=1, itr=itr, fwhm=0., - fcomment=fcomment+'_NAC', - outpath=os.path.join(outpath,'PET', 'positioning'), - store_img=True) - elif petopt=='ac': + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=1, itr=itr, + fwhm=0., fcomment=fcomment + '_NAC', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) + elif petopt == 'ac': # --------------------------------------------- # OPTION 3 (recon with attenuation correction only but no scatter) # reconstruct PET image with UTE mu-map to which co-register T1w - mudic = obj_mumap( - datain, - Cnt, - outpath=tmpdir, - del_auxilary=del_auxilary) + mudic = obj_mumap(datain, Cnt, outpath=tmpdir, del_auxilary=del_auxilary) muo = mudic['im'] - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=1, itr=itr, fwhm=0., - fcomment=fcomment+'_AC-UTE', - outpath=os.path.join(outpath,'PET', 'positioning'), - store_img=True) + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=1, itr=itr, + fwhm=0., fcomment=fcomment + '_AC-UTE', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) fpet = recout.fpet mu_dct['fpet'] = fpet #------------------------------ - if musrc=='ute' and ute_name in datain and os.path.exists(datain[ute_name]): + if musrc == 'ute' and ute_name in datain and os.path.exists(datain[ute_name]): # change to NIfTI if the UTE sequence is in DICOM files (folder) if os.path.isdir(datain[ute_name]): - fnew = os.path.basename(datain[ute_name]) + fnew = os.path.basename(datain[ute_name]) run([Cnt['DCM2NIIX'], '-f', fnew, datain[ute_name]]) - fute = glob.glob(os.path.join(datain[ute_name], fnew+'*nii*'))[0] + fute = glob.glob(os.path.join(datain[ute_name], fnew + '*nii*'))[0] elif os.path.isfile(datain[ute_name]): fute = datain[ute_name] # get the affine transformation - if reg_tool=='spm': - regdct = nimpa.coreg_spm( - fpet, - fute, - outpath=os.path.join(outpath,'PET', 'positioning') - ) - elif reg_tool=='niftyreg': + if reg_tool == 'spm': + regdct = nimpa.coreg_spm(fpet, fute, + outpath=os.path.join(outpath, 'PET', 'positioning')) + elif reg_tool == 'niftyreg': regdct = nimpa.affine_niftyreg( fpet, fute, - outpath=os.path.join(outpath,'PET', 'positioning'), - #fcomment=fcomment, - executable = Cnt['REGPATH'], - omp = multiprocessing.cpu_count()/2, - rigOnly = True, - affDirect = False, + outpath=os.path.join(outpath, 'PET', 'positioning'), + #fcomment=fcomment, + executable=Cnt['REGPATH'], + omp=multiprocessing.cpu_count() / 2, + rigOnly=True, + affDirect=False, maxit=5, speed=True, - pi=50, pv=50, - smof=0, smor=0, + pi=50, + pv=50, + smof=0, + smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., #millilitres rthrsh=0.05, - ffwhm = 15., #millilitres + ffwhm=15., #millilitres fthrsh=0.05, - verbose=verbose - ) + verbose=verbose) else: raise ValueError('unknown registration tool requested') faff_mrpet = regdct['faff'] - elif musrc=='pct': + elif musrc == 'pct': ft1w = nimpa.pick_t1w(datain) - if reg_tool=='spm': - regdct = nimpa.coreg_spm( - fpet, - ft1w, - outpath=os.path.join(outpath,'PET', 'positioning') - ) - elif reg_tool=='niftyreg': + if reg_tool == 'spm': + regdct = nimpa.coreg_spm(fpet, ft1w, + outpath=os.path.join(outpath, 'PET', 'positioning')) + elif reg_tool == 'niftyreg': regdct = nimpa.affine_niftyreg( fpet, ft1w, - outpath=os.path.join(outpath,'PET', 'positioning'), - executable = Cnt['REGPATH'], - omp = multiprocessing.cpu_count()/2, - rigOnly = True, - affDirect = False, + outpath=os.path.join(outpath, 'PET', 'positioning'), + executable=Cnt['REGPATH'], + omp=multiprocessing.cpu_count() / 2, + rigOnly=True, + affDirect=False, maxit=5, speed=True, - pi=50, pv=50, - smof=0, smor=0, + pi=50, + pv=50, + smof=0, + smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., #millilitres rthrsh=0.05, - ffwhm = 15., #millilitres + ffwhm=15., #millilitres fthrsh=0.05, - verbose=verbose - ) + verbose=verbose) else: raise ValueError('unknown registration tool requested') @@ -694,8 +644,7 @@ def align_mumap( raise IOError('e> the reference PET should be supplied with the affine.') #> output file name for the aligned mu-maps - if musrc=='pct': - + if musrc == 'pct': #> convert to mu-values before resampling to avoid artefacts with negative values nii = nib.load(datain['pCT']) @@ -705,20 +654,19 @@ def align_mumap( fflo = os.path.join(tmpdir, 'pct2mu-not-aligned.nii.gz') nib.save(nii_mu, fflo) - freg = os.path.join(opth, 'pct2mu-aligned-'+fcomment+'.nii.gz') + freg = os.path.join(opth, 'pct2mu-aligned-' + fcomment + '.nii.gz') - - elif musrc=='ute': - freg = os.path.join(opth, 'UTE-res-tmp'+fcomment+'.nii.gz') + elif musrc == 'ute': + freg = os.path.join(opth, 'UTE-res-tmp' + fcomment + '.nii.gz') if 'UTE' not in datain: fnii = 'converted-from-DICOM_' tstmp = nimpa.time_stamp(simple_ascii=True) # convert the DICOM mu-map images to nii if 'mumapDCM' not in datain: raise IOError('DICOM with the UTE mu-map are not given.') - run( [ Cnt['DCM2NIIX'], '-f', fnii+tstmp, '-o', opth, datain['mumapDCM'] ] ) + run([Cnt['DCM2NIIX'], '-f', fnii + tstmp, '-o', opth, datain['mumapDCM']]) #files for the T1w, pick one: - fflo = glob.glob( os.path.join(opth, '*'+fnii+tstmp+'*.nii*') )[0] + fflo = glob.glob(os.path.join(opth, '*' + fnii + tstmp + '*.nii*'))[0] else: if os.path.isfile(datain['UTE']): fflo = datain['UTE'] @@ -727,44 +675,31 @@ def align_mumap( #> call the resampling routine to get the pCT/UTE in place if reg_tool == "spm": - nimpa.resample_spm( - fpet, - fflo, - faff_mrpet, - fimout=freg, - del_ref_uncmpr=True, - del_flo_uncmpr=True, - del_out_uncmpr=True - ) + nimpa.resample_spm(fpet, fflo, faff_mrpet, fimout=freg, del_ref_uncmpr=True, + del_flo_uncmpr=True, del_out_uncmpr=True) else: - nimpa.resample_niftyreg( - fpet, - fflo, - faff_mrpet, - fimout=freg, - executable=Cnt['RESPATH'], - verbose=verbose) - + nimpa.resample_niftyreg(fpet, fflo, faff_mrpet, fimout=freg, executable=Cnt['RESPATH'], + verbose=verbose) #-get the NIfTI of registered image nim = nib.load(freg) - A = nim.affine + A = nim.affine imreg = nim.get_fdata(dtype=np.float32) - imreg = imreg[:,::-1,::-1] + imreg = imreg[:, ::-1, ::-1] imreg = np.transpose(imreg, (2, 1, 0)) #-convert to mu-values; sort out the file name too. - if musrc=='pct': + if musrc == 'pct': mu = imreg - elif musrc=='ute': - mu = np.float32(imreg)/1e4 + elif musrc == 'ute': + mu = np.float32(imreg) / 1e4 #-remove the converted file from DICOMs os.remove(fflo) else: raise NameError('Confused o_O') #> get rid of negatives and nans - mu[mu<0] = 0 + mu[mu < 0] = 0 mu[np.isnan(mu)] = 0 #> return image dictionary with the image itself and other parameters @@ -774,7 +709,7 @@ def align_mumap( if store or store_npy: nimpa.create_dir(opth) - if faff=='': + if faff == '': fname = fnm + '-aligned-to_t'\ + str(hst['t0'])+'-'+str(hst['t1'])+'_'+petopt.upper()\ + fcomment @@ -788,13 +723,13 @@ def align_mumap( if store: #> NIfTI fmu = os.path.join(opth, fname + '.nii.gz') - nimpa.array2nii(mu[::-1,::-1,:], A, fmu) + nimpa.array2nii(mu[::-1, ::-1, :], A, fmu) mu_dct['fim'] = fmu if del_auxilary: os.remove(freg) - if musrc=='ute' and not os.path.isfile(faff): + if musrc == 'ute' and not os.path.isfile(faff): os.remove(fute) shutil.rmtree(tmpdir) @@ -806,21 +741,8 @@ def align_mumap( #--------------------------------------------------------------------------------- -def pct_mumap( - datain, - scanner_params, - hst=None, - t0=0, t1=0, - itr=2, - petopt='ac', - faff='', - fpet='', - fcomment='', - outpath='', - store_npy = False, - store=False, - verbose=False - ): +def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', faff='', fpet='', + fcomment='', outpath='', store_npy=False, store=False, verbose=False): ''' GET THE MU-MAP from pCT IMAGE (which is in T1w space) * the mu-map will be registered to PET which will be reconstructed for time frame t0-t1 @@ -831,7 +753,7 @@ def pct_mumap( hst = [] # constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -846,11 +768,11 @@ def pct_mumap( # get hardware mu-map if datain.get("hmumap", "").endswith(".npz") and os.path.isfile(datain["hmumap"]): muh = np.load(datain["hmumap"], allow_pickle=True)["hmu"] - (log.info if verbose else log.debug)( - 'loaded hardware mu-map from file:\n{}'.format(datain['hmumap'])) + (log.info if verbose else log.debug)('loaded hardware mu-map from file:\n{}'.format( + datain['hmumap'])) elif outpath: hmupath = os.path.join(outpath, "mumap-hdw", "hmumap.npz") - if os.path.isfile( hmupath ): + if os.path.isfile(hmupath): muh = np.load(hmupath, allow_pickle=True)["hmu"] datain['hmumap'] = hmupath else: @@ -868,45 +790,36 @@ def pct_mumap( mu_dct = {} if not os.path.isfile(faff): # first recon pet to get the T1 aligned to it - if petopt=='qnt': + if petopt == 'qnt': # --------------------------------------------- # OPTION 1 (quantitative recon with all corrections using MR-based mu-map) # get UTE object mu-map (may not be in register with the PET data) mudic = obj_mumap(datain, Cnt) muo = mudic['im'] # reconstruct PET image with UTE mu-map to which co-register T1w - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=3, itr=itr, fwhm=0., - fcomment=fcomment+'_qntUTE', - outpath=os.path.join(outpath, 'PET', 'positioning'), - store_img=True) - elif petopt=='nac': + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=3, itr=itr, + fwhm=0., fcomment=fcomment + '_qntUTE', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) + elif petopt == 'nac': # --------------------------------------------- # OPTION 2 (recon without any corrections for scatter and attenuation) # reconstruct PET image with UTE mu-map to which co-register T1w muo = np.zeros(muh.shape, dtype=muh.dtype) - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=1, itr=itr, fwhm=0., - fcomment=fcomment+'_NAC', - outpath=os.path.join(outpath, 'PET', 'positioning'), - store_img=True) - elif petopt=='ac': + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=1, itr=itr, + fwhm=0., fcomment=fcomment + '_NAC', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) + elif petopt == 'ac': # --------------------------------------------- # OPTION 3 (recon with attenuation correction only but no scatter) # reconstruct PET image with UTE mu-map to which co-register T1w mudic = obj_mumap(datain, Cnt, outpath=outpath) muo = mudic['im'] - recout = mmrrec.osemone( - datain, [muh, muo], - hst, scanner_params, - recmod=1, itr=itr, fwhm=0., - fcomment=fcomment+'_AC', - outpath=os.path.join(outpath, 'PET', 'positioning'), - store_img=True) + recout = mmrrec.osemone(datain, [muh, muo], hst, scanner_params, recmod=1, itr=itr, + fwhm=0., fcomment=fcomment + '_AC', + outpath=os.path.join(outpath, 'PET', + 'positioning'), store_img=True) fpet = recout.fpet mu_dct['fpet'] = fpet @@ -915,52 +828,47 @@ def pct_mumap( # get the affine transformation ft1w = nimpa.pick_t1w(datain) try: - regdct = nimpa.coreg_spm( - fpet, - ft1w, - outpath=os.path.join(outpath,'PET', 'positioning') - ) + regdct = nimpa.coreg_spm(fpet, ft1w, + outpath=os.path.join(outpath, 'PET', 'positioning')) except: regdct = nimpa.affine_niftyreg( fpet, ft1w, - outpath=os.path.join(outpath,'PET', 'positioning'), - #fcomment=fcomment, - executable = Cnt['REGPATH'], - omp = multiprocessing.cpu_count()/2, - rigOnly = True, - affDirect = False, + outpath=os.path.join(outpath, 'PET', 'positioning'), + #fcomment=fcomment, + executable=Cnt['REGPATH'], + omp=multiprocessing.cpu_count() / 2, + rigOnly=True, + affDirect=False, maxit=5, speed=True, - pi=50, pv=50, - smof=0, smor=0, + pi=50, + pv=50, + smof=0, + smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., #millilitres rthrsh=0.05, - ffwhm = 15., #millilitres + ffwhm=15., #millilitres fthrsh=0.05, - verbose=verbose - ) + verbose=verbose) faff = regdct['faff'] #------------------------------ # pCT file name - if outpath=='': + if outpath == '': pctdir = os.path.dirname(datain['pCT']) else: pctdir = os.path.join(outpath, 'mumap-obj') mmraux.create_dir(pctdir) - fpct = os.path.join(pctdir, 'pCT_r_tmp'+fcomment+'.nii.gz') + fpct = os.path.join(pctdir, 'pCT_r_tmp' + fcomment + '.nii.gz') #> call the resampling routine to get the pCT in place - if os.path.isfile( Cnt['RESPATH'] ): - cmd = [Cnt['RESPATH'], - '-ref', fpet, - '-flo', datain['pCT'], - '-trans', faff, - '-res', fpct, + if os.path.isfile(Cnt['RESPATH']): + cmd = [ + Cnt['RESPATH'], '-ref', fpet, '-flo', datain['pCT'], '-trans', faff, '-res', fpct, '-pad', '0'] if log.getEffectiveLevel() > logging.INFO: cmd.append('-voff') @@ -969,17 +877,16 @@ def pct_mumap( log.error('path to resampling executable is incorrect!') raise IOError('Incorrect path to executable!') - # get the NIfTI of the pCT nim = nib.load(fpct) - A = nim.get_sform() + A = nim.get_sform() pct = nim.get_fdata(dtype=np.float32) - pct = pct[:,::-1,::-1] + pct = pct[:, ::-1, ::-1] pct = np.transpose(pct, (2, 1, 0)) # convert the HU units to mu-values mu = hu2mu(pct) # get rid of negatives - mu[mu<0] = 0 + mu[mu < 0] = 0 # return image dictionary with the image itself and other parameters mu_dct['im'] = mu @@ -988,8 +895,8 @@ def pct_mumap( if store: # now save to numpy array and NIfTI in this folder - if outpath=='': - pctumapdir = os.path.join( datain['corepath'], 'mumap-obj' ) + if outpath == '': + pctumapdir = os.path.join(datain['corepath'], 'mumap-obj') else: pctumapdir = os.path.join(outpath, 'mumap-obj') mmraux.create_dir(pctumapdir) @@ -999,8 +906,8 @@ def pct_mumap( np.savez(fnp, mu=mu, A=A) #> NIfTI - fmu = os.path.join(pctumapdir, 'mumap-pCT' +fcomment+ '.nii.gz') - nimpa.array2nii(mu[::-1,::-1,:], A, fmu) + fmu = os.path.join(pctumapdir, 'mumap-pCT' + fcomment + '.nii.gz') + nimpa.array2nii(mu[::-1, ::-1, :], A, fmu) mu_dct['fim'] = fmu datain['mumapCT'] = fmu @@ -1016,21 +923,21 @@ def hdr_mu(datain, Cnt): '''Get the headers from DICOM data file''' #get one of the DICOM files of the mu-map if 'mumapDCM' in datain: - files = glob.glob(os.path.join(datain['mumapDCM'],'*.dcm')) - files.extend(glob.glob(os.path.join(datain['mumapDCM'],'*.DCM'))) - files.extend(glob.glob(os.path.join(datain['mumapDCM'],'*.ima'))) - files.extend(glob.glob(os.path.join(datain['mumapDCM'],'*.IMA'))) + files = glob.glob(os.path.join(datain['mumapDCM'], '*.dcm')) + files.extend(glob.glob(os.path.join(datain['mumapDCM'], '*.DCM'))) + files.extend(glob.glob(os.path.join(datain['mumapDCM'], '*.ima'))) + files.extend(glob.glob(os.path.join(datain['mumapDCM'], '*.IMA'))) dcmf = files[0] else: raise NameError('no DICOM or DICOM filed found!') - if os.path.isfile( dcmf ): - dhdr = dcm.read_file( dcmf ) + if os.path.isfile(dcmf): + dhdr = dcm.read_file(dcmf) else: log.error('DICOM mMR mu-maps are not valid files!') return None # CSA Series Header Info - if [0x29,0x1020] in dhdr: - csahdr = dhdr[0x29,0x1020].value + if [0x29, 0x1020] in dhdr: + csahdr = dhdr[0x29, 0x1020].value log.info('got CSA mu-map info from the DICOM header.') return csahdr, dhdr @@ -1040,17 +947,17 @@ def hmu_shape(hdr): p = re.compile(r'(?<=:=)\s*\d{1,4}') # x: dim [1] i0 = hdr.find('matrix size[1]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') u = int(p.findall(hdr[i0:i1])[0]) # x: dim [2] i0 = hdr.find('matrix size[2]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') v = int(p.findall(hdr[i0:i1])[0]) # x: dim [3] i0 = hdr.find('matrix size[3]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') w = int(p.findall(hdr[i0:i1])[0]) - return w,v,u + return w, v, u def hmu_voxsize(hdr): @@ -1058,17 +965,17 @@ def hmu_voxsize(hdr): p = re.compile(r'(?<=:=)\s*\d{1,2}[.]\d{1,10}') # x: dim [1] i0 = hdr.find('scale factor (mm/pixel) [1]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') vx = float(p.findall(hdr[i0:i1])[0]) # x: dim [2] i0 = hdr.find('scale factor (mm/pixel) [2]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') vy = float(p.findall(hdr[i0:i1])[0]) # x: dim [3] i0 = hdr.find('scale factor (mm/pixel) [3]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') vz = float(p.findall(hdr[i0:i1])[0]) - return np.array([0.1*vz, 0.1*vy, 0.1*vx]) + return np.array([0.1 * vz, 0.1 * vy, 0.1 * vx]) def hmu_origin(hdr): @@ -1076,15 +983,15 @@ def hmu_origin(hdr): p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{1,10}') # x: dim [1] i0 = hdr.find('$umap origin (pixels) [1]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') x = float(p.findall(hdr[i0:i1])[0]) # x: dim [2] i0 = hdr.find('$umap origin (pixels) [2]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') y = float(p.findall(hdr[i0:i1])[0]) # x: dim [3] i0 = hdr.find('$umap origin (pixels) [3]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') z = -float(p.findall(hdr[i0:i1])[0]) return np.array([z, y, x]) @@ -1092,20 +999,20 @@ def hmu_origin(hdr): def hmu_offset(hdr): #regular expression to find the origin p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{1,10}') - if hdr.find('$origin offset')>0: + if hdr.find('$origin offset') > 0: # x: dim [1] i0 = hdr.find('$origin offset (mm) [1]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') x = float(p.findall(hdr[i0:i1])[0]) # x: dim [2] i0 = hdr.find('$origin offset (mm) [2]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') y = float(p.findall(hdr[i0:i1])[0]) # x: dim [3] i0 = hdr.find('$origin offset (mm) [3]') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') z = -float(p.findall(hdr[i0:i1])[0]) - return np.array([0.1*z, 0.1*y, 0.1*x]) + return np.array([0.1 * z, 0.1 * y, 0.1 * x]) else: return np.array([0.0, 0.0, 0.0]) @@ -1119,14 +1026,14 @@ def rd_hmu(fh): #regular expression to find the file name p = re.compile(r'(?<=:=)\s*\w*[.]\w*') i0 = hdr.find('!name of data file') - i1 = i0+hdr[i0:].find('\n') + i1 = i0 + hdr[i0:].find('\n') fbin = p.findall(hdr[i0:i1])[0] #--read img file-- f = open(os.path.join(os.path.dirname(fh), fbin.strip()), 'rb') im = np.fromfile(f, np.float32) f.close() #----------------- - return hdr, im + return hdr, im def get_hmupos(datain, parts, Cnt, outpath=''): @@ -1137,7 +1044,7 @@ def get_hmupos(datain, parts, Cnt, outpath=''): ihdr, csainfo = mmraux.hdr_lm(datain, Cnt) #table position origin fi = csainfo.find(b'TablePositionOrigin') - tpostr = csainfo[fi:fi+200] + tpostr = csainfo[fi:fi + 200] tpo = re.sub(b'[^a-zA-Z0-9.\\-]', b'', tpostr).split(b'M') tpozyx = np.array([float(tpo[-1]), float(tpo[-2]), float(tpo[-3])]) / 10 log.info('table position (z,y,x) (cm): {}'.format(tpozyx)) @@ -1150,15 +1057,15 @@ def get_hmupos(datain, parts, Cnt, outpath=''): #> loop over the indices and find those which are correct found_off = False for i in idxs: - gtostr1 = csamu[ i:i+300 ] + gtostr1 = csamu[i:i + 300] gtostr2 = re.sub(b'[^a-zA-Z0-9.\\-]', b'', gtostr1) # gantry table offset, through conversion of string to float gtoxyz = re.findall(b'(?<=M)-*[\\d]{1,4}\\.[\\d]{6,9}', gtostr2) - gtozyx = np.float32(gtoxyz)[::-1]/10 - if len(gtoxyz)>3: + gtozyx = np.float32(gtoxyz)[::-1] / 10 + if len(gtoxyz) > 3: log.warning('the gantry table offset got more than 3 entries detected--check needed.') gtozyx = gtozyx[-3:] - if abs(gtozyx[0])>20 and abs(gtozyx[1])<20 and abs(gtozyx[2])<2: + if abs(gtozyx[0]) > 20 and abs(gtozyx[1]) < 20 and abs(gtozyx[2]) < 2: found_off = True break @@ -1168,12 +1075,11 @@ def get_hmupos(datain, parts, Cnt, outpath=''): raise ValueError('Could not find the gantry table offset or the offset is unusual.') #-------------------------------------------------------- - # create the folder for hardware mu-maps - if outpath=='': - dirhmu = os.path.join( datain['corepath'], 'mumap-hdw') + if outpath == '': + dirhmu = os.path.join(datain['corepath'], 'mumap-hdw') else: - dirhmu = os.path.join( outpath, 'mumap-hdw') + dirhmu = os.path.join(outpath, 'mumap-hdw') mmraux.create_dir(dirhmu) # get the reference nii image fref = os.path.join(dirhmu, 'hmuref.nii.gz') @@ -1182,35 +1088,35 @@ def get_hmupos(datain, parts, Cnt, outpath=''): p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') - hbedpos = 0.1*float(ihdr[m.start()+fi+1:m.end()]) + hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) #start vertical bed position p = re.compile(r'start vertical bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') - vbedpos = 0.1*float(ihdr[m.start()+fi+1:m.end()]) + vbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) log.info('creating reference NIfTI image for resampling') - B = np.diag(np.array([-10*Cnt['SO_VXX'], 10*Cnt['SO_VXY'], 10*Cnt['SO_VXZ'], 1])) - B[0,3] = 10*(.5*Cnt['SO_IMX'])*Cnt['SO_VXX'] - B[1,3] = 10*( -.5*Cnt['SO_IMY']+1)*Cnt['SO_VXY'] - B[2,3] = 10*((-.5*Cnt['SO_IMZ']+1)*Cnt['SO_VXZ'] + hbedpos ) - nimpa.array2nii( np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32), B, fref) + B = np.diag(np.array([-10 * Cnt['SO_VXX'], 10 * Cnt['SO_VXY'], 10 * Cnt['SO_VXZ'], 1])) + B[0, 3] = 10 * (.5 * Cnt['SO_IMX']) * Cnt['SO_VXX'] + B[1, 3] = 10 * (-.5 * Cnt['SO_IMY'] + 1) * Cnt['SO_VXY'] + B[2, 3] = 10 * ((-.5 * Cnt['SO_IMZ'] + 1) * Cnt['SO_VXZ'] + hbedpos) + nimpa.array2nii(np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32), B, + fref) #define a dictionary of all positions/offsets of hardware mu-maps - hmupos = [None]*5 + hmupos = [None] * 5 hmupos[0] = { - 'TabPosOrg' : tpozyx, #from DICOM of LM file - 'GanTabOff' : gtozyx, #from DICOM of mMR mu-map file - 'HBedPos' : hbedpos, #from Interfile of LM file [cm] - 'VBedPos' : vbedpos, #from Interfile of LM file [cm] - 'niipath' : fref - } + 'TabPosOrg': tpozyx, #from DICOM of LM file + 'GanTabOff': gtozyx, #from DICOM of mMR mu-map file + 'HBedPos': hbedpos, #from Interfile of LM file [cm] + 'VBedPos': vbedpos, #from Interfile of LM file [cm] + 'niipath': fref} #-------------------------------------------------------------------------- # iteratively go through the mu-maps and add them as needed for i in parts: - fh = os.path.join(Cnt['HMUDIR'], Cnt['HMULIST'][i-1]) + fh = os.path.join(Cnt['HMUDIR'], Cnt['HMULIST'][i - 1]) # get the interfile header and binary data hdr, im = rd_hmu(fh) #get shape, origin, offset and voxel size @@ -1219,36 +1125,33 @@ def get_hmupos(datain, parts, Cnt, outpath=''): # get the origin, offset and voxel size for the mu-map interfile data org = hmu_origin(hdr) off = hmu_offset(hdr) - vs = hmu_voxsize(hdr) + vs = hmu_voxsize(hdr) # corner voxel position for the interfile image data - vpos = (-org*vs + off + gtozyx - tpozyx) + vpos = (-org * vs + off + gtozyx - tpozyx) #add to the dictionary hmupos[i] = { - 'vpos' : vpos, - 'shape' : s, #from interfile - 'iorg' : org, #from interfile - 'ioff' : off, #from interfile - 'ivs' : vs, #from interfile - 'img' : im, #from interfile - 'niipath' : os.path.join(dirhmu, '_'+Cnt['HMULIST'][i-1].split('.')[0]+'.nii.gz') - } - #save to NIfTI - log.info('creating mu-map for: {}'.format(Cnt['HMULIST'][i-1])) - A = np.diag(np.append(10*vs[::-1], 1)) - A[0,0] *= -1 - A[0,3] = 10*(-vpos[2]) - A[1,3] = -10*((s[1]-1)*vs[1] + vpos[1]) - A[2,3] = -10*((s[0]-1)*vs[0] - vpos[0]) - nimpa.array2nii(im[::-1,::-1,:], A, hmupos[i]['niipath']) + 'vpos': vpos, + 'shape': s, #from interfile + 'iorg': org, #from interfile + 'ioff': off, #from interfile + 'ivs': vs, #from interfile + 'img': im, #from interfile + 'niipath': os.path.join(dirhmu, '_' + Cnt['HMULIST'][i - 1].split('.')[0] + '.nii.gz')} + #save to NIfTI + log.info('creating mu-map for: {}'.format(Cnt['HMULIST'][i - 1])) + A = np.diag(np.append(10 * vs[::-1], 1)) + A[0, 0] *= -1 + A[0, 3] = 10 * (-vpos[2]) + A[1, 3] = -10 * ((s[1] - 1) * vs[1] + vpos[1]) + A[2, 3] = -10 * ((s[0] - 1) * vs[0] - vpos[0]) + nimpa.array2nii(im[::-1, ::-1, :], A, hmupos[i]['niipath']) # resample using nify.reg - fout = os.path.join( os.path.dirname (hmupos[0]['niipath']), - 'r'+os.path.basename(hmupos[i]['niipath']).split('.')[0]+'.nii.gz' ) - cmd = [ Cnt['RESPATH'], - '-ref', hmupos[0]['niipath'], - '-flo', hmupos[i]['niipath'], - '-res', fout, - '-pad', '0'] + fout = os.path.join(os.path.dirname(hmupos[0]['niipath']), + 'r' + os.path.basename(hmupos[i]['niipath']).split('.')[0] + '.nii.gz') + cmd = [ + Cnt['RESPATH'], '-ref', hmupos[0]['niipath'], '-flo', hmupos[i]['niipath'], '-res', + fout, '-pad', '0'] if log.getEffectiveLevel() > logging.INFO: cmd.append('-voff') run(cmd) @@ -1256,13 +1159,7 @@ def get_hmupos(datain, parts, Cnt, outpath=''): return hmupos -def hdw_mumap( - datain, - hparts, - params, - outpath='', - use_stored=False, - del_interm=True): +def hdw_mumap(datain, hparts, params, outpath='', use_stored=False, del_interm=True): '''Get hardware mu-map components, including bed, coils etc.''' # two ways of passing Cnt are here decoded if 'Cnt' in params: @@ -1270,7 +1167,7 @@ def hdw_mumap( else: Cnt = params - if outpath!='': + if outpath != '': fmudir = os.path.join(outpath, 'mumap-hdw') else: fmudir = os.path.join(datain['corepath'], 'mumap-hdw') @@ -1304,22 +1201,23 @@ def hdw_mumap( imo[:] = 0 for i in hparts: - fin = os.path.join(os.path.dirname (hmupos[0]['niipath']), - 'r'+os.path.basename(hmupos[i]['niipath']).split('.')[0]+'.nii.gz' ) + fin = os.path.join( + os.path.dirname(hmupos[0]['niipath']), + 'r' + os.path.basename(hmupos[i]['niipath']).split('.')[0] + '.nii.gz') nim = nib.load(fin) mu = nim.get_fdata(dtype=np.float32) - mu[mu<0] = 0 + mu[mu < 0] = 0 imo += mu hdr = nimo.header hdr['cal_max'] = np.max(imo) hdr['cal_min'] = np.min(imo) - fmu = os.path.join(os.path.dirname (hmupos[0]['niipath']), 'hardware_umap.nii.gz' ) + fmu = os.path.join(os.path.dirname(hmupos[0]['niipath']), 'hardware_umap.nii.gz') hmu_nii = nib.Nifti1Image(imo, A) nib.save(hmu_nii, fmu) - hmu = np.transpose(imo[:,::-1,::-1], (2, 1, 0)) + hmu = np.transpose(imo[:, ::-1, ::-1], (2, 1, 0)) # save the objects to numpy arrays fnp = os.path.join(fmudir, "hmumap.npz") @@ -1334,9 +1232,7 @@ def hdw_mumap( os.remove(fname) #return image dictionary with the image itself and some other stats - hmu_dct = { 'im':hmu, - 'fim':fmu, - 'affine':A} + hmu_dct = {'im': hmu, 'fim': fmu, 'affine': A} if 'fnp' in locals(): hmu_dct['fnp'] = fnp @@ -1356,14 +1252,14 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): muh = np.load(datain["hmumap"], allow_pickle=True)["hmu"] log.info('loaded hardware mu-map from file:\n{}'.format(datain['hmumap'])) else: - hmudic = hdw_mumap(datain, [1,2,4], Cnt) + hmudic = hdw_mumap(datain, [1, 2, 4], Cnt) muh = hmudic['im'] # get pCT mu-map if stored in numpy file and then exit, otherwise do all the processing if os.path.isfile(datain['mumapCT']) and use_stored: mup = np.load(datain["mumapCT"], allow_pickle=True)["mu"] - muh = muh[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] - mup = mup[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] + muh = muh[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] + mup = mup[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] return [muh, mup] # get UTE object mu-map (may be not in register with the PET data) @@ -1383,10 +1279,8 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): # histogram for reconstruction with UTE mu-map hst = mmrhist.hist(datain, txLUT_, axLUT_, Cnt_, t0=t0, t1=t1) # reconstruct PET image with UTE mu-map to which co-register T1w - recute = mmrrec.osemone( - datain, [muh, muo], hst, params, - recmod=3, itr=4, fwhm=0., store_img=True, fcomment=fcomment+'_QNT-UTE' - ) + recute = mmrrec.osemone(datain, [muh, muo], hst, params, recmod=3, itr=4, fwhm=0., + store_img=True, fcomment=fcomment + '_QNT-UTE') # --- MR T1w if os.path.isfile(datain['T1nii']): ft1w = datain['T1nii'] @@ -1395,24 +1289,22 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): elif os.path.isdir(datain['MRT1W']): # create file name for the converted NIfTI image fnii = 'converted' - run( [ Cnt['DCM2NIIX'], '-f', fnii, datain['T1nii'] ] ) - ft1nii = glob.glob( os.path.join(datain['T1nii'], '*converted*.nii*') ) + run([Cnt['DCM2NIIX'], '-f', fnii, datain['T1nii']]) + ft1nii = glob.glob(os.path.join(datain['T1nii'], '*converted*.nii*')) ft1w = ft1nii[0] else: raise IOError('Disaster: no T1w image!') #output for the T1w in register with PET - ft1out = os.path.join(os.path.dirname(ft1w), 'T1w_r'+'.nii.gz') + ft1out = os.path.join(os.path.dirname(ft1w), 'T1w_r' + '.nii.gz') #text file fo rthe affine transform T1w->PET - faff = os.path.join(os.path.dirname(ft1w), fcomment+'mr2pet_affine'+'.txt') #time.strftime('%d%b%y_%H.%M',time.gmtime()) - #> call the registration routine - if os.path.isfile( Cnt['REGPATH'] ): - cmd = [Cnt['REGPATH'], - '-ref', recute.fpet, - '-flo', ft1w, - '-rigOnly', '-speeeeed', - '-aff', faff, - '-res', ft1out] + faff = os.path.join(os.path.dirname(ft1w), fcomment + 'mr2pet_affine' + + '.txt') #time.strftime('%d%b%y_%H.%M',time.gmtime()) + #> call the registration routine + if os.path.isfile(Cnt['REGPATH']): + cmd = [ + Cnt['REGPATH'], '-ref', recute.fpet, '-flo', ft1w, '-rigOnly', '-speeeeed', '-aff', + faff, '-res', ft1out] if log.getEffectiveLevel() > logging.INFO: cmd.append('-voff') run(cmd) @@ -1420,13 +1312,14 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): raise IOError('Path to registration executable is incorrect!') #get the pCT mu-map with the above faff - pmudic = pct_mumap(datain, txLUT, axLUT, Cnt, faff=faff, fpet=recute.fpet, fcomment=fcomment) + pmudic = pct_mumap(datain, txLUT, axLUT, Cnt, faff=faff, fpet=recute.fpet, + fcomment=fcomment) mup = pmudic['im'] - muh = muh[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] - mup = mup[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] + muh = muh[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] + mup = mup[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] return [muh, mup] else: - muh = muh[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] - muo = muo[2*Cnt['RNG_STRT'] : 2*Cnt['RNG_END'], :, :] + muh = muh[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] + muo = muo[2 * Cnt['RNG_STRT']:2 * Cnt['RNG_END'], :, :] return [muh, muo] diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index 9494856e..a056da9f 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -1,6 +1,6 @@ """module for pipelined image reconstruction and analysis""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" import logging import os @@ -24,71 +24,65 @@ def mmrchain( - datain, # all input data in a dictionary - scanner_params, # all scanner parameters in one dictionary - # containing constants, transaxial and axial - # LUTs. - outpath='', # output path for results - frames=['fluid', [0,0]], # definition of time frames. - mu_h = [], # hardware mu-map. - mu_o = [], # object mu-map. - tAffine = None, # affine transformations for the mu-map for - # each time frame separately. - - itr=4, # number of OSEM iterations - fwhm=0., # Gaussian Post-Smoothing FWHM - psf=None, # Resolution Modelling - recmod = -1, # reconstruction mode: -1: undefined, chosen - # automatically. 3: attenuation and scatter - # correction, 1: attenuation correction - # only, 0: no correction (randoms only). - histo=None, # input histogram (from list-mode data); - # if not given, it will be performed. - - decay_ref_time=None, # decay corrects relative to the reference - # time provided; otherwise corrects to the scan - # start time. - + datain, # all input data in a dictionary + scanner_params, # all scanner parameters in one dictionary + # containing constants, transaxial and axial + # LUTs. + outpath='', # output path for results + frames=['fluid', [0, 0]], # definition of time frames. + mu_h=[], # hardware mu-map. + mu_o=[], # object mu-map. + tAffine=None, # affine transformations for the mu-map for + # each time frame separately. + itr=4, # number of OSEM iterations + fwhm=0., # Gaussian Post-Smoothing FWHM + psf=None, # Resolution Modelling + recmod=-1, # reconstruction mode: -1: undefined, chosen + # automatically. 3: attenuation and scatter + # correction, 1: attenuation correction + # only, 0: no correction (randoms only). + histo=None, # input histogram (from list-mode data); + # if not given, it will be performed. + decay_ref_time=None, # decay corrects relative to the reference + # time provided; otherwise corrects to the scan + # start time. trim=False, trim_scale=2, - trim_interp=0, # interpolation for upsampling used in PVC - trim_memlim=True, # reduced use of memory for machines - # with limited memory (slow though) - - pvcroi=[], # ROI used for PVC. If undefined no PVC - # is performed. - - pvcreg_tool = 'niftyreg', # the registration tool used in PVC - store_rois = False, # stores the image of PVC ROIs - # as defined in pvcroi. - + trim_interp=0, # interpolation for upsampling used in PVC + trim_memlim=True, # reduced use of memory for machines + # with limited memory (slow though) + pvcroi=[], # ROI used for PVC. If undefined no PVC + # is performed. + pvcreg_tool='niftyreg', # the registration tool used in PVC + store_rois=False, # stores the image of PVC ROIs + # as defined in pvcroi. pvcpsf=[], pvcitr=5, - - fcomment='', # text comment used in the file name of - # generated image files - ret_sinos=False,# return prompt, scatter and randoms - # sinograms for each reconstruction - ret_histo=False,# return histogram (LM processing output) for - # each image frame - store_img = True, + fcomment='', # text comment used in the file name of + # generated image files + ret_sinos=False, # return prompt, scatter and randoms + # sinograms for each reconstruction + ret_histo=False, # return histogram (LM processing output) for + # each image frame + store_img=True, store_img_intrmd=False, - store_itr=[], # store any reconstruction iteration in - # the list. ignored if the list is empty. + store_itr=[], # store any reconstruction iteration in + # the list. ignored if the list is empty. del_img_intrmd=False, ): - # decompose all the scanner parameters and constants - Cnt = scanner_params['Cnt'] + # decompose all the scanner parameters and constants + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] # ------------------------------------------------------------------------- # HISOTGRAM PRECEEDS FRAMES - if not histo==None and 'psino' in histo: + if not histo == None and 'psino' in histo: frames = ['fluid', [histo['t0'], histo['t1']]] else: histo = None - log.warning('the given histogram does not contain a prompt sinogram--will generate a histogram.') + log.warning( + 'the given histogram does not contain a prompt sinogram--will generate a histogram.') # FRAMES # check for the provided dynamic frames @@ -105,11 +99,11 @@ def mmrchain( # 2D starting with entry 'fluid' or 'timings' if (isinstance(frames[0], str) and frames[0] in ('fluid', 'timings') - and all([isinstance(t, list) and len(t) == 2 for t in frames[1:]])): + and all([isinstance(t, list) and len(t) == 2 for t in frames[1:]])): t_frms = frames[1:] # if 2D definitions, starting with entry 'def': - elif (isinstance(frames[0], str) and frames[0]=='def' - and all([isinstance(t,list) and len(t)==2 for t in frames[1:]])): + elif (isinstance(frames[0], str) and frames[0] == 'def' + and all([isinstance(t, list) and len(t) == 2 for t in frames[1:]])): # get total time and list of all time frames dfrms = dynamic_timings(frames) t_frms = dfrms[1:] @@ -125,17 +119,16 @@ def mmrchain( in the correct list format: 1D [15,15,30,30,...]\ or 2D list [[2,15], [2,30], ...]') else: - log.error('provided dynamic frames definitions are incorrect (should be a list of definitions).') + log.error( + 'provided dynamic frames definitions are incorrect (should be a list of definitions).') raise TypeError('Wrong data type for dynamic frames') # number of dynamic time frames nfrm = len(t_frms) # ------------------------------------------------------------------------- - - # ------------------------------------------------------------------------- # create folders for results - if outpath=='': + if outpath == '': petdir = os.path.join(datain['corepath'], 'reconstructed') fmudir = os.path.join(datain['corepath'], 'mumap-obj') pvcdir = os.path.join(datain['corepath'], 'PRCL') @@ -145,15 +138,15 @@ def mmrchain( pvcdir = os.path.join(outpath, 'PRCL') # folder for co-registered mu-maps (for motion compensation) - fmureg = os.path.join( fmudir, 'registered') + fmureg = os.path.join(fmudir, 'registered') # folder for affine transformation MR/CT->PET - petaff = os.path.join( petdir, 'faffine') + petaff = os.path.join(petdir, 'faffine') # folder for reconstructed images (dynamic or static depending on number of frames). - if nfrm>1: + if nfrm > 1: petimg = os.path.join(petdir, 'multiple-frames') pvcdir = os.path.join(pvcdir, 'multiple-frames') - elif nfrm==1: + elif nfrm == 1: petimg = os.path.join(petdir, 'single-frame') pvcdir = os.path.join(pvcdir, 'single-frame') else: @@ -164,7 +157,6 @@ def mmrchain( nimpa.create_dir(petdir) # ------------------------------------------------------------------------- - # ------------------------------------------------------------------------- # MU-MAPS # get the mu-maps, if given; otherwise will use blank mu-maps. @@ -177,15 +169,16 @@ def mmrchain( muhd = obtain_image(mu_h, Cnt, imtype='hardware mu-map') # choose the mode of reconstruction based on the provided (or not) mu-maps - if muod['exists'] and muhd['exists'] and recmod==-1: + if muod['exists'] and muhd['exists'] and recmod == -1: recmod = 3 - elif (muod['exists'] or muhd['exists']) and recmod==-1: + elif (muod['exists'] or muhd['exists']) and recmod == -1: recmod = 1 log.warning('partial mu-map: scatter correction is switched off.') else: - if recmod==-1: + if recmod == -1: recmod = 0 - log.warning('no mu-map provided: scatter and attenuation corrections are switched off.') + log.warning( + 'no mu-map provided: scatter and attenuation corrections are switched off.') # ------------------------------------------------------------------------- #import pdb; pdb.set_trace() @@ -200,7 +193,7 @@ def mmrchain( if tAffine is None: log.info('using the provided mu-map the same way for all frames.') else: - if len(tAffine)!=nfrm: + if len(tAffine) != nfrm: log.error('the number of affine transformations in the list\ has to be the same as the number of dynamic frames!') raise ValueError('Inconsistent number of frames.') @@ -228,12 +221,13 @@ def mmrchain( nimpa.create_dir(petaff) faff_frms = [] for i in range(nfrm): - fout = os.path.join(petaff, 'affine_frame('+str(i)+').txt') + fout = os.path.join(petaff, 'affine_frame(' + str(i) + ').txt') np.savetxt(fout, tAffine[i], fmt='%3.9f') faff_frms.append(fout) log.info('using provided numpy arrays affine transformations for each dynamic frame.') else: - raise ValueError('Affine transformations for each dynamic frame could not be established.') + raise ValueError( + 'Affine transformations for each dynamic frame could not be established.') # ------------------------------------------------------------------------------------- # get ref image for mu-map resampling @@ -263,13 +257,13 @@ def mmrchain( if store_img_intrmd: output['fpeti'] = [] - if fwhm>0: + if fwhm > 0: output['fsmoi'] = [] # dynamic images in one numpy array dynim = np.zeros((nfrm, Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMY']), dtype=np.float32) #if asked, output only scatter+randoms sinogram for each frame - if ret_sinos and itr>1 and recmod>2: + if ret_sinos and itr > 1 and recmod > 2: dynmsk = np.zeros((nfrm, Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) dynrsn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) dynssn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) @@ -279,7 +273,6 @@ def mmrchain( if ret_histo: hsts = {} - # import pdb; pdb.set_trace() # starting frame index with reasonable prompt data @@ -294,24 +287,26 @@ def mmrchain( # check if there is enough prompt data to do a reconstruction # -------------- log.info('dynamic frame times t0={}, t1={}:'.format(t0, t1)) - if histo==None: + if histo == None: hst = mmrhist(datain, scanner_params, t0=t0, t1=t1) else: hst = histo - log.info(dedent('''\ + log.info( + dedent('''\ ------------------------------------------------------ using provided histogram ------------------------------------------------------''')) if ret_histo: - hsts[str(t0)+'-'+str(t1)] = hst + hsts[str(t0) + '-' + str(t1)] = hst - if np.sum(hst['dhc'])>0.99*np.sum(hst['phc']): - log.warning(dedent('''\ + if np.sum(hst['dhc']) > 0.99 * np.sum(hst['phc']): + log.warning( + dedent('''\ =========================================================================== amount of randoms is the greater part of prompts => omitting reconstruction ===========================================================================''')) - ifrmP = ifrm+1 + ifrmP = ifrm + 1 continue # -------------------- # transform the mu-map if given the affine transformation for each frame @@ -319,15 +314,12 @@ def mmrchain( # create the folder for aligned (registered for motion compensation) mu-maps nimpa.create_dir(fmureg) # the converted nii image resample to the reference size - fmu = os.path.join(fmureg, 'mumap_dyn_frm'+str(ifrm)+fcomment+'.nii.gz') + fmu = os.path.join(fmureg, 'mumap_dyn_frm' + str(ifrm) + fcomment + '.nii.gz') # command for resampling - if os.path.isfile( Cnt['RESPATH'] ): - cmd = [Cnt['RESPATH'], - '-ref', fmuref, - '-flo', muod['fim'], - '-trans', faff_frms[ifrm], - '-res', fmu, - '-pad', '0'] + if os.path.isfile(Cnt['RESPATH']): + cmd = [ + Cnt['RESPATH'], '-ref', fmuref, '-flo', muod['fim'], '-trans', faff_frms[ifrm], + '-res', fmu, '-pad', '0'] if log.getEffectiveLevel() > log.INFO: cmd.append('-voff') call(cmd) @@ -337,62 +329,52 @@ def mmrchain( muodct = nimpa.getnii(fmu, output='all') muo = muodct['im'] A = muodct['affine'] - muo[muo<0] = 0 + muo[muo < 0] = 0 output['fmureg'].append(fmu) else: muo = muod['im'] #--------------------- # output image file name - if nfrm>1: - frmno = '_frm'+str(ifrm) + if nfrm > 1: + frmno = '_frm' + str(ifrm) else: frmno = '' # run OSEM reconstruction of a single time frame - recimg = mmrrec.osemone(datain, [muhd['im'], muo], - hst, scanner_params, - decay_ref_time=decay_ref_time, - recmod=recmod, itr=itr, fwhm=fwhm, psf=psf, - outpath=petimg, - frmno=frmno, - fcomment=fcomment+'_i', - store_img=store_img_intrmd, - store_itr=store_itr, + recimg = mmrrec.osemone(datain, [muhd['im'], muo], hst, scanner_params, + decay_ref_time=decay_ref_time, recmod=recmod, itr=itr, fwhm=fwhm, + psf=psf, outpath=petimg, frmno=frmno, fcomment=fcomment + '_i', + store_img=store_img_intrmd, store_itr=store_itr, ret_sinos=ret_sinos) # form dynamic Numpy array - if fwhm>0: - dynim[ifrm,:,:,:] = recimg.imsmo + if fwhm > 0: + dynim[ifrm, :, :, :] = recimg.imsmo else: - dynim[ifrm,:,:,:] = recimg.im + dynim[ifrm, :, :, :] = recimg.im - if ret_sinos and itr>1 and recmod>2: - dynpsn[ifrm,:,:,:] = np.squeeze(hst['psino']) - dynssn[ifrm,:,:,:] = np.squeeze(recimg.ssn) - dynrsn[ifrm,:,:,:] = np.squeeze(recimg.rsn) - dynmsk[ifrm,:,:,:] = np.squeeze(recimg.amsk) + if ret_sinos and itr > 1 and recmod > 2: + dynpsn[ifrm, :, :, :] = np.squeeze(hst['psino']) + dynssn[ifrm, :, :, :] = np.squeeze(recimg.ssn) + dynrsn[ifrm, :, :, :] = np.squeeze(recimg.rsn) + dynmsk[ifrm, :, :, :] = np.squeeze(recimg.amsk) if store_img_intrmd: output['fpeti'].append(recimg.fpet) - if fwhm>0: + if fwhm > 0: output['fsmoi'].append(recimg.fsmo) - if nfrm==1: output['tuple'] = recimg + if nfrm == 1: output['tuple'] = recimg output['im'] = np.squeeze(dynim) - if ret_sinos and itr>1 and recmod>2: - output['sinos'] = { - 'psino':dynpsn, - 'ssino':dynssn, - 'rsino':dynrsn, - 'amask':dynmsk} + if ret_sinos and itr > 1 and recmod > 2: + output['sinos'] = {'psino': dynpsn, 'ssino': dynssn, 'rsino': dynrsn, 'amask': dynmsk} if ret_histo: output['hst'] = hsts - # ---------------------------------------------------------------------- # trim the PET image # images have to be stored for PVC @@ -404,25 +386,14 @@ def mmrchain( elif 'lm_ima' in datain: fnm = os.path.basename(datain['lm_ima'])[:20] # trim PET and upsample - petu = nimpa.imtrimup( - dynim, - affine=image_affine(datain, Cnt), - scale=trim_scale, - int_order=trim_interp, - outpath=petimg, - fname = fnm, - fcomment=fcomment, - store_img_intrmd=store_img_intrmd, - memlim=trim_memlim, - verbose=log.getEffectiveLevel() - ) - - output.update({'trimmed': { 'im':petu['im'], - 'fpet':petu['fimi'], - 'affine':petu['affine']}} - ) - # ---------------------------------------------------------------------- + petu = nimpa.imtrimup(dynim, affine=image_affine(datain, Cnt), scale=trim_scale, + int_order=trim_interp, outpath=petimg, fname=fnm, fcomment=fcomment, + store_img_intrmd=store_img_intrmd, memlim=trim_memlim, + verbose=log.getEffectiveLevel()) + output.update({ + 'trimmed': {'im': petu['im'], 'fpet': petu['fimi'], 'affine': petu['affine']}}) + # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- #run PVC if requested and required input given @@ -434,8 +405,12 @@ def mmrchain( if not pvcpsf: pvcpsf = nimpa.psf_measured(scanner='mmr', scale=trim_scale) else: - if isinstance(pvcpsf, (np.ndarray, np.generic)) and pvcpsf.shape!=(3, 2*Cnt['RSZ_PSF_KRNL']+1): - raise ValueError('the PSF kernel has to be an numpy array with the shape of ({},{})'.format(3, 2*Cnt['RSZ_PSF_KRNL']+1)) + if isinstance( + pvcpsf, + (np.ndarray, np.generic)) and pvcpsf.shape != (3, 2 * Cnt['RSZ_PSF_KRNL'] + 1): + raise ValueError( + 'the PSF kernel has to be an numpy array with the shape of ({},{})'.format( + 3, 2 * Cnt['RSZ_PSF_KRNL'] + 1)) #> file names for NIfTI images of PVC ROIs and PVC corrected PET froi = [] @@ -443,38 +418,30 @@ def mmrchain( #> perform PVC for each time frame dynpvc = np.zeros(petu['im'].shape, dtype=np.float32) - for i in range(ifrmP,nfrm): + for i in range(ifrmP, nfrm): # transform the parcellations (ROIs) if given the affine transformation for each frame if tAffine is None: - log.warning('affine transformation are not provided: will generate for the time frame.') + log.warning( + 'affine transformation are not provided: will generate for the time frame.') faffpvc = None #raise StandardError('No affine transformation') else: faffpvc = faff_frms[i] # chose file name of individual PVC images - if nfrm>1: - fcomment_pvc = '_frm'+str(i)+fcomment + if nfrm > 1: + fcomment_pvc = '_frm' + str(i) + fcomment else: fcomment_pvc = fcomment #============================ # perform PVC - petpvc_dic = nimpa.pvc_iyang( - petu['fimi'][i], - datain, - Cnt, - pvcroi, - pvcpsf, - tool=pvcreg_tool, - itr=pvcitr, - faff=faffpvc, - fcomment=fcomment_pvc, - outpath=pvcdir, - store_rois=store_rois, - store_img=store_img_intrmd) + petpvc_dic = nimpa.pvc_iyang(petu['fimi'][i], datain, Cnt, pvcroi, pvcpsf, + tool=pvcreg_tool, itr=pvcitr, faff=faffpvc, + fcomment=fcomment_pvc, outpath=pvcdir, + store_rois=store_rois, store_img=store_img_intrmd) #============================ - if nfrm>1: - dynpvc[i,:,:,:] = petpvc_dic['im'] + if nfrm > 1: + dynpvc[i, :, :, :] = petpvc_dic['im'] else: dynpvc = petpvc_dic['im'] fpvc.append(petpvc_dic['fpet']) @@ -482,18 +449,18 @@ def mmrchain( if store_rois: froi.append(petpvc_dic['froi']) #> update output dictionary - output.update({'impvc':dynpvc}) + output.update({'impvc': dynpvc}) output['fprc'] = petpvc_dic['fprc'] output['imprc'] = petpvc_dic['imprc'] - if store_img_intrmd: output.update({'fpvc':fpvc}) - if store_rois: output.update({'froi':froi}) + if store_img_intrmd: output.update({'fpvc': fpvc}) + if store_rois: output.update({'froi': froi}) # ---------------------------------------------------------------------- if store_img: # description for saving NIFTI image # attenuation number: if only bed present then it is 0.5 - attnum = ( 1*muhd['exists'] + 1*muod['exists'] ) / 2. + attnum = (1 * muhd['exists'] + 1 * muod['exists']) / 2. descrip = 'alg=osem' \ +';att='+str(attnum*(recmod>0)) \ +';sct='+str(1*(recmod>1)) \ @@ -510,10 +477,10 @@ def mmrchain( # NIfTI file name for the full PET image (single or multiple frame) # save the image to NIfTI file - if nfrm==1: + if nfrm == 1: t0 = hst['t0'] t1 = hst['t1'] - if t1==t0: + if t1 == t0: t0 = 0 t1 = hst['dur'] fpet = os.path.join( @@ -521,27 +488,29 @@ def mmrchain( os.path.basename(recimg.fpet)[:8] \ +'_t-'+str(t0)+'-'+str(t1)+'sec' \ +'_itr-'+str(itr) ) - fpeto = fpet+fcomment+'.nii.gz' - nimpa.prc.array2nii( dynim[::-1,::-1,:], recimg.affine, fpeto, descrip=descrip) + fpeto = fpet + fcomment + '.nii.gz' + nimpa.prc.array2nii(dynim[::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) else: fpet = os.path.join( petimg, os.path.basename(recimg.fpet)[:8]\ +'_nfrm-'+str(nfrm)+'_itr-'+str(itr) ) - fpeto = fpet+fcomment+'.nii.gz' - nimpa.prc.array2nii( dynim[:,::-1,::-1,:], recimg.affine, fpeto, descrip=descrip) + fpeto = fpet + fcomment + '.nii.gz' + nimpa.prc.array2nii(dynim[:, ::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) # get output file names for trimmed/PVC images if trim: # folder for trimmed and dynamic - pettrim = os.path.join( petimg, 'trimmed') + pettrim = os.path.join(petimg, 'trimmed') # make folder nimpa.create_dir(pettrim) # trimming scale added to NIfTI descritoption - descrip_trim = descrip + ';trim_scale='+str(trim_scale) + descrip_trim = descrip + ';trim_scale=' + str(trim_scale) # file name for saving the trimmed image - fpetu = os.path.join(pettrim, os.path.basename(fpet) + '_trimmed-upsampled-scale-'+str(trim_scale)) + fpetu = os.path.join( + pettrim, + os.path.basename(fpet) + '_trimmed-upsampled-scale-' + str(trim_scale)) # in case of PVC if pvcroi: # itertive Yang (iY) added to NIfTI descritoption @@ -551,24 +520,27 @@ def mmrchain( output['trimmed']['fpvc'] = fpvc # update the trimmed image file name - fpetu += fcomment+'.nii.gz' + fpetu += fcomment + '.nii.gz' # store the file name in the output dictionary output['trimmed']['fpet'] = fpetu output['fpet'] = fpeto # save images - if nfrm==1: + if nfrm == 1: if trim: - nimpa.prc.array2nii( petu['im'][::-1,::-1,:], petu['affine'], fpetu, descrip=descrip_trim) + nimpa.prc.array2nii(petu['im'][::-1, ::-1, :], petu['affine'], fpetu, + descrip=descrip_trim) if pvcroi: - nimpa.prc.array2nii( dynpvc[::-1,::-1,:], petu['affine'], fpvc, descrip=descrip_pvc) - elif nfrm>1: + nimpa.prc.array2nii(dynpvc[::-1, ::-1, :], petu['affine'], fpvc, + descrip=descrip_pvc) + elif nfrm > 1: if trim: - nimpa.prc.array2nii( petu['im'][:,::-1,::-1,:], petu['affine'], fpetu, descrip=descrip_trim) + nimpa.prc.array2nii(petu['im'][:, ::-1, ::-1, :], petu['affine'], fpetu, + descrip=descrip_trim) if pvcroi: - nimpa.prc.array2nii( dynpvc[:,::-1,::-1,:], petu['affine'], fpvc, descrip=descrip_pvc) - + nimpa.prc.array2nii(dynpvc[:, ::-1, ::-1, :], petu['affine'], fpvc, + descrip=descrip_pvc) if del_img_intrmd: if pvcroi: @@ -578,7 +550,4 @@ def mmrchain( for fi in petu['fimi']: os.remove(fi) - - - return output diff --git a/niftypet/nipet/lm/mmrhist.py b/niftypet/nipet/lm/mmrhist.py index f0b1708f..4378cc91 100644 --- a/niftypet/nipet/lm/mmrhist.py +++ b/niftypet/nipet/lm/mmrhist.py @@ -14,56 +14,42 @@ from .. import mmraux from . import mmr_lmproc # CUDA extension module -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) - #================================================================================ # HISTOGRAM THE LIST-MODE DATA #-------------------------------------------------------------------------------- -def mmrhist( - datain, - scanner_params, - t0=0, t1=0, - outpath='', - frms=None, - use_stored=False, - store=False, - cmass_sig=5): +def mmrhist(datain, scanner_params, t0=0, t1=0, outpath='', frms=None, use_stored=False, + store=False, cmass_sig=5): ''' Process the list-mode data and return histogram, head curves, and centre of mass for motion detection. ''' # constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - return hist( - datain, txLUT, axLUT, Cnt, - frms=frms, - use_stored=use_stored, - store=store, - outpath=outpath, - t0=t0, t1=t1, - cmass_sig=cmass_sig) + return hist(datain, txLUT, axLUT, Cnt, frms=frms, use_stored=use_stored, store=store, + outpath=outpath, t0=t0, t1=t1, cmass_sig=cmass_sig) def hist( - datain, - txLUT, - axLUT, - Cnt, - t0=0, t1=0, - cmass_sig=5, - frms=None, # np.array([0], dtype=np.uint16), - use_stored=False, - store=False, - outpath=''): - + datain, + txLUT, + axLUT, + Cnt, + t0=0, + t1=0, + cmass_sig=5, + frms=None, # np.array([0], dtype=np.uint16), + use_stored=False, + store=False, + outpath=''): ''' Process list mode data with histogramming and optional bootstrapping: Cnt['BTP'] = 0: no bootstrapping [default]; @@ -71,15 +57,14 @@ def hist( Cnt['BTP'] = 2: parametric bootstrapping (using Poisson distribution with mean = 1) ''' - if Cnt['SPN']==1: nsinos=Cnt['NSN1'] - elif Cnt['SPN']==11: nsinos=Cnt['NSN11'] - elif Cnt['SPN']==0: nsinos=Cnt['NSEG0'] + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] + elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] log.debug('histogramming with span {}.'.format(Cnt['SPN'])) - if (use_stored is True and 'sinos' in datain and - os.path.basename(datain['sinos']) == f"sinos_s{Cnt['SPN']}_frm-{t0}-{t1}.npz" - ): + if (use_stored is True and 'sinos' in datain + and os.path.basename(datain['sinos']) == f"sinos_s{Cnt['SPN']}_frm-{t0}-{t1}.npz"): hstout = dict(np.load(datain['sinos'], allow_pickle=True)) nitag = len(hstout['phc']) log.debug('acquisition duration by integrating time tags is {} sec.'.format(nitag)) @@ -87,23 +72,24 @@ def hist( elif os.path.isfile(datain['lm_bf']): # gather info about the LM time tags nele, ttags, tpos = mmr_lmproc.lminfo(datain['lm_bf']) - nitag = int((ttags[1]-ttags[0]+999)/1000) + nitag = int((ttags[1] - ttags[0] + 999) / 1000) log.debug('acquisition duration by integrating time tags is {} sec.'.format(nitag)) # adjust frame time if outside the limit - if t1>nitag: t1 = nitag + if t1 > nitag: t1 = nitag # check if the time point is allowed - if t0>=nitag: - raise ValueError('e> the time frame definition is not allowed! (outside acquisition time)') + if t0 >= nitag: + raise ValueError( + 'e> the time frame definition is not allowed! (outside acquisition time)') # --------------------------------------- # preallocate all the output arrays VTIME = 2 MXNITAG = 5400 #limit to 1hr and 30mins - if (nitag>MXNITAG): - tn = int(MXNITAG/(1< MXNITAG): + tn = int(MXNITAG / (1 << VTIME)) else: - tn = int((nitag+(1< number of single rates reported for the given second #> the last two bits are used for the number of reports - nsr = (hstout['bck'][1,:,:]>>30) + nsr = (hstout['bck'][1, :, :] >> 30) #> average in a second period - hstout['bck'][0,nsr>0] = hstout['bck'][0,nsr>0] / nsr[nsr>0] + hstout['bck'][0, nsr > 0] = hstout['bck'][0, nsr > 0] / nsr[nsr > 0] #> time indeces when single rates given - tmsk = np.sum(nsr,axis=1)>0 - single_rate = np.copy(hstout['bck'][0,tmsk,:]) + tmsk = np.sum(nsr, axis=1) > 0 + single_rate = np.copy(hstout['bck'][0, tmsk, :]) #> time t = np.arange(nitag) t = t[tmsk] #> get the average bucket singles: - buckets = np.int32( np.sum(single_rate,axis=0)/single_rate.shape[0] ) + buckets = np.int32(np.sum(single_rate, axis=0) / single_rate.shape[0]) log.debug('dynamic and static buckets single rates: COMPLETED.') #=================================================================== # account for the fact that when t0==t1 that means that full dataset is processed - if t0==t1: t1 = t0+nitag - - pdata={ - 't0':t0, - 't1':t1, - 'dur':t1-t0, #duration - 'phc':hstout['phc'], #prompts head curve - 'dhc':hstout['dhc'], #delayeds head curve - 'cmass':cmass, #centre of mass of the radiodistribution in axial direction - 'pvs_sgtl':pvs_sgtl, #sagittal projection views in short intervals - 'pvs_crnl':pvs_crnl, #coronal projection views in short intervals - - 'fansums':hstout['fan'], #fan sums of delayeds for variance reduction of random event sinograms - 'sngl_rate':single_rate, #bucket singles over time - 'tsngl':t, #time points of singles measurements in list-mode data - 'buckets':buckets, #average bucket singles - - 'psino':hstout['psn'].astype(np.uint16), #prompt sinogram - 'dsino':hstout['dsn'].astype(np.uint16), #delayeds sinogram - 'pssr' :hstout['ssr'] #single-slice rebinned sinogram of prompts + if t0 == t1: t1 = t0 + nitag + + pdata = { + 't0': t0, + 't1': t1, + 'dur': t1 - t0, #duration + 'phc': hstout['phc'], #prompts head curve + 'dhc': hstout['dhc'], #delayeds head curve + 'cmass': cmass, #centre of mass of the radiodistribution in axial direction + 'pvs_sgtl': pvs_sgtl, #sagittal projection views in short intervals + 'pvs_crnl': pvs_crnl, #coronal projection views in short intervals + 'fansums': hstout[ + 'fan'], #fan sums of delayeds for variance reduction of random event sinograms + 'sngl_rate': single_rate, #bucket singles over time + 'tsngl': t, #time points of singles measurements in list-mode data + 'buckets': buckets, #average bucket singles + 'psino': hstout['psn'].astype(np.uint16), #prompt sinogram + 'dsino': hstout['dsn'].astype(np.uint16), #delayeds sinogram + 'pssr': hstout['ssr'] #single-slice rebinned sinogram of prompts } return pdata @@ -224,7 +204,7 @@ def randoms(hst, scanner_params, gpu_dim=False): ''' # constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -238,17 +218,16 @@ def randoms(hst, scanner_params, gpu_dim=False): def rand(fansums, txLUT, axLUT, Cnt): - if Cnt['SPN']==1: nsinos=Cnt['NSN1'] - elif Cnt['SPN']==11: nsinos=Cnt['NSN11'] - elif Cnt['SPN']==0: nsinos=Cnt['NSEG0'] + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] + elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] #random sino and estimated crystal map of singles put into a dictionary - rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) rndout = { 'rsn': rsn, - 'cmap':cmap, - } + 'cmap': cmap,} mmr_lmproc.rand(rndout, fansums, txLUT, axLUT, Cnt) @@ -261,21 +240,20 @@ def rand(fansums, txLUT, axLUT, Cnt): def prand(fansums, pmsk, txLUT, axLUT, Cnt): - if Cnt['SPN']==1: nsinos=Cnt['NSN1'] - elif Cnt['SPN']==11: nsinos=Cnt['NSN11'] - elif Cnt['SPN']==0: nsinos=Cnt['NSEG0'] + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] + elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] #number of frames nfrm = fansums.shape[0] log.debug('# of dynamic frames: {}.'.format(nfrm)) #random sino and estimated crystal map of singles put into a dictionary - rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) rndout = { 'rsn': rsn, - 'cmap':cmap, - } + 'cmap': cmap,} #save results for each frame @@ -283,15 +261,15 @@ def prand(fansums, pmsk, txLUT, axLUT, Cnt): crmap = np.zeros((nfrm, Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) for i in range(nfrm): - rndout['rsn'][:,:,:] = 0 - rndout['cmap'][:,:] = 0 - mmr_lmproc.prand(rndout, pmsk, fansums[i,:,:], txLUT, axLUT, Cnt) - rsino[i,:,:,:] = rndout['rsn'] - crmap[i,:,:] = rndout['cmap'] + rndout['rsn'][:, :, :] = 0 + rndout['cmap'][:, :] = 0 + mmr_lmproc.prand(rndout, pmsk, fansums[i, :, :], txLUT, axLUT, Cnt) + rsino[i, :, :, :] = rndout['rsn'] + crmap[i, :, :] = rndout['cmap'] - if nfrm==1: - rsino = rsino[0,:,:,:] - crmap = crmap[0,:,:] + if nfrm == 1: + rsino = rsino[0, :, :, :] + crmap = crmap[0, :, :] return rsino, crmap @@ -304,15 +282,16 @@ def sino2nii(sino, Cnt, fpth): cumseg = np.append([0], cumseg) #plane offset (relative to 127 planes of seg 0) for each segment - OFF = np.min( abs( np.append([Cnt['MNRD']], [Cnt['MXRD']], axis=0) ), axis=0 ) - niisn = np.zeros(( Cnt['SEG'][0], Cnt['NSANGLES'], Cnt['NSBINS'], segn), dtype=sino.dtype) + OFF = np.min(abs(np.append([Cnt['MNRD']], [Cnt['MXRD']], axis=0)), axis=0) + niisn = np.zeros((Cnt['SEG'][0], Cnt['NSANGLES'], Cnt['NSBINS'], segn), dtype=sino.dtype) #first segment (with direct planes) # tmp = - niisn[:,:,:,0] = sino[Cnt['SEG'][0]-1::-1, ::-1, ::-1] + niisn[:, :, :, 0] = sino[Cnt['SEG'][0] - 1::-1, ::-1, ::-1] - for iseg in range(1,segn): - niisn[OFF[iseg]:OFF[iseg]+Cnt['SEG'][iseg], :, :, iseg] = sino[cumseg[iseg]+Cnt['SEG'][iseg]-1:cumseg[iseg]-1:-1, ::-1, ::-1 ] + for iseg in range(1, segn): + niisn[OFF[iseg]:OFF[iseg] + Cnt['SEG'][iseg], :, :, + iseg] = sino[cumseg[iseg] + Cnt['SEG'][iseg] - 1:cumseg[iseg] - 1:-1, ::-1, ::-1] niisn = np.transpose(niisn, (2, 1, 0, 3)) @@ -326,24 +305,24 @@ def get_michem(sino, axLUT, Cnt): # span: spn = -1 - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: slut = np.arange(Cnt['NSN1']) #for span 1, one-to-one mapping - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: slut = axLUT['sn1_sn11'] else: raise ValueError('sino is neither in span-1 or span-11') #acitivity michelogram - Mem = np.zeros((Cnt['NRNG'],Cnt['NRNG']), dtype=np.float32) + Mem = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) #sino to ring number & sino-1 to sino-11 index: - sn1_rno = axLUT['sn1_rno'] + sn1_rno = axLUT['sn1_rno'] #sum all the sinograms inside - ssm = np.sum(sino, axis=(1,2)) + ssm = np.sum(sino, axis=(1, 2)) for sni in range(len(sn1_rno)): - r0 = sn1_rno[sni,0] - r1 = sn1_rno[sni,1] - Mem[r1,r0] = ssm[slut[sni]] + r0 = sn1_rno[sni, 0] + r1 = sn1_rno[sni, 1] + Mem[r1, r0] = ssm[slut[sni]] return Mem @@ -362,16 +341,16 @@ def draw_frames(hst, tfrms, plot_diff=True): if plot_diff: plt.plot(diff, label='difference') - K = [f[0] for f in tfrms if isinstance(f,list)] + K = [f[0] for f in tfrms if isinstance(f, list)] for k in K: yval = hst['phc'][k] - if yval<0.2*np.max(hst['phc']): - yval = 0.2*np.max(hst['phc']) + if yval < 0.2 * np.max(hst['phc']): + yval = 0.2 * np.max(hst['phc']) plt.plot([k, k], [0, yval], 'k--', lw=.75) plt.legend() plt.xlabel('time [sec]') plt.ylabel('counts/sec') - plt.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) + plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) def get_time_offset(hst): @@ -379,9 +358,10 @@ def get_time_offset(hst): Detects when the signal is stronger than the randoms (noise) in the list-mode data stream. ''' # detect when the signal (here prompt data) is almost as strong as randoms - s = hst['dhc']>0.98*hst['phc'] + s = hst['dhc'] > 0.98 * hst['phc'] # return index, which will constitute time in seconds, for this offset - return len(s)-np.argmax(s[::-1])-1 + return len(s) - np.argmax(s[::-1]) - 1 + def split_frames(hst, Tref=0, t0=0, t1=0): ''' @@ -399,11 +379,11 @@ def split_frames(hst, Tref=0, t0=0, t1=0): diff = np.int64(hst['phc']) - np.int64(hst['dhc']) # follow up index - i = t0 + (toff)*(t0<=0) - if Tref>0: - j = i+Tref - elif t1>0: - j = t1 + (toff)*(t0<=0) + i = t0 + (toff) * (t0 <= 0) + if Tref > 0: + j = i + Tref + elif t1 > 0: + j = t1 + (toff) * (t0 <= 0) else: raise ValueError('e> could not figure out the reference frame.') @@ -415,34 +395,34 @@ def split_frames(hst, Tref=0, t0=0, t1=0): i = 0 j = toff # threshold to be achieved - thrsh = csum[j-1] + cref + thrsh = csum[j - 1] + cref fdur = [toff] frms = ['timings', [0, toff]] clvl = [0] - print('frame counts t(%d,%d) = %d. diff=%d' % ( i,j,clvl[-1] , np.sum(diff[i:j])-cref )) - while thrshthrsh) - fdur.append(j-i) - frms.append([i,j]) + j = np.argmax(csum > thrsh) + fdur.append(j - i) + frms.append([i, j]) clvl.append(np.sum(diff[i:j])) - print('frame counts t(%d,%d) = %d. diff=%d' % ( i,j,clvl[-1] , np.sum(diff[i:j])-cref )) + print('frame counts t(%d,%d) = %d. diff=%d' % (i, j, clvl[-1], np.sum(diff[i:j]) - cref)) thrsh += cref # last remianing frame - i=j - j=hst['dur'] + i = j + j = hst['dur'] # if last frame is short, include it in the last one. - if np.sum(diff[i:])>.5*cref: - fdur.append(j-i) - frms.append([i,j]) + if np.sum(diff[i:]) > .5 * cref: + fdur.append(j - i) + frms.append([i, j]) clvl.append(np.sum(diff[i:])) else: - fdur[-1] += j-i - frms[-1][-1] += j-i + fdur[-1] += j - i + frms[-1][-1] += j - i clvl[-1] += np.sum(diff[i:]) i = frms[-1][0] - print('frame counts t(%d,%d) = %d. diff=%d' % ( i,j,clvl[-1] , np.sum(diff[i:j])-cref )) - return {'timings':frms, 'fdur':fdur, 'fcnts':clvl, 'offset':toff, 'csum':csum} + print('frame counts t(%d,%d) = %d. diff=%d' % (i, j, clvl[-1], np.sum(diff[i:j]) - cref)) + return {'timings': frms, 'fdur': fdur, 'fcnts': clvl, 'offset': toff, 'csum': csum} def frame_position(hst, tposition, Cref=0, tr0=0, tr1=15, verbose=True): @@ -460,10 +440,10 @@ def frame_position(hst, tposition, Cref=0, tr0=0, tr1=15, verbose=True): # cumulative sum for calculating count levels in arbitrary time windows cumdiff = np.cumsum(diff) - if Cref==0: - Cref = cumdiff[tr1]-cumdiff[tr0-1] + if Cref == 0: + Cref = cumdiff[tr1] - cumdiff[tr0 - 1] - if Cref<0: + if Cref < 0: raise ValueError('The reference count level has to be non-negative') (log.info if verbose else log.debug)('reference count level: {}.'.format(Cref)) @@ -471,14 +451,14 @@ def frame_position(hst, tposition, Cref=0, tr0=0, tr1=15, verbose=True): stp0 = 0 stp1 = 0 Cw = 0 - while Cw0: stp0 += 1 - if (tposition+stp1+1)<=len(cumdiff)-1: stp1 += 1 - Cw = cumdiff[tposition+stp1] - cumdiff[tposition-stp0-1] + if (tposition - stp0 - 1) > 0: stp0 += 1 + if (tposition + stp1 + 1) <= len(cumdiff) - 1: stp1 += 1 + Cw = cumdiff[tposition + stp1] - cumdiff[tposition - stp0 - 1] - tw0 = tposition-stp0 - tw1 = tposition+stp1 + tw0 = tposition - stp0 + tw1 = tposition + stp1 Tw = tw1 - tw0 (log.info if verbose else log.debug)( 'time window t[{}, {}] of duration T={} and count level Cw={}'.format(tw0, tw1, Tw, Cw)) @@ -486,7 +466,7 @@ def frame_position(hst, tposition, Cref=0, tr0=0, tr1=15, verbose=True): return (tw0, tw1) -def auxilary_frames(hst, t_frms, Cref=0, tr0=0, tr1=15, verbose = True): +def auxilary_frames(hst, t_frms, Cref=0, tr0=0, tr1=15, verbose=True): ''' Get auxiliary time frames with equal count levels for constant precision in the estimation of subject motion based on PET data. @@ -496,7 +476,7 @@ def auxilary_frames(hst, t_frms, Cref=0, tr0=0, tr1=15, verbose = True): diff = np.int64(hst['phc']) - np.int64(hst['dhc']) # previous frame (time tuple) - prev_frm = (0,0) + prev_frm = (0, 0) # previous frame index prev_i = -1 # look up table to the auxilary frames from the regular ones @@ -504,26 +484,27 @@ def auxilary_frames(hst, t_frms, Cref=0, tr0=0, tr1=15, verbose = True): fi2afi = [] for i in range(len(t_frms)): # time point as an average between the frame end points - tp = int(np.mean([t_frms[i][0],t_frms[i][1]])) + tp = int(np.mean([t_frms[i][0], t_frms[i][1]])) # alternative (more accurate) average through centre of mass t0 = t_frms[i][0] t1 = t_frms[i][1] - if t1>=hst['dur']: t1 = hst['dur']-1 - t = np.arange(t0,t1) - tcm = np.sum(diff[t]*t)/np.sum(diff[t]) + if t1 >= hst['dur']: t1 = hst['dur'] - 1 + t = np.arange(t0, t1) + tcm = np.sum(diff[t] * t) / np.sum(diff[t]) # get the tuple of the equivalent count level frame frm = frame_position(hst, tcm, tr0=tr0, tr1=tr1, verbose=False) # form the LUT - if frm!=prev_frm: + if frm != prev_frm: prev_frm = frm prev_i += 1 timings.append(list(frm)) fi2afi.append(prev_i) if verbose: - print('t[{}, {}]; tp={}, tcm={} => frm id:{}, timings:{}'.format(t_frms[i][0], t_frms[i][1], tp, tcm, fi2afi[-1], timings[-1])) + print('t[{}, {}]; tp={}, tcm={} => frm id:{}, timings:{}'.format( + t_frms[i][0], t_frms[i][1], tp, tcm, fi2afi[-1], timings[-1])) # form the list of auxilary dynamic frames of equivalent count level (as in Cref) for reconstruction mfrm = ['fluid'] + timings - return {'timings':mfrm, 'frame_idx':fi2afi} + return {'timings': mfrm, 'frame_idx': fi2afi} def dynamic_timings(flist, offset=0): @@ -541,10 +522,12 @@ def dynamic_timings(flist, offset=0): ''' if not isinstance(flist, list): raise TypeError('Wrong type of frame data input') - if all([isinstance(t,(int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) for t in flist]): + if all([ + isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) + for t in flist]): tsum = offset # list of frame timings - if offset>0: + if offset > 0: t_frames = ['timings', [0, offset]] else: t_frames = ['timings'] @@ -558,17 +541,17 @@ def dynamic_timings(flist, offset=0): t_frames.append([t0, t1]) frms = np.uint16(flist) - elif all( [isinstance(t,list) and len(t)==2 for t in flist[1:]] ) and flist[0]=='def': + elif all([isinstance(t, list) and len(t) == 2 for t in flist[1:]]) and flist[0] == 'def': flist = flist[1:] - if offset>0: - flist.insert(0,[0,offset]) + if offset > 0: + flist.insert(0, [0, offset]) farray = np.asarray(flist, dtype=np.uint16) else: farray = np.array(flist) # number of dynamic frames - nfrm = np.sum(farray[:,0]) + nfrm = np.sum(farray[:, 0]) # list of frame duration - frms = np.zeros(nfrm,dtype=np.uint16) + frms = np.zeros(nfrm, dtype=np.uint16) #frame iterator fi = 0 #time sum of frames @@ -576,18 +559,18 @@ def dynamic_timings(flist, offset=0): # list of frame timings t_frames = ['timings'] for i in range(0, farray.shape[0]): - for t in range(0, farray[i,0]): + for t in range(0, farray[i, 0]): # frame start time t0 = tsum - tsum += farray[i,1] + tsum += farray[i, 1] # frame end time t1 = tsum # append the timings to the list t_frames.append([t0, t1]) - frms[fi] = farray[i,1] + frms[fi] = farray[i, 1] fi += 1 else: raise TypeError('Unrecognised time frame definitions.') # prepare the output dictionary - out = {'total':tsum, 'frames':frms, 'timings':t_frames} + out = {'total': tsum, 'frames': frms, 'timings': t_frames} return out diff --git a/niftypet/nipet/lm/pviews.py b/niftypet/nipet/lm/pviews.py index a2bf7ae9..edae64f4 100644 --- a/niftypet/nipet/lm/pviews.py +++ b/niftypet/nipet/lm/pviews.py @@ -13,7 +13,7 @@ def mvavg(interval, window_size): - window = np.ones(int(window_size))/float(window_size) + window = np.ones(int(window_size)) / float(window_size) return np.convolve(interval, window, 'same') @@ -25,10 +25,10 @@ def video_frm(hst, outpth): #============================================ i = np.argmax(hst['phc']) - ymin = np.floor( min(hst['cmass'][i:i+300]) ) - ymax = np.ceil( max(hst['cmass'][i+100:]) ) + ymin = np.floor(min(hst['cmass'][i:i + 300])) + ymax = np.ceil(max(hst['cmass'][i + 100:])) - mfrm = hst['pvs_sgtl'].shape[0]; + mfrm = hst['pvs_sgtl'].shape[0] #--for movie FFMpegWriter = manimation.writers['ffmpeg'] @@ -40,45 +40,46 @@ def video_frm(hst, outpth): ax1 = plt.subplot(311) plt.title('Coronal View') - plt.setp( ax1.get_xticklabels(), visible=False) + plt.setp(ax1.get_xticklabels(), visible=False) plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') - l1 = plt.imshow(hst['pvs_crnl'][100,:,:]/np.mean(hst['pvs_crnl'][100,:,:]), cmap='jet',interpolation='nearest') + l1 = plt.imshow(hst['pvs_crnl'][100, :, :] / np.mean(hst['pvs_crnl'][100, :, :]), cmap='jet', + interpolation='nearest') ax2 = plt.subplot(312) plt.title('Sagittal View') - plt.setp( ax2.get_xticklabels(), visible=False) + plt.setp(ax2.get_xticklabels(), visible=False) plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') - l = plt.imshow(hst['pvs_sgtl'][100,:,:]/np.mean(hst['pvs_sgtl'][100,:,:]), cmap='jet',interpolation='nearest') + l = plt.imshow(hst['pvs_sgtl'][100, :, :] / np.mean(hst['pvs_sgtl'][100, :, :]), cmap='jet', + interpolation='nearest') ax3 = plt.subplot(313) plt.title('Axial Centre of Mass') t = np.arange(0., hst['dur'], 1.) #plt.plot(t, rprmt, 'k', t, rdlyd, 'r') - plt.plot(t, mvavg(hst['cmass'][:],5),'k') + plt.plot(t, mvavg(hst['cmass'][:], 5), 'k') plt.ylim([ymin, ymax]) plt.xlabel('Time [s]') l2, = plt.plot(np.array([1000, 1000]), np.array([0, ymax]), 'b') #how many gpu frames per movie (controls the time resolution) mf = 6 - mmfrm = mfrm/mf + mmfrm = mfrm / mf - fnm = os.path.join(outpth, 'pViews_' +str(mf)+'.mp4') + fnm = os.path.join(outpth, 'pViews_' + str(mf) + '.mp4') - with writer.saving( fig3, fnm, 200 ): + with writer.saving(fig3, fnm, 200): for i in range(mmfrm): print('i> short frame to movie:', i) - tmp = np.sum( hst['pvs_sgtl'][mf*i:mf*(i+1),:,:], axis=0) - tmp2= np.sum( hst['pvs_crnl'][mf*i:mf*(i+1),:,:], axis=0) - tmp = tmp/np.mean(tmp) - tmp2 = tmp2/np.mean(tmp2) + tmp = np.sum(hst['pvs_sgtl'][mf * i:mf * (i+1), :, :], axis=0) + tmp2 = np.sum(hst['pvs_crnl'][mf * i:mf * (i+1), :, :], axis=0) + tmp = tmp / np.mean(tmp) + tmp2 = tmp2 / np.mean(tmp2) l.set_data(tmp) l1.set_data(tmp2) # l2.set_data(VTIME*mf*i*np.ones(2), np.array([0, np.max(hst['phc'])])) - l2.set_data(VTIME*mf*i*np.ones(2), np.array([0, ymax])) + l2.set_data(VTIME * mf * i * np.ones(2), np.array([0, ymax])) writer.grab_frame() - plt.show() return fnm @@ -105,8 +106,8 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): # for scaling of the mass centre i = np.argmax(hst['phc']) - ymin = np.floor( min(hst['cmass'][i:i+300]) ) - ymax = np.ceil( max(hst['cmass'][i+100:]) ) + ymin = np.floor(min(hst['cmass'][i:i + 300])) + ymax = np.ceil(max(hst['cmass'][i + 100:])) # number of dynamic frames nfrm = hst['psino'].shape[0] @@ -120,10 +121,10 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): for frm in range(nfrm): for i in range(nsinos): - ddsino[frm, axLUT['sn11_ssrb'][i], :, :] += hst['psino'][frm,i,:,:] - gsum[frm] = np.sum(hst['psino'][frm,:,:,:]) + ddsino[frm, axLUT['sn11_ssrb'][i], :, :] += hst['psino'][frm, i, :, :] + gsum[frm] = np.sum(hst['psino'][frm, :, :, :]) gpu_totsum += gsum[frm] - print('GPU('+str(frm)+') =', gsum[frm]) + print('GPU(' + str(frm) + ') =', gsum[frm]) print('-----------') print('GPUtot =', gpu_totsum) @@ -132,7 +133,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): coronal_angle = 0 i_mxfrm = gsum.argmax() frmrep = 5 - mfrm = frmrep*nfrm + mfrm = frmrep * nfrm #--- #--for movie @@ -145,34 +146,36 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): ax1 = plt.subplot(311) plt.title('Coronal View') - plt.setp( ax1.get_xticklabels(), visible=False) + plt.setp(ax1.get_xticklabels(), visible=False) plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') - l1 = plt.imshow(np.array(ddsino[i_mxfrm, : , coronal_angle, :], dtype=np.float64), cmap='jet',interpolation='nearest') + l1 = plt.imshow(np.array(ddsino[i_mxfrm, :, coronal_angle, :], dtype=np.float64), cmap='jet', + interpolation='nearest') #plt.clim([0, 70]) ax2 = plt.subplot(312) plt.title('Sagittal View') - plt.setp( ax2.get_xticklabels(), visible=False) + plt.setp(ax2.get_xticklabels(), visible=False) plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') - l2 = plt.imshow(np.array(ddsino[i_mxfrm, : , saggital_angle, :], dtype=np.float64), cmap='jet',interpolation='nearest') + l2 = plt.imshow(np.array(ddsino[i_mxfrm, :, saggital_angle, :], dtype=np.float64), cmap='jet', + interpolation='nearest') #plt.clim([0, 70]) ax3 = plt.subplot(313) plt.title('Axial Centre of Mass') - plt.plot(range(hst['dur']), voxz*mvavg(hst['cmass'][:],5),'k') - plt.ylim([voxz*ymin, voxz*ymax]) + plt.plot(range(hst['dur']), voxz * mvavg(hst['cmass'][:], 5), 'k') + plt.ylim([voxz * ymin, voxz * ymax]) plt.xlabel('Time [s]') l3, = plt.plot(np.array([1000, 1000]), np.array([0, ymax]), 'b') fnm = os.path.join(outpth, 'pViews_dyn.mp4') with writer.saving(fig1, fnm, 100): for frm in range(mfrm): - print ('i> dynamic frame:', frm%nfrm) - tmp = np.array(ddsino[frm%nfrm, : , coronal_angle, :], dtype=np.float64) + print('i> dynamic frame:', frm % nfrm) + tmp = np.array(ddsino[frm % nfrm, :, coronal_angle, :], dtype=np.float64) l1.set_data(tmp) - tmp = np.array(ddsino[frm%nfrm, : , saggital_angle, :], dtype=np.float64) + tmp = np.array(ddsino[frm % nfrm, :, saggital_angle, :], dtype=np.float64) l2.set_data(tmp) - l3.set_data(frmcum[frm%nfrm]*np.ones(2), np.array([0, ymax])) + l3.set_data(frmcum[frm % nfrm] * np.ones(2), np.array([0, ymax])) writer.grab_frame() return fnm diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 2cfe5aeb..7e57b60f 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -20,8 +20,8 @@ from . import mmr_auxe, resources -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) @@ -32,7 +32,7 @@ def create_dir(pth): def fwhm2sig(fwhm): Cnt = resources.get_mmr_constants() - return (fwhm/Cnt['SZ_VOXY']) / (2*(2*np.log(2))**.5) + return (fwhm / Cnt['SZ_VOXY']) / (2 * (2 * np.log(2))**.5) def lm_pos(datain, Cnt): @@ -46,20 +46,21 @@ def lm_pos(datain, Cnt): return None #---find GantryOffset - if dhdr[0x0018, 0x1020].value == 'syngo MR B20P' or dhdr[0x0018, 0x1020].value == 'syngo MR E11': + if dhdr[0x0018, 0x1020].value == 'syngo MR B20P' or dhdr[0x0018, + 0x1020].value == 'syngo MR E11': flip = 1 - if [0x29,0x1120] in dhdr: - csainfo = dhdr[0x29,0x1120].value + if [0x29, 0x1120] in dhdr: + csainfo = dhdr[0x29, 0x1120].value else: log.error('DICOM field [0x29,0x1120] not found!') return None # this is for older scanner software elif dhdr[0x0018, 0x1020].value == 'syngo MR B18P': flip = -1 - if [0x29,0x1020] in dhdr: - csainfo = dhdr[0x29,0x1020].value - elif [0x29,0x1120] in dhdr: - csainfo = dhdr[0x29,0x1120].value + if [0x29, 0x1020] in dhdr: + csainfo = dhdr[0x29, 0x1020].value + elif [0x29, 0x1120] in dhdr: + csainfo = dhdr[0x29, 0x1120].value else: log.error(' DICOM field [0x29,0x1020] not found!') return None @@ -67,24 +68,24 @@ def lm_pos(datain, Cnt): raise ValueError('unknown scanner software version!') fi = re.search(b'GantryOffset(?!_)', csainfo).start() #csainfo.find('GantryOffset') - #regular expression for the needed three numbers + #regular expression for the needed three numbers p = re.compile(b'-?\\d.\\d{4,10}') - xyz = p.findall(csainfo[fi:fi+200]) - #offset in cm - # xoff = float(xyz[0])/10 - # yoff = float(xyz[1])/10 - # zoff = float(xyz[2])/10 - #> hack to avoid other numbers (counting from the back) - xoff = float(xyz[-3])/10 - yoff = float(xyz[-2])/10 - zoff = float(xyz[-1])/10 + xyz = p.findall(csainfo[fi:fi + 200]) + #offset in cm + # xoff = float(xyz[0])/10 + # yoff = float(xyz[1])/10 + # zoff = float(xyz[2])/10 + #> hack to avoid other numbers (counting from the back) + xoff = float(xyz[-3]) / 10 + yoff = float(xyz[-2]) / 10 + zoff = float(xyz[-1]) / 10 goff = flip * np.array([xoff, yoff, zoff]) log.info('gantry offset from DICOM:\n{}'.format(goff)) fi = csainfo.find(b'TablePositionOrigin') #regular expression for the needed three numbers - tpostr = csainfo[fi:fi+200] + tpostr = csainfo[fi:fi + 200] tpo = re.sub(b'[^a-zA-Z0-9\\-]', b'', tpostr).split(b'M') tpozyx = np.array([float(tpo[-1]), float(tpo[-2]), float(tpo[-3])]) log.info('table position origin from DICOM:\n{}'.format(tpozyx)) @@ -103,21 +104,22 @@ def hdr_lm(datain, Cnt): return None # list possible DICOM locations for list-mode interfile header - lmhdr_locations = [[0x29,0x1010], [0x29,0x1110]] + lmhdr_locations = [[0x29, 0x1010], [0x29, 0x1110]] # for newer scanner software - if dhdr[0x0018, 0x1020].value == 'syngo MR B20P' or dhdr[0x0018, 0x1020].value == 'syngo MR E11': + if dhdr[0x0018, 0x1020].value == 'syngo MR B20P' or dhdr[0x0018, + 0x1020].value == 'syngo MR E11': # interfile header - if [0x29,0x1010] in dhdr: - lmhdr = dhdr[0x29,0x1010].value + if [0x29, 0x1010] in dhdr: + lmhdr = dhdr[0x29, 0x1010].value log.info('got LM interfile.') else: log.warning('DICOM field [0x29,0x1010] not found!') lmhdr = None #CSA Series Header Info - if [0x29,0x1120] in dhdr: - csahdr = dhdr[0x29,0x1120].value + if [0x29, 0x1120] in dhdr: + csahdr = dhdr[0x29, 0x1120].value log.info('got CSA info.') else: log.error('DICOM field [0x29,0x1120] not found!') @@ -132,7 +134,8 @@ def hdr_lm(datain, Cnt): if loc in dhdr: lmhdr = dhdr[loc].value if '!INTERFILE' in lmhdr and 'start horizontal bed position' in lmhdr: - log.info(dedent('''\ + log.info( + dedent('''\ obtained list-mode interfile header from: [{}, {}]''').format(hex(loc[0]), hex(loc[1]))) found_lmhdr = True @@ -142,11 +145,11 @@ def hdr_lm(datain, Cnt): lmhdr = None #CSA Series Header Info - if [0x29,0x1020] in dhdr: - csahdr = dhdr[0x29,0x1020].value + if [0x29, 0x1020] in dhdr: + csahdr = dhdr[0x29, 0x1020].value log.info('got CSA info.') - elif [0x29,0x1120] in dhdr: - csahdr = dhdr[0x29,0x1120].value + elif [0x29, 0x1120] in dhdr: + csahdr = dhdr[0x29, 0x1120].value log.info('got CSA info (may not be accurate, please check).') else: log.error('DICOM field [0x29,0x1020] not found!') @@ -162,43 +165,35 @@ def vh_bedpos(datain, Cnt): p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') - hbedpos = 0.1*float(ihdr[m.start()+fi+1:m.end()]) + hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) #start vertical bed position p = re.compile(r'start vertical bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') - vbedpos = 0.1*float(ihdr[m.start()+fi+1:m.end()]) + vbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) return vbedpos, hbedpos -def hmu_resample0(hmupos, parts ,Cnt): +def hmu_resample0(hmupos, parts, Cnt): #output image sampling Cim = { - 'VXSRx':Cnt['SO_VXX'], - 'VXSRy':Cnt['SO_VXY'], - 'VXSRz':Cnt['SO_VXZ'], - 'VXNRx':Cnt['SO_IMX'], - 'VXNRy':Cnt['SO_IMY'], - 'VXNRz':Cnt['SO_IMZ'] - } + 'VXSRx': Cnt['SO_VXX'], 'VXSRy': Cnt['SO_VXY'], 'VXSRz': Cnt['SO_VXZ'], + 'VXNRx': Cnt['SO_IMX'], 'VXNRy': Cnt['SO_IMY'], 'VXNRz': Cnt['SO_IMZ']} #voxel position/offset - Cim['OFFRx'] = -0.5*Cim['VXNRx']*Cim['VXSRx']#-0.5*Cim['VXSRx'] - Cim['OFFRy'] = -0.5*Cim['VXNRy']*Cim['VXSRy']#-0.5*Cim['VXSRy'] - Cim['OFFRz'] = -0.5*Cim['VXNRz']*Cim['VXSRz']-hmupos[0]['HBedPos'] + Cim['OFFRx'] = -0.5 * Cim['VXNRx'] * Cim['VXSRx'] #-0.5*Cim['VXSRx'] + Cim['OFFRy'] = -0.5 * Cim['VXNRy'] * Cim['VXSRy'] #-0.5*Cim['VXSRy'] + Cim['OFFRz'] = -0.5 * Cim['VXNRz'] * Cim['VXSRz'] - hmupos[0]['HBedPos'] Trnsl = (0.0, 0.0, 0.0) #transformation matrix A = np.array( - [[ 1., 0., 0., Trnsl[0] ], - [ 0., 1., 0., Trnsl[1] ], - [ 0., 0., 1., Trnsl[2] ], - [ 0., 0., 0., 1. ]], dtype=np.float32 - ) + [[1., 0., 0., Trnsl[0]], [0., 1., 0., Trnsl[1]], [0., 0., 1., Trnsl[2]], [0., 0., 0., 1.]], + dtype=np.float32) - imr = np.zeros( (Cnt['SO_IMZ'],Cnt['SO_IMY'],Cnt['SO_IMX']), dtype=np.float32) + imr = np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) #===== Go through the hardware mu-map parts ===== for i in parts: Cim['VXSOx'] = hmupos[i]['ivs'][2] @@ -210,21 +205,21 @@ def hmu_resample0(hmupos, parts ,Cnt): #original image offset Cim['OFFOx'] = hmupos[i]['vpos'][2] Cim['OFFOy'] = hmupos[i]['vpos'][1] - Cim['OFFOz'] =-hmupos[i]['vpos'][0] + Cim['OFFOz'] = -hmupos[i]['vpos'][0] #resample! - if i==4: + if i == 4: #does the bed just partly (no point doing all the long bed) - offresZ = (-.5*Cnt['SO_IMZ']*Cnt['SO_VXZ']-hmupos[0]['HBedPos']) + offresZ = (-.5 * Cnt['SO_IMZ'] * Cnt['SO_VXZ'] - hmupos[0]['HBedPos']) #excess of the hrdwr mu-map axially excemuZ = offresZ - (-hmupos[4]['vpos'][0]) - excevox = int( excemuZ/hmupos[4]['ivs'][0] ) - 5# with extra margin of 5 - newoffZ = -hmupos[4]['vpos'][0] + excevox*hmupos[4]['ivs'][0] - #number of voxels included axially - inclvox = Cnt['SO_IMZ']*Cnt['SO_VXZ']/hmupos[4]['ivs'][0] + 10 #with extra margin... - #truncate the image - im = hmupos[i]['img'][excevox:excevox+inclvox,:,:] - #update dictionary Cim + excevox = int(excemuZ / hmupos[4]['ivs'][0]) - 5 # with extra margin of 5 + newoffZ = -hmupos[4]['vpos'][0] + excevox * hmupos[4]['ivs'][0] + #number of voxels included axially + inclvox = Cnt['SO_IMZ'] * Cnt['SO_VXZ'] / hmupos[4]['ivs'][0] + 10 #with extra margin... + #truncate the image + im = hmupos[i]['img'][excevox:excevox + inclvox, :, :] + #update dictionary Cim Cim['OFFOz'] = newoffZ Cim['VXNOz'] = im.shape[0] imr += nimpa.prc.improc.resample(im, A, Cim) @@ -245,38 +240,41 @@ def time_diff_norm_acq(datain): return None # acq date - s = l[0x08,0x21].value + s = l[0x08, 0x21].value y = int(s[:4]) m = int(s[4:6]) d = int(s[6:8]) # acq time - s = l[0x08,0x32].value + s = l[0x08, 0x32].value hrs = int(s[:2]) mns = int(s[2:4]) sec = int(s[4:6]) # calib date - s = l[0x18,0x1200].value + s = l[0x18, 0x1200].value cy = int(s[:4]) cm = int(s[4:6]) cd = int(s[6:8]) # calib time - s = l[0x18,0x1201].value + s = l[0x18, 0x1201].value chrs = int(s[:2]) cmns = int(s[2:4]) csec = int(s[4:6]) tdiff = (hrs*3600 + mns*60 + sec) - (chrs*3600 + cmns*60 + csec) - dhrs = tdiff/3600 - dmns = (tdiff - 3600*dhrs)/60 - if dhrs>12: - log.warning('time difference between calibration and acquisition is: {} hrs and {} mins'.format(dhrs, dmns)) - - if np.sum([cy-y, cm-m, cd-d])!=0: - log.warning(dedent('''\ + dhrs = tdiff / 3600 + dmns = (tdiff - 3600*dhrs) / 60 + if dhrs > 12: + log.warning( + 'time difference between calibration and acquisition is: {} hrs and {} mins'.format( + dhrs, dmns)) + + if np.sum([cy - y, cm - m, cd - d]) != 0: + log.warning( + dedent('''\ daily QC/calibration was performed on different day(!): {}-{}-{} vs. {}-{}-{} - ''').format(cy, cm, cd, y,m,d)) + ''').format(cy, cm, cd, y, m, d)) def timings_from_list(flist, offset=0): @@ -293,10 +291,12 @@ def timings_from_list(flist, offset=0): ''' if not isinstance(flist, list): raise TypeError('Wrong type of frame data input') - if all([isinstance(t,(int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) for t in flist]): + if all([ + isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) + for t in flist]): tsum = offset # list of frame timings - if offset>0: + if offset > 0: t_frames = [[0, offset]] else: t_frames = [] @@ -309,16 +309,16 @@ def timings_from_list(flist, offset=0): # append the timings to the list t_frames.append([t0, t1]) frms = np.uint16(flist) - elif all([isinstance(t,list) and len(t)==2 for t in flist]): - if offset>0: - flist.insert(0,[1,offset]) + elif all([isinstance(t, list) and len(t) == 2 for t in flist]): + if offset > 0: + flist.insert(0, [1, offset]) farray = np.asarray(flist, dtype=np.uint16) else: farray = np.array(flist) # number of dynamic frames - nfrm = np.sum(farray[:,0]) + nfrm = np.sum(farray[:, 0]) # list of frame duration - frms = np.zeros(nfrm,dtype=np.uint16) + frms = np.zeros(nfrm, dtype=np.uint16) #frame iterator fi = 0 #time sum of frames @@ -326,20 +326,20 @@ def timings_from_list(flist, offset=0): # list of frame timings t_frames = [] for i in range(0, farray.shape[0]): - for t in range(0, farray[i,0]): + for t in range(0, farray[i, 0]): # frame start time t0 = tsum - tsum += farray[i,1] + tsum += farray[i, 1] # frame end time t1 = tsum # append the timings to the list t_frames.append([t0, t1]) - frms[fi] = farray[i,1] + frms[fi] = farray[i, 1] fi += 1 else: raise TypeError('Unrecognised data input.') # prepare the output dictionary - out = {'total':tsum, 'frames':frms, 'timings':t_frames} + out = {'total': tsum, 'frames': frms, 'timings': t_frames} return out @@ -349,117 +349,120 @@ def axial_lut(Cnt): ''' NRNG = Cnt['NRNG'] - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 NSN1_c = NRNG_c**2 # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if NRNG_c==64: + if NRNG_c == 64: NSN1_c -= 12 - SEG0_c = 2*NRNG_c-1 + SEG0_c = 2*NRNG_c - 1 else: NRNG_c = NRNG NSN1_c = Cnt['NSN1'] - if Cnt['RNG_END']!=NRNG or Cnt['RNG_STRT']!=0: + if Cnt['RNG_END'] != NRNG or Cnt['RNG_STRT'] != 0: log.error('the reduced axial FOV only works in span-1!') return None #ring dimensions - rng = np.zeros((NRNG,2), dtype = np.float32) - z = -.5*NRNG*Cnt['AXR'] + rng = np.zeros((NRNG, 2), dtype=np.float32) + z = -.5 * NRNG * Cnt['AXR'] for i in range(NRNG): - rng[i,0] = z + rng[i, 0] = z z += Cnt['AXR'] - rng[i,1] = z + rng[i, 1] = z #--create mapping from ring difference to segment number #ring difference range - rd = list(range(-Cnt['MRD'],Cnt['MRD']+1)) + rd = list(range(-Cnt['MRD'], Cnt['MRD'] + 1)) #ring difference to segment - rd2sg = -1*np.ones((len(rd),2,), dtype=np.int32) + rd2sg = -1 * np.ones(( + len(rd), + 2, + ), dtype=np.int32) for i in range(len(rd)): for iseg in range(len(Cnt['MNRD'])): - if ( rd[i]>=Cnt['MNRD'][iseg] ) and ( rd[i]<=Cnt['MXRD'][iseg] ): - rd2sg[i,:] = np.array([rd[i], iseg]) + if (rd[i] >= Cnt['MNRD'][iseg]) and (rd[i] <= Cnt['MXRD'][iseg]): + rd2sg[i, :] = np.array([rd[i], iseg]) #create two Michelograms for segments (Mseg) #and absolute axial position for individual sinos (Mssrb) which is single slice rebinning - Mssrb = -1*np.ones((NRNG,NRNG), dtype=np.int32) - Mseg = -1*np.ones((NRNG,NRNG), dtype=np.int32) + Mssrb = -1 * np.ones((NRNG, NRNG), dtype=np.int32) + Mseg = -1 * np.ones((NRNG, NRNG), dtype=np.int32) for r1 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): for r0 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): - if abs(r1-r0)>Cnt['MRD']: + if abs(r1 - r0) > Cnt['MRD']: continue - ssp = r0+r1 #segment sino position (axially: 0-126) - rd = r1-r0 - jseg = rd2sg[rd2sg[:,0]==rd, 1] - Mssrb[r1,r0] = ssp - Mseg[r1,r0] = jseg #negative segments are on top diagonals + ssp = r0 + r1 #segment sino position (axially: 0-126) + rd = r1 - r0 + jseg = rd2sg[rd2sg[:, 0] == rd, 1] + Mssrb[r1, r0] = ssp + Mseg[r1, r0] = jseg #negative segments are on top diagonals # np.savetxt("Mssrb.csv", Mssrb, delimiter=",", fmt='%d') # np.savetxt("Mseg.csv", Mseg, delimiter=",", fmt='%d') #create a Michelogram map from rings to sino number in span-11 (1..837) - Msn = -1*np.ones((NRNG,NRNG), dtype=np.int32) + Msn = -1 * np.ones((NRNG, NRNG), dtype=np.int32) #number of span-1 sinos per sino in span-11 - Mnos = -1*np.ones((NRNG,NRNG), dtype=np.int32) + Mnos = -1 * np.ones((NRNG, NRNG), dtype=np.int32) i = 0 - for iseg in range(0,len(Cnt['SEG'])): - msk = (Mseg==iseg) + for iseg in range(0, len(Cnt['SEG'])): + msk = (Mseg == iseg) Mtmp = np.copy(Mssrb) Mtmp[~msk] = -1 uq = np.unique(Mtmp[msk]) - for u in range(0,len(uq)): + for u in range(0, len(uq)): #print(i) - Msn [ Mtmp==uq[u] ] = i - Mnos[ Mtmp==uq[u] ] = np.sum(Mtmp==uq[u]) + Msn[Mtmp == uq[u]] = i + Mnos[Mtmp == uq[u]] = np.sum(Mtmp == uq[u]) i += 1 # np.savetxt("Mnos.csv", Mnos, delimiter=",", fmt='%d') # np.savetxt("Msn.csv", Msn, delimiter=",", fmt='%d') #====full LUT - sn1_rno = np.zeros((NSN1_c,2), dtype=np.int16) - sn1_ssrb= np.zeros((NSN1_c), dtype=np.int16) - sn1_sn11= np.zeros((NSN1_c), dtype=np.int16) + sn1_rno = np.zeros((NSN1_c, 2), dtype=np.int16) + sn1_ssrb = np.zeros((NSN1_c), dtype=np.int16) + sn1_sn11 = np.zeros((NSN1_c), dtype=np.int16) sn1_sn11no = np.zeros((NSN1_c), dtype=np.int8) - sni = 0 #full linear index, upto 4084 - Msn1 = -1*np.ones((NRNG,NRNG), dtype=np.int16) #michelogram of sino numbers for spn-1 - for ro in range(0,NRNG): - if ro==0: + sni = 0 #full linear index, upto 4084 + Msn1 = -1 * np.ones((NRNG, NRNG), dtype=np.int16) #michelogram of sino numbers for spn-1 + for ro in range(0, NRNG): + if ro == 0: oblique = 1 else: oblique = 2 for m in range(oblique): - strt = NRNG*(ro+Cnt['RNG_STRT']) + Cnt['RNG_STRT'] - stop = (Cnt['RNG_STRT']+NRNG_c)*NRNG - step = NRNG+1 - for li in range(strt, stop, step): #goes along a diagonal started in the first row at r1 - #linear indecies of michelogram --> subscript indecies for positive and negative RDs - if m==0: - r1 = int(li/NRNG) + strt = NRNG * (ro + Cnt['RNG_STRT']) + Cnt['RNG_STRT'] + stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG + step = NRNG + 1 + for li in range(strt, stop, step): #goes along a diagonal started in the first row at r1 + #linear indecies of michelogram --> subscript indecies for positive and negative RDs + if m == 0: + r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - else: #for positive now (? or vice versa) - r0 = int(li/NRNG) + else: #for positive now (? or vice versa) + r0 = int(li / NRNG) r1 = int(li - r0*NRNG) - #avoid case when RD>MRD - if (Msn[r1,r0])<0: + #avoid case when RD>MRD + if (Msn[r1, r0]) < 0: continue - sn1_rno[sni,0] = r0 - sn1_rno[sni,1] = r1 + sn1_rno[sni, 0] = r0 + sn1_rno[sni, 1] = r1 - sn1_ssrb[sni] = Mssrb[r1,r0] - sn1_sn11[sni] = Msn[r0,r1] + sn1_ssrb[sni] = Mssrb[r1, r0] + sn1_sn11[sni] = Msn[r0, r1] - sn1_sn11no[sni] = Mnos[r0,r1] + sn1_sn11no[sni] = Mnos[r0, r1] - Msn1[r0,r1] = sni + Msn1[r0, r1] = sni #-- sni += 1 #span-11 sino to SSRB - sn11_ssrb = np.zeros(Cnt['NSN11'], dtype=np.int32); + sn11_ssrb = np.zeros(Cnt['NSN11'], dtype=np.int32) sn11_ssrb[:] -= 1 sn1_ssrno = np.zeros(Cnt['NSEG0'], dtype=np.int8) for i in range(NSN1_c): @@ -468,77 +471,77 @@ def axial_lut(Cnt): sn11_ssrno = np.zeros(Cnt['NSEG0'], dtype=np.int8) for i in range(Cnt['NSN11']): - if sn11_ssrb[i]>0: sn11_ssrno[sn11_ssrb[i]] += 1 + if sn11_ssrb[i] > 0: sn11_ssrno[sn11_ssrb[i]] += 1 - sn1_ssrno = sn1_ssrno[np.unique(sn1_ssrb)] + sn1_ssrno = sn1_ssrno[np.unique(sn1_ssrb)] sn11_ssrno = sn11_ssrno[np.unique(sn1_ssrb)] - sn11_ssrb = sn11_ssrb[sn11_ssrb>=0] + sn11_ssrb = sn11_ssrb[sn11_ssrb >= 0] #--------------------------------------------------------------------- #linear index (along diagonals of Michelogram) to rings # the number of Michelogram elements considered in projection calculations - NLI2R_c = int(NRNG_c**2/2. + NRNG_c/2.) + NLI2R_c = int(NRNG_c**2 / 2. + NRNG_c/2.) # if the whole scanner is used then account for the MRD and subtract 6 ring permutations - if NRNG_c==NRNG: + if NRNG_c == NRNG: NLI2R_c -= 6 - li2r = np.zeros((NLI2R_c,2), dtype=np.int8) + li2r = np.zeros((NLI2R_c, 2), dtype=np.int8) #the same as above but to sinos in span-11 - li2sn = np.zeros((NLI2R_c,2), dtype=np.int16) - li2sn1 = np.zeros((NLI2R_c,2), dtype=np.int16) - li2rng = np.zeros((NLI2R_c,2), dtype=np.float32) + li2sn = np.zeros((NLI2R_c, 2), dtype=np.int16) + li2sn1 = np.zeros((NLI2R_c, 2), dtype=np.int16) + li2rng = np.zeros((NLI2R_c, 2), dtype=np.float32) #...to number of sinos (nos) li2nos = np.zeros((NLI2R_c), dtype=np.int8) dli = 0 for ro in range(0, NRNG_c): # selects the sub-Michelogram of the whole Michelogram - strt = NRNG*(ro+Cnt['RNG_STRT']) + Cnt['RNG_STRT'] - stop = (Cnt['RNG_STRT']+NRNG_c)*NRNG - step = NRNG+1 + strt = NRNG * (ro + Cnt['RNG_STRT']) + Cnt['RNG_STRT'] + stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG + step = NRNG + 1 for li in range(strt, stop, step): #goes along a diagonal started in the first row at r2o - #from the linear indexes of Michelogram get the subscript indexes - r1 = int(li/NRNG) + #from the linear indexes of Michelogram get the subscript indexes + r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - #avoid case when RD>MRD - if (Msn[r1,r0])<0: + #avoid case when RD>MRD + if (Msn[r1, r0]) < 0: continue - # li2r[0, dli] = r0 - # li2r[1, dli] = r1 - # #-- - # li2rng[0, dli] = rng[r0,0]; - # li2rng[1, dli] = rng[r1,0]; - # #-- - # li2sn[0, dli] = Msn[r0,r1] - # li2sn[1, dli] = Msn[r1,r0] - - li2r[dli,0] = r0 - li2r[dli,1] = r1 + # li2r[0, dli] = r0 + # li2r[1, dli] = r1 + # #-- + # li2rng[0, dli] = rng[r0,0]; + # li2rng[1, dli] = rng[r1,0]; + # #-- + # li2sn[0, dli] = Msn[r0,r1] + # li2sn[1, dli] = Msn[r1,r0] + + li2r[dli, 0] = r0 + li2r[dli, 1] = r1 #-- - li2rng[dli,0] = rng[r0,0] - li2rng[dli,1] = rng[r1,0] + li2rng[dli, 0] = rng[r0, 0] + li2rng[dli, 1] = rng[r1, 0] #-- - li2sn[dli, 0] = Msn[r0,r1] - li2sn[dli, 1] = Msn[r1,r0] + li2sn[dli, 0] = Msn[r0, r1] + li2sn[dli, 1] = Msn[r1, r0] - li2sn1[dli, 0] = Msn1[r0,r1] - li2sn1[dli, 1] = Msn1[r1,r0] + li2sn1[dli, 0] = Msn1[r0, r1] + li2sn1[dli, 1] = Msn1[r1, r0] # li2sn[0, dli] = Msn[r1,r0] # li2sn[1, dli] = Msn[r0,r1] #-- - li2nos[dli] = Mnos[r1,r0] + li2nos[dli] = Mnos[r1, r0] #-- dli += 1 # log.info('number of diagonal indexes (in Michelogram) accounted for: {}'.format(dli)) #--------------------------------------------------------------------- - - axLUT = {'li2rno':li2r, 'li2sn':li2sn, 'li2sn1':li2sn1, 'li2nos':li2nos, 'li2rng':li2rng, - 'sn1_rno':sn1_rno, 'sn1_ssrb':sn1_ssrb, 'sn1_sn11':sn1_sn11, 'sn1_sn11no':sn1_sn11no, - 'sn11_ssrb':sn11_ssrb, 'sn1_ssrno':sn1_ssrno, 'sn11_ssrno':sn11_ssrno, - 'Msn11':Msn, 'Msn1':Msn1, 'Mnos':Mnos, 'rng':rng} + axLUT = { + 'li2rno': li2r, 'li2sn': li2sn, 'li2sn1': li2sn1, 'li2nos': li2nos, 'li2rng': li2rng, + 'sn1_rno': sn1_rno, 'sn1_ssrb': sn1_ssrb, 'sn1_sn11': sn1_sn11, 'sn1_sn11no': sn1_sn11no, + 'sn11_ssrb': sn11_ssrb, 'sn1_ssrno': sn1_ssrno, 'sn11_ssrno': sn11_ssrno, 'Msn11': Msn, + 'Msn1': Msn1, 'Mnos': Mnos, 'rng': rng} log.debug('axial LUTs done.') @@ -546,10 +549,10 @@ def axial_lut(Cnt): def sino2ssr(sino, axLUT, Cnt): - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: slut = axLUT['sn1_ssrb'] snno = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: slut = axLUT['sn11_ssrb'] snno = Cnt['NSN11'] else: @@ -559,7 +562,7 @@ def sino2ssr(sino, axLUT, Cnt): ssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) for i in range(snno): - ssr[slut[i],:,:] += sino[i,:,:] + ssr[slut[i], :, :] += sino[i, :, :] return ssr @@ -583,13 +586,13 @@ def reduce_rings(pars, rs=0, re=64): # RNG_STRT is included in detection # RNG_END is not included in detection process pars['Cnt']['RNG_STRT'] = rs - pars['Cnt']['RNG_END'] = re + pars['Cnt']['RNG_END'] = re # now change the voxels dims too - vz0 = 2*pars['Cnt']['RNG_STRT'] - vz1 = 2*(pars['Cnt']['RNG_END']-1) + vz0 = 2 * pars['Cnt']['RNG_STRT'] + vz1 = 2 * (pars['Cnt']['RNG_END'] - 1) # number of axial voxels - pars['Cnt']['rSO_IMZ'] = vz1-vz0+1 - pars['Cnt']['rSZ_IMZ'] = vz1-vz0+1 + pars['Cnt']['rSO_IMZ'] = vz1 - vz0 + 1 + pars['Cnt']['rSZ_IMZ'] = vz1 - vz0 + 1 # axial voxel size for scatter (mu-map and emission image) # pars['Cnt']['SS_IMZ'] = pars['Cnt']['rSG_IMZ'] # number of rings customised for the given ring range (only optional in span-1) @@ -600,7 +603,7 @@ def reduce_rings(pars, rs=0, re=64): pars['Cnt']['rNSN1'] = rNSN1 # correct for the limited max. ring difference in the full axial extent. # don't use ring range (1,63) as for this case no correction - if rNRNG==64: rNSN1 -= 12 + if rNRNG == 64: rNSN1 -= 12 # apply the new ring subset to axial LUTs raxLUT = axial_lut(pars['Cnt']) # michelogram for reduced rings in span-1 @@ -609,7 +612,7 @@ def reduce_rings(pars, rs=0, re=64): Msn1 = np.copy(pars['axLUT']['Msn1']) # from full span-1 sinogram index to reduced rings sinogram index rlut = np.zeros(rNSN1, dtype=np.int16) - rlut[Msn1_c[Msn1_c>=0]] = Msn1[Msn1_c>=0] + rlut[Msn1_c[Msn1_c >= 0]] = Msn1[Msn1_c >= 0] raxLUT['rLUT'] = rlut pars['axLUT'] = raxLUT @@ -624,10 +627,10 @@ def transaxial_lut(Cnt, visualisation=False): if visualisation: #---visualisation of the crystal ring in transaxial view - p = 8 #pixel density of the visualisation - VISXY = Cnt['SO_IMX']*p - T = np.zeros((VISXY,VISXY), dtype=np.float32) - #--- + p = 8 #pixel density of the visualisation + VISXY = Cnt['SO_IMX'] * p + T = np.zeros((VISXY, VISXY), dtype=np.float32) + #--- #--- crystal coordinates transaxially #> block width @@ -636,62 +639,60 @@ def transaxial_lut(Cnt, visualisation=False): #> block gap [cm] dg = 0.474 NTBLK = 56 - alpha = 0.1122 #2*pi/NTBLK - crs = np.zeros((Cnt['NCRS'],4), dtype=np.float32) + alpha = 0.1122 #2*pi/NTBLK + crs = np.zeros((Cnt['NCRS'], 4), dtype=np.float32) #> phi angle points in the middle and is used for obtaining the normal of detector block - phi = 0.5*pi - alpha/2 -0.001 + phi = 0.5*pi - alpha/2 - 0.001 for bi in range(NTBLK): #> tangent point (ring against detector block) # ye = RE*np.sin(phi) # xe = RE*np.cos(phi) - y = Cnt['R_RING']*np.sin(phi) - x = Cnt['R_RING']*np.cos(phi) + y = Cnt['R_RING'] * np.sin(phi) + x = Cnt['R_RING'] * np.cos(phi) #> vector for the face of crystals - pv = np.array([-y, x]) + pv = np.array([-y, x]) pv /= np.sum(pv**2)**.5 #> update phi for next block phi -= alpha #> end block points - xcp = x + (bw/2)*pv[0] - ycp = y + (bw/2)*pv[1] + xcp = x + (bw/2) * pv[0] + ycp = y + (bw/2) * pv[1] if visualisation: - u = int( .5*VISXY + np.floor(xcp/(Cnt['SO_VXY']/p)) ) - v = int( .5*VISXY - np.ceil (ycp/(Cnt['SO_VXY']/p)) ) - T[v,u] = 5 - - for n in range(1,9): - c = bi*9 +n-1 - crs[c,0] = xcp - crs[c,1] = ycp - xc = x + (bw/2-n*bw/8)*pv[0] - yc = y + (bw/2-n*bw/8)*pv[1] - crs[c,2] = xc - crs[c,3] = yc + u = int(.5*VISXY + np.floor(xcp / (Cnt['SO_VXY'] / p))) + v = int(.5*VISXY - np.ceil(ycp / (Cnt['SO_VXY'] / p))) + T[v, u] = 5 + + for n in range(1, 9): + c = bi*9 + n - 1 + crs[c, 0] = xcp + crs[c, 1] = ycp + xc = x + (bw/2 - n*bw/8) * pv[0] + yc = y + (bw/2 - n*bw/8) * pv[1] + crs[c, 2] = xc + crs[c, 3] = yc xcp = xc ycp = yc if visualisation: - u = int(.5*VISXY + np.floor(xcp/(Cnt['SO_VXY']/p))) - v = int(.5*VISXY - np.ceil (ycp/(Cnt['SO_VXY']/p))) - T[v,u] = 2.5 + u = int(.5*VISXY + np.floor(xcp / (Cnt['SO_VXY'] / p))) + v = int(.5*VISXY - np.ceil(ycp / (Cnt['SO_VXY'] / p))) + T[v, u] = 2.5 out = dict(crs=crs) if visualisation: out['visual'] = T - - #> crystals reduced by the gaps (dead crystals) - crsr = -1*np.ones(Cnt['NCRS'], dtype=np.int16) + crsr = -1 * np.ones(Cnt['NCRS'], dtype=np.int16) ci = 0 for i in range(Cnt['NCRS']): - if (((i + Cnt['OFFGAP']) % Cnt['TGAP'])>0): + if (((i + Cnt['OFFGAP']) % Cnt['TGAP']) > 0): crsr[i] = ci ci += 1 if visualisation: @@ -705,75 +706,79 @@ def transaxial_lut(Cnt, visualisation=False): msino = np.zeros((Cnt['NSBINS'], Cnt['NSANGLES']), dtype=np.int8) # LUT: sino -> crystal and crystal -> sino - s2cF = np.zeros((Cnt['NSBINS']*Cnt['NSANGLES'], 2), dtype=np.int16) - c2sF = -1*np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) + s2cF = np.zeros((Cnt['NSBINS'] * Cnt['NSANGLES'], 2), dtype=np.int16) + c2sF = -1 * np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) #> with projection bin fast changing (c2s has angle changing fast). #> this is used in scatter estimation - c2sFw = -1*np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) + c2sFw = -1 * np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) #> global sinogram index (linear) of live crystals (excludes gaps) awi = 0 for iw in range(Cnt['NSBINS']): for ia in range(Cnt['NSANGLES']): - c0 = int( np.floor( (ia + 0.5*(Cnt['NCRS'] - 2 + Cnt['NSBINS']/2 - iw)) % Cnt['NCRS'] ) ) - c1 = int( np.floor( (ia + 0.5*(2*Cnt['NCRS'] - 2 - Cnt['NSBINS']/2 + iw)) % Cnt['NCRS'] ) ) + c0 = int( + np.floor((ia + 0.5 * (Cnt['NCRS'] - 2 + Cnt['NSBINS'] / 2 - iw)) % Cnt['NCRS'])) + c1 = int( + np.floor( + (ia + 0.5 * (2 * Cnt['NCRS'] - 2 - Cnt['NSBINS'] / 2 + iw)) % Cnt['NCRS'])) - s2cF[ia + iw*Cnt['NSANGLES'], 0] = c0 - s2cF[ia + iw*Cnt['NSANGLES'], 1] = c1 + s2cF[ia + iw * Cnt['NSANGLES'], 0] = c0 + s2cF[ia + iw * Cnt['NSANGLES'], 1] = c1 - c2sF[c1, c0] = ia + iw*Cnt['NSANGLES'] - c2sF[c0, c1] = ia + iw*Cnt['NSANGLES'] + c2sF[c1, c0] = ia + iw * Cnt['NSANGLES'] + c2sF[c0, c1] = ia + iw * Cnt['NSANGLES'] - if (((((c0 + Cnt['OFFGAP']) % Cnt['TGAP']) * ((c1 + Cnt['OFFGAP']) % Cnt['TGAP']))>0)): + if (((((c0 + Cnt['OFFGAP']) % Cnt['TGAP']) * + ((c1 + Cnt['OFFGAP']) % Cnt['TGAP'])) > 0)): #> masking gaps in 2D sinogram msino[iw, ia] = 1 awi += 1 - c2sFw[c1, c0] = iw + ia*Cnt['NSBINS'] - c2sFw[c0, c1] = iw + ia*Cnt['NSBINS'] + c2sFw[c1, c0] = iw + ia * Cnt['NSBINS'] + c2sFw[c0, c1] = iw + ia * Cnt['NSBINS'] - out['s2cF'] = s2cF - out['c2sF'] = c2sF + out['s2cF'] = s2cF + out['c2sF'] = c2sF out['c2sFw'] = c2sFw out['msino'] = msino #> number of total transaxial live crystals (excludes gaps) out['Naw'] = awi - s2c = np.zeros((out['Naw'],2), dtype=np.int16) - s2cr = np.zeros((out['Naw'],2), dtype=np.int16) - cr2s = np.zeros((Cnt['NCRSR'],Cnt['NCRSR']), dtype=np.int32); - aw2sn = np.zeros((out['Naw'],2), dtype=np.int16) + s2c = np.zeros((out['Naw'], 2), dtype=np.int16) + s2cr = np.zeros((out['Naw'], 2), dtype=np.int16) + cr2s = np.zeros((Cnt['NCRSR'], Cnt['NCRSR']), dtype=np.int32) + aw2sn = np.zeros((out['Naw'], 2), dtype=np.int16) aw2ali = np.zeros(out['Naw'], dtype=np.int32) #> live crystals which are in coincidence - cij = np.zeros((Cnt['NCRSR'],Cnt['NCRSR']), dtype=np.int8) + cij = np.zeros((Cnt['NCRSR'], Cnt['NCRSR']), dtype=np.int8) awi = 0 for iw in range(Cnt['NSBINS']): for ia in range(Cnt['NSANGLES']): - if (msino[iw,ia]>0): - c0 = s2cF[Cnt['NSANGLES']*iw + ia, 0] - c1 = s2cF[Cnt['NSANGLES']*iw + ia, 1] + if (msino[iw, ia] > 0): + c0 = s2cF[Cnt['NSANGLES'] * iw + ia, 0] + c1 = s2cF[Cnt['NSANGLES'] * iw + ia, 1] - s2c[awi,0] = c0 - s2c[awi,1] = c1 + s2c[awi, 0] = c0 + s2c[awi, 1] = c1 - s2cr[awi,0] = crsr[c0] - s2cr[awi,1] = crsr[c1] + s2cr[awi, 0] = crsr[c0] + s2cr[awi, 1] = crsr[c1] #> reduced crystal index (after getting rid of crystal gaps) cr2s[crsr[c1], crsr[c0]] = awi cr2s[crsr[c0], crsr[c1]] = awi - aw2sn[awi,0] = ia - aw2sn[awi,1] = iw + aw2sn[awi, 0] = ia + aw2sn[awi, 1] = iw - aw2ali[awi] = iw + Cnt['NSBINS']*ia + aw2ali[awi] = iw + Cnt['NSBINS'] * ia #> square matrix of crystals in coincidence cij[crsr[c0], crsr[c1]] = 1 @@ -781,15 +786,14 @@ def transaxial_lut(Cnt, visualisation=False): awi += 1 - out['s2c'] = s2c - out['s2cr'] = s2cr - out['cr2s'] = cr2s - out['aw2sn'] = aw2sn + out['s2c'] = s2c + out['s2cr'] = s2cr + out['cr2s'] = cr2s + out['aw2sn'] = aw2sn out['aw2ali'] = aw2ali - out['cij'] = cij + out['cij'] = cij #---------------------------------- - # # cij - a square matrix of crystals in coincidence (transaxially) # # crsri - indexes of crystals with the gap crystals taken out (therefore reduced) # # aw2sn - LUT array [AW x 2] translating linear index into a 2D sinogram with dead LOR (gaps) @@ -807,7 +811,6 @@ def transaxial_lut(Cnt, visualisation=False): # 'aw2ali':aw2ali, 's2c':s2c, 's2cr':s2cr, 's2cF':s2cF, 'Naw':Naw, # 'c2sF':c2sF, 'cr2s':cr2s} - return out @@ -818,7 +821,8 @@ def transaxial_lut(Cnt, visualisation=False): def get_npfiles(dfile, datain, v=False): logger = log.info if v else log.debug - logger(dedent('''\ + logger( + dedent('''\ ------------------------------------------------------------------ file: {} ------------------------------------------------------------------ @@ -838,7 +842,7 @@ def get_npfiles(dfile, datain, v=False): datain['hmumap'] = dfile logger('mu-map for hardware.') - if os.path.basename(dfile)[:8]=='sinos_s1': + if os.path.basename(dfile)[:8] == 'sinos_s1': datain['sinos'] = dfile logger('prompt sinogram data.') @@ -849,36 +853,37 @@ def get_npfiles(dfile, datain, v=False): def get_niifiles(dfile, datain, v=False): logger = log.info if v else log.debug - logger(dedent('''\ + logger( + dedent('''\ ------------------------------------------------------------------ file: {} ------------------------------------------------------------------ ''').format(dfile)) #> NIfTI file of converted MR-based mu-map from DICOMs - if os.path.basename(dfile).split('.nii')[0]=='mumap-from-DICOM': + if os.path.basename(dfile).split('.nii')[0] == 'mumap-from-DICOM': datain['mumapNII'] = dfile logger('mu-map for the object.') #> NIfTI file of pseudo CT - fpct = glob.glob( os.path.join(os.path.dirname(dfile), '*_synth.nii*') ) - if len(fpct)>0: + fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*_synth.nii*')) + if len(fpct) > 0: datain['pCT'] = fpct[0] logger('pseudoCT of the object.') - fpct = glob.glob( os.path.join(os.path.dirname(dfile), '*_p[cC][tT].nii*') ) - if len(fpct)>0: + fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*_p[cC][tT].nii*')) + if len(fpct) > 0: datain['pCT'] = fpct[0] logger('pseudoCT of the object.') #MR T1 - fmri = glob.glob( os.path.join(os.path.dirname(dfile), '[tT]1*.nii*') ) - if len(fmri)==1: + fmri = glob.glob(os.path.join(os.path.dirname(dfile), '[tT]1*.nii*')) + if len(fmri) == 1: bnm = os.path.basename(fmri[0]).lower() if not {'giflabels', 'parcellation', 'pct', 'n4bias'}.intersection(bnm): datain['T1nii'] = fmri[0] logger('NIfTI for T1w of the object.') - elif len(fmri)>1: + elif len(fmri) > 1: for fg in fmri: bnm = os.path.basename(fg).lower() if not {'giflabels', 'parcellation', 'pct', 'n4bias'}.intersection(bnm): @@ -888,13 +893,13 @@ def get_niifiles(dfile, datain, v=False): datain['T1nii_2'] = fg #MR T1 N4bias-corrected - fmri = glob.glob( os.path.join(os.path.dirname(dfile), '[tT]1*[nN]4bias*.nii*') ) - if len(fmri)==1: + fmri = glob.glob(os.path.join(os.path.dirname(dfile), '[tT]1*[nN]4bias*.nii*')) + if len(fmri) == 1: bnm = os.path.basename(fmri[0]).lower() if not {'giflabels', 'parcellation', 'pct'}.intersection(bnm): datain['T1N4'] = fmri[0] logger('NIfTI for T1w of the object.') - elif len(fmri)>1: + elif len(fmri) > 1: for fg in fmri: bnm = os.path.basename(fg).lower() if not {'giflabels', 'parcellation', 'pct'}.intersection(bnm): @@ -903,43 +908,43 @@ def get_niifiles(dfile, datain, v=False): elif 'usable' in bnm: datain['T1N4_2'] = fg - #T1w corrected - fbc = glob.glob( os.path.join(os.path.dirname(dfile), '*gifbc.nii*') ) - if len(fbc)==1: + fbc = glob.glob(os.path.join(os.path.dirname(dfile), '*gifbc.nii*')) + if len(fbc) == 1: datain['T1bc'] = fbc[0] logger('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) - fbc = glob.glob( os.path.join(os.path.dirname(dfile), '*[tT]1*BiasCorrected.nii*') ) - if len(fbc)==1: + fbc = glob.glob(os.path.join(os.path.dirname(dfile), '*[tT]1*BiasCorrected.nii*')) + if len(fbc) == 1: datain['T1bc'] = fbc[0] logger('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) #T1-based labels after parcellation - flbl = glob.glob( os.path.join(os.path.dirname(dfile), '*giflabels.nii*') ) - if len(flbl)==1: + flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*giflabels.nii*')) + if len(flbl) == 1: datain['T1lbl'] = flbl[0] logger('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) - flbl = glob.glob( os.path.join(os.path.dirname(dfile), '*[tT]1*[Pp]arcellation.nii*') ) - if len(flbl)==1: + flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*[tT]1*[Pp]arcellation.nii*')) + if len(flbl) == 1: datain['T1lbl'] = flbl[0] logger('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) #reconstructed emission data without corrections, minimum 2 osem iter - fpct = glob.glob( os.path.join(os.path.dirname(dfile), '*__ACbed.nii*') ) - if len(fpct)>0: + fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*__ACbed.nii*')) + if len(fpct) > 0: datain['em_nocrr'] = fpct[0] logger('pseudoCT of the object.') #reconstructed emission data with corrections, minimum 3 osem iter - fpct = glob.glob( os.path.join(os.path.dirname(dfile), '*QNT*.nii*') ) - if len(fpct)>0: + fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*QNT*.nii*')) + if len(fpct) > 0: datain['em_crr'] = fpct[0] logger('pseudoCT of the object.') def get_dicoms(dfile, datain, Cnt): # v = Cnt['VERBOSE'] - log.debug(dedent('''\ + log.debug( + dedent('''\ ------------------------------------------------------------------ file: {} ------------------------------------------------------------------ @@ -950,23 +955,23 @@ def get_dicoms(dfile, datain, Cnt): #> check if it is norm file if 'mmr' in dcmtype and 'norm' in dcmtype: - if os.path.splitext(dfile)[-1].lower()=='.dcm': + if os.path.splitext(dfile)[-1].lower() == '.dcm': datain['nrm_dcm'] = dfile #> check if the binary file exists - if os.path.isfile(dfile[:-4]+'.bf'): - datain['nrm_bf'] = dfile[:-4]+'.bf' + if os.path.isfile(dfile[:-4] + '.bf'): + datain['nrm_bf'] = dfile[:-4] + '.bf' else: - log.error('file does not exists:\n{}'.format(dfile[:-4]+'.bf')) - elif os.path.splitext(dfile)[-1].lower()=='.ima': + log.error('file does not exists:\n{}'.format(dfile[:-4] + '.bf')) + elif os.path.splitext(dfile)[-1].lower() == '.ima': datain['nrm_ima'] = dfile # extract the binary norm data from the IMA DICOM - if [0x7fe1,0x1010] in d: - nrm = d[0x7fe1,0x1010].value + if [0x7fe1, 0x1010] in d: + nrm = d[0x7fe1, 0x1010].value else: log.error('could not find binary normalisation data in the IMA DICOM file.') # binary file name - bf = os.path.splitext(dfile)[0]+'.bf' + bf = os.path.splitext(dfile)[0] + '.bf' with open(bf, 'wb') as f: f.write(nrm) datain['nrm_bf'] = bf @@ -974,26 +979,27 @@ def get_dicoms(dfile, datain, Cnt): #--- check if it is list-mode file elif 'mmr' in dcmtype and 'list' in dcmtype: - if os.path.splitext(dfile)[-1]=='.dcm': + if os.path.splitext(dfile)[-1] == '.dcm': datain['lm_dcm'] = dfile #check if the binary file exists - if os.path.isfile(dfile[:-4]+'.bf'): - datain['lm_bf'] = dfile[:-4]+'.bf' + if os.path.isfile(dfile[:-4] + '.bf'): + datain['lm_bf'] = dfile[:-4] + '.bf' else: - log.error('file does not exists: \n{}'.format(dfile[:-4]+'.bf')) - elif os.path.splitext(dfile)[-1].lower()=='.ima': + log.error('file does not exists: \n{}'.format(dfile[:-4] + '.bf')) + elif os.path.splitext(dfile)[-1].lower() == '.ima': datain['lm_ima'] = dfile # extract the binary list-mode data from the IMA DICOM if it does not exist already # binary file name bf = os.path.splitext(dfile)[0] + '.bf' - if [0x7fe1,0x1010] in d and not os.path.isfile(bf): - lm = d[0x7fe1,0x1010].value + if [0x7fe1, 0x1010] in d and not os.path.isfile(bf): + lm = d[0x7fe1, 0x1010].value with open(bf, 'wb') as f: f.write(lm) datain['lm_bf'] = bf log.debug('saved list-mode data to binary file: \n{}'.format(bf)) elif os.path.isfile(bf): - log.debug('the binary list-mode data was already extracted from the IMA DICOM file.') + log.debug( + 'the binary list-mode data was already extracted from the IMA DICOM file.') datain['lm_bf'] = bf else: log.error('could not find binary list-mode data in the IMA DICOM file.') @@ -1008,8 +1014,8 @@ def get_dicoms(dfile, datain, Cnt): else: f0 = -1 - if f0>=0: - f1 = f0+lmhdr[f0:].find('\n') + if f0 >= 0: + f1 = f0 + lmhdr[f0:].find('\n') #regular expression for the isotope symbol p = re.compile(r'(?<=:=)\s*\S*') # the name of isotope: @@ -1020,17 +1026,17 @@ def get_dicoms(dfile, datain, Cnt): #> if no info in interfile header than look in the CSA header else: f0 = csahdr.find('RadionuclideCodeSequence') - if f0<0: - print('w> could not find isotope name. enter manually into Cnt[''ISOTOPE'']') + if f0 < 0: + print('w> could not find isotope name. enter manually into Cnt[' 'ISOTOPE' ']') return None - istp_coded = re.search(r'(?<=CodeValue:)\S*', csahdr[f0:f0+100]).group() - if istp_coded=='C-111A1': Cnt['ISOTOPE'] = 'F18' - elif istp_coded=='C-105A1': Cnt['ISOTOPE'] = 'C11' - elif istp_coded=='C-B1038': Cnt['ISOTOPE'] = 'O15' - elif istp_coded=='C-128A2': Cnt['ISOTOPE'] = 'Ge68' - elif istp_coded=='C-131A3': Cnt['ISOTOPE'] = 'Ga68' + istp_coded = re.search(r'(?<=CodeValue:)\S*', csahdr[f0:f0 + 100]).group() + if istp_coded == 'C-111A1': Cnt['ISOTOPE'] = 'F18' + elif istp_coded == 'C-105A1': Cnt['ISOTOPE'] = 'C11' + elif istp_coded == 'C-B1038': Cnt['ISOTOPE'] = 'O15' + elif istp_coded == 'C-128A2': Cnt['ISOTOPE'] = 'Ge68' + elif istp_coded == 'C-131A3': Cnt['ISOTOPE'] = 'Ga68' else: - print('w> could not find isotope name. enter manually into Cnt[''ISOTOPE'']') + print('w> could not find isotope name. enter manually into Cnt[' 'ISOTOPE' ']') return None #--- @@ -1072,7 +1078,6 @@ def get_dicoms(dfile, datain, Cnt): else: datain['#UTE1'] += 1 - if Cnt['VERBOSE']: print('') @@ -1082,7 +1087,7 @@ def explore_input(fldr, params, print_paths=False, recurse=1): recurse: int, [default: 1] subfolder deep. Use -1 for infinite recursion. """ fldr, fpth = fspath(fldr), Path(fldr) - Cnt = params.get('Cnt', params) # two ways of passing Cnt are here decoded + Cnt = params.get('Cnt', params) # two ways of passing Cnt are here decoded if not os.path.isdir(fldr): log.error('provide a valid folder path for the data.') @@ -1112,7 +1117,7 @@ def explore_input(fldr, params, print_paths=False, recurse=1): if print_paths: print('--------------------------------------------------') for x in datain: - print(x,':',datain[x]) + print(x, ':', datain[x]) print('--------------------------------------------------') return datain @@ -1121,23 +1126,23 @@ def explore_input(fldr, params, print_paths=False, recurse=1): def putgaps(s, txLUT, Cnt, sino_no=0): #number of sino planes (2D sinos) depends on the span used - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if NRNG_c==64: + if NRNG_c == 64: nsinos -= 12 - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] #preallocate sino with gaps sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) #fill the sino with gaps mmr_auxe.pgaps(sino, s.astype(np.float32), txLUT, Cnt, sino_no) - sino = np.transpose(sino, (2,0,1)) + sino = np.transpose(sino, (2, 0, 1)) return sino.astype(s.dtype) @@ -1169,9 +1174,10 @@ def mmrinit(): return Cnt, txLUT, axLUT + def mMR_params(): ''' get all scanner parameters in one dictionary ''' Cnt, txLUT, axLUT = mmrinit() - return {'Cnt':Cnt, 'txLUT':txLUT, 'axLUT':axLUT} + return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 75c8ecb0..5f554721 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -10,9 +10,8 @@ from . import mmr_auxe # auxiliary functions through Python extensions in CUDA -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" - +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" #================================================================================================= # GET NORM COMPONENTS @@ -33,24 +32,24 @@ def get_components(datain, Cnt): with open(fnrm_dat, 'rb') as f: #geometric effects - geo = np.fromfile(f, np.float32, Cnt['NSBINS']*Cnt['NSEG0']) + geo = np.fromfile(f, np.float32, Cnt['NSBINS'] * Cnt['NSEG0']) geo.shape = (Cnt['NSEG0'], Cnt['NSBINS']) #crystal interference - crs_intf = np.fromfile(f, np.float32, 9*Cnt['NSBINS']) - crs_intf.shape = (Cnt['NSBINS'],9) + crs_intf = np.fromfile(f, np.float32, 9 * Cnt['NSBINS']) + crs_intf.shape = (Cnt['NSBINS'], 9) #crystal efficiencies - crs_eff = np.fromfile(f, np.float32, Cnt['NCRS']*Cnt['NRNG']) - crs_eff.shape = (Cnt['NRNG'], Cnt['NCRS']) + crs_eff = np.fromfile(f, np.float32, Cnt['NCRS'] * Cnt['NRNG']) + crs_eff.shape = (Cnt['NRNG'], Cnt['NCRS']) #axial effects - ax_eff1 = np.fromfile(f, np.float32, Cnt['NSN11']) + ax_eff1 = np.fromfile(f, np.float32, Cnt['NSN11']) #paralyzing ring DT parameters - rng_dtp = np.fromfile(f, np.float32, Cnt['NRNG']) + rng_dtp = np.fromfile(f, np.float32, Cnt['NRNG']) #non-paralyzing ring DT parameters - rng_dtnp = np.fromfile(f, np.float32, Cnt['NRNG']) + rng_dtnp = np.fromfile(f, np.float32, Cnt['NRNG']) #TX crystal DT parameter - crs_dt = np.fromfile(f, np.float32, 9) + crs_dt = np.fromfile(f, np.float32, 9) #additional axial effects - ax_eff2 = np.fromfile(f, np.float32, Cnt['NSN11']) + ax_eff2 = np.fromfile(f, np.float32, Cnt['NSN11']) #------------------------------------------------- #the files below are found based on a 24hr scan of germanium-68 phantom @@ -58,19 +57,17 @@ def get_components(datain, Cnt): # axial effects for span-1 ax_f1 = np.load(fspath(auxdata / "AxialFactorForSpan1.npy")) # relative scale factors for axial scatter deriving span-11 scale factors from SSR scale factors - sax_f11 = np.fromfile( - fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan11.f32"), - np.float32, Cnt['NSN11']) + sax_f11 = np.fromfile(fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan11.f32"), + np.float32, Cnt['NSN11']) # relative scale factors for axial scatter deriving span-1 scale factors from SSR scale factors - sax_f1 = np.fromfile( - fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan1.f32"), - np.float32, Cnt['NSN1']) + sax_f1 = np.fromfile(fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan1.f32"), + np.float32, Cnt['NSN1']) #------------------------------------------------- #------------------------------------------------- # HEADER FILE # possible DICOM locations for the Interfile header - nhdr_locations = [[0x29,0x1010], [0x29,0x1110]] + nhdr_locations = [[0x29, 0x1010], [0x29, 0x1110]] # read the DICOM file d = dcm.read_file(fnrm_hdr) @@ -86,14 +83,16 @@ def get_components(datain, Cnt): except: continue if '!INTERFILE' in nhdr and 'scanner quantification factor' in nhdr: - if Cnt['VERBOSE']: print('i> got the normalisation interfile header from [', hex(loc[0]),',', hex(loc[1]), ']') + if Cnt['VERBOSE']: + print('i> got the normalisation interfile header from [', hex(loc[0]), ',', + hex(loc[1]), ']') found_nhdr = True break if not found_nhdr: raise ValueError('DICOM field with normalisation interfile header has not been found!') f0 = nhdr.find('scanner quantification factor') - f1 = f0+nhdr[f0:].find('\n') + f1 = f0 + nhdr[f0:].find('\n') #regular expression for the needed three numbers p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{3,10}[e][+-]\d{1,4}') #-quantification factor: @@ -102,11 +101,10 @@ def get_components(datain, Cnt): qf_loc = 0.205 #------------------------------------------------- - nrmcmp = {'qf':qf, 'qf_loc':qf_loc, 'geo':geo, 'cinf':crs_intf, 'ceff':crs_eff, - 'axe1':ax_eff1, 'dtp':rng_dtp, 'dtnp':rng_dtnp, - 'dtc':crs_dt, 'axe2':ax_eff2, 'axf1':ax_f1, - 'sax_f11':sax_f11, 'sax_f1':sax_f1} - + nrmcmp = { + 'qf': qf, 'qf_loc': qf_loc, 'geo': geo, 'cinf': crs_intf, 'ceff': crs_eff, 'axe1': ax_eff1, + 'dtp': rng_dtp, 'dtnp': rng_dtnp, 'dtc': crs_dt, 'axe2': ax_eff2, 'axf1': ax_f1, + 'sax_f11': sax_f11, 'sax_f1': sax_f1} return nrmcmp, nhdr @@ -118,9 +116,9 @@ def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): normcomp, _ = get_components(datain, Cnt) #number of sino planes (2D sinos) depends on the span used - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] #predefine the sinogram @@ -135,9 +133,9 @@ def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): def get_sino(datain, hst, axLUT, txLUT, Cnt): #number of sino planes (2D sinos) depends on the span used - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] #get sino with no gaps @@ -146,7 +144,7 @@ def get_sino(datain, hst, axLUT, txLUT, Cnt): sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) #fill the sino with gaps mmr_auxe.pgaps(sino, s, txLUT, Cnt, 0) - sino = np.transpose(sino, (2,0,1)) + sino = np.transpose(sino, (2, 0, 1)) return sino @@ -161,9 +159,9 @@ def get_norm_sino(datain, scanner_params, hst): # hst = mmrhist.mmrhist(datain, scanner_params) #number of sino planes (2D sinos) depends on the span used - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] #get sino with no gaps @@ -172,6 +170,6 @@ def get_norm_sino(datain, scanner_params, hst): sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) #fill the sino with gaps mmr_auxe.pgaps(sino, s, txLUT, Cnt, 0) - sino = np.transpose(sino, (2,0,1)) + sino = np.transpose(sino, (2, 0, 1)) return sino diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 1ef7277e..9e5a7e10 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -9,11 +9,10 @@ from ..img import mmrimg from . import petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) - #========================================================================= # transaxial (one-slice) projector #------------------------------------------------------------------------- @@ -22,7 +21,7 @@ def trnx_prj(scanner_params, sino=None, im=None): # Get particular scanner parameters: Constants, transaxial and axial LUTs - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -32,16 +31,16 @@ def trnx_prj(scanner_params, sino=None, im=None): raise ValueError('Only one input should be given: sinogram or image.') if sino is None: - sino = np.zeros((txLUT['Naw'], ), dtype=np.float32) + sino = np.zeros((txLUT['Naw'],), dtype=np.float32) if im is None: im = np.zeros((Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) - tv = np.zeros(Cnt['NTV']*Cnt['Naw'], dtype=np.uint8) - tt = np.zeros(Cnt['NTT']*Cnt['Naw'], dtype=np.float32) + tv = np.zeros(Cnt['NTV'] * Cnt['Naw'], dtype=np.uint8) + tt = np.zeros(Cnt['NTT'] * Cnt['Naw'], dtype=np.float32) petprj.tprj(sino, im, tv, tt, txLUT, Cnt) - return {'tv':tv, 'tt':tt} + return {'tv': tv, 'tt': tt} #========================================================================= @@ -49,7 +48,8 @@ def trnx_prj(scanner_params, sino=None, im=None): #------------------------------------------------------------------------- -def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=False, attenuation=False): +def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=False, + attenuation=False): ''' Calculate forward projection (a set of sinograms) for the provided input image. Arguments: im -- input image (can be emission or mu-map image). @@ -65,7 +65,7 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa mu-values along LOR path is taken at the end. ''' # Get particular scanner parameters: Constants, transaxial and axial LUTs - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -76,34 +76,40 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa else: att = 0 - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if NRNG_c==64: + if NRNG_c == 64: nsinos -= 12 - elif Cnt['SPN']==11: nsinos=Cnt['NSN11'] - elif Cnt['SPN']==0: nsinos=Cnt['NSEG0'] + elif Cnt['SPN'] == 11: + nsinos = Cnt['NSN11'] + elif Cnt['SPN'] == 0: + nsinos = Cnt['NSEG0'] - if im.shape[0]==Cnt['SO_IMZ'] and im.shape[1]==Cnt['SO_IMY'] and im.shape[2]==Cnt['SO_IMX']: + if im.shape[0] == Cnt['SO_IMZ'] and im.shape[1] == Cnt['SO_IMY'] and im.shape[2] == Cnt[ + 'SO_IMX']: ims = mmrimg.convert2dev(im, Cnt) - elif im.shape[0]==Cnt['SZ_IMX'] and im.shape[1]==Cnt['SZ_IMY'] and im.shape[2]==Cnt['SZ_IMZ']: + elif im.shape[0] == Cnt['SZ_IMX'] and im.shape[1] == Cnt['SZ_IMY'] and im.shape[2] == Cnt[ + 'SZ_IMZ']: ims = im - elif im.shape[0]==Cnt['rSO_IMZ'] and im.shape[1]==Cnt['SO_IMY'] and im.shape[2]==Cnt['SO_IMX']: + elif im.shape[0] == Cnt['rSO_IMZ'] and im.shape[1] == Cnt['SO_IMY'] and im.shape[2] == Cnt[ + 'SO_IMX']: ims = mmrimg.convert2dev(im, Cnt) - elif im.shape[0]==Cnt['SZ_IMX'] and im.shape[1]==Cnt['SZ_IMY'] and im.shape[2]==Cnt['rSZ_IMZ']: + elif im.shape[0] == Cnt['SZ_IMX'] and im.shape[1] == Cnt['SZ_IMY'] and im.shape[2] == Cnt[ + 'rSZ_IMZ']: ims = im else: raise ValueError('wrong image size;' - ' it has to be one of these: (z,y,x) = (127,344,344)' - ' or (y,x,z) = (320,320,128)') + ' it has to be one of these: (z,y,x) = (127,344,344)' + ' or (y,x,z) = (320,320,128)') log.debug('number of sinos:%d' % nsinos) #predefine the sinogram. if subsets are used then only preallocate those bins which will be used. - if isub[0]<0: + if isub[0] < 0: sinog = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) else: sinog = np.zeros((len(isub), nsinos), dtype=np.float32) @@ -113,8 +119,8 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa # -------------------- # get the sinogram bins in a proper sinogram sino = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - if isub[0]>=0: sino[isub,:] = sinog - else: sino = sinog + if isub[0] >= 0: sino[isub, :] = sinog + else: sino = sinog # put the gaps back to form displayable sinogram if not dev_out: @@ -139,36 +145,38 @@ def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): when the first element is negative, all transaxial bins are used (as in pure EM-ML). ''' # Get particular scanner parameters: Constants, transaxial and axial LUTs - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) - if NRNG_c==64: + if NRNG_c == 64: nsinos -= 12 - elif Cnt['SPN']==11: nsinos=Cnt['NSN11'] - elif Cnt['SPN']==0: nsinos=Cnt['NSEG0'] - + elif Cnt['SPN'] == 11: + nsinos = Cnt['NSN11'] + elif Cnt['SPN'] == 0: + nsinos = Cnt['NSEG0'] #> check first the Siemens default sinogram; #> for this default shape only full sinograms are expected--no subsets. - if len(sino.shape)==3: - if sino.shape[0]!=nsinos or sino.shape[1]!=Cnt['NSANGLES'] or sino.shape[2]!=Cnt['NSBINS']: + if len(sino.shape) == 3: + if sino.shape[0] != nsinos or sino.shape[1] != Cnt['NSANGLES'] or sino.shape[2] != Cnt[ + 'NSBINS']: raise ValueError('Unexpected sinogram array dimensions/shape for Siemens defaults.') sinog = mmraux.remgaps(sino, txLUT, Cnt) - elif len(sino.shape)==2: - if isub[0]<0 and sino.shape[0]!=txLUT["Naw"]: + elif len(sino.shape) == 2: + if isub[0] < 0 and sino.shape[0] != txLUT["Naw"]: raise ValueError('Unexpected number of transaxial elements in the full sinogram.') - elif isub[0]>=0 and sino.shape[0]!=len(isub): + elif isub[0] >= 0 and sino.shape[0] != len(isub): raise ValueError('Unexpected number of transaxial elements in the subset sinogram.') #> check if the number of sinograms is correct - if sino.shape[1]!=nsinos: + if sino.shape[1] != nsinos: raise ValueError('Inconsistent number of sinograms in the array.') #> when found the dimensions/shape are fine: sinog = sino @@ -176,7 +184,7 @@ def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): raise ValueError('Unexpected shape of the input sinogram.') #predefine the output image depending on the number of rings used - if Cnt['SPN']==1 and 'rSZ_IMZ' in Cnt: + if Cnt['SPN'] == 1 and 'rSZ_IMZ' in Cnt: nvz = Cnt['rSZ_IMZ'] else: nvz = Cnt['SZ_IMZ'] diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 2c0298b5..896f7a4b 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -23,11 +23,10 @@ from ..sct import vsm from . import petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) - #reconstruction mode: # 0 - no attenuation and no scatter # 1 - attenuation and no scatter @@ -38,7 +37,7 @@ # fwhm in [mm] def fwhm2sig(fwhm, voxsize=1.): - return (fwhm/voxsize) / (2*(2*np.log(2))**.5) + return (fwhm/voxsize) / (2 * (2 * np.log(2))**.5) #========================================================================= @@ -58,11 +57,11 @@ def get_subsets14(n, params): # projections per subset P = Cnt['NSANGLES'] // N # the remaining projections which have to be spread over the N subsets with a given frequency - fs = N/float(P-N) + fs = N / float(P - N) # generate sampling pattern for subsets up to N out of P - sp = np.array([np.arange(i,Cnt['NSANGLES'],P) for i in range(N)]) + sp = np.array([np.arange(i, Cnt['NSANGLES'], P) for i in range(N)]) # ====================================== - S = np.zeros((N,P),dtype=np.int16) + S = np.zeros((N, P), dtype=np.int16) # ====================================== # sum of sino angle projections totsum = np.zeros(N, dtype=np.int32) @@ -73,27 +72,27 @@ def get_subsets14(n, params): #::::: iterate sino blocks. This bit may be unnecessary, it can be taken directly from sp array for b in range(N): #--angle index within a sino block depending on subset s - ai = (s+b)%N + ai = (s+b) % N #--angle index for whole sino sai = sp[ai, b] si.append(sai) totsum[s] += aisum[sai] #::::: # deal with the remaining part, ie, P-N per block - rai = np.int16( np.floor( np.arange(s,2*N,fs)[:4]%N ) ) - for i in range(P-N): - sai = sp[-1,rai[i]]+i+1 + rai = np.int16(np.floor(np.arange(s, 2 * N, fs)[:4] % N)) + for i in range(P - N): + sai = sp[-1, rai[i]] + i + 1 totsum[s] += aisum[sai] si.append(sai) S[s] = np.array((si)) # get the projection bin index for transaxial gpu sinos - tmsk = txLUT['msino']>0 - Smsk = -1*np.ones(tmsk.shape, dtype=np.int32) + tmsk = txLUT['msino'] > 0 + Smsk = -1 * np.ones(tmsk.shape, dtype=np.int32) Smsk[tmsk] = list(range(Cnt['Naw'])) - iprj = Smsk[:,S[n]] - iprj = iprj[iprj>=0] + iprj = Smsk[:, S[n]] + iprj = iprj[iprj >= 0] return iprj, S @@ -113,22 +112,22 @@ def psf_config(psf, Cnt): def _config(fwhm3, check_len=True): # resolution modelling by custom kernels if check_len: - if len(fwhm3)!=3 or any([f<0 for f in fwhm3]): + if len(fwhm3) != 3 or any([f < 0 for f in fwhm3]): raise ValueError('Incorrect separable kernel FWHM definition') - kernel = np.empty((3, 2*Cnt['RSZ_PSF_KRNL']+1), dtype=np.float32) + kernel = np.empty((3, 2 * Cnt['RSZ_PSF_KRNL'] + 1), dtype=np.float32) for i, psf in enumerate(fwhm3): #> FWHM -> sigma conversion for all dimensions separately - if i==2: - sig = fwhm2sig(psf, voxsize=Cnt['SZ_VOXZ']*10) + if i == 2: + sig = fwhm2sig(psf, voxsize=Cnt['SZ_VOXZ'] * 10) else: - sig = fwhm2sig(psf, voxsize=Cnt['SZ_VOXY']*10) + sig = fwhm2sig(psf, voxsize=Cnt['SZ_VOXY'] * 10) - x = np.arange(-Cnt['RSZ_PSF_KRNL'], Cnt['RSZ_PSF_KRNL']+1) - kernel[i, :] = np.exp(-0.5 * (x**2/sig**2)) - kernel[i, :] /= np.sum(kernel[i,:]) + x = np.arange(-Cnt['RSZ_PSF_KRNL'], Cnt['RSZ_PSF_KRNL'] + 1) + kernel[i, :] = np.exp(-0.5 * (x**2 / sig**2)) + kernel[i, :] /= np.sum(kernel[i, :]) - psfkernel = np.empty((3, 2*Cnt['RSZ_PSF_KRNL']+1), dtype=np.float32) + psfkernel = np.empty((3, 2 * Cnt['RSZ_PSF_KRNL'] + 1), dtype=np.float32) psfkernel[0, :] = kernel[2, :] psfkernel[1, :] = kernel[0, :] psfkernel[2, :] = kernel[1, :] @@ -159,20 +158,10 @@ def _config(fwhm3, check_len=True): return psfkernel -def osemone(datain, mumaps, hst, scanner_params, - recmod=3, itr=4, fwhm=0., psf=None, mask_radius=29., - decay_ref_time=None, - attnsino=None, - sctsino=None, - randsino=None, - normcomp=None, - - emmskS=False, - frmno='', fcomment='', - outpath=None, - store_img=False, - store_itr=None, - ret_sinos=False): +def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=None, + mask_radius=29., decay_ref_time=None, attnsino=None, sctsino=None, randsino=None, + normcomp=None, emmskS=False, frmno='', fcomment='', outpath=None, store_img=False, + store_itr=None, ret_sinos=False): ''' OSEM image reconstruction with several modes (with/without scatter and/or attenuation correction) @@ -182,14 +171,14 @@ def osemone(datain, mumaps, hst, scanner_params, ''' #> Get particular scanner parameters: Constants, transaxial and axial LUTs - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] #---------- sort out OUTPUT ------------ #-output file name for the reconstructed image if outpath is None: - opth = os.path.join( datain['corepath'], 'reconstructed' ) + opth = os.path.join(datain['corepath'], 'reconstructed') else: opth = outpath @@ -211,11 +200,11 @@ def osemone(datain, mumaps, hst, scanner_params, muh, muo = mumaps # get the GPU version of the image dims - mus = mmrimg.convert2dev(muo+muh, Cnt) + mus = mmrimg.convert2dev(muo + muh, Cnt) - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: snno = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: snno = Cnt['NSN11'] # remove gaps from the prompt sino @@ -236,7 +225,7 @@ def osemone(datain, mumaps, hst, scanner_params, # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP #------------------------------------------------------------------------- #> combine attenuation and norm together depending on reconstruction mode - if recmod==0: + if recmod == 0: asng = np.ones(psng.shape, dtype=np.float32) else: #> check if the attenuation sino is given as an array @@ -252,7 +241,7 @@ def osemone(datain, mumaps, hst, scanner_params, asng = np.zeros(psng.shape, dtype=np.float32) petprj.fprj(asng, mus, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) #> combine attenuation and normalisation - ansng = asng*nsng + ansng = asng * nsng #========================================================================= #========================================================================= @@ -269,7 +258,7 @@ def osemone(datain, mumaps, hst, scanner_params, #========================================================================= # SCAT #------------------------------------------------------------------------- - if recmod==2: + if recmod == 2: if not sctsino is None: ssng = mmraux.remgaps(sctsino, txLUT, Cnt) elif sctsino is None and os.path.isfile(datain['em_crr']): @@ -279,44 +268,45 @@ def osemone(datain, mumaps, hst, scanner_params, mumaps, emd['im'], scanner_params, - histo = hst, - rsino = rsino, - prcnt_scl = 0.1, - emmsk=False,) + histo=hst, + rsino=rsino, + prcnt_scl=0.1, + emmsk=False, + ) ssng = mmraux.remgaps(ssn, txLUT, Cnt) else: - raise ValueError( - "No emission image available for scatter estimation! " + - " Check if it's present or the path is correct.") + raise ValueError("No emission image available for scatter estimation! " + + " Check if it's present or the path is correct.") else: ssng = np.zeros(rsng.shape, dtype=rsng.dtype) #========================================================================= log.info('------ OSEM (%d) -------' % itr) #------------------------------------ - Sn = 14 # number of subsets - #-get one subset to get number of projection bins in a subset - Sprj, s = get_subsets14(0,scanner_params) + Sn = 14 # number of subsets + #-get one subset to get number of projection bins in a subset + Sprj, s = get_subsets14(0, scanner_params) Nprj = len(Sprj) - #-init subset array and sensitivity image for a given subset - sinoTIdx = np.zeros((Sn, Nprj+1), dtype=np.int32) - #-init sensitivity images for each subset + #-init subset array and sensitivity image for a given subset + sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) + #-init sensitivity images for each subset imgsens = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in range(Sn): - sinoTIdx[n,0] = Nprj #first number of projection for the given subset - sinoTIdx[n,1:], s = get_subsets14(n,scanner_params) - # sensitivity image - petprj.bprj(imgsens[n,:,:,:], ansng[sinoTIdx[n,1:],:], txLUT, axLUT, sinoTIdx[n,1:], Cnt ) - #------------------------------------- + sinoTIdx[n, 0] = Nprj #first number of projection for the given subset + sinoTIdx[n, 1:], s = get_subsets14(n, scanner_params) + # sensitivity image + petprj.bprj(imgsens[n, :, :, :], ansng[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], + Cnt) + #------------------------------------- #-mask for reconstructed image. anything outside it is set to zero - msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True)>0.9 + msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9 #-init image img = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) #-decay correction - lmbd = np.log(2)/resources.riLUT[Cnt['ISOTOPE']]['thalf'] + lmbd = np.log(2) / resources.riLUT[Cnt['ISOTOPE']]['thalf'] if Cnt['DCYCRR'] and 't0' in hst and 'dur' in hst: #> decay correct to the reference time (e.g., injection time) if provided #> otherwise correct in reference to the scan start time @@ -325,7 +315,7 @@ def osemone(datain, mumaps, hst, scanner_params, else: tref = hst['t0'] - dcycrr = np.exp(lmbd*tref)*lmbd*hst['dur'] / (1-np.exp(-lmbd*hst['dur'])) + dcycrr = np.exp(lmbd * tref) * lmbd * hst['dur'] / (1 - np.exp(-lmbd * hst['dur'])) # apply quantitative correction to the image qf = ncmp['qf'] / resources.riLUT[Cnt['ISOTOPE']]['BF'] / float(hst['dur']) qf_loc = ncmp['qf_loc'] @@ -355,43 +345,24 @@ def osemone(datain, mumaps, hst, scanner_params, #========================================================================= # OSEM RECONSTRUCTION #------------------------------------------------------------------------- - with trange(itr, desc="OSEM", - disable=log.getEffectiveLevel() > logging.INFO, - leave=log.getEffectiveLevel() <= logging.INFO - ) as pbar: + with trange(itr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, + leave=log.getEffectiveLevel() <= logging.INFO) as pbar: for k in pbar: - petprj.osem( - img, - psng, - rsng, - ssng, - nsng, - asng, - sinoTIdx, - imgsens, - msk, - psfkernel, - txLUT, axLUT, Cnt) + petprj.osem(img, psng, rsng, ssng, nsng, asng, sinoTIdx, imgsens, msk, psfkernel, + txLUT, axLUT, Cnt) if np.nansum(img) < 0.1: log.warning('it seems there is not enough true data to render reasonable image') #img[:]=0 itr = k break - if recmod>=3 and ( ((k1)) ): # or (itr==1) + if recmod >= 3 and (((k < itr - 1) and (itr > 1))): # or (itr==1) sct_time = time.time() - sct = vsm( - datain, - mumaps, - mmrimg.convert2e7(img, Cnt), - scanner_params, - histo=hst, - rsino=rsino, - emmsk=emmskS, - return_ssrb=return_ssrb, - return_mask=return_mask) + sct = vsm(datain, mumaps, mmrimg.convert2e7(img, Cnt), scanner_params, histo=hst, + rsino=rsino, emmsk=emmskS, return_ssrb=return_ssrb, + return_mask=return_mask) if isinstance(sct, dict): ssn = sct['sino'] @@ -406,12 +377,11 @@ def osemone(datain, mumaps, hst, scanner_params, fout = os.path.join(opth, os.path.basename(datain['lm_bf'])[:8] \ + frmno +'_t'+str(hst['t0'])+'-'+str(hst['t1'])+'sec' \ +'_itr'+str(k)+fcomment+'_inrecon.nii.gz') - nimpa.array2nii( im[::-1,::-1,:], B, fout) + nimpa.array2nii(im[::-1, ::-1, :], B, fout) log.info('recon time:%.3g' % (time.time() - stime)) #========================================================================= - log.info('applying decay correction of %r' % dcycrr) log.info('applying quantification factor:%r to the whole image' % qf) log.info('for the frame duration of :%r' % hst['dur']) @@ -424,7 +394,7 @@ def osemone(datain, mumaps, hst, scanner_params, #-description text to NIfTI #-attenuation number: if only bed present then it is 0.5 - attnum = ( 1*(np.sum(muh)>0.5)+1*(np.sum(muo)>0.5) ) / 2. + attnum = (1 * (np.sum(muh) > 0.5) + 1 * (np.sum(muo) > 0.5)) / 2. descrip = 'alg=osem'+ \ ';sub=14'+ \ ';att='+str(attnum*(recmod>0))+ \ @@ -437,7 +407,6 @@ def osemone(datain, mumaps, hst, scanner_params, ';dur='+str(hst['dur']) +\ ';qf='+str(qf) - #> file name of the output reconstructed image #> (maybe used later even if not stored now) fpet = os.path.join(opth, os.path.basename(datain['lm_bf']).split('.')[0] \ @@ -446,20 +415,19 @@ def osemone(datain, mumaps, hst, scanner_params, if store_img: log.info('saving image to: ' + fpet) - nimpa.array2nii( im[::-1,::-1,:], B, fpet, descrip=descrip) + nimpa.array2nii(im[::-1, ::-1, :], B, fpet, descrip=descrip) im_smo = None fsmo = None - if fwhm>0: - im_smo = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, voxsize=Cnt['SZ_VOXY']*10), mode='mirror') + if fwhm > 0: + im_smo = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, voxsize=Cnt['SZ_VOXY'] * 10), + mode='mirror') if store_img: - fsmo = fpet.split('.nii.gz')[0] + '_smo-'+str(fwhm).replace('.','-')+'mm.nii.gz' + fsmo = fpet.split('.nii.gz')[0] + '_smo-' + str(fwhm).replace('.', '-') + 'mm.nii.gz' log.info('saving smoothed image to: ' + fsmo) descrip.replace(';fwhm=0', ';fwhm=str(fwhm)') - nimpa.array2nii( im_smo[::-1,::-1,:], B, fsmo, descrip=descrip) - - + nimpa.array2nii(im_smo[::-1, ::-1, :], B, fsmo, descrip=descrip) # returning: # (0) E7 image [can be smoothed]; @@ -481,7 +449,7 @@ def osemone(datain, mumaps, hst, scanner_params, # recout.im = im # recout.fpet = fout - if ret_sinos and recmod>=3 and itr>1: + if ret_sinos and recmod >= 3 and itr > 1: RecOut = namedtuple('RecOut', 'im, fpet, imsmo, fsmo, affine, ssn, sssr, amsk, rsn') recout = RecOut(im, fpet, im_smo, fsmo, B, ssn, sct['ssrb'], sct['mask'], rsino) else: @@ -519,7 +487,6 @@ def osemone(datain, mumaps, hst, scanner_params, # nrmsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=ncmp) # #========================================================================= - # #========================================================================= # # Randoms # #------------------------------------------------------------------------- @@ -531,7 +498,6 @@ def osemone(datain, mumaps, hst, scanner_params, # rsng = mmraux.remgaps(randsino, txLUT, Cnt) # #========================================================================= - # #========================================================================= # # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP # #------------------------------------------------------------------------- @@ -544,7 +510,6 @@ def osemone(datain, mumaps, hst, scanner_params, # attnrmsng = asng*nrmsng # #========================================================================= - # #========================================================================= # # SCATTER and the additive term # #------------------------------------------------------------------------- @@ -578,7 +543,6 @@ def osemone(datain, mumaps, hst, scanner_params, # #init estimate sino # esng = np.zeros((Cnt['Naw'], Cnt['NSN11']), dtype=np.float32) - # for k in range(itr): # print '>--------- ITERATION', k, '-----------<' # esng[:] = 0 @@ -644,7 +608,6 @@ def osemone(datain, mumaps, hst, scanner_params, # return recout - #============================================================================= # OSEM diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index 2d26eb42..619fff0c 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -11,8 +11,8 @@ from ..img import mmrimg from . import mmrprj, mmrrec, petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) @@ -20,9 +20,9 @@ def simulate_sino( petim, ctim, scanner_params, - simulate_3d = False, + simulate_3d=False, slice_idx=-1, - mu_input = False, + mu_input=False, ): ''' Simulate the measured sinogram with photon attenuation. @@ -46,28 +46,30 @@ def simulate_sino( if simulate_3d: if petim.ndim != 3 \ or petim.shape != (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError( - 'The input image shape does not match the scanner image size.') - if petim.max()>200: + raise ValueError('The input image shape does not match the scanner image size.') + if petim.max() > 200: log.warning('the PET image may have too large intensities for robust simulation.') else: #> 2D case with reduced rings if len(petim.shape) == 3: # make sure that the shape of the input image matches the image size of the scanner if petim.shape[1:] != (Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError('The input image shape for x and y does not match the scanner image size.') + raise ValueError( + 'The input image shape for x and y does not match the scanner image size.') # pick the right slice index (slice_idx) if not given or mistaken if slice_idx < 0: - log.warning('the axial index is chosen to be in the middle of axial FOV.') - slice_idx = petim.shape[0]/2 + log.warning( + 'the axial index is chosen to be in the middle of axial FOV.') + slice_idx = petim.shape[0] / 2 if slice_idx >= petim.shape[0]: raise ValueError('The axial index for 2D slice selection is outside the image.') - elif len(petim.shape)==2: + elif len(petim.shape) == 2: # make sure that the shape of the input image matches the image size of the scanner if petim.shape != (Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError('The input image shape for x and y does not match the scanner image size.') + raise ValueError( + 'The input image shape for x and y does not match the scanner image size.') petim.shape = (1,) + petim.shape - ctim.shape = (1,) + ctim.shape + ctim.shape = (1,) + ctim.shape slice_idx = 0 if 'rSZ_IMZ' not in Cnt: @@ -83,7 +85,7 @@ def simulate_sino( mui = nimpa.ct2mu(ctim) #> get rid of negative values - mui[mui<0] = 0 + mui[mui < 0] = 0 #-------------------- if simulate_3d: @@ -93,40 +95,40 @@ def simulate_sino( #> 2D case with reduced rings #-------------------- #> create a number of slices of the same chosen image slice for reduced (fast) 3D simulation - rmu = mui[slice_idx,:,:] + rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) #-------------------- #-------------------- #> form a short 3D image of the same emission image slice - rpet = petim[slice_idx,:,:].copy() + rpet = petim[slice_idx, :, :].copy() rpet.shape = (1,) + rpet.shape rpet = np.repeat(rpet, Cnt['rSZ_IMZ'], axis=0) #-------------------- #> forward project the mu-map to obtain attenuation factors - attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True) + attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True) #> forward project the PET image to obtain non-attenuated emission sino emisino = mmrprj.frwd_prj(rpet, scanner_params, attenuation=False) #> return the simulated emission sino with photon attenuation - return attsino*emisino + return attsino * emisino def simulate_recon( measured_sino, ctim, scanner_params, - simulate_3d = False, + simulate_3d=False, nitr=60, fwhm_rm=0., - slice_idx = -1, + slice_idx=-1, randoms=None, scatter=None, - mu_input = False, - msk_radius = 29., + mu_input=False, + msk_radius=29., psf=None, ): ''' @@ -152,25 +154,27 @@ def simulate_recon( if simulate_3d: if ctim.ndim!=3 \ or ctim.shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError( - 'The CT/mu-map image does not match the scanner image shape.') + raise ValueError('The CT/mu-map image does not match the scanner image shape.') else: #> 2D case with reduced rings - if len(ctim.shape)==3: + if len(ctim.shape) == 3: # make sure that the shape of the input image matches the image size of the scanner - if ctim.shape[1:]!=(Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError('The input image shape for x and y does not match the scanner image size.') + if ctim.shape[1:] != (Cnt['SO_IMY'], Cnt['SO_IMX']): + raise ValueError( + 'The input image shape for x and y does not match the scanner image size.') # pick the right slice index (slice_idx) if not given or mistaken - if slice_idx<0: - log.warning('the axial index is chosen to be in the middle of axial FOV.') - slice_idx = ctim.shape[0]/2 - if slice_idx>=ctim.shape[0]: + if slice_idx < 0: + log.warning( + 'the axial index is chosen to be in the middle of axial FOV.') + slice_idx = ctim.shape[0] / 2 + if slice_idx >= ctim.shape[0]: raise ValueError('The axial index for 2D slice selection is outside the image.') - elif len(ctim.shape)==2: + elif len(ctim.shape) == 2: # make sure that the shape of the input image matches the image size of the scanner if ctim.shape != (Cnt['SO_IMY'], Cnt['SO_IMX']): - raise ValueError('The input image shape for x and y does not match the scanner image size.') - ctim.shape = (1,) + ctim.shape + raise ValueError( + 'The input image shape for x and y does not match the scanner image size.') + ctim.shape = (1,) + ctim.shape slice_idx = 0 if 'rSZ_IMZ' not in Cnt: @@ -184,7 +188,7 @@ def simulate_recon( mui = nimpa.ct2mu(ctim) #> get rid of negative values - mui[mui<0] = 0 + mui[mui < 0] = 0 #-------------------- if simulate_3d: @@ -194,7 +198,7 @@ def simulate_recon( else: #-------------------- #> create a number of slides of the same chosen image slice for reduced (fast) 3D simulation - rmu = mui[slice_idx,:,:] + rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) #-------------------- @@ -204,23 +208,23 @@ def simulate_recon( # import pdb; pdb.set_trace() #> attenuation factor sinogram - attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True, dev_out=True) + attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True, dev_out=True) nrmsino = np.ones(attsino.shape, dtype=np.float32) #> randoms and scatter put together - if isinstance(randoms, np.ndarray) and measured_sino.shape==randoms.shape: + if isinstance(randoms, np.ndarray) and measured_sino.shape == randoms.shape: rsng = mmraux.remgaps(randoms, txLUT, Cnt) else: - rsng = 1e-5*np.ones((Cnt['Naw'], nsinos), dtype=np.float32) + rsng = 1e-5 * np.ones((Cnt['Naw'], nsinos), dtype=np.float32) - if isinstance(scatter, np.ndarray) and measured_sino.shape==scatter.shape: + if isinstance(scatter, np.ndarray) and measured_sino.shape == scatter.shape: ssng = mmraux.remgaps(scatter, txLUT, Cnt) else: - ssng = 1e-5*np.ones((Cnt['Naw'], nsinos), dtype=np.float32) + ssng = 1e-5 * np.ones((Cnt['Naw'], nsinos), dtype=np.float32) # resolution modelling - Cnt['SIGMA_RM'] = mmrrec.fwhm2sig(fwhm_rm, voxsize=Cnt['SZ_VOXZ']*10) if fwhm_rm else 0 + Cnt['SIGMA_RM'] = mmrrec.fwhm2sig(fwhm_rm, voxsize=Cnt['SZ_VOXZ'] * 10) if fwhm_rm else 0 if simulate_3d: log.debug('------ OSEM (%d) -------' % nitr) @@ -229,56 +233,39 @@ def simulate_recon( psng = mmraux.remgaps(measured_sino.astype(np.uint16), txLUT, Cnt) #> mask for reconstructed image. anything outside it is set to zero - msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=True)>0.9 + msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9 #> init image eimg = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) #------------------------------------ - Sn = 14 # number of subsets - #-get one subset to get number of projection bins in a subset - Sprj, s = mmrrec.get_subsets14(0,scanner_params) + Sn = 14 # number of subsets + #-get one subset to get number of projection bins in a subset + Sprj, s = mmrrec.get_subsets14(0, scanner_params) Nprj = len(Sprj) #> init subset array and sensitivity image for a given subset - sinoTIdx = np.zeros((Sn, Nprj+1), dtype=np.int32) + sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) #> init sensitivity images for each subset sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in trange(Sn, desc="sensitivity", leave=log.getEffectiveLevel() < logging.INFO): - sinoTIdx[n,0] = Nprj #first number of projection for the given subset - sinoTIdx[n,1:], s = mmrrec.get_subsets14(n,scanner_params) - #> sensitivity image - petprj.bprj( - sim[n,:,:,:], - attsino[sinoTIdx[n,1:],:], - txLUT, - axLUT, - sinoTIdx[n,1:], - Cnt) - #------------------------------------- - - for k in trange(nitr, desc="OSEM", - disable=log.getEffectiveLevel() > logging.INFO, - leave=log.getEffectiveLevel() < logging.INFO): - petprj.osem( - eimg, - psng, - rsng, - ssng, - nrmsino, - attsino, - sinoTIdx, - sim, - msk, - psfkernel, - txLUT, - axLUT, - Cnt) + sinoTIdx[n, 0] = Nprj #first number of projection for the given subset + sinoTIdx[n, 1:], s = mmrrec.get_subsets14(n, scanner_params) + #> sensitivity image + petprj.bprj(sim[n, :, :, :], attsino[sinoTIdx[n, 1:], :], txLUT, axLUT, + sinoTIdx[n, 1:], Cnt) + #------------------------------------- + + for k in trange(nitr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, + leave=log.getEffectiveLevel() < logging.INFO): + petprj.osem(eimg, psng, rsng, ssng, nrmsino, attsino, sinoTIdx, sim, msk, psfkernel, + txLUT, axLUT, Cnt) eim = mmrimg.convert2e7(eimg, Cnt) else: + def psf(x, output=None): if Cnt['SIGMA_RM']: x = ndi.gaussian_filter(x, sigma=Cnt['SIGMA_RM'], mode='constant', output=None) @@ -287,7 +274,7 @@ def psf(x, output=None): #> estimated image, initialised to ones eim = np.ones(rmu.shape, dtype=np.float32) - msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=False)>0.9 + msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=False) > 0.9 #> sensitivity image for the EM-ML reconstruction sim = mmrprj.back_prj(attsino, scanner_params) @@ -295,9 +282,8 @@ def psf(x, output=None): sim_inv[~msk] = 0 rndsct = rsng + ssng - for i in trange(nitr, desc="MLEM", - disable=log.getEffectiveLevel() > logging.INFO, - leave=log.getEffectiveLevel() < logging.INFO): + for i in trange(nitr, desc="MLEM", disable=log.getEffectiveLevel() > logging.INFO, + leave=log.getEffectiveLevel() < logging.INFO): #> remove gaps from the measured sinogram #> then forward project the estimated image #> after which divide the measured sinogram by the estimated sinogram (forward projected) diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 0d0ed716..4acfb309 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -20,16 +20,16 @@ from ..prj import mmrprj, mmrrec, petprj from . import nifty_scatter -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") +__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) -def fwhm2sig (fwhm, Cnt): +def fwhm2sig(fwhm, Cnt): ''' Convert FWHM to sigma (standard deviation) ''' - return (fwhm/Cnt['SO_VXY']) / (2*(2*np.log(2))**.5) + return (fwhm / Cnt['SO_VXY']) / (2 * (2 * np.log(2))**.5) #======================================================================= @@ -43,7 +43,7 @@ def get_scrystals(scanner_params): used for scatter modelling ''' #> decompose constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -65,21 +65,18 @@ def get_scrystals(scanner_params): #> transaxial scatter crystal selection for modelling for c in range(Cnt['NCRS']): - if (((c + 1) % 9) == 0): + if (((c+1) % 9) == 0): continue cntr += 1 if (cntr == SCRS_T): cntr = 0 - scrs.append([ - c, 0.5*(crs[c, 0] + crs[c, 2]), 0.5*(crs[c, 1] + crs[c, 3]) - ]) + scrs.append([c, 0.5 * (crs[c, 0] + crs[c, 2]), 0.5 * (crs[c, 1] + crs[c, 3])]) iscrs += 1 #> convert the scatter crystal table to Numpy array scrs = np.array(scrs, dtype=np.float32) #------------------------------------------------------ - #------------------------------------------------------ #> scatter ring definition (axially) sct_irng = np.int16([0, 10, 19, 28, 35, 44, 53, 63]) @@ -87,19 +84,17 @@ def get_scrystals(scanner_params): NSRNG = len(sct_irng) #------------------------------------------------------ - logtxt = '' - srng = np.zeros((NSRNG,2), dtype=np.float32) - z = 0.5*(-Cnt['NRNG']*Cnt['AXR'] + Cnt['AXR']) + srng = np.zeros((NSRNG, 2), dtype=np.float32) + z = 0.5 * (-Cnt['NRNG'] * Cnt['AXR'] + Cnt['AXR']) for ir in range(NSRNG): - srng[ir,0] = float(sct_irng[ir]) - srng[ir,1] = axLUT['rng'][sct_irng[ir],:].mean() - logtxt += '> [{}]: ring_i={}, ring_z={}\n'.format(ir, int(srng[ir,0]), srng[ir,1]) + srng[ir, 0] = float(sct_irng[ir]) + srng[ir, 1] = axLUT['rng'][sct_irng[ir], :].mean() + logtxt += '> [{}]: ring_i={}, ring_z={}\n'.format(ir, int(srng[ir, 0]), srng[ir, 1]) log.debug(logtxt) - return dict(scrs=scrs, srng=srng, sirng=sct_irng, NSCRS=scrs.shape[0], NSRNG=NSRNG) @@ -107,7 +102,7 @@ def get_scrystals(scanner_params): def get_sctlut2d(txLUT, scrs_def): #> scatter to sinogram bin index LUT - sct2aw = np.zeros(scrs_def['NSCRS']*scrs_def['NSCRS'], dtype=np.int32) + sct2aw = np.zeros(scrs_def['NSCRS'] * scrs_def['NSCRS'], dtype=np.int32) # scatter/unscattered crystal x-coordinate (used for determining +/- sino segments) xsxu = np.zeros((scrs_def['NSCRS'], scrs_def['NSCRS']), dtype=np.int8) @@ -127,16 +122,17 @@ def get_sctlut2d(txLUT, scrs_def): ] #> scattered and unscattered crystal positions (used for determining +/- sino segments) - xs = scrs_def['scrs'][sc,1] - xu = scrs_def['scrs'][uc,1] + xs = scrs_def['scrs'][sc, 1] + xu = scrs_def['scrs'][uc, 1] - if (xs>xu): + if (xs > xu): xsxu[uc, sc] = 1 sct2aw.shape = (scrs_def['NSCRS'], scrs_def['NSCRS']) return dict(sct2aw=sct2aw, xsxu=xsxu, c2sFw=txLUT['c2sFw']) + #======================================================================= @@ -146,57 +142,62 @@ def get_knlut(Cnt): get Klein-Nishina LUTs ''' - SIG511 = Cnt['ER']*Cnt['E511']/2.35482 + SIG511 = Cnt['ER'] * Cnt['E511'] / 2.35482 - CRSSavg = (2*(4/3.0-np.log(3)) + .5*np.log(3)-4/9.0) + CRSSavg = (2 * (4/3.0 - np.log(3)) + .5 * np.log(3) - 4/9.0) - COSSTP = (1-Cnt['COSUPSMX'])/(Cnt['NCOS']-1) + COSSTP = (1 - Cnt['COSUPSMX']) / (Cnt['NCOS'] - 1) - log.debug('using these scatter constants:\nCOS(UPSMAX) = {},\nCOSSTP = {}'.format(Cnt['COSUPSMX'], COSSTP)) + log.debug('using these scatter constants:\nCOS(UPSMAX) = {},\nCOSSTP = {}'.format( + Cnt['COSUPSMX'], COSSTP)) - knlut = np.zeros((Cnt['NCOS'],2), dtype = np.float32) + knlut = np.zeros((Cnt['NCOS'], 2), dtype=np.float32) for i in range(Cnt['NCOS']): - cosups = Cnt['COSUPSMX']+i*COSSTP - alpha = 1/(2 - cosups) - KNtmp = ( (0.5*Cnt['R02']) * alpha*alpha * ( alpha + 1/alpha - (1-cosups*cosups) ) ) - knlut[i,0] = KNtmp / ( 2*pi*Cnt['R02'] * CRSSavg); + cosups = Cnt['COSUPSMX'] + i*COSSTP + alpha = 1 / (2-cosups) + KNtmp = ((0.5 * Cnt['R02']) * alpha * alpha * (alpha + 1/alpha - (1 - cosups*cosups))) + knlut[i, 0] = KNtmp / (2 * pi * Cnt['R02'] * CRSSavg) knlut[i,1] = ( (1+alpha)/(alpha*alpha)*(2*(1+alpha)/(1+2*alpha)-1/alpha*np.log(1+2*alpha)) + \ np.log(1+2*alpha)/(2*alpha)-(1+3*alpha)/((1+2*alpha)*(1+2*alpha)) ) / CRSSavg # Add energy resolution: - if Cnt['ER']>0: + if Cnt['ER'] > 0: log.info('using energy resolution for scatter simulation, ER = {}'.format(Cnt['ER'])) - knlut[i,0] *= .5*erfc( (Cnt['LLD']-alpha*Cnt['E511'])/(SIG511*np.sqrt(2*alpha)) ) + knlut[i, 0] *= .5 * erfc( + (Cnt['LLD'] - alpha * Cnt['E511']) / (SIG511 * np.sqrt(2 * alpha))) #knlut[i,0] *= .5*erfc( (Cnt['LLD']-alpha*Cnt['E511'])/(SIG511) ); # for large angles (small cosups) when the angle in GPU calculations is greater than COSUPSMX - if (i==0): - knlut[0,0] = 0; + if (i == 0): + knlut[0, 0] = 0 return knlut -#======================================================================= +#======================================================================= + #================================================================================================== # GET SCATTER LUTs #-------------------------------------------------------------------------------------------------- def rd2sni(offseg, r1, r0): - rd = np.abs(r1-r0) - rdi = (2*rd - 1*(r1>r0)) - sni = offseg[rdi] + np.minimum(r0,r1) + rd = np.abs(r1 - r0) + rdi = (2*rd - 1 * (r1 > r0)) + sni = offseg[rdi] + np.minimum(r0, r1) return sni + + #-------------------------------------------------------------------------------------------------- + def get_sctLUT(scanner_params): #> decompose constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #> get the Klein-Nishina LUT: KN = get_knlut(Cnt) @@ -213,92 +214,91 @@ def get_sctLUT(scanner_params): # NRNG = Cnt['RNG_END']-Cnt['RNG_STRT'] #-span-1 LUT (rings to sino index) - seg = np.append( [Cnt['NRNG']], np.ceil( np.arange(Cnt['NRNG']-1,0,-.5) ).astype(np.int16) ) - offseg = np.int16( np.append( [0], np.cumsum(seg)) ) + seg = np.append([Cnt['NRNG']], np.ceil(np.arange(Cnt['NRNG'] - 1, 0, -.5)).astype(np.int16)) + offseg = np.int16(np.append([0], np.cumsum(seg))) #-3D scatter sino LUT. axial component based on michelogram. sctaxR = np.zeros((Cnt['NRNG']**2, 4), dtype=np.int32) sctaxW = np.zeros((Cnt['NRNG']**2, 4), dtype=np.float32) #-just for local check and display of the interpolation at work - mich = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) + mich = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) mich2 = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) - - J, I = np.meshgrid(irng, irng) - mich[J,I] = np.reshape(np.arange(scrs_def['NSRNG']**2), (scrs_def['NSRNG'], scrs_def['NSRNG'])) + J, I = np.meshgrid(irng, irng) + mich[J, I] = np.reshape(np.arange(scrs_def['NSRNG']**2), + (scrs_def['NSRNG'], scrs_def['NSRNG'])) # plt.figure(64), plt.imshow(mich, interpolation='none') for r1 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): #border up and down - bd = next(idx for idx in irng if idx>=r1) - bu = next(idx for idx in irng[::-1] if idx<=r1) + bd = next(idx for idx in irng if idx >= r1) + bu = next(idx for idx in irng[::-1] if idx <= r1) for r0 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): # if (np.abs(r1-r0)>MRD): # continue #border left and right - br = next(idx for idx in irng if idx>=r0) - bl = next(idx for idx in irng[::-1] if idx<=r0) + br = next(idx for idx in irng if idx >= r0) + bl = next(idx for idx in irng[::-1] if idx <= r0) #print '(r0,r1)=', r0,r1, '(bl,br,bu,bd)', bl,br,bu,bd #span-1 sino index (sni) creation: sni = rd2sni(offseg, r1, r0) #see: https://en.wikipedia.org/wiki/Bilinear_interpolation - if (br==bl)and(bu!=bd): + if (br == bl) and (bu != bd): - sctaxR[sni,0] = rd2sni(offseg, bd, r0) - sctaxW[sni,0] = (r1-bu)/float(bd-bu) - sctaxR[sni,1] = rd2sni(offseg, bu, r0) - sctaxW[sni,1] = (bd-r1)/float(bd-bu) + sctaxR[sni, 0] = rd2sni(offseg, bd, r0) + sctaxW[sni, 0] = (r1-bu) / float(bd - bu) + sctaxR[sni, 1] = rd2sni(offseg, bu, r0) + sctaxW[sni, 1] = (bd-r1) / float(bd - bu) - mich2[r1,r0] = mich[bd,r0]*sctaxW[sni,0] + mich[bu,r0]*sctaxW[sni,1] + mich2[r1, r0] = mich[bd, r0] * sctaxW[sni, 0] + mich[bu, r0] * sctaxW[sni, 1] - elif (bu==bd)and(br!=bl): + elif (bu == bd) and (br != bl): - sctaxR[sni,0] = rd2sni(offseg, r1, bl) - sctaxW[sni,0] = (br-r0)/float(br-bl) - sctaxR[sni,1] = rd2sni(offseg, r1, br) - sctaxW[sni,1] = (r0-bl)/float(br-bl) + sctaxR[sni, 0] = rd2sni(offseg, r1, bl) + sctaxW[sni, 0] = (br-r0) / float(br - bl) + sctaxR[sni, 1] = rd2sni(offseg, r1, br) + sctaxW[sni, 1] = (r0-bl) / float(br - bl) - mich2[r1,r0] = mich[r1,bl]*sctaxW[sni,0] + mich[r1,br]*sctaxW[sni,1] + mich2[r1, r0] = mich[r1, bl] * sctaxW[sni, 0] + mich[r1, br] * sctaxW[sni, 1] - elif (bu==bd)and(br==bl): + elif (bu == bd) and (br == bl): - mich2[r1,r0] = mich[r1,r0] - sctaxR[sni,0] = rd2sni(offseg, r1, r0) - sctaxW[sni,0] = 1 + mich2[r1, r0] = mich[r1, r0] + sctaxR[sni, 0] = rd2sni(offseg, r1, r0) + sctaxW[sni, 0] = 1 continue else: - cf = float(((br-bl)*(bd-bu))) + cf = float(((br-bl) * (bd-bu))) - sctaxR[sni,0] = rd2sni(offseg, bd, bl) - sctaxW[sni,0] = (br-r0)*(r1-bu)/cf - sctaxR[sni,1] = rd2sni(offseg, bd, br) - sctaxW[sni,1] = (r0-bl)*(r1-bu)/cf + sctaxR[sni, 0] = rd2sni(offseg, bd, bl) + sctaxW[sni, 0] = (br-r0) * (r1-bu) / cf + sctaxR[sni, 1] = rd2sni(offseg, bd, br) + sctaxW[sni, 1] = (r0-bl) * (r1-bu) / cf - sctaxR[sni,2] = rd2sni(offseg, bu, bl) - sctaxW[sni,2] = (br-r0)*(bd-r1)/cf - sctaxR[sni,3] = rd2sni(offseg, bu, br) - sctaxW[sni,3] = (r0-bl)*(bd-r1)/cf + sctaxR[sni, 2] = rd2sni(offseg, bu, bl) + sctaxW[sni, 2] = (br-r0) * (bd-r1) / cf + sctaxR[sni, 3] = rd2sni(offseg, bu, br) + sctaxW[sni, 3] = (r0-bl) * (bd-r1) / cf - mich2[r1,r0] = mich[bd,bl]*sctaxW[sni,0]+ mich[bd,br]*sctaxW[sni,1] + mich[bu,bl]*sctaxW[sni,2] + mich[bu,br]*sctaxW[sni,3] + mich2[r1, r0] = mich[bd, bl] * sctaxW[sni, 0] + mich[bd, br] * sctaxW[ + sni, 1] + mich[bu, bl] * sctaxW[sni, 2] + mich[bu, br] * sctaxW[sni, 3] # plt.figure(65), plt.imshow(mich2, interpolation='none') - sctLUT = { - 'sctaxR':sctaxR, - 'sctaxW':sctaxW, - 'offseg':offseg, - 'KN':KN, - 'mich_chck':[mich, mich2], + 'sctaxR': sctaxR, + 'sctaxW': sctaxW, + 'offseg': offseg, + 'KN': KN, + 'mich_chck': [mich, mich2], **scrs_def, - **sctlut2d, - } + **sctlut2d,} return sctLUT @@ -315,11 +315,10 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): transferred into the scatter sinograms. ''' - #> number of sinograms snno = sct3d.shape[1] - i_scrs = sctLUT['scrs'][:,0].astype(int) + i_scrs = sctLUT['scrs'][:, 0].astype(int) x = i_scrs y = np.append([-1], i_scrs) @@ -331,18 +330,17 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): #> roll each row according to the position for i in range(sctLUT['NSCRS']): - ii[i,:] = np.roll(ii[i,:], -1*i) + ii[i, :] = np.roll(ii[i, :], -1 * i) jjnew, iinew = np.mgrid[0:Cnt['NCRS'], 0:Cnt['NCRS']] for i in range(Cnt['NCRS']): - iinew[i,:] = np.roll(iinew[i,:], i) - - ssn = np.zeros((Cnt['TOFBINN'], snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=dtype); - sssr = np.zeros((Cnt['TOFBINN'], Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=dtype); + iinew[i, :] = np.roll(iinew[i, :], i) + ssn = np.zeros((Cnt['TOFBINN'], snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=dtype) + sssr = np.zeros((Cnt['TOFBINN'], Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=dtype) for ti in range(Cnt['TOFBINN']): - sn2d = np.zeros(Cnt['NSANGLES']*Cnt['NSBINS'], dtype=dtype) + sn2d = np.zeros(Cnt['NSANGLES'] * Cnt['NSBINS'], dtype=dtype) for si in range(snno): @@ -350,7 +348,7 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): sct2d = sct3d[0, si, jj, ii] - z = np.vstack([sct2d[-1,:], sct2d]) + z = np.vstack([sct2d[-1, :], sct2d]) f = interp2d(x, y, z, kind='cubic') znew = f(xnew, ynew) @@ -359,48 +357,48 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): #> upper triangle #> add '1' to include index zero (distinguished from after triangulation) - qi = np.triu(sctLUT['c2sFw']+1)>0 + qi = np.triu(sctLUT['c2sFw'] + 1) > 0 sidx = sctLUT['c2sFw'][qi] s = znew[qi] sn2d[sidx] = s #> lower triangle - qi = np.tril(sctLUT['c2sFw']+1)>0 + qi = np.tril(sctLUT['c2sFw'] + 1) > 0 sidx = sctLUT['c2sFw'][qi] s = znew[qi] sn2d[sidx] += s - ssn [ti, si, ...] = np.reshape(sn2d, (Cnt['NSANGLES'],Cnt['NSBINS'])) - sssr[ti, ssrlut[si], ...] += ssn[ti, si,:,:] - + ssn[ti, si, ...] = np.reshape(sn2d, (Cnt['NSANGLES'], Cnt['NSBINS'])) + sssr[ti, ssrlut[si], ...] += ssn[ti, si, :, :] return np.squeeze(ssn), np.squeeze(sssr) #------------------------------------------------- + #==================================================================================================== def vsm( - datain, - mumaps, - em, - scanner_params, - histo = None, - rsino = None, - prcnt_scl = 0.1, - fwhm_input=0.42, - mask_threshlod = 0.999, - snmsk=None, - emmsk=False, - interpolate=True, - return_uninterp=False, - return_ssrb=False, - return_mask=False, - return_scaling=False, - scaling=True, - self_scaling=False, - save_sax=False, - ): + datain, + mumaps, + em, + scanner_params, + histo=None, + rsino=None, + prcnt_scl=0.1, + fwhm_input=0.42, + mask_threshlod=0.999, + snmsk=None, + emmsk=False, + interpolate=True, + return_uninterp=False, + return_ssrb=False, + return_mask=False, + return_scaling=False, + scaling=True, + self_scaling=False, + save_sax=False, +): ''' Voxel-driven scatter modelling (VSM). Obtain a scatter sinogram using the mu-maps (hardware and object mu-maps) @@ -432,7 +430,7 @@ def vsm( ''' #> decompose constants, transaxial and axial LUTs are extracted - Cnt = scanner_params['Cnt'] + Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -443,8 +441,11 @@ def vsm( muh, muo = mumaps if emmsk and not os.path.isfile(datain['em_nocrr']): - log.info('reconstructing emission data without scatter and attenuation corrections for mask generation...') - recnac = mmrrec.osemone(datain, mumaps, histo, scanner_params, recmod=0, itr=3, fwhm=2.0, store_img=True) + log.info( + 'reconstructing emission data without scatter and attenuation corrections for mask generation...' + ) + recnac = mmrrec.osemone(datain, mumaps, histo, scanner_params, recmod=0, itr=3, fwhm=2.0, + store_img=True) datain['em_nocrr'] = recnac.fpet # if rsino is None and not histo is None and 'rsino' in histo: @@ -458,60 +459,55 @@ def vsm( nrmcmp, nhdr = mmrnorm.get_components(datain, Cnt) #-smooth for defining the sino scatter only regions - if fwhm_input>0.: - mu_sctonly = ndi.filters.gaussian_filter( - mmrimg.convert2dev(muo, Cnt), - fwhm2sig(fwhm_input, Cnt), - mode='mirror' - ) + if fwhm_input > 0.: + mu_sctonly = ndi.filters.gaussian_filter(mmrimg.convert2dev(muo, Cnt), + fwhm2sig(fwhm_input, Cnt), mode='mirror') else: mu_sctonly = muo - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: snno = Cnt['NSN1'] - snno_= Cnt['NSN64'] + snno_ = Cnt['NSN64'] ssrlut = axLUT['sn1_ssrb'] saxnrm = nrmcmp['sax_f1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: snno = Cnt['NSN11'] - snno_= snno + snno_ = snno ssrlut = axLUT['sn11_ssrb'] saxnrm = nrmcmp['sax_f11'] #LUTs for scatter sctLUT = get_sctLUT(scanner_params) - #> smooth before scaling/down-sampling the mu-map and emission images - if fwhm_input>0.: - muim = ndi.filters.gaussian_filter(muo+muh, fwhm2sig(fwhm_input, Cnt), mode='mirror') + if fwhm_input > 0.: + muim = ndi.filters.gaussian_filter(muo + muh, fwhm2sig(fwhm_input, Cnt), mode='mirror') emim = ndi.filters.gaussian_filter(em, fwhm2sig(fwhm_input, Cnt), mode='mirror') else: - muim = muo+muh + muim = muo + muh emim = em - muim = ndi.interpolation.zoom( muim, Cnt['SCTSCLMU'], order=3 ) #(0.499, 0.5, 0.5) - emim = ndi.interpolation.zoom( emim, Cnt['SCTSCLEM'], order=3 ) #(0.34, 0.33, 0.33) + muim = ndi.interpolation.zoom(muim, Cnt['SCTSCLMU'], order=3) #(0.499, 0.5, 0.5) + emim = ndi.interpolation.zoom(emim, Cnt['SCTSCLEM'], order=3) #(0.34, 0.33, 0.33) #-smooth the mu-map for mask creation. the mask contains voxels for which attenuation ray LUT is found. - if fwhm_input>0.: + if fwhm_input > 0.: smomu = ndi.filters.gaussian_filter(muim, fwhm2sig(fwhm_input, Cnt), mode='mirror') - mumsk = np.int8(smomu>0.003) + mumsk = np.int8(smomu > 0.003) else: - mumsk = np.int8(muim>0.001) + mumsk = np.int8(muim > 0.001) #CORE SCATTER ESTIMATION NSCRS, NSRNG = sctLUT['NSCRS'], sctLUT['NSRNG'] - sctout ={ - 'sct_3d' :np.zeros((Cnt['TOFBINN'], snno_, NSCRS, NSCRS), dtype=np.float32), - 'sct_val' :np.zeros((Cnt['TOFBINN'], NSRNG, NSCRS, NSRNG, NSCRS), dtype=np.float32), - } + sctout = { + 'sct_3d': np.zeros((Cnt['TOFBINN'], snno_, NSCRS, NSCRS), dtype=np.float32), + 'sct_val': np.zeros((Cnt['TOFBINN'], NSRNG, NSCRS, NSRNG, NSCRS), dtype=np.float32),} #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> nifty_scatter.vsm(sctout, muim, mumsk, emim, sctLUT, axLUT, Cnt) #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> - sct3d = sctout['sct_3d'] + sct3d = sctout['sct_3d'] sctind = sctLUT['sct2aw'] log.debug('total scatter sum: {}'.format(np.sum(sct3d))) @@ -525,15 +521,13 @@ def vsm( out['indexes'] = sctind #------------------------------------------------------------------- - - if np.sum(sct3d)<1e-04: + if np.sum(sct3d) < 1e-04: log.warning('total scatter below threshold: {}'.format(np.sum(sct3d))) - sss = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + sss = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) asnmsk = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) - sssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + sssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) return sss, sssr, asnmsk - # import pdb; pdb.set_trace() #------------------------------------------------------------------- @@ -545,7 +539,7 @@ def vsm( start = time.time() ssn, sssr = intrp_bsct(sct3d, Cnt, sctLUT, ssrlut) stop = time.time() - log.debug('scatter interpolation done in {} sec.'.format(stop-start)) + log.debug('scatter interpolation done in {} sec.'.format(stop - start)) if not scaling: out['ssrb'] = sssr @@ -555,10 +549,8 @@ def vsm( return out #------------------------------------------------------------------- - #------------------------------------------------------------------- # import pdb; pdb.set_trace() - ''' debugging scatter: import matplotlib.pyplot as plt @@ -582,12 +574,11 @@ def vsm( ''' #------------------------------------------------------------------- - #> get SSR for randoms from span-1 or span-11 rssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) if scaling: for i in range(snno): - rssr[ssrlut[i],:,:] += rsino[i,:,:] + rssr[ssrlut[i], :, :] += rsino[i, :, :] #ATTENUATION FRACTIONS for scatter only regions, and NORMALISATION for all SCATTER #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> @@ -606,17 +597,16 @@ def vsm( nrm = mmraux.putgaps(nrmg, txLUT, Cnt) #-------------------------------------------------------------- - #> get attenuation + norm in (span-11) and SSR attossr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) nrmsssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) for i in range(Cnt['NSN1']): si = axLUT['sn1_ssrb'][i] - attossr[si,:,:] += atto[i,:,:] / float(axLUT['sn1_ssrno'][si]) - nrmsssr[si,:,:] += nrm[i,:,:] / float(axLUT['sn1_ssrno'][si]) - if currentspan==11: - Cnt['SPN']=11 + attossr[si, :, :] += atto[i, :, :] / float(axLUT['sn1_ssrno'][si]) + nrmsssr[si, :, :] += nrm[i, :, :] / float(axLUT['sn1_ssrno'][si]) + if currentspan == 11: + Cnt['SPN'] = 11 nrmg = np.zeros((txLUT['Naw'], snno), dtype=np.float32) mmr_auxe.norm(nrmg, nrmcmp, histo['buckets'], axLUT, txLUT['aw2ali'], Cnt) nrm = mmraux.putgaps(nrmg, txLUT, Cnt) @@ -627,61 +617,57 @@ def vsm( #get the mask for the object from uncorrected emission image if emmsk and os.path.isfile(datain['em_nocrr']): nim = nib.load(datain['em_nocrr']) - A = nim.get_sform() + A = nim.get_sform() eim = nim.get_fdata(dtype=np.float32) - eim = eim[:,::-1,::-1] + eim = eim[:, ::-1, ::-1] eim = np.transpose(eim, (2, 1, 0)) em_sctonly = ndi.filters.gaussian_filter(eim, fwhm2sig(.6, Cnt), mode='mirror') - msk = np.float32(em_sctonly>0.07*np.max(em_sctonly)) + msk = np.float32(em_sctonly > 0.07 * np.max(em_sctonly)) msk = ndi.filters.gaussian_filter(msk, fwhm2sig(.6, Cnt), mode='mirror') - msk = np.float32(msk>0.01) + msk = np.float32(msk > 0.01) msksn = mmrprj.frwd_prj(msk, txLUT, axLUT, Cnt) mssr = mmraux.sino2ssr(msksn, axLUT, Cnt) - mssr = mssr>0 + mssr = mssr > 0 else: - mssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.bool); + mssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.bool) #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> - #======== SCALING ======== #> scale scatter using non-TOF SSRB sinograms #> gap mask - rmsk = (txLUT['msino']>0).T - rmsk.shape = (1,Cnt['NSANGLES'],Cnt['NSBINS']) + rmsk = (txLUT['msino'] > 0).T + rmsk.shape = (1, Cnt['NSANGLES'], Cnt['NSBINS']) rmsk = np.repeat(rmsk, Cnt['NSEG0'], axis=0) #> include attenuating object into the mask (and the emission if selected) - amsksn = np.logical_and( attossr>=mask_threshlod, rmsk) * ~mssr + amsksn = np.logical_and(attossr >= mask_threshlod, rmsk) * ~mssr #> scaling factors for SSRB scatter - scl_ssr = np.zeros( (Cnt['NSEG0']), dtype=np.float32) + scl_ssr = np.zeros((Cnt['NSEG0']), dtype=np.float32) for sni in range(Cnt['NSEG0']): #> region for scaling defined by the percentage of lowest #> but usable/significant scatter - thrshld = prcnt_scl * np.max(sssr[sni,:,:]) - amsksn[sni,:,:] *= (sssr[sni,:,:]>thrshld) - amsk = amsksn[sni,:,:] + thrshld = prcnt_scl * np.max(sssr[sni, :, :]) + amsksn[sni, :, :] *= (sssr[sni, :, :] > thrshld) + amsk = amsksn[sni, :, :] #> normalised estimated scatter - mssn = sssr[sni,:,:] * nrmsssr[sni,:,:] + mssn = sssr[sni, :, :] * nrmsssr[sni, :, :] vpsn = histo['pssr'][sni, amsk] - rssr[sni, amsk] scl_ssr[sni] = np.sum(vpsn) / np.sum(mssn[amsk]) #> scatter SSRB sinogram output - sssr[sni,:,:] *= nrmsssr[sni,:,:]*scl_ssr[sni] - + sssr[sni, :, :] *= nrmsssr[sni, :, :] * scl_ssr[sni] #=== scale scatter for the full-size sinogram === - sss = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32); + sss = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) for i in range(snno): - sss[i,:,:] = ssn[i,:,:]*scl_ssr[ssrlut[i]]*saxnrm[i] * nrm[i,:,:] - - + sss[i, :, :] = ssn[i, :, :] * scl_ssr[ssrlut[i]] * saxnrm[i] * nrm[i, :, :] ''' #> debug si = 60 @@ -696,7 +682,6 @@ def vsm( plot(np.sum(rssr+sssr,axis=(0,1))) ''' - #=== OUTPUT === if return_uninterp: out['uninterp'] = sct3d @@ -715,7 +700,6 @@ def vsm( # if self_scaling: # out['scl_sn1'] = scl_ssn - if not out: return sss else: diff --git a/setup.py b/setup.py index 0d882c5b..e6c2b29c 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,8 @@ def chck_vox_h(Cnt): i1 = def_h.find("//## end ##//") defh = def_h[i0:i1] # list of constants which will be kept in synch from Python - cnt_list = ["SZ_IMX", "SZ_IMY", "SZ_IMZ", "TFOV2", "SZ_VOXY", "SZ_VOXZ", "SZ_VOXZi", "RSZ_PSF_KRNL"] + cnt_list = [ + "SZ_IMX", "SZ_IMY", "SZ_IMZ", "TFOV2", "SZ_VOXY", "SZ_VOXZ", "SZ_VOXZi", "RSZ_PSF_KRNL"] flg = False for s in cnt_list: m = re.search("(?<=#define " + s + r")\s*\d*\.*\d*", defh) @@ -61,13 +62,9 @@ def chck_vox_h(Cnt): break # if flag is set then redefine the constants in the sct.h file if flg: - strNew = ( - "//## start ##// constants definitions in synch with Python. DON" - "T MODIFY MANUALLY HERE!\n" - + "// IMAGE SIZE\n" - + "// SZ_I* are image sizes\n" - + "// SZ_V* are voxel sizes\n" - ) + strNew = ("//## start ##// constants definitions in synch with Python. DON" + "T MODIFY MANUALLY HERE!\n" + "// IMAGE SIZE\n" + "// SZ_I* are image sizes\n" + + "// SZ_V* are voxel sizes\n") strDef = "#define " for s in cnt_list: strNew += strDef + s + " " + str(Cnt[s]) + (s[3] == "V") * "f" + "\n" @@ -108,8 +105,7 @@ def chck_sct_h(Cnt): "R_RING", "R_2", "IR_RING", - "SRFCRS", - ] + "SRFCRS",] flg = False for i, s in enumerate(cnt_list): m = re.search("(?<=#define " + s + r")\s*\d*\.*\d*", scth) @@ -127,16 +123,14 @@ def chck_sct_h(Cnt): # if flag is set then redefine the constants in the sct.h file if flg: - strNew = dedent( - """\ + strNew = dedent("""\ //## start ##// constants definitions in synch with Python. DO NOT MODIFY!\n // SCATTER IMAGE SIZE AND PROPERTIES // SS_* are used for the mu-map in scatter calculations // SSE_* are used for the emission image in scatter calculations // R_RING, R_2, IR_RING are ring radius, squared radius and inverse of the radius, respectively. // NCOS is the number of samples for scatter angular sampling - """ - ) + """) strDef = "#define " for i, s in enumerate(cnt_list): @@ -166,35 +160,29 @@ def check_constants(): txt = "- - . - -" log.info( - dedent( - """\ + dedent("""\ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ changed sct.h: {} changed def.h: {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ {} - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - ).format(sct_compile, def_compile, txt) - ) + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""").format( + sct_compile, def_compile, txt)) -cs.resources_setup(gpu=False) # install resources.py -# check and update the constants in C headers according to resources.py +cs.resources_setup(gpu=False) # install resources.py + # check and update the constants in C headers according to resources.py check_constants() try: gpuarch = cs.dev_setup() # update resources.py with a supported GPU device except Exception as exc: log.error("could not set up CUDA:\n%s", exc) - log.info( - dedent( - """\ + dedent("""\ -------------------------------------------------------------- Finding hardware mu-maps - --------------------------------------------------------------""" - ) -) + --------------------------------------------------------------""")) # get the local path to NiftyPET resources.py path_resources = cs.path_niftypet_local() # if exists, import the resources and get the constants @@ -213,9 +201,7 @@ def check_constants(): break # prompt for installation path if hmu_dir is None: - Cnt["HMUDIR"] = tls.askdirectory( - title="Folder for hardware mu-maps: ", name="HMUDIR" - ) + Cnt["HMUDIR"] = tls.askdirectory(title="Folder for hardware mu-maps: ", name="HMUDIR") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # update the path in resources.py tls.update_resources(Cnt) @@ -232,12 +218,8 @@ def check_constants(): for i in (Path(__file__).resolve().parent / "_skbuild").rglob("CMakeCache.txt"): i.write_text(re.sub("^//.*$\n^[^#].*pip-build-env.*$", "", i.read_text(), flags=re.M)) setup( - use_scm_version=True, - packages=find_packages(exclude=["examples", "tests"]), - package_data={"niftypet": ["nipet/auxdata/*"]}, - cmake_source_dir="niftypet", - cmake_languages=("C", "CXX", "CUDA"), - cmake_minimum_required_version="3.18", - cmake_args=[ + use_scm_version=True, packages=find_packages(exclude=["examples", "tests"]), + package_data={"niftypet": ["nipet/auxdata/*"]}, cmake_source_dir="niftypet", + cmake_languages=("C", "CXX", "CUDA"), cmake_minimum_required_version="3.18", cmake_args=[ f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}", "-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))]) diff --git a/tests/conftest.py b/tests/conftest.py index 5e13a2c2..be9e03a0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,15 +5,14 @@ HOME = Path(getenv("DATA_ROOT", "~")).expanduser() + @pytest.fixture(scope="session") def folder_in(): Ab_PET_mMR_test = HOME / "Ab_PET_mMR_test" if not Ab_PET_mMR_test.is_dir(): - pytest.skip( - f"""Cannot find Ab_PET_mMR_test in ${{DATA_ROOT:-~}} ({HOME}). + pytest.skip(f"""Cannot find Ab_PET_mMR_test in ${{DATA_ROOT:-~}} ({HOME}). Try running `python -m tests` to download it. -""" - ) +""") return Ab_PET_mMR_test @@ -21,10 +20,8 @@ def folder_in(): def folder_ref(folder_in): Ab_PET_mMR_ref = folder_in / "testing_reference" / "Ab_PET_mMR_ref" if not Ab_PET_mMR_ref.is_dir(): - pytest.skip( - f"""Cannot find Ab_PET_mMR_ref in + pytest.skip(f"""Cannot find Ab_PET_mMR_ref in ${{DATA_ROOT:-~}}/testing_reference ({HOME}/testing_reference). Try running `python -m tests` to download it. -""" - ) +""") return Ab_PET_mMR_ref diff --git a/tests/test_amyloid_pvc.py b/tests/test_amyloid_pvc.py index 7c24ca1a..af4556e7 100644 --- a/tests/test_amyloid_pvc.py +++ b/tests/test_amyloid_pvc.py @@ -12,39 +12,31 @@ # segmentation/parcellation for PVC, with unique regions numbered from 0 onwards pvcroi = [] -pvcroi.append([66, 67] + list(range(81, 95))) # white matter -pvcroi.append([36]) # brain stem -pvcroi.append([35]) # pons -pvcroi.append([39, 40, 72, 73, 74]) # cerebellum GM -pvcroi.append([41, 42]) # cerebellum WM -pvcroi.append([48, 49]) # hippocampus -pvcroi.append([167, 168]) # posterior cingulate gyrus -pvcroi.append([139, 140]) # middle cingulate gyrus -pvcroi.append([101, 102]) # anterior cingulate gyrus -pvcroi.append([169, 170]) # precuneus -pvcroi.append([32, 33]) # amygdala -pvcroi.append([37, 38]) # caudate -pvcroi.append([56, 57]) # pallidum -pvcroi.append([58, 59]) # putamen -pvcroi.append([60, 61]) # thalamus -pvcroi.append([175, 176, 199, 200]) # parietal without precuneus -pvcroi.append([133, 134, 155, 156, 201, 202, 203, 204]) # temporal -pvcroi.append([4, 5, 12, 16, 43, 44, 47, 50, 51, 52, 53]) # CSF -pvcroi.append([24, 31, 62, 63, 70, 76, 77, 96, 97]) # basal ganglia + optic chiasm +pvcroi.append([66, 67] + list(range(81, 95))) # white matter +pvcroi.append([36]) # brain stem +pvcroi.append([35]) # pons +pvcroi.append([39, 40, 72, 73, 74]) # cerebellum GM +pvcroi.append([41, 42]) # cerebellum WM +pvcroi.append([48, 49]) # hippocampus +pvcroi.append([167, 168]) # posterior cingulate gyrus +pvcroi.append([139, 140]) # middle cingulate gyrus +pvcroi.append([101, 102]) # anterior cingulate gyrus +pvcroi.append([169, 170]) # precuneus +pvcroi.append([32, 33]) # amygdala +pvcroi.append([37, 38]) # caudate +pvcroi.append([56, 57]) # pallidum +pvcroi.append([58, 59]) # putamen +pvcroi.append([60, 61]) # thalamus +pvcroi.append([175, 176, 199, 200]) # parietal without precuneus +pvcroi.append([133, 134, 155, 156, 201, 202, 203, 204]) # temporal +pvcroi.append([4, 5, 12, 16, 43, 44, 47, 50, 51, 52, 53]) # CSF +pvcroi.append([24, 31, 62, 63, 70, 76, 77, 96, 97]) # basal ganglia + optic chiasm pvcroi.append( - list(range(103, 110 + 1)) - + list(range(113, 126 + 1)) - + list(range(129, 130 + 1)) - + list(range(135, 138 + 1)) - + list(range(141, 154 + 1)) - + list(range(157, 158 + 1)) - + list(range(161, 166 + 1)) - + list(range(171, 174 + 1)) - + list(range(177, 188 + 1)) - + list(range(191, 198 + 1)) - + list(range(205, 208 + 1)) -) # remaining neocortex -# expected %error for static (SUVr) and PVC reconstructions + list(range(103, 110 + 1)) + list(range(113, 126 + 1)) + list(range(129, 130 + 1)) + + list(range(135, 138 + 1)) + list(range(141, 154 + 1)) + list(range(157, 158 + 1)) + + list(range(161, 166 + 1)) + list(range(171, 174 + 1)) + list(range(177, 188 + 1)) + + list(range(191, 198 + 1)) + list(range(205, 208 + 1))) # remaining neocortex + # expected %error for static (SUVr) and PVC reconstructions emape_basic = 0.1 emape_algnd = { "pet": 3.0, @@ -52,8 +44,7 @@ "trm": 3.0, "pvc": 3.0, "hmu": 0.01, - "omu": 3.0, -} + "omu": 3.0,} @pytest.fixture(scope="session") @@ -73,18 +64,14 @@ def datain(mMRpars, folder_in): def muhdct(mMRpars, datain, tmp_path_factory, worker_id): tmp_path = tmp_path_factory.getbasetemp() - if worker_id == "master": # not xdist, auto-reuse + if worker_id == "master": # not xdist, auto-reuse opth = str(tmp_path / "muhdct") - return nipet.hdw_mumap( - datain, [1, 2, 4], mMRpars, outpath=opth, use_stored=True - ) + return nipet.hdw_mumap(datain, [1, 2, 4], mMRpars, outpath=opth, use_stored=True) opth = str(tmp_path.parent / "muhdct") flock = FileLock(opth + ".lock") - with flock.acquire(poll_intervall=0.5): # xdist, force auto-reuse via flock - return nipet.hdw_mumap( - datain, [1, 2, 4], mMRpars, outpath=opth, use_stored=True - ) + with flock.acquire(poll_intervall=0.5): # xdist, force auto-reuse via flock + return nipet.hdw_mumap(datain, [1, 2, 4], mMRpars, outpath=opth, use_stored=True) @pytest.fixture(scope="session") @@ -98,8 +85,7 @@ def refimg(folder_ref): "basic": { "pet": basic / "17598013_t-3000-3600sec_itr-4_suvr.nii.gz", "omu": basic / "mumap-from-DICOM_no-alignment.nii.gz", - "hmu": basic / "hardware_umap.nii.gz", - }, + "hmu": basic / "hardware_umap.nii.gz",}, "aligned": { "spm": { "hmu": spm / "hardware_umap.nii.gz", @@ -107,34 +93,28 @@ def refimg(folder_ref): "pos": spm / "17598013_t0-3600sec_itr2_AC-UTE.nii.gz", "pet": spm / "17598013_nfrm-2_itr-4.nii.gz", "trm": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2.nii.gz", - "pvc": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz", - }, + "pvc": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz",}, "niftyreg": { "hmu": niftyreg / "hardware_umap.nii.gz", "omu": niftyreg / "mumap-PCT-aligned-to_t0-3600_AC.nii.gz", "pos": niftyreg / "17598013_t0-3600sec_itr2_AC-UTE.nii.gz", "pet": niftyreg / "17598013_nfrm-2_itr-4.nii.gz", "trm": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2.nii.gz", - "pvc": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz", - }, - }, + "pvc": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz",},}, } testext = { "basic": { "pet": "static reconstruction with unaligned UTE mu-map", "hmu": "hardware mu-map for the static unaligned reconstruction", - "omu": "object mu-map for the static unaligned reconstruction", - }, + "omu": "object mu-map for the static unaligned reconstruction",}, "aligned": { "hmu": "hardware mu-map for the 2-frame aligned reconstruction", "omu": "object mu-map for the 2-frame aligned reconstruction", "pos": "AC reconstruction for positioning (full acquisition used)", "pet": "2-frame scan with aligned UTE mu-map", "trm": "trimming post reconstruction", - "pvc": "PVC post reconstruction", - }, - } + "pvc": "PVC post reconstruction",},} # check basic files frefs = refpaths["basic"] @@ -233,11 +213,9 @@ def test_aligned_reconstruction(reg_tool, mMRpars, datain, muhdct, refimg, tmp_p "omu": muopct["im"], "pos": muopct["fpet"], "trm": recon["trimmed"]["fpet"], - "pvc": recon["trimmed"]["fpvc"], - } + "pvc": recon["trimmed"]["fpvc"],} for k in testext["aligned"]: - diff = nimpa.imdiff( - fspath(refpaths["aligned"][reg_tool][k]), testout[k], verbose=True, plot=False - ) + diff = nimpa.imdiff(fspath(refpaths["aligned"][reg_tool][k]), testout[k], verbose=True, + plot=False) err = diff["mape"] <= emape_algnd[k] assert (all(err) if isinstance(err, Iterable) else err), testext["aligned"][k] From 2f61d0a2e9d0928215bb03a968154821c624d5fb Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 02:26:42 +0000 Subject: [PATCH 07/64] some manual cleanup --- niftypet/nipet/__init__.py | 31 ++++++++++++++++++++----------- niftypet/nipet/img/__init__.py | 1 + niftypet/nipet/img/auximg.py | 3 --- niftypet/nipet/img/mmrimg.py | 2 -- niftypet/nipet/img/pipe.py | 3 --- niftypet/nipet/lm/__init__.py | 4 ++++ niftypet/nipet/lm/mmrhist.py | 2 -- niftypet/nipet/lm/pviews.py | 2 -- niftypet/nipet/mmraux.py | 2 -- niftypet/nipet/mmrnorm.py | 3 --- niftypet/nipet/prj/mmrprj.py | 2 -- niftypet/nipet/prj/mmrrec.py | 2 -- niftypet/nipet/prj/mmrsim.py | 2 -- niftypet/nipet/sct/mmrsct.py | 2 -- setup.py | 4 ---- 15 files changed, 25 insertions(+), 40 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 68ba3be8..059bf1a5 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """initialise the NiftyPET NIPET package""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" +__author__ = "Pawel J. Markiewicz", "Casper O. da Costa-Luis" +__copyright__ = "Copyright 2021" # version detector. Precedence: installed dist, git, 'UNKNOWN' try: from ._dist_ver import __version__ @@ -12,16 +12,25 @@ __version__ = get_version(root="../..", relative_to=__file__) except (ImportError, LookupError): __version__ = "UNKNOWN" - -import logging -import os -import platform -import re -import sys -from textwrap import dedent - +__all__ = [ + # GPU utils + 'resource_filename', 'cs', 'dev_info', 'gpuinfo', + # utils + 'LOG_FORMAT', 'LogHandler', 'path_resources', 'resources' + # package + 'img', 'lm', 'mmr_auxe', 'mmraux', 'mmrnorm', 'prj' + # img + 'align_mumap', 'im_e72dev', 'im_dev2e7', 'hdw_mumap', 'obj_mumap', + 'pct_mumap', 'mmrchain', + # lm + 'dynamic_timings', 'mmrhist', 'randoms', + # mmraux + 'classify_input', 'get_mmrparams', + # prj + 'back_prj', 'frwd_prj', 'simulate_recon', 'simulate_sino', + # sct + 'vsm'] # yapf: disable from pkg_resources import resource_filename -from tqdm.auto import tqdm from niftypet.ninst import cudasetup as cs from niftypet.ninst.dinf import dev_info, gpuinfo diff --git a/niftypet/nipet/img/__init__.py b/niftypet/nipet/img/__init__.py index a04effd6..9b46b42c 100644 --- a/niftypet/nipet/img/__init__.py +++ b/niftypet/nipet/img/__init__.py @@ -1,4 +1,5 @@ # init the package folder +__all__ = ['auximg', 'mmrimg', 'obtain_image'] # from . import pipe from . import auximg, mmrimg from .auximg import obtain_image diff --git a/niftypet/nipet/img/auximg.py b/niftypet/nipet/img/auximg.py index 05efafbe..67fb3a46 100644 --- a/niftypet/nipet/img/auximg.py +++ b/niftypet/nipet/img/auximg.py @@ -1,7 +1,4 @@ """auxilary imaging functions for PET image reconstruction and analysis.""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" - import logging import os diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index c3a399aa..ff597edf 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -23,8 +23,6 @@ from .. import mmraux from .. import resources as rs -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) ct_nans = -1024 diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index a056da9f..ef92e605 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -1,7 +1,4 @@ """module for pipelined image reconstruction and analysis""" -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" - import logging import os import sys diff --git a/niftypet/nipet/lm/__init__.py b/niftypet/nipet/lm/__init__.py index 9dc6c6ff..894ecbb2 100644 --- a/niftypet/nipet/lm/__init__.py +++ b/niftypet/nipet/lm/__init__.py @@ -1,4 +1,8 @@ # init the package folder +__all__ = [ + 'auxilary_frames', 'draw_frames', 'dynamic_timings', 'frame_position', 'get_time_offset', + 'mmrhist', 'randoms', 'split_frames'] + from .mmrhist import ( auxilary_frames, draw_frames, diff --git a/niftypet/nipet/lm/mmrhist.py b/niftypet/nipet/lm/mmrhist.py index 4378cc91..1afeea1d 100644 --- a/niftypet/nipet/lm/mmrhist.py +++ b/niftypet/nipet/lm/mmrhist.py @@ -14,8 +14,6 @@ from .. import mmraux from . import mmr_lmproc # CUDA extension module -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) #================================================================================ diff --git a/niftypet/nipet/lm/pviews.py b/niftypet/nipet/lm/pviews.py index edae64f4..0d895e0d 100644 --- a/niftypet/nipet/lm/pviews.py +++ b/niftypet/nipet/lm/pviews.py @@ -1,6 +1,4 @@ #!/usr/bin/python -__author__ = 'pawel' - import os import sys diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 7e57b60f..9ba5b88e 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -20,8 +20,6 @@ from . import mmr_auxe, resources -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 5f554721..02b27b5a 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -10,9 +10,6 @@ from . import mmr_auxe # auxiliary functions through Python extensions in CUDA -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" - #================================================================================================= # GET NORM COMPONENTS #================================================================================================= diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 9e5a7e10..bf3eef83 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -9,8 +9,6 @@ from ..img import mmrimg from . import petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) #========================================================================= diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 896f7a4b..1d88fc23 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -23,8 +23,6 @@ from ..sct import vsm from . import petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) #reconstruction mode: diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index 619fff0c..b033153b 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -11,8 +11,6 @@ from ..img import mmrimg from . import mmrprj, mmrrec, petprj -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 4acfb309..d8a6908a 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -20,8 +20,6 @@ from ..prj import mmrprj, mmrrec, petprj from . import nifty_scatter -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" log = logging.getLogger(__name__) diff --git a/setup.py b/setup.py index e6c2b29c..9dc3e69e 100644 --- a/setup.py +++ b/setup.py @@ -18,10 +18,6 @@ from niftypet.ninst import cudasetup as cs from niftypet.ninst import dinf from niftypet.ninst import install_tools as tls - -__author__ = ("Pawel J. Markiewicz", "Casper O. da Costa-Luis") -__copyright__ = "Copyright 2020" -__licence__ = __license__ = "Apache 2.0" __version__ = get_version(root=".", relative_to=__file__) logging.basicConfig(level=logging.INFO, format=tls.LOG_FORMAT) From 9cbbf640c1a720ef3f13c575d65f8b34c427f88e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 02:27:10 +0000 Subject: [PATCH 08/64] format: some regex --- niftypet/nipet/__init__.py | 10 +- niftypet/nipet/img/auximg.py | 10 +- niftypet/nipet/img/mmrimg.py | 252 ++++++++++++++++----------------- niftypet/nipet/img/pipe.py | 18 +-- niftypet/nipet/lm/mmrhist.py | 92 ++++++------ niftypet/nipet/lm/pviews.py | 24 ++-- niftypet/nipet/mmraux.py | 213 ++++++++++++++-------------- niftypet/nipet/mmrnorm.py | 60 ++++---- niftypet/nipet/prj/mmrprj.py | 32 ++--- niftypet/nipet/prj/mmrrec.py | 124 ++++++++-------- niftypet/nipet/prj/mmrsim.py | 92 ++++++------ niftypet/nipet/sct/__init__.py | 1 + niftypet/nipet/sct/mmrsct.py | 176 +++++++++++------------ setup.py | 1 + 14 files changed, 554 insertions(+), 551 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 059bf1a5..a5f8feae 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -37,7 +37,7 @@ from niftypet.ninst.tools import LOG_FORMAT, LogHandler, path_resources, resources # shared CUDA C library for extended auxiliary functions for the mMR -#> Siemens Biograph mMR +# > Siemens Biograph mMR from . import img, lm, mmr_auxe, mmraux, mmrnorm, prj from .img.mmrimg import align_mumap from .img.mmrimg import convert2dev as im_e72dev @@ -62,11 +62,11 @@ if resources.ENBLXNAT: from xnat import xnat -#> GE Signa -#from . import aux_sig +# > GE Signa +# from . import aux_sig -#from . import lm_sig -#from .lm_sig.hst_sig import lminfo_sig +# from . import lm_sig +# from .lm_sig.hst_sig import lminfo_sig # for use in `cmake -DCMAKE_PREFIX_PATH=...` cmake_prefix = resource_filename(__name__, "cmake") diff --git a/niftypet/nipet/img/auximg.py b/niftypet/nipet/img/auximg.py index 67fb3a46..05070679 100644 --- a/niftypet/nipet/img/auximg.py +++ b/niftypet/nipet/img/auximg.py @@ -15,8 +15,8 @@ def obtain_image(img, Cnt=None, imtype=''): numpy array, dictionary or empty list (assuming blank then). The image has to have the dimensions of the PET image used as in Cnt['SO_IM[X-Z]']. ''' - #> establishing what and if the image object has been provided - #> all findings go to the output dictionary + # > establishing what and if the image object has been provided + # > all findings go to the output dictionary output = {} if isinstance(img, dict): if Cnt is not None and img['im'].shape != (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): @@ -64,7 +64,7 @@ def obtain_image(img, Cnt=None, imtype=''): log.info(imtype + ' has not been provided -> using blank.') output['fim'] = '' output['exists'] = False - #------------------------------------------------------------------------ + # ------------------------------------------------------------------------ return output @@ -111,9 +111,9 @@ def dynamic_timings(flist, offset=0): nfrm = np.sum(farray[:, 0]) # list of frame duration frms = np.zeros(nfrm, dtype=np.uint16) - #frame iterator + # frame iterator fi = 0 - #time sum of frames + # time sum of frames tsum = 0 # list of frame timings t_frames = [] diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index ff597edf..42895252 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -27,9 +27,9 @@ ct_nans = -1024 -#=================================================================================== +# ================================================================================== # IMAGE ROUTINES -#=================================================================================== +# ================================================================================== def convert2e7(img, Cnt): @@ -37,16 +37,16 @@ def convert2e7(img, Cnt): margin = (Cnt['SO_IMX'] - Cnt['SZ_IMX']) // 2 - #permute the dims first + # permute the dims first imo = np.transpose(img, (2, 0, 1)) nvz = img.shape[2] - #> get the x-axis filler and apply it + # > get the x-axis filler and apply it filler = np.zeros((nvz, Cnt['SZ_IMY'], margin), dtype=np.float32) imo = np.concatenate((filler, imo, filler), axis=2) - #> get the y-axis filler and apply it + # > get the y-axis filler and apply it filler = np.zeros((nvz, margin, Cnt['SO_IMX']), dtype=np.float32) imo = np.concatenate((filler, imo, filler), axis=1) return imo @@ -103,7 +103,7 @@ def cropxy(im, imsize, datain, Cnt, store_pth=''): def image_affine(datain, Cnt, gantry_offset=False): '''Creates a blank reference image, to which another image will be resampled''' - #------get necessary data for ----- + # ------get necessary data for ----- # gantry offset if gantry_offset: goff, tpo = mmraux.lm_pos(datain, Cnt) @@ -120,31 +120,31 @@ def image_affine(datain, Cnt, gantry_offset=False): def getmu_off(mu, Cnt, Offst=np.array([0., 0., 0.])): - #number of voxels + # pumber of voxels nvx = mu.shape[0] - #change the shape to 3D + # phange the shape to 3D mu.shape = (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']) - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- # CORRECT THE MU-MAP for GANTRY OFFSET - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- Cim = { 'VXSOx': 0.208626, 'VXSOy': 0.208626, 'VXSOz': 0.203125, 'VXNOx': 344, 'VXNOy': 344, 'VXNOz': 127, 'VXSRx': 0.208626, 'VXSRy': 0.208626, 'VXSRz': 0.203125, 'VXNRx': 344, 'VXNRy': 344, 'VXNRz': 127} - #original image offset + # priginal image offset Cim['OFFOx'] = -0.5 * Cim['VXNOx'] * Cim['VXSOx'] Cim['OFFOy'] = -0.5 * Cim['VXNOy'] * Cim['VXSOy'] Cim['OFFOz'] = -0.5 * Cim['VXNOz'] * Cim['VXSOz'] - #resampled image offset + # pesampled image offset Cim['OFFRx'] = -0.5 * Cim['VXNRx'] * Cim['VXSRx'] Cim['OFFRy'] = -0.5 * Cim['VXNRy'] * Cim['VXSRy'] Cim['OFFRz'] = -0.5 * Cim['VXNRz'] * Cim['VXSRz'] - #transformation matrix + # pransformation matrix A = np.array( [[1., 0., 0., Offst[0]], [0., 1., 0., Offst[1]], [0., 0., 1., Offst[2]], [0., 0., 0., 1.]], dtype=np.float32) - #apply the gantry offset to the mu-map + # ppply the gantry offset to the mu-map mur = nimpa.prc.improc.resample(mu, A, Cim) return mur @@ -153,57 +153,57 @@ def getinterfile_off(fmu, Cnt, Offst=np.array([0., 0., 0.])): ''' Return the floating point mu-map in an array from Interfile, accounting for image offset (does slow interpolation). ''' - #read the image file + # pead the image file f = open(fmu, 'rb') mu = np.fromfile(f, np.float32) f.close() # save_im(mur, Cnt, os.path.dirname(fmu) + '/mur.nii') - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- mur = getmu_off(mu, Cnt) - #> create GPU version of the mu-map + # > create GPU version of the mu-map murs = convert2dev(mur, Cnt) - #> number of voxels + # > number of voxels nvx = im.shape[0] - #> get the basic stats + # > get the basic stats mumax = np.max(mur) mumin = np.min(mur) - #> number of voxels greater than 10% of max image value + # > number of voxels greater than 10% of max image value n10mx = np.sum(mur > 0.1 * mumax) - #> return image dictionary with the image itself and some other stats + # > return image dictionary with the image itself and some other stats mu_dct = {'im': mur, 'ims': murs, 'max': mumax, 'min': mumin, 'nvx': nvx, 'n10mx': n10mx} return mu_dct def getinterfile(fim, Cnt): '''Return the floating point image file in an array from an Interfile file.''' - #read the image file + # pead the image file f = open(fim, 'rb') im = np.fromfile(f, np.float32) f.close() - #number of voxels + # pumber of voxels nvx = im.shape[0] - #change the shape to 3D + # phange the shape to 3D im.shape = (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']) - #get the basic stats + # pet the basic stats immax = np.max(im) immin = np.min(im) - #number of voxels greater than 10% of max image value + # pumber of voxels greater than 10% of max image value n10mx = np.sum(im > 0.1 * immax) - #reorganise the image for optimal gpu execution + # peorganise the image for optimal gpu execution im_sqzd = convert2dev(im, Cnt) - #return image dictionary with the image itself and some other stats + # peturn image dictionary with the image itself and some other stats im_dct = {'im': im, 'ims': im_sqzd, 'max': immax, 'min': immin, 'nvx': nvx, 'n10mx': n10mx} return im_dct -#-define uniform cylinder +# define uniform cylinder def get_cylinder(Cnt, rad=25, xo=0, yo=0, unival=1, gpu_dim=False): @@ -257,11 +257,11 @@ def mudcm2nii(datain, Cnt): nimpa.array2nii(mu[:, ::-1, :], A, os.path.join(os.path.dirname(datain['mumapDCM']), 'mu.nii.gz')) - #------get necessary data for creating a blank reference image (to which resample)----- + # ------get necessary data for creating a blank reference image (to which resample)----- # gantry offset goff, tpo = mmraux.lm_pos(datain, Cnt) ihdr, csainfo = mmraux.hdr_lm(datain) - #start horizontal bed position + # ptart horizontal bed position p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') @@ -317,16 +317,16 @@ def obj_mumap( fmudir = os.path.join(outpath, 'mumap-obj') nimpa.create_dir(fmudir) - #> ref file name + # > ref file name fmuref = os.path.join(fmudir, 'muref.nii.gz') - #> ref affine + # > ref affine B = image_affine(datain, Cnt, gantry_offset=gantry_offset) - #> ref image (blank) + # > ref image (blank) im = np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) - #> store ref image + # > store ref image nimpa.array2nii(im, B, fmuref) # check if the object dicom files for MR-based mu-map exists @@ -344,7 +344,7 @@ def obj_mumap( # convert the DICOM mu-map images to nii run([Cnt['DCM2NIIX'], '-f', fnii + tstmp, '-o', fmudir, datain['mumapDCM']]) - #files for the T1w, pick one: + # piles for the T1w, pick one: fmunii = glob.glob(os.path.join(fmudir, '*' + fnii + tstmp + '*.nii*'))[0] # fmunii = glob.glob( os.path.join(datain['mumapDCM'], '*converted*.nii*') ) # fmunii = fmunii[0] @@ -369,12 +369,12 @@ def obj_mumap( mu = np.float32(mu) / 1e4 mu[mu < 0] = 0 - #> return image dictionary with the image itself and some other stats + # > return image dictionary with the image itself and some other stats mu_dct = dict(im=mu, affine=A) if not del_auxilary: mu_dct['fmuref'] = fmuref - #> store the mu-map if requested + # > store the mu-map if requested if store_npy: # to numpy array fnp = os.path.join(fmudir, "mumap-from-DICOM.npz") @@ -398,9 +398,9 @@ def obj_mumap( return mu_dct -#================================================================================= +# ================================================================================ # pCT/UTE MU-MAP ALIGNED -#--------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- def align_mumap( @@ -433,32 +433,32 @@ def align_mumap( if scanner_params is None: scanner_params = {} - #> output folder + # > output folder if outpath == '': opth = os.path.join(datain['corepath'], 'mumap-obj') else: opth = os.path.join(outpath, 'mumap-obj') - #> create the folder, if not existent + # > create the folder, if not existent nimpa.create_dir(opth) - #> tmp folder for not aligned mu-maps + # > tmp folder for not aligned mu-maps tmpdir = os.path.join(opth, 'tmp') nimpa.create_dir(tmpdir) - #> get the timing of PET if affine not given + # > get the timing of PET if affine not given if faff == '' and not hst is None and isinstance(hst, dict) and 't0' in hst: t0 = hst['t0'] t1 = hst['t1'] - #> file name for the output mu-map + # > file name for the output mu-map fnm = 'mumap-' + musrc.upper() - #> output dictionary + # > output dictionary mu_dct = {} - #--------------------------------------------------------------------------- - #> used stored if requested + # --------------------------------------------------------------------------- + # > used stored if requested if use_stored: fmu_stored = fnm + '-aligned-to_t'\ + str(hst['t0'])+'-'+str(hst['t1'])+'_'+petopt.upper()\ @@ -466,14 +466,14 @@ def align_mumap( fmupath = os.path.join(opth, fmu_stored) if os.path.isfile(fmupath): mudct_stored = nimpa.getnii(fmupath, output='all') - #> create output dictionary + # > create output dictionary mu_dct['im'] = mudct_stored['im'] mu_dct['affine'] = mudct_stored['affine'] - #mu_dct['faff'] = faff + # pu_dct['faff'] = faff return mu_dct - #--------------------------------------------------------------------------- + # --------------------------------------------------------------------------- - #> three ways of passing scanner constants are here decoded + # > three ways of passing scanner constants are here decoded if 'Cnt' in scanner_params: Cnt = scanner_params['Cnt'] elif 'SO_IMZ' in scanner_params: @@ -481,11 +481,11 @@ def align_mumap( else: Cnt = rs.get_mmr_constants() - #> if affine not provided histogram the LM data for recon and registration + # > if affine not provided histogram the LM data for recon and registration if not os.path.isfile(faff): from niftypet.nipet.prj import mmrrec - #-histogram the list data if needed + # -histogram the list data if needed if hst is None: from niftypet.nipet import mmrhist if 'txLUT' in scanner_params: @@ -495,7 +495,7 @@ def align_mumap( but are required for histogramming.') #========================================================= - #-get hardware mu-map + # -get hardware mu-map if 'hmumap' in datain and os.path.isfile(datain['hmumap']): muh = np.load(datain['hmumap'], allow_pickle=True)["hmu"] (log.info if verbose else log.debug)('loaded hardware mu-map from file:\n{}'.format( @@ -511,14 +511,14 @@ def align_mumap( log.error('the hardware mu-map is required first.') raise IOError('Could not find the hardware mu-map!') #========================================================= - #-check if T1w image is available + # -check if T1w image is available if not {'MRT1W#', 'T1nii', 'T1bc', 'T1N4'}.intersection(datain): log.error('no MR T1w images required for co-registration!') raise IOError('T1w image could not be obtained!') #========================================================= - #-if the affine is not given, - #-it will be generated by reconstructing PET image, with some or no corrections + # -if the affine is not given, + # -it will be generated by reconstructing PET image, with some or no corrections if not os.path.isfile(faff): # first recon pet to get the T1 aligned to it if petopt == 'qnt': @@ -556,7 +556,7 @@ def align_mumap( fpet = recout.fpet mu_dct['fpet'] = fpet - #------------------------------ + # ------------------------------ if musrc == 'ute' and ute_name in datain and os.path.exists(datain[ute_name]): # change to NIfTI if the UTE sequence is in DICOM files (folder) if os.path.isdir(datain[ute_name]): @@ -575,7 +575,7 @@ def align_mumap( fpet, fute, outpath=os.path.join(outpath, 'PET', 'positioning'), - #fcomment=fcomment, + # pcomment=fcomment, executable=Cnt['REGPATH'], omp=multiprocessing.cpu_count() / 2, rigOnly=True, @@ -588,9 +588,9 @@ def align_mumap( smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., # pillilitres rthrsh=0.05, - ffwhm=15., #millilitres + ffwhm=15., # pillilitres fthrsh=0.05, verbose=verbose) else: @@ -622,9 +622,9 @@ def align_mumap( smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., # pillilitres rthrsh=0.05, - ffwhm=15., #millilitres + ffwhm=15., # pillilitres fthrsh=0.05, verbose=verbose) else: @@ -641,10 +641,10 @@ def align_mumap( if not os.path.isfile(fpet): raise IOError('e> the reference PET should be supplied with the affine.') - #> output file name for the aligned mu-maps + # > output file name for the aligned mu-maps if musrc == 'pct': - #> convert to mu-values before resampling to avoid artefacts with negative values + # > convert to mu-values before resampling to avoid artefacts with negative values nii = nib.load(datain['pCT']) img = nii.get_fdata(dtype=np.float32) img_mu = hu2mu(img) @@ -663,7 +663,7 @@ def align_mumap( if 'mumapDCM' not in datain: raise IOError('DICOM with the UTE mu-map are not given.') run([Cnt['DCM2NIIX'], '-f', fnii + tstmp, '-o', opth, datain['mumapDCM']]) - #files for the T1w, pick one: + # piles for the T1w, pick one: fflo = glob.glob(os.path.join(opth, '*' + fnii + tstmp + '*.nii*'))[0] else: if os.path.isfile(datain['UTE']): @@ -671,7 +671,7 @@ def align_mumap( else: raise IOError('The provided NIfTI UTE path is not valid.') - #> call the resampling routine to get the pCT/UTE in place + # > call the resampling routine to get the pCT/UTE in place if reg_tool == "spm": nimpa.resample_spm(fpet, fflo, faff_mrpet, fimout=freg, del_ref_uncmpr=True, del_flo_uncmpr=True, del_out_uncmpr=True) @@ -679,28 +679,28 @@ def align_mumap( nimpa.resample_niftyreg(fpet, fflo, faff_mrpet, fimout=freg, executable=Cnt['RESPATH'], verbose=verbose) - #-get the NIfTI of registered image + # -get the NIfTI of registered image nim = nib.load(freg) A = nim.affine imreg = nim.get_fdata(dtype=np.float32) imreg = imreg[:, ::-1, ::-1] imreg = np.transpose(imreg, (2, 1, 0)) - #-convert to mu-values; sort out the file name too. + # -convert to mu-values; sort out the file name too. if musrc == 'pct': mu = imreg elif musrc == 'ute': mu = np.float32(imreg) / 1e4 - #-remove the converted file from DICOMs + # -remove the converted file from DICOMs os.remove(fflo) else: raise NameError('Confused o_O') - #> get rid of negatives and nans + # > get rid of negatives and nans mu[mu < 0] = 0 mu[np.isnan(mu)] = 0 - #> return image dictionary with the image itself and other parameters + # > return image dictionary with the image itself and other parameters mu_dct['im'] = mu mu_dct['affine'] = A mu_dct['faff'] = faff_mrpet @@ -714,12 +714,12 @@ def align_mumap( else: fname = fnm + '-aligned-to-given-affine' + fcomment if store_npy: - #> Numpy + # > Numpy if store_to_npy: fnp = os.path.join(opth, fname + ".npz") np.savez(fnp, mu=mu, A=A) if store: - #> NIfTI + # > NIfTI fmu = os.path.join(opth, fname + '.nii.gz') nimpa.array2nii(mu[::-1, ::-1, :], A, fmu) mu_dct['fim'] = fmu @@ -734,9 +734,9 @@ def align_mumap( return mu_dct -#================================================================================= +# ================================================================================ # PSEUDO CT MU-MAP -#--------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', faff='', fpet='', @@ -822,7 +822,7 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', fpet = recout.fpet mu_dct['fpet'] = fpet - #------------------------------ + # ------------------------------ # get the affine transformation ft1w = nimpa.pick_t1w(datain) try: @@ -833,7 +833,7 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', fpet, ft1w, outpath=os.path.join(outpath, 'PET', 'positioning'), - #fcomment=fcomment, + # pcomment=fcomment, executable=Cnt['REGPATH'], omp=multiprocessing.cpu_count() / 2, rigOnly=True, @@ -846,14 +846,14 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', smor=0, rmsk=True, fmsk=True, - rfwhm=15., #millilitres + rfwhm=15., # pillilitres rthrsh=0.05, - ffwhm=15., #millilitres + ffwhm=15., # pillilitres fthrsh=0.05, verbose=verbose) faff = regdct['faff'] - #------------------------------ + # ------------------------------ # pCT file name if outpath == '': @@ -863,7 +863,7 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', mmraux.create_dir(pctdir) fpct = os.path.join(pctdir, 'pCT_r_tmp' + fcomment + '.nii.gz') - #> call the resampling routine to get the pCT in place + # > call the resampling routine to get the pCT in place if os.path.isfile(Cnt['RESPATH']): cmd = [ Cnt['RESPATH'], '-ref', fpet, '-flo', datain['pCT'], '-trans', faff, '-res', fpct, @@ -898,12 +898,12 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', else: pctumapdir = os.path.join(outpath, 'mumap-obj') mmraux.create_dir(pctumapdir) - #> Numpy + # > Numpy if store_npy: fnp = os.path.join(pctumapdir, "mumap-pCT.npz") np.savez(fnp, mu=mu, A=A) - #> NIfTI + # > NIfTI fmu = os.path.join(pctumapdir, 'mumap-pCT' + fcomment + '.nii.gz') nimpa.array2nii(mu[::-1, ::-1, :], A, fmu) mu_dct['fim'] = fmu @@ -914,12 +914,12 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', #********************************************************************************* #GET HARDWARE MU-MAPS with positions and offsets -#--------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- def hdr_mu(datain, Cnt): '''Get the headers from DICOM data file''' - #get one of the DICOM files of the mu-map + # pet one of the DICOM files of the mu-map if 'mumapDCM' in datain: files = glob.glob(os.path.join(datain['mumapDCM'], '*.dcm')) files.extend(glob.glob(os.path.join(datain['mumapDCM'], '*.DCM'))) @@ -941,7 +941,7 @@ def hdr_mu(datain, Cnt): def hmu_shape(hdr): - #regular expression to find the shape + # pegular expression to find the shape p = re.compile(r'(?<=:=)\s*\d{1,4}') # x: dim [1] i0 = hdr.find('matrix size[1]') @@ -959,7 +959,7 @@ def hmu_shape(hdr): def hmu_voxsize(hdr): - #regular expression to find the shape + # pegular expression to find the shape p = re.compile(r'(?<=:=)\s*\d{1,2}[.]\d{1,10}') # x: dim [1] i0 = hdr.find('scale factor (mm/pixel) [1]') @@ -977,7 +977,7 @@ def hmu_voxsize(hdr): def hmu_origin(hdr): - #regular expression to find the origin + # pegular expression to find the origin p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{1,10}') # x: dim [1] i0 = hdr.find('$umap origin (pixels) [1]') @@ -995,7 +995,7 @@ def hmu_origin(hdr): def hmu_offset(hdr): - #regular expression to find the origin + # pegular expression to find the origin p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{1,10}') if hdr.find('$origin offset') > 0: # x: dim [1] @@ -1016,21 +1016,21 @@ def hmu_offset(hdr): def rd_hmu(fh): - #--read hdr file-- + # --read hdr file-- f = open(fh, 'r') hdr = f.read() f.close() - #----------------- - #regular expression to find the file name + # ----------------- + # pegular expression to find the file name p = re.compile(r'(?<=:=)\s*\w*[.]\w*') i0 = hdr.find('!name of data file') i1 = i0 + hdr[i0:].find('\n') fbin = p.findall(hdr[i0:i1])[0] - #--read img file-- + # --read img file-- f = open(os.path.join(os.path.dirname(fh), fbin.strip()), 'rb') im = np.fromfile(f, np.float32) f.close() - #----------------- + # ----------------- return hdr, im @@ -1038,21 +1038,21 @@ def get_hmupos(datain, parts, Cnt, outpath=''): # check if registration executable exists if not os.path.isfile(Cnt['RESPATH']): raise IOError('No registration executable found!') - #----- get positions from the DICOM list-mode file ----- + # ----- get positions from the DICOM list-mode file ----- ihdr, csainfo = mmraux.hdr_lm(datain, Cnt) - #table position origin + # pable position origin fi = csainfo.find(b'TablePositionOrigin') tpostr = csainfo[fi:fi + 200] tpo = re.sub(b'[^a-zA-Z0-9.\\-]', b'', tpostr).split(b'M') tpozyx = np.array([float(tpo[-1]), float(tpo[-2]), float(tpo[-3])]) / 10 log.info('table position (z,y,x) (cm): {}'.format(tpozyx)) - #-------------------------------------------------------- + # -------------------------------------------------------- - #------- get positions from the DICOM mu-map file ------- + # ------- get positions from the DICOM mu-map file ------- csamu, dhdr = hdr_mu(datain, Cnt) - #> get the indices where the table offset may reside: + # > get the indices where the table offset may reside: idxs = [m.start() for m in re.finditer(b'GantryTableHomeOffset(?!_)', csamu)] - #> loop over the indices and find those which are correct + # > loop over the indices and find those which are correct found_off = False for i in idxs: gtostr1 = csamu[i:i + 300] @@ -1071,7 +1071,7 @@ def get_hmupos(datain, parts, Cnt, outpath=''): log.info('gantry table offset (z,y,x) (cm): {}'.format(gtozyx)) else: raise ValueError('Could not find the gantry table offset or the offset is unusual.') - #-------------------------------------------------------- + # -------------------------------------------------------- # create the folder for hardware mu-maps if outpath == '': @@ -1082,13 +1082,13 @@ def get_hmupos(datain, parts, Cnt, outpath=''): # get the reference nii image fref = os.path.join(dirhmu, 'hmuref.nii.gz') - #start horizontal bed position + # ptart horizontal bed position p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) - #start vertical bed position + # ptart vertical bed position p = re.compile(r'start vertical bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') @@ -1102,22 +1102,22 @@ def get_hmupos(datain, parts, Cnt, outpath=''): nimpa.array2nii(np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32), B, fref) - #define a dictionary of all positions/offsets of hardware mu-maps + # pefine a dictionary of all positions/offsets of hardware mu-maps hmupos = [None] * 5 hmupos[0] = { - 'TabPosOrg': tpozyx, #from DICOM of LM file - 'GanTabOff': gtozyx, #from DICOM of mMR mu-map file - 'HBedPos': hbedpos, #from Interfile of LM file [cm] - 'VBedPos': vbedpos, #from Interfile of LM file [cm] + 'TabPosOrg': tpozyx, # prom DICOM of LM file + 'GanTabOff': gtozyx, # prom DICOM of mMR mu-map file + 'HBedPos': hbedpos, # prom Interfile of LM file [cm] + 'VBedPos': vbedpos, # prom Interfile of LM file [cm] 'niipath': fref} - #-------------------------------------------------------------------------- + # -------------------------------------------------------------------------- # iteratively go through the mu-maps and add them as needed for i in parts: fh = os.path.join(Cnt['HMUDIR'], Cnt['HMULIST'][i - 1]) # get the interfile header and binary data hdr, im = rd_hmu(fh) - #get shape, origin, offset and voxel size + # pet shape, origin, offset and voxel size s = hmu_shape(hdr) im.shape = s # get the origin, offset and voxel size for the mu-map interfile data @@ -1126,16 +1126,16 @@ def get_hmupos(datain, parts, Cnt, outpath=''): vs = hmu_voxsize(hdr) # corner voxel position for the interfile image data vpos = (-org * vs + off + gtozyx - tpozyx) - #add to the dictionary + # pdd to the dictionary hmupos[i] = { 'vpos': vpos, - 'shape': s, #from interfile - 'iorg': org, #from interfile - 'ioff': off, #from interfile - 'ivs': vs, #from interfile - 'img': im, #from interfile + 'shape': s, # prom interfile + 'iorg': org, # prom interfile + 'ioff': off, # prom interfile + 'ivs': vs, # prom interfile + 'img': im, # prom interfile 'niipath': os.path.join(dirhmu, '_' + Cnt['HMULIST'][i - 1].split('.')[0] + '.nii.gz')} - #save to NIfTI + # pave to NIfTI log.info('creating mu-map for: {}'.format(Cnt['HMULIST'][i - 1])) A = np.diag(np.append(10 * vs[::-1], 1)) A[0, 0] *= -1 @@ -1220,7 +1220,7 @@ def hdw_mumap(datain, hparts, params, outpath='', use_stored=False, del_interm=T # save the objects to numpy arrays fnp = os.path.join(fmudir, "hmumap.npz") np.savez(fnp, hmu=hmu, A=A, fmu=fmu) - #update the datain dictionary (assuming it is mutable) + # ppdate the datain dictionary (assuming it is mutable) datain['hmumap'] = fnp if del_interm: @@ -1229,7 +1229,7 @@ def hdw_mumap(datain, hparts, params, outpath='', use_stored=False, del_interm=T for fname in glob.glob(os.path.join(fmudir, 'r_*.nii*')): os.remove(fname) - #return image dictionary with the image itself and some other stats + # peturn image dictionary with the image itself and some other stats hmu_dct = {'im': hmu, 'fim': fmu, 'affine': A} if 'fnp' in locals(): hmu_dct['fnp'] = fnp @@ -1293,12 +1293,12 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): else: raise IOError('Disaster: no T1w image!') - #output for the T1w in register with PET + # putput for the T1w in register with PET ft1out = os.path.join(os.path.dirname(ft1w), 'T1w_r' + '.nii.gz') - #text file fo rthe affine transform T1w->PET + # pext file fo rthe affine transform T1w->PET faff = os.path.join(os.path.dirname(ft1w), fcomment + 'mr2pet_affine' + - '.txt') #time.strftime('%d%b%y_%H.%M',time.gmtime()) - #> call the registration routine + '.txt') # pime.strftime('%d%b%y_%H.%M',time.gmtime()) + # > call the registration routine if os.path.isfile(Cnt['REGPATH']): cmd = [ Cnt['REGPATH'], '-ref', recute.fpet, '-flo', ft1w, '-rigOnly', '-speeeeed', '-aff', @@ -1309,7 +1309,7 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): else: raise IOError('Path to registration executable is incorrect!') - #get the pCT mu-map with the above faff + # pet the pCT mu-map with the above faff pmudic = pct_mumap(datain, txLUT, axLUT, Cnt, faff=faff, fpet=recute.fpet, fcomment=fcomment) mup = pmudic['im'] diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index ef92e605..f2838380 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -178,7 +178,7 @@ def mmrchain( 'no mu-map provided: scatter and attenuation corrections are switched off.') # ------------------------------------------------------------------------- - #import pdb; pdb.set_trace() + # import pdb; pdb.set_trace() # output dictionary output = {} @@ -259,14 +259,14 @@ def mmrchain( # dynamic images in one numpy array dynim = np.zeros((nfrm, Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMY']), dtype=np.float32) - #if asked, output only scatter+randoms sinogram for each frame + # if asked, output only scatter+randoms sinogram for each frame if ret_sinos and itr > 1 and recmod > 2: dynmsk = np.zeros((nfrm, Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) dynrsn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) dynssn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) dynpsn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) - #> returning dictionary of histograms if requested + # > returning dictionary of histograms if requested if ret_histo: hsts = {} @@ -330,7 +330,7 @@ def mmrchain( output['fmureg'].append(fmu) else: muo = muod['im'] - #--------------------- + # --------------------- # output image file name if nfrm > 1: @@ -393,7 +393,7 @@ def mmrchain( # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- - #run PVC if requested and required input given + # run PVC if requested and required input given if pvcroi: if not os.path.isfile(datain['T1lbl']): raise Exception('No labels and/or ROIs image definitions found!') @@ -409,11 +409,11 @@ def mmrchain( 'the PSF kernel has to be an numpy array with the shape of ({},{})'.format( 3, 2 * Cnt['RSZ_PSF_KRNL'] + 1)) - #> file names for NIfTI images of PVC ROIs and PVC corrected PET + # > file names for NIfTI images of PVC ROIs and PVC corrected PET froi = [] fpvc = [] - #> perform PVC for each time frame + # > perform PVC for each time frame dynpvc = np.zeros(petu['im'].shape, dtype=np.float32) for i in range(ifrmP, nfrm): # transform the parcellations (ROIs) if given the affine transformation for each frame @@ -421,7 +421,7 @@ def mmrchain( log.warning( 'affine transformation are not provided: will generate for the time frame.') faffpvc = None - #raise StandardError('No affine transformation') + # raise StandardError('No affine transformation') else: faffpvc = faff_frms[i] @@ -445,7 +445,7 @@ def mmrchain( if store_rois: froi.append(petpvc_dic['froi']) - #> update output dictionary + # > update output dictionary output.update({'impvc': dynpvc}) output['fprc'] = petpvc_dic['fprc'] output['imprc'] = petpvc_dic['imprc'] diff --git a/niftypet/nipet/lm/mmrhist.py b/niftypet/nipet/lm/mmrhist.py index 1afeea1d..2e8ce198 100644 --- a/niftypet/nipet/lm/mmrhist.py +++ b/niftypet/nipet/lm/mmrhist.py @@ -16,9 +16,9 @@ log = logging.getLogger(__name__) -#================================================================================ +# =============================================================================== # HISTOGRAM THE LIST-MODE DATA -#-------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- def mmrhist(datain, scanner_params, t0=0, t1=0, outpath='', frms=None, use_stored=False, @@ -83,7 +83,7 @@ def hist( # --------------------------------------- # preallocate all the output arrays VTIME = 2 - MXNITAG = 5400 #limit to 1hr and 30mins + MXNITAG = 5400 # limit to 1hr and 30mins if (nitag > MXNITAG): tn = int(MXNITAG / (1 << VTIME)) else: @@ -97,11 +97,11 @@ def hist( bck = np.zeros((2, nitag, Cnt['NBCKT']), dtype=np.uint32) fan = np.zeros((Cnt['NRNG'], Cnt['NCRS']), dtype=np.uint32) - #> prompt and delayed sinograms + # > prompt and delayed sinograms psino = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.uint16) dsino = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.uint16) - #> single slice rebinned prompots + # > single slice rebinned prompots ssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.uint32) hstout = { @@ -134,7 +134,7 @@ def hist( log.error('input list-mode data is not defined.') return - #short (interval) projection views + # short (interval) projection views pvs_sgtl = np.right_shift(hstout['pvs'], 8).astype(np.float32) pvs_crnl = np.bitwise_and(hstout['pvs'], 255).astype(np.float32) @@ -144,22 +144,22 @@ def hist( .format(cmass_sig)) #========================== BUCKET SINGLES ========================= - #> number of single rates reported for the given second - #> the last two bits are used for the number of reports + # > number of single rates reported for the given second + # > the last two bits are used for the number of reports nsr = (hstout['bck'][1, :, :] >> 30) - #> average in a second period + # > average in a second period hstout['bck'][0, nsr > 0] = hstout['bck'][0, nsr > 0] / nsr[nsr > 0] - #> time indeces when single rates given + # > time indeces when single rates given tmsk = np.sum(nsr, axis=1) > 0 single_rate = np.copy(hstout['bck'][0, tmsk, :]) - #> time + # > time t = np.arange(nitag) t = t[tmsk] - #> get the average bucket singles: + # > get the average bucket singles: buckets = np.int32(np.sum(single_rate, axis=0) / single_rate.shape[0]) log.debug('dynamic and static buckets single rates: COMPLETED.') #=================================================================== @@ -170,28 +170,28 @@ def hist( pdata = { 't0': t0, 't1': t1, - 'dur': t1 - t0, #duration - 'phc': hstout['phc'], #prompts head curve - 'dhc': hstout['dhc'], #delayeds head curve - 'cmass': cmass, #centre of mass of the radiodistribution in axial direction - 'pvs_sgtl': pvs_sgtl, #sagittal projection views in short intervals - 'pvs_crnl': pvs_crnl, #coronal projection views in short intervals + 'dur': t1 - t0, # duration + 'phc': hstout['phc'], # prompts head curve + 'dhc': hstout['dhc'], # delayeds head curve + 'cmass': cmass, # centre of mass of the radiodistribution in axial direction + 'pvs_sgtl': pvs_sgtl, # sagittal projection views in short intervals + 'pvs_crnl': pvs_crnl, # coronal projection views in short intervals 'fansums': hstout[ - 'fan'], #fan sums of delayeds for variance reduction of random event sinograms - 'sngl_rate': single_rate, #bucket singles over time - 'tsngl': t, #time points of singles measurements in list-mode data - 'buckets': buckets, #average bucket singles - 'psino': hstout['psn'].astype(np.uint16), #prompt sinogram - 'dsino': hstout['dsn'].astype(np.uint16), #delayeds sinogram - 'pssr': hstout['ssr'] #single-slice rebinned sinogram of prompts + 'fan'], # fan sums of delayeds for variance reduction of random event sinograms + 'sngl_rate': single_rate, # bucket singles over time + 'tsngl': t, # time points of singles measurements in list-mode data + 'buckets': buckets, # average bucket singles + 'psino': hstout['psn'].astype(np.uint16), # prompt sinogram + 'dsino': hstout['dsn'].astype(np.uint16), # delayeds sinogram + 'pssr': hstout['ssr'] # single-slice rebinned sinogram of prompts } return pdata -#=============================================================================== +# ============================================================================== # GET REDUCED VARIANCE RANDOMS -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------ def randoms(hst, scanner_params, gpu_dim=False): @@ -220,7 +220,7 @@ def rand(fansums, txLUT, axLUT, Cnt): elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] - #random sino and estimated crystal map of singles put into a dictionary + # random sino and estimated crystal map of singles put into a dictionary rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) rndout = { @@ -232,9 +232,9 @@ def rand(fansums, txLUT, axLUT, Cnt): return rndout['rsn'], rndout['cmap'] -#================================================================================ +# =============================================================================== # NEW!! GET REDUCED VARIANCE RANDOMS (BASED ON PROMPTS) -#-------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- def prand(fansums, pmsk, txLUT, axLUT, Cnt): @@ -242,18 +242,18 @@ def prand(fansums, pmsk, txLUT, axLUT, Cnt): elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] - #number of frames + # number of frames nfrm = fansums.shape[0] log.debug('# of dynamic frames: {}.'.format(nfrm)) - #random sino and estimated crystal map of singles put into a dictionary + # random sino and estimated crystal map of singles put into a dictionary rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) rndout = { 'rsn': rsn, 'cmap': cmap,} - #save results for each frame + # save results for each frame rsino = np.zeros((nfrm, nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) crmap = np.zeros((nfrm, Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) @@ -274,16 +274,16 @@ def prand(fansums, pmsk, txLUT, axLUT, Cnt): def sino2nii(sino, Cnt, fpth): '''save sinogram in span-11 into NIfTI file''' - #number of segments + # number of segments segn = len(Cnt['SEG']) cumseg = np.cumsum(Cnt['SEG']) cumseg = np.append([0], cumseg) - #plane offset (relative to 127 planes of seg 0) for each segment + # plane offset (relative to 127 planes of seg 0) for each segment OFF = np.min(abs(np.append([Cnt['MNRD']], [Cnt['MXRD']], axis=0)), axis=0) niisn = np.zeros((Cnt['SEG'][0], Cnt['NSANGLES'], Cnt['NSBINS'], segn), dtype=sino.dtype) - #first segment (with direct planes) + # first segment (with direct planes) # tmp = niisn[:, :, :, 0] = sino[Cnt['SEG'][0] - 1::-1, ::-1, ::-1] @@ -297,24 +297,24 @@ def sino2nii(sino, Cnt, fpth): nib.save(nim, fpth) -#================================================================================= +# ================================================================================ # create michelogram map for emission data, only when the input sino in in span-1 def get_michem(sino, axLUT, Cnt): # span: spn = -1 if Cnt['SPN'] == 1: - slut = np.arange(Cnt['NSN1']) #for span 1, one-to-one mapping + slut = np.arange(Cnt['NSN1']) # for span 1, one-to-one mapping elif Cnt['SPN'] == 11: slut = axLUT['sn1_sn11'] else: raise ValueError('sino is neither in span-1 or span-11') - #acitivity michelogram + # acitivity michelogram Mem = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) - #sino to ring number & sino-1 to sino-11 index: + # sino to ring number & sino-1 to sino-11 index: sn1_rno = axLUT['sn1_rno'] - #sum all the sinograms inside + # sum all the sinograms inside ssm = np.sum(sino, axis=(1, 2)) for sni in range(len(sn1_rno)): @@ -325,9 +325,9 @@ def get_michem(sino, axLUT, Cnt): return Mem -#================================================================================= -#--------------------------------------------------------------------------------- -#================================================================================= +# ================================================================================ +# -------------------------------------------------------------------------------- +# ================================================================================ def draw_frames(hst, tfrms, plot_diff=True): @@ -550,9 +550,9 @@ def dynamic_timings(flist, offset=0): nfrm = np.sum(farray[:, 0]) # list of frame duration frms = np.zeros(nfrm, dtype=np.uint16) - #frame iterator + # frame iterator fi = 0 - #time sum of frames + # time sum of frames tsum = 0 # list of frame timings t_frames = ['timings'] diff --git a/niftypet/nipet/lm/pviews.py b/niftypet/nipet/lm/pviews.py index 0d895e0d..f9b50294 100644 --- a/niftypet/nipet/lm/pviews.py +++ b/niftypet/nipet/lm/pviews.py @@ -28,11 +28,11 @@ def video_frm(hst, outpth): mfrm = hst['pvs_sgtl'].shape[0] - #--for movie + # --for movie FFMpegWriter = manimation.writers['ffmpeg'] metadata = dict(title='GPU Sino Views', artist='Pawel', comment=':)') writer = FFMpegWriter(fps=25, bitrate=30000, metadata=metadata) - #-- + # -- fig3 = plt.figure() @@ -53,13 +53,13 @@ def video_frm(hst, outpth): ax3 = plt.subplot(313) plt.title('Axial Centre of Mass') t = np.arange(0., hst['dur'], 1.) - #plt.plot(t, rprmt, 'k', t, rdlyd, 'r') + # plt.plot(t, rprmt, 'k', t, rdlyd, 'r') plt.plot(t, mvavg(hst['cmass'][:], 5), 'k') plt.ylim([ymin, ymax]) plt.xlabel('Time [s]') l2, = plt.plot(np.array([1000, 1000]), np.array([0, ymax]), 'b') - #how many gpu frames per movie (controls the time resolution) + # how many gpu frames per movie (controls the time resolution) mf = 6 mmfrm = mfrm / mf @@ -82,9 +82,9 @@ def video_frm(hst, outpth): return fnm -#=================================================================================== +# ================================================================================== # Dynamic Frames to Projection Views -#----------------------------------------------------------------------------------- +# ---------------------------------------------------------------------------------- def video_dyn(hst, frms, outpth, axLUT, Cnt): @@ -126,19 +126,19 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): print('-----------') print('GPUtot =', gpu_totsum) - #---additional constants + # ---additional constants saggital_angle = 127 coronal_angle = 0 i_mxfrm = gsum.argmax() frmrep = 5 mfrm = frmrep * nfrm - #--- + # --- - #--for movie + # --for movie FFMpegWriter = manimation.writers['ffmpeg'] metadata = dict(title='Axial View', artist='Pawel', comment='--') writer = FFMpegWriter(fps=10, bitrate=30000, metadata=metadata) - #-- + # -- fig1 = plt.figure() @@ -148,7 +148,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') l1 = plt.imshow(np.array(ddsino[i_mxfrm, :, coronal_angle, :], dtype=np.float64), cmap='jet', interpolation='nearest') - #plt.clim([0, 70]) + # plt.clim([0, 70]) ax2 = plt.subplot(312) plt.title('Sagittal View') @@ -156,7 +156,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') l2 = plt.imshow(np.array(ddsino[i_mxfrm, :, saggital_angle, :], dtype=np.float64), cmap='jet', interpolation='nearest') - #plt.clim([0, 70]) + # plt.clim([0, 70]) ax3 = plt.subplot(313) plt.title('Axial Centre of Mass') diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 9ba5b88e..564ab746 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -43,7 +43,7 @@ def lm_pos(datain, Cnt): log.error('DICOM list-mode data not found!') return None - #---find GantryOffset + # ---find GantryOffset if dhdr[0x0018, 0x1020].value == 'syngo MR B20P' or dhdr[0x0018, 0x1020].value == 'syngo MR E11': flip = 1 @@ -65,15 +65,15 @@ def lm_pos(datain, Cnt): else: raise ValueError('unknown scanner software version!') - fi = re.search(b'GantryOffset(?!_)', csainfo).start() #csainfo.find('GantryOffset') - #regular expression for the needed three numbers + fi = re.search(b'GantryOffset(?!_)', csainfo).start() # csainfo.find('GantryOffset') + # regular expression for the needed three numbers p = re.compile(b'-?\\d.\\d{4,10}') xyz = p.findall(csainfo[fi:fi + 200]) - #offset in cm + # offset in cm # xoff = float(xyz[0])/10 # yoff = float(xyz[1])/10 # zoff = float(xyz[2])/10 - #> hack to avoid other numbers (counting from the back) + # > hack to avoid other numbers (counting from the back) xoff = float(xyz[-3]) / 10 yoff = float(xyz[-2]) / 10 zoff = float(xyz[-1]) / 10 @@ -82,7 +82,7 @@ def lm_pos(datain, Cnt): log.info('gantry offset from DICOM:\n{}'.format(goff)) fi = csainfo.find(b'TablePositionOrigin') - #regular expression for the needed three numbers + # regular expression for the needed three numbers tpostr = csainfo[fi:fi + 200] tpo = re.sub(b'[^a-zA-Z0-9\\-]', b'', tpostr).split(b'M') tpozyx = np.array([float(tpo[-1]), float(tpo[-2]), float(tpo[-3])]) @@ -115,7 +115,7 @@ def hdr_lm(datain, Cnt): log.warning('DICOM field [0x29,0x1010] not found!') lmhdr = None - #CSA Series Header Info + # CSA Series Header Info if [0x29, 0x1120] in dhdr: csahdr = dhdr[0x29, 0x1120].value log.info('got CSA info.') @@ -142,7 +142,7 @@ def hdr_lm(datain, Cnt): log.warning('DICOM field with LM interfile header has not been found!') lmhdr = None - #CSA Series Header Info + # CSA Series Header Info if [0x29, 0x1020] in dhdr: csahdr = dhdr[0x29, 0x1020].value log.info('got CSA info.') @@ -159,13 +159,13 @@ def hdr_lm(datain, Cnt): def vh_bedpos(datain, Cnt): ihdr, csainfo = hdr_lm(datain, Cnt) - #start horizontal bed position + # start horizontal bed position p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) - #start vertical bed position + # start vertical bed position p = re.compile(r'start vertical bed position.*\d{1,3}\.*\d*') m = p.search(ihdr) fi = ihdr[m.start():m.end()].find('=') @@ -176,17 +176,17 @@ def vh_bedpos(datain, Cnt): def hmu_resample0(hmupos, parts, Cnt): - #output image sampling + # output image sampling Cim = { 'VXSRx': Cnt['SO_VXX'], 'VXSRy': Cnt['SO_VXY'], 'VXSRz': Cnt['SO_VXZ'], 'VXNRx': Cnt['SO_IMX'], 'VXNRy': Cnt['SO_IMY'], 'VXNRz': Cnt['SO_IMZ']} - #voxel position/offset - Cim['OFFRx'] = -0.5 * Cim['VXNRx'] * Cim['VXSRx'] #-0.5*Cim['VXSRx'] - Cim['OFFRy'] = -0.5 * Cim['VXNRy'] * Cim['VXSRy'] #-0.5*Cim['VXSRy'] + # voxel position/offset + Cim['OFFRx'] = -0.5 * Cim['VXNRx'] * Cim['VXSRx'] # -0.5*Cim['VXSRx'] + Cim['OFFRy'] = -0.5 * Cim['VXNRy'] * Cim['VXSRy'] # -0.5*Cim['VXSRy'] Cim['OFFRz'] = -0.5 * Cim['VXNRz'] * Cim['VXSRz'] - hmupos[0]['HBedPos'] Trnsl = (0.0, 0.0, 0.0) - #transformation matrix + # transformation matrix A = np.array( [[1., 0., 0., Trnsl[0]], [0., 1., 0., Trnsl[1]], [0., 0., 1., Trnsl[2]], [0., 0., 0., 1.]], dtype=np.float32) @@ -200,24 +200,24 @@ def hmu_resample0(hmupos, parts, Cnt): Cim['VXNOx'] = hmupos[i]['shape'][2] Cim['VXNOy'] = hmupos[i]['shape'][1] Cim['VXNOz'] = hmupos[i]['shape'][0] - #original image offset + # original image offset Cim['OFFOx'] = hmupos[i]['vpos'][2] Cim['OFFOy'] = hmupos[i]['vpos'][1] Cim['OFFOz'] = -hmupos[i]['vpos'][0] - #resample! + # resample! if i == 4: - #does the bed just partly (no point doing all the long bed) + # does the bed just partly (no point doing all the long bed) offresZ = (-.5 * Cnt['SO_IMZ'] * Cnt['SO_VXZ'] - hmupos[0]['HBedPos']) - #excess of the hrdwr mu-map axially + # excess of the hrdwr mu-map axially excemuZ = offresZ - (-hmupos[4]['vpos'][0]) excevox = int(excemuZ / hmupos[4]['ivs'][0]) - 5 # with extra margin of 5 newoffZ = -hmupos[4]['vpos'][0] + excevox * hmupos[4]['ivs'][0] - #number of voxels included axially - inclvox = Cnt['SO_IMZ'] * Cnt['SO_VXZ'] / hmupos[4]['ivs'][0] + 10 #with extra margin... - #truncate the image + # number of voxels included axially + inclvox = Cnt['SO_IMZ'] * Cnt['SO_VXZ'] / hmupos[4]['ivs'][0] + 10 # with extra margin... + # truncate the image im = hmupos[i]['img'][excevox:excevox + inclvox, :, :] - #update dictionary Cim + # update dictionary Cim Cim['OFFOz'] = newoffZ Cim['VXNOz'] = im.shape[0] imr += nimpa.prc.improc.resample(im, A, Cim) @@ -317,9 +317,9 @@ def timings_from_list(flist, offset=0): nfrm = np.sum(farray[:, 0]) # list of frame duration frms = np.zeros(nfrm, dtype=np.uint16) - #frame iterator + # frame iterator fi = 0 - #time sum of frames + # time sum of frames tsum = 0 # list of frame timings t_frames = [] @@ -363,7 +363,7 @@ def axial_lut(Cnt): log.error('the reduced axial FOV only works in span-1!') return None - #ring dimensions + # ring dimensions rng = np.zeros((NRNG, 2), dtype=np.float32) z = -.5 * NRNG * Cnt['AXR'] for i in range(NRNG): @@ -371,10 +371,10 @@ def axial_lut(Cnt): z += Cnt['AXR'] rng[i, 1] = z - #--create mapping from ring difference to segment number - #ring difference range + # --create mapping from ring difference to segment number + # ring difference range rd = list(range(-Cnt['MRD'], Cnt['MRD'] + 1)) - #ring difference to segment + # ring difference to segment rd2sg = -1 * np.ones(( len(rd), 2, @@ -384,26 +384,26 @@ def axial_lut(Cnt): if (rd[i] >= Cnt['MNRD'][iseg]) and (rd[i] <= Cnt['MXRD'][iseg]): rd2sg[i, :] = np.array([rd[i], iseg]) - #create two Michelograms for segments (Mseg) - #and absolute axial position for individual sinos (Mssrb) which is single slice rebinning + # create two Michelograms for segments (Mseg) + # and absolute axial position for individual sinos (Mssrb) which is single slice rebinning Mssrb = -1 * np.ones((NRNG, NRNG), dtype=np.int32) Mseg = -1 * np.ones((NRNG, NRNG), dtype=np.int32) for r1 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): for r0 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): if abs(r1 - r0) > Cnt['MRD']: continue - ssp = r0 + r1 #segment sino position (axially: 0-126) + ssp = r0 + r1 # segment sino position (axially: 0-126) rd = r1 - r0 jseg = rd2sg[rd2sg[:, 0] == rd, 1] Mssrb[r1, r0] = ssp - Mseg[r1, r0] = jseg #negative segments are on top diagonals + Mseg[r1, r0] = jseg # negative segments are on top diagonals # np.savetxt("Mssrb.csv", Mssrb, delimiter=",", fmt='%d') # np.savetxt("Mseg.csv", Mseg, delimiter=",", fmt='%d') - #create a Michelogram map from rings to sino number in span-11 (1..837) + # create a Michelogram map from rings to sino number in span-11 (1..837) Msn = -1 * np.ones((NRNG, NRNG), dtype=np.int32) - #number of span-1 sinos per sino in span-11 + # number of span-1 sinos per sino in span-11 Mnos = -1 * np.ones((NRNG, NRNG), dtype=np.int32) i = 0 for iseg in range(0, len(Cnt['SEG'])): @@ -412,7 +412,7 @@ def axial_lut(Cnt): Mtmp[~msk] = -1 uq = np.unique(Mtmp[msk]) for u in range(0, len(uq)): - #print(i) + # print(i) Msn[Mtmp == uq[u]] = i Mnos[Mtmp == uq[u]] = np.sum(Mtmp == uq[u]) i += 1 @@ -424,8 +424,8 @@ def axial_lut(Cnt): sn1_ssrb = np.zeros((NSN1_c), dtype=np.int16) sn1_sn11 = np.zeros((NSN1_c), dtype=np.int16) sn1_sn11no = np.zeros((NSN1_c), dtype=np.int8) - sni = 0 #full linear index, upto 4084 - Msn1 = -1 * np.ones((NRNG, NRNG), dtype=np.int16) #michelogram of sino numbers for spn-1 + sni = 0 # full linear index, upto 4084 + Msn1 = -1 * np.ones((NRNG, NRNG), dtype=np.int16) # michelogram of sino numbers for spn-1 for ro in range(0, NRNG): if ro == 0: oblique = 1 @@ -435,15 +435,16 @@ def axial_lut(Cnt): strt = NRNG * (ro + Cnt['RNG_STRT']) + Cnt['RNG_STRT'] stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG step = NRNG + 1 - for li in range(strt, stop, step): #goes along a diagonal started in the first row at r1 - #linear indecies of michelogram --> subscript indecies for positive and negative RDs + for li in range(strt, stop, + step): # goes along a diagonal started in the first row at r1 + # linear indecies of michelogram --> subscript indecies for positive and negative RDs if m == 0: r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - else: #for positive now (? or vice versa) + else: # for positive now (? or vice versa) r0 = int(li / NRNG) r1 = int(li - r0*NRNG) - #avoid case when RD>MRD + # avoid case when RD>MRD if (Msn[r1, r0]) < 0: continue @@ -456,10 +457,10 @@ def axial_lut(Cnt): sn1_sn11no[sni] = Mnos[r0, r1] Msn1[r0, r1] = sni - #-- + # -- sni += 1 - #span-11 sino to SSRB + # span-11 sino to SSRB sn11_ssrb = np.zeros(Cnt['NSN11'], dtype=np.int32) sn11_ssrb[:] -= 1 sn1_ssrno = np.zeros(Cnt['NSEG0'], dtype=np.int8) @@ -475,8 +476,8 @@ def axial_lut(Cnt): sn11_ssrno = sn11_ssrno[np.unique(sn1_ssrb)] sn11_ssrb = sn11_ssrb[sn11_ssrb >= 0] - #--------------------------------------------------------------------- - #linear index (along diagonals of Michelogram) to rings + # --------------------------------------------------------------------- + # linear index (along diagonals of Michelogram) to rings # the number of Michelogram elements considered in projection calculations NLI2R_c = int(NRNG_c**2 / 2. + NRNG_c/2.) # if the whole scanner is used then account for the MRD and subtract 6 ring permutations @@ -484,7 +485,7 @@ def axial_lut(Cnt): NLI2R_c -= 6 li2r = np.zeros((NLI2R_c, 2), dtype=np.int8) - #the same as above but to sinos in span-11 + # the same as above but to sinos in span-11 li2sn = np.zeros((NLI2R_c, 2), dtype=np.int16) li2sn1 = np.zeros((NLI2R_c, 2), dtype=np.int16) li2rng = np.zeros((NLI2R_c, 2), dtype=np.float32) @@ -498,28 +499,28 @@ def axial_lut(Cnt): stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG step = NRNG + 1 - for li in range(strt, stop, step): #goes along a diagonal started in the first row at r2o - #from the linear indexes of Michelogram get the subscript indexes + for li in range(strt, stop, step): # goes along a diagonal started in the first row at r2o + # from the linear indexes of Michelogram get the subscript indexes r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - #avoid case when RD>MRD + # avoid case when RD>MRD if (Msn[r1, r0]) < 0: continue # li2r[0, dli] = r0 # li2r[1, dli] = r1 - # #-- + # # -- # li2rng[0, dli] = rng[r0,0]; # li2rng[1, dli] = rng[r1,0]; - # #-- + # # -- # li2sn[0, dli] = Msn[r0,r1] # li2sn[1, dli] = Msn[r1,r0] li2r[dli, 0] = r0 li2r[dli, 1] = r1 - #-- + # -- li2rng[dli, 0] = rng[r0, 0] li2rng[dli, 1] = rng[r1, 0] - #-- + # -- li2sn[dli, 0] = Msn[r0, r1] li2sn[dli, 1] = Msn[r1, r0] @@ -528,12 +529,12 @@ def axial_lut(Cnt): # li2sn[0, dli] = Msn[r1,r0] # li2sn[1, dli] = Msn[r0,r1] - #-- + # -- li2nos[dli] = Mnos[r1, r0] - #-- + # -- dli += 1 # log.info('number of diagonal indexes (in Michelogram) accounted for: {}'.format(dli)) - #--------------------------------------------------------------------- + # --------------------------------------------------------------------- axLUT = { 'li2rno': li2r, 'li2sn': li2sn, 'li2sn1': li2sn1, 'li2nos': li2nos, 'li2rng': li2rng, @@ -624,39 +625,39 @@ def transaxial_lut(Cnt, visualisation=False): ''' if visualisation: - #---visualisation of the crystal ring in transaxial view - p = 8 #pixel density of the visualisation + # ---visualisation of the crystal ring in transaxial view + p = 8 # pixel density of the visualisation VISXY = Cnt['SO_IMX'] * p T = np.zeros((VISXY, VISXY), dtype=np.float32) - #--- + # --- - #--- crystal coordinates transaxially - #> block width + # --- crystal coordinates transaxially + # > block width bw = 3.209 - #> block gap [cm] + # > block gap [cm] dg = 0.474 NTBLK = 56 - alpha = 0.1122 #2*pi/NTBLK + alpha = 0.1122 # 2*pi/NTBLK crs = np.zeros((Cnt['NCRS'], 4), dtype=np.float32) - #> phi angle points in the middle and is used for obtaining the normal of detector block + # > phi angle points in the middle and is used for obtaining the normal of detector block phi = 0.5*pi - alpha/2 - 0.001 for bi in range(NTBLK): - #> tangent point (ring against detector block) + # > tangent point (ring against detector block) # ye = RE*np.sin(phi) # xe = RE*np.cos(phi) y = Cnt['R_RING'] * np.sin(phi) x = Cnt['R_RING'] * np.cos(phi) - #> vector for the face of crystals + # > vector for the face of crystals pv = np.array([-y, x]) pv /= np.sum(pv**2)**.5 - #> update phi for next block + # > update phi for next block phi -= alpha - #> end block points + # > end block points xcp = x + (bw/2) * pv[0] ycp = y + (bw/2) * pv[1] @@ -686,7 +687,7 @@ def transaxial_lut(Cnt, visualisation=False): if visualisation: out['visual'] = T - #> crystals reduced by the gaps (dead crystals) + # > crystals reduced by the gaps (dead crystals) crsr = -1 * np.ones(Cnt['NCRS'], dtype=np.int16) ci = 0 for i in range(Cnt['NCRS']): @@ -698,20 +699,20 @@ def transaxial_lut(Cnt, visualisation=False): out['crsri'] = crsr - #---------------------------------- + # ---------------------------------- # sinogram definitions - #> sinogram mask for dead crystals (gaps) + # > sinogram mask for dead crystals (gaps) msino = np.zeros((Cnt['NSBINS'], Cnt['NSANGLES']), dtype=np.int8) # LUT: sino -> crystal and crystal -> sino s2cF = np.zeros((Cnt['NSBINS'] * Cnt['NSANGLES'], 2), dtype=np.int16) c2sF = -1 * np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) - #> with projection bin fast changing (c2s has angle changing fast). - #> this is used in scatter estimation + # > with projection bin fast changing (c2s has angle changing fast). + # > this is used in scatter estimation c2sFw = -1 * np.ones((Cnt['NCRS'], Cnt['NCRS']), dtype=np.int32) - #> global sinogram index (linear) of live crystals (excludes gaps) + # > global sinogram index (linear) of live crystals (excludes gaps) awi = 0 for iw in range(Cnt['NSBINS']): @@ -730,7 +731,7 @@ def transaxial_lut(Cnt, visualisation=False): if (((((c0 + Cnt['OFFGAP']) % Cnt['TGAP']) * ((c1 + Cnt['OFFGAP']) % Cnt['TGAP'])) > 0)): - #> masking gaps in 2D sinogram + # > masking gaps in 2D sinogram msino[iw, ia] = 1 awi += 1 @@ -742,7 +743,7 @@ def transaxial_lut(Cnt, visualisation=False): out['c2sFw'] = c2sFw out['msino'] = msino - #> number of total transaxial live crystals (excludes gaps) + # > number of total transaxial live crystals (excludes gaps) out['Naw'] = awi s2c = np.zeros((out['Naw'], 2), dtype=np.int16) @@ -751,7 +752,7 @@ def transaxial_lut(Cnt, visualisation=False): aw2sn = np.zeros((out['Naw'], 2), dtype=np.int16) aw2ali = np.zeros(out['Naw'], dtype=np.int32) - #> live crystals which are in coincidence + # > live crystals which are in coincidence cij = np.zeros((Cnt['NCRSR'], Cnt['NCRSR']), dtype=np.int8) awi = 0 @@ -769,7 +770,7 @@ def transaxial_lut(Cnt, visualisation=False): s2cr[awi, 0] = crsr[c0] s2cr[awi, 1] = crsr[c1] - #> reduced crystal index (after getting rid of crystal gaps) + # > reduced crystal index (after getting rid of crystal gaps) cr2s[crsr[c1], crsr[c0]] = awi cr2s[crsr[c0], crsr[c1]] = awi @@ -778,7 +779,7 @@ def transaxial_lut(Cnt, visualisation=False): aw2ali[awi] = iw + Cnt['NSBINS'] * ia - #> square matrix of crystals in coincidence + # > square matrix of crystals in coincidence cij[crsr[c0], crsr[c1]] = 1 cij[crsr[c1], crsr[c0]] = 1 @@ -790,7 +791,7 @@ def transaxial_lut(Cnt, visualisation=False): out['aw2sn'] = aw2sn out['aw2ali'] = aw2ali out['cij'] = cij - #---------------------------------- + # ---------------------------------- # # cij - a square matrix of crystals in coincidence (transaxially) # # crsri - indexes of crystals with the gap crystals taken out (therefore reduced) @@ -812,9 +813,9 @@ def transaxial_lut(Cnt, visualisation=False): return out -#================================================================================================= +# ================================================================================================ # Explore files in folder with raw PET/MR data -#------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------ def get_npfiles(dfile, datain, v=False): @@ -858,12 +859,12 @@ def get_niifiles(dfile, datain, v=False): ------------------------------------------------------------------ ''').format(dfile)) - #> NIfTI file of converted MR-based mu-map from DICOMs + # > NIfTI file of converted MR-based mu-map from DICOMs if os.path.basename(dfile).split('.nii')[0] == 'mumap-from-DICOM': datain['mumapNII'] = dfile logger('mu-map for the object.') - #> NIfTI file of pseudo CT + # > NIfTI file of pseudo CT fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*_synth.nii*')) if len(fpct) > 0: datain['pCT'] = fpct[0] @@ -874,7 +875,7 @@ def get_niifiles(dfile, datain, v=False): datain['pCT'] = fpct[0] logger('pseudoCT of the object.') - #MR T1 + # MR T1 fmri = glob.glob(os.path.join(os.path.dirname(dfile), '[tT]1*.nii*')) if len(fmri) == 1: bnm = os.path.basename(fmri[0]).lower() @@ -890,7 +891,7 @@ def get_niifiles(dfile, datain, v=False): elif 'usable' in bnm: datain['T1nii_2'] = fg - #MR T1 N4bias-corrected + # MR T1 N4bias-corrected fmri = glob.glob(os.path.join(os.path.dirname(dfile), '[tT]1*[nN]4bias*.nii*')) if len(fmri) == 1: bnm = os.path.basename(fmri[0]).lower() @@ -906,7 +907,7 @@ def get_niifiles(dfile, datain, v=False): elif 'usable' in bnm: datain['T1N4_2'] = fg - #T1w corrected + # T1w corrected fbc = glob.glob(os.path.join(os.path.dirname(dfile), '*gifbc.nii*')) if len(fbc) == 1: datain['T1bc'] = fbc[0] @@ -916,7 +917,7 @@ def get_niifiles(dfile, datain, v=False): datain['T1bc'] = fbc[0] logger('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) - #T1-based labels after parcellation + # T1-based labels after parcellation flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*giflabels.nii*')) if len(flbl) == 1: datain['T1lbl'] = flbl[0] @@ -926,13 +927,13 @@ def get_niifiles(dfile, datain, v=False): datain['T1lbl'] = flbl[0] logger('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) - #reconstructed emission data without corrections, minimum 2 osem iter + # reconstructed emission data without corrections, minimum 2 osem iter fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*__ACbed.nii*')) if len(fpct) > 0: datain['em_nocrr'] = fpct[0] logger('pseudoCT of the object.') - #reconstructed emission data with corrections, minimum 3 osem iter + # reconstructed emission data with corrections, minimum 3 osem iter fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*QNT*.nii*')) if len(fpct) > 0: datain['em_crr'] = fpct[0] @@ -951,12 +952,12 @@ def get_dicoms(dfile, datain, Cnt): d = dcm.dcmread(dfile) dcmtype = nimpa.dcminfo(d, verbose=Cnt['VERBOSE']) - #> check if it is norm file + # > check if it is norm file if 'mmr' in dcmtype and 'norm' in dcmtype: if os.path.splitext(dfile)[-1].lower() == '.dcm': datain['nrm_dcm'] = dfile - #> check if the binary file exists + # > check if the binary file exists if os.path.isfile(dfile[:-4] + '.bf'): datain['nrm_bf'] = dfile[:-4] + '.bf' else: @@ -975,11 +976,11 @@ def get_dicoms(dfile, datain, Cnt): datain['nrm_bf'] = bf log.debug('saved component norm data to binary file: \n{}'.format(bf)) - #--- check if it is list-mode file + # --- check if it is list-mode file elif 'mmr' in dcmtype and 'list' in dcmtype: if os.path.splitext(dfile)[-1] == '.dcm': datain['lm_dcm'] = dfile - #check if the binary file exists + # check if the binary file exists if os.path.isfile(dfile[:-4] + '.bf'): datain['lm_bf'] = dfile[:-4] + '.bf' else: @@ -1003,10 +1004,10 @@ def get_dicoms(dfile, datain, Cnt): log.error('could not find binary list-mode data in the IMA DICOM file.') return None - #> get info about the PET tracer being used + # > get info about the PET tracer being used lmhdr, csahdr = hdr_lm(datain, Cnt) - #> if there is interfile header get the info from there + # > if there is interfile header get the info from there if lmhdr is not None: f0 = lmhdr.find('isotope name') else: @@ -1014,14 +1015,14 @@ def get_dicoms(dfile, datain, Cnt): if f0 >= 0: f1 = f0 + lmhdr[f0:].find('\n') - #regular expression for the isotope symbol + # regular expression for the isotope symbol p = re.compile(r'(?<=:=)\s*\S*') # the name of isotope: istp = p.findall(lmhdr[f0:f1])[0] istp = istp.replace('-', '') Cnt['ISOTOPE'] = istp.strip() - #> if no info in interfile header than look in the CSA header + # > if no info in interfile header than look in the CSA header else: f0 = csahdr.find('RadionuclideCodeSequence') if f0 < 0: @@ -1036,7 +1037,7 @@ def get_dicoms(dfile, datain, Cnt): else: print('w> could not find isotope name. enter manually into Cnt[' 'ISOTOPE' ']') return None - #--- + # --- # check if MR-based mu-map elif 'mumap' in dcmtype: @@ -1091,7 +1092,7 @@ def explore_input(fldr, params, print_paths=False, recurse=1): log.error('provide a valid folder path for the data.') return - #check for the availble data: list mode data, component-based norm and mu-maps + # check for the availble data: list mode data, component-based norm and mu-maps # [dcm + bf] is one format of DICOM raw data; [ima] is another one used. # mu-map can be given from the scanner as an e.g., UTE-based, or pseudoCT through synthesis. datain = {'corepath': fldr} @@ -1123,7 +1124,7 @@ def explore_input(fldr, params, print_paths=False, recurse=1): def putgaps(s, txLUT, Cnt, sino_no=0): - #number of sino planes (2D sinos) depends on the span used + # number of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] @@ -1136,9 +1137,9 @@ def putgaps(s, txLUT, Cnt, sino_no=0): elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] - #preallocate sino with gaps + # preallocate sino with gaps sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) - #fill the sino with gaps + # fill the sino with gaps mmr_auxe.pgaps(sino, s.astype(np.float32), txLUT, Cnt, sino_no) sino = np.transpose(sino, (2, 0, 1)) @@ -1150,9 +1151,9 @@ def remgaps(sino, txLUT, Cnt): # number of sino planes (2D sinos) depends on the span used nsinos = sino.shape[0] - #preallocate output sino without gaps, always in float + # preallocate output sino without gaps, always in float s = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - #fill the sino with gaps + # fill the sino with gaps mmr_auxe.rgaps(s, sino.astype(np.float32), txLUT, Cnt) # return in the same data type as the input sino diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 02b27b5a..6f5f69c0 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -10,9 +10,9 @@ from . import mmr_auxe # auxiliary functions through Python extensions in CUDA -#================================================================================================= +# ================================================================================================ # GET NORM COMPONENTS -#================================================================================================= +# ================================================================================================ def get_components(datain, Cnt): @@ -28,28 +28,28 @@ def get_components(datain, Cnt): raise NameError('norm file does not exist or it is incomplete') with open(fnrm_dat, 'rb') as f: - #geometric effects + # geometric effects geo = np.fromfile(f, np.float32, Cnt['NSBINS'] * Cnt['NSEG0']) geo.shape = (Cnt['NSEG0'], Cnt['NSBINS']) - #crystal interference + # grystal interference crs_intf = np.fromfile(f, np.float32, 9 * Cnt['NSBINS']) crs_intf.shape = (Cnt['NSBINS'], 9) - #crystal efficiencies + # grystal efficiencies crs_eff = np.fromfile(f, np.float32, Cnt['NCRS'] * Cnt['NRNG']) crs_eff.shape = (Cnt['NRNG'], Cnt['NCRS']) - #axial effects + # gxial effects ax_eff1 = np.fromfile(f, np.float32, Cnt['NSN11']) - #paralyzing ring DT parameters + # garalyzing ring DT parameters rng_dtp = np.fromfile(f, np.float32, Cnt['NRNG']) - #non-paralyzing ring DT parameters + # gon-paralyzing ring DT parameters rng_dtnp = np.fromfile(f, np.float32, Cnt['NRNG']) - #TX crystal DT parameter + # gX crystal DT parameter crs_dt = np.fromfile(f, np.float32, 9) - #additional axial effects + # gdditional axial effects ax_eff2 = np.fromfile(f, np.float32, Cnt['NSN11']) - #------------------------------------------------- - #the files below are found based on a 24hr scan of germanium-68 phantom + # ------------------------------------------------- + # ghe files below are found based on a 24hr scan of germanium-68 phantom auxdata = Path(resource_filename("niftypet.nipet", "auxdata")) # axial effects for span-1 ax_f1 = np.load(fspath(auxdata / "AxialFactorForSpan1.npy")) @@ -59,9 +59,9 @@ def get_components(datain, Cnt): # relative scale factors for axial scatter deriving span-1 scale factors from SSR scale factors sax_f1 = np.fromfile(fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan1.f32"), np.float32, Cnt['NSN1']) - #------------------------------------------------- + # ------------------------------------------------- - #------------------------------------------------- + # ------------------------------------------------- # HEADER FILE # possible DICOM locations for the Interfile header nhdr_locations = [[0x29, 0x1010], [0x29, 0x1110]] @@ -90,13 +90,13 @@ def get_components(datain, Cnt): f0 = nhdr.find('scanner quantification factor') f1 = f0 + nhdr[f0:].find('\n') - #regular expression for the needed three numbers + # gegular expression for the needed three numbers p = re.compile(r'(?<=:=)\s*\d{1,5}[.]\d{3,10}[e][+-]\d{1,4}') - #-quantification factor: + # -quantification factor: qf = float(p.findall(nhdr[f0:f1])[0]) - #-local quantification correction factor + # -local quantification correction factor qf_loc = 0.205 - #------------------------------------------------- + # ------------------------------------------------- nrmcmp = { 'qf': qf, 'qf_loc': qf_loc, 'geo': geo, 'cinf': crs_intf, 'ceff': crs_eff, 'axe1': ax_eff1, @@ -108,20 +108,20 @@ def get_components(datain, Cnt): def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): - #get the normalisation components + # get the normalisation components if normcomp is None: normcomp, _ = get_components(datain, Cnt) - #number of sino planes (2D sinos) depends on the span used + # gumber of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] - #predefine the sinogram + # gredefine the sinogram sinog = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - #get the sino in the GPU-optimised shape + # get the sino in the GPU-optimised shape mmr_auxe.norm(sinog, normcomp, hst['buckets'], axLUT, txLUT['aw2ali'], Cnt) return sinog @@ -129,17 +129,17 @@ def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): def get_sino(datain, hst, axLUT, txLUT, Cnt): - #number of sino planes (2D sinos) depends on the span used + # gumber of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] - #get sino with no gaps + # get sino with no gaps s = get_sinog(datain, hst, axLUT, txLUT, Cnt) - #preallocate sino with gaps + # greallocate sino with gaps sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) - #fill the sino with gaps + # gill the sino with gaps mmr_auxe.pgaps(sino, s, txLUT, Cnt, 0) sino = np.transpose(sino, (2, 0, 1)) @@ -155,17 +155,17 @@ def get_norm_sino(datain, scanner_params, hst): # if not hst: # hst = mmrhist.mmrhist(datain, scanner_params) - #number of sino planes (2D sinos) depends on the span used + # gumber of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] - #get sino with no gaps + # get sino with no gaps s = get_sinog(datain, hst, axLUT, txLUT, Cnt) - #preallocate sino with gaps + # greallocate sino with gaps sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) - #fill the sino with gaps + # gill the sino with gaps mmr_auxe.pgaps(sino, s, txLUT, Cnt, 0) sino = np.transpose(sino, (2, 0, 1)) diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index bf3eef83..e818f625 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -11,9 +11,9 @@ log = logging.getLogger(__name__) -#========================================================================= +# ======================================================================== # transaxial (one-slice) projector -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------ def trnx_prj(scanner_params, sino=None, im=None): @@ -41,9 +41,9 @@ def trnx_prj(scanner_params, sino=None, im=None): return {'tv': tv, 'tt': tt} -#========================================================================= +# ======================================================================== # forward projector -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=False, @@ -67,8 +67,8 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #>choose between attenuation forward projection (mu-map is the input) - #>or the default for emission image forward projection + # >choose between attenuation forward projection (mu-map is the input) + # >or the default for emission image forward projection if attenuation: att = 1 else: @@ -106,7 +106,7 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa log.debug('number of sinos:%d' % nsinos) - #predefine the sinogram. if subsets are used then only preallocate those bins which will be used. + # predefine the sinogram. if subsets are used then only preallocate those bins which will be used. if isub[0] < 0: sinog = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) else: @@ -127,9 +127,9 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa return sino -#========================================================================= +# ======================================================================== # back projector -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------ def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): @@ -160,8 +160,8 @@ def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): elif Cnt['SPN'] == 0: nsinos = Cnt['NSEG0'] - #> check first the Siemens default sinogram; - #> for this default shape only full sinograms are expected--no subsets. + # > check first the Siemens default sinogram; + # > for this default shape only full sinograms are expected--no subsets. if len(sino.shape) == 3: if sino.shape[0] != nsinos or sino.shape[1] != Cnt['NSANGLES'] or sino.shape[2] != Cnt[ 'NSBINS']: @@ -173,25 +173,25 @@ def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): raise ValueError('Unexpected number of transaxial elements in the full sinogram.') elif isub[0] >= 0 and sino.shape[0] != len(isub): raise ValueError('Unexpected number of transaxial elements in the subset sinogram.') - #> check if the number of sinograms is correct + # > check if the number of sinograms is correct if sino.shape[1] != nsinos: raise ValueError('Inconsistent number of sinograms in the array.') - #> when found the dimensions/shape are fine: + # > when found the dimensions/shape are fine: sinog = sino else: raise ValueError('Unexpected shape of the input sinogram.') - #predefine the output image depending on the number of rings used + # predefine the output image depending on the number of rings used if Cnt['SPN'] == 1 and 'rSZ_IMZ' in Cnt: nvz = Cnt['rSZ_IMZ'] else: nvz = Cnt['SZ_IMZ'] bimg = np.zeros((Cnt['SZ_IMX'], Cnt['SZ_IMY'], nvz), dtype=np.float32) - #> run back-projection + # > run back-projection petprj.bprj(bimg, sinog, txLUT, axLUT, isub, Cnt) - #> change from GPU optimised image dimensions to the standard Siemens shape + # > change from GPU optimised image dimensions to the standard Siemens shape bimg = mmrimg.convert2e7(bimg, Cnt) return bimg diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 1d88fc23..2bb929d6 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -38,9 +38,9 @@ def fwhm2sig(fwhm, voxsize=1.): return (fwhm/voxsize) / (2 * (2 * np.log(2))**.5) -#========================================================================= +# ======================================================================== # OSEM RECON -#------------------------------------------------------------------------- +# ------------------------------------------------------------------------ def get_subsets14(n, params): @@ -69,9 +69,9 @@ def get_subsets14(n, params): si = [] #::::: iterate sino blocks. This bit may be unnecessary, it can be taken directly from sp array for b in range(N): - #--angle index within a sino block depending on subset s + # --angle index within a sino block depending on subset s ai = (s+b) % N - #--angle index for whole sino + # --angle index for whole sino sai = sp[ai, b] si.append(sai) totsum[s] += aisum[sai] @@ -115,7 +115,7 @@ def _config(fwhm3, check_len=True): kernel = np.empty((3, 2 * Cnt['RSZ_PSF_KRNL'] + 1), dtype=np.float32) for i, psf in enumerate(fwhm3): - #> FWHM -> sigma conversion for all dimensions separately + # > FWHM -> sigma conversion for all dimensions separately if i == 2: sig = fwhm2sig(psf, voxsize=Cnt['SZ_VOXZ'] * 10) else: @@ -168,13 +168,13 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N psf: Reconstruction with PSF, passed to `psf_config` ''' - #> Get particular scanner parameters: Constants, transaxial and axial LUTs + # > Get particular scanner parameters: Constants, transaxial and axial LUTs Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #---------- sort out OUTPUT ------------ - #-output file name for the reconstructed image + # ---------- sort out OUTPUT ------------ + # -output file name for the reconstructed image if outpath is None: opth = os.path.join(datain['corepath'], 'reconstructed') else: @@ -190,7 +190,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N return_ssrb = False return_mask = False - #---------- + # ---------- log.info('reconstruction in mode:%d' % recmod) @@ -210,7 +210,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N #========================================================================= # GET NORM - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- if normcomp is None: ncmp, _ = mmrnorm.get_components(datain, Cnt) else: @@ -221,12 +221,12 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N #========================================================================= # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP - #------------------------------------------------------------------------- - #> combine attenuation and norm together depending on reconstruction mode + # ------------------------------------------------------------------------- + # > combine attenuation and norm together depending on reconstruction mode if recmod == 0: asng = np.ones(psng.shape, dtype=np.float32) else: - #> check if the attenuation sino is given as an array + # > check if the attenuation sino is given as an array if isinstance(attnsino, np.ndarray) \ and attnsino.shape==(Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']): asng = mmraux.remgaps(attnsino, txLUT, Cnt) @@ -238,13 +238,13 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: asng = np.zeros(psng.shape, dtype=np.float32) petprj.fprj(asng, mus, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) - #> combine attenuation and normalisation + # > combine attenuation and normalisation ansng = asng * nsng #========================================================================= #========================================================================= # Randoms - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- if isinstance(randsino, np.ndarray): rsino = randsino rsng = mmraux.remgaps(randsino, txLUT, Cnt) @@ -255,7 +255,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N #========================================================================= # SCAT - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- if recmod == 2: if not sctsino is None: ssng = mmraux.remgaps(sctsino, txLUT, Cnt) @@ -280,34 +280,34 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N #========================================================================= log.info('------ OSEM (%d) -------' % itr) - #------------------------------------ + # ------------------------------------ Sn = 14 # number of subsets - #-get one subset to get number of projection bins in a subset + # -get one subset to get number of projection bins in a subset Sprj, s = get_subsets14(0, scanner_params) Nprj = len(Sprj) - #-init subset array and sensitivity image for a given subset + # -init subset array and sensitivity image for a given subset sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) - #-init sensitivity images for each subset + # -init sensitivity images for each subset imgsens = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in range(Sn): - sinoTIdx[n, 0] = Nprj #first number of projection for the given subset + sinoTIdx[n, 0] = Nprj # first number of projection for the given subset sinoTIdx[n, 1:], s = get_subsets14(n, scanner_params) # sensitivity image petprj.bprj(imgsens[n, :, :, :], ansng[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], Cnt) - #------------------------------------- + # ------------------------------------- - #-mask for reconstructed image. anything outside it is set to zero + # -mask for reconstructed image. anything outside it is set to zero msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9 - #-init image + # -init image img = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) - #-decay correction + # -decay correction lmbd = np.log(2) / resources.riLUT[Cnt['ISOTOPE']]['thalf'] if Cnt['DCYCRR'] and 't0' in hst and 'dur' in hst: - #> decay correct to the reference time (e.g., injection time) if provided - #> otherwise correct in reference to the scan start time + # > decay correct to the reference time (e.g., injection time) if provided + # > otherwise correct in reference to the scan start time if not decay_ref_time is None: tref = decay_ref_time else: @@ -329,20 +329,20 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N qf = 1. qf_loc = 1. - #-affine matrix for the reconstructed images + # -affine matrix for the reconstructed images B = mmrimg.image_affine(datain, Cnt) # resolution modelling psfkernel = psf_config(psf, Cnt) - #-time it + # -time it stime = time.time() # import pdb; pdb.set_trace() #========================================================================= # OSEM RECONSTRUCTION - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- with trange(itr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() <= logging.INFO) as pbar: @@ -353,7 +353,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N if np.nansum(img) < 0.1: log.warning('it seems there is not enough true data to render reasonable image') - #img[:]=0 + # img[:]=0 itr = k break if recmod >= 3 and (((k < itr - 1) and (itr > 1))): # or (itr==1) @@ -384,14 +384,14 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N log.info('applying quantification factor:%r to the whole image' % qf) log.info('for the frame duration of :%r' % hst['dur']) - img *= dcycrr * qf * qf_loc #additional factor for making it quantitative in absolute terms (derived from measurements) + img *= dcycrr * qf * qf_loc # additional factor for making it quantitative in absolute terms (derived from measurements) - #---- save images ----- - #-first convert to standard mMR image size + # ---- save images ----- + # -first convert to standard mMR image size im = mmrimg.convert2e7(img, Cnt) - #-description text to NIfTI - #-attenuation number: if only bed present then it is 0.5 + # -description text to NIfTI + # -attenuation number: if only bed present then it is 0.5 attnum = (1 * (np.sum(muh) > 0.5) + 1 * (np.sum(muo) > 0.5)) / 2. descrip = 'alg=osem'+ \ ';sub=14'+ \ @@ -405,8 +405,8 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N ';dur='+str(hst['dur']) +\ ';qf='+str(qf) - #> file name of the output reconstructed image - #> (maybe used later even if not stored now) + # > file name of the output reconstructed image + # > (maybe used later even if not stored now) fpet = os.path.join(opth, os.path.basename(datain['lm_bf']).split('.')[0] \ + frmno +'_t'+str(hst['t0'])+'-'+str(hst['t1'])+'sec' \ +'_itr'+str(itr)+fcomment+'.nii.gz') @@ -457,12 +457,12 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N return recout -#=============================================================================== +# ============================================================================== # EMML # def emml( datain, mumaps, hst, txLUT, axLUT, Cnt, # recmod=3, itr=10, fwhm=0., mask_radius=29., store_img=True, ret_sinos=False, sctsino = None, randsino = None, normcomp = None): -# #subsets (when not used) +# # subsets (when not used) # sbs = np.array([-1], dtype=np.int32) # # get object and hardware mu-maps @@ -476,7 +476,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # GET NORM -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # if normcomp == None: # ncmp, _ = mmrnorm.get_components(datain, Cnt) # else: @@ -487,7 +487,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # Randoms -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # if randsino == None: # rsino, snglmap = mmrhist.rand(hst['fansums'], txLUT, axLUT, Cnt) # rsng = mmraux.remgaps(rsino, txLUT, Cnt) @@ -498,7 +498,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # # combine attenuation and norm together depending on reconstruction mode # if recmod==0: # asng = np.ones(psng.shape, dtype=np.float32) @@ -510,7 +510,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # SCATTER and the additive term -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # if recmod==2: # if sctsino != None: # # remove the gaps from the provided scatter sinogram @@ -529,7 +529,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # rssng = (rsng + ssng) / attnrmsng # #========================================================================= -# #mask for reconstructed image +# # mask for reconstructed image # msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True)>0.9 # # estimated image # imrec = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) @@ -538,7 +538,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # # Get sensitivity image by backprojection # sim = np.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) # petprj.bprj(sim, attnrmsng, txLUT, axLUT, sbs, Cnt) -# #init estimate sino +# # init estimate sino # esng = np.zeros((Cnt['Naw'], Cnt['NSN11']), dtype=np.float32) # for k in range(itr): @@ -567,7 +567,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # # apply quantitative correction to the image # qf = ncmp['qf'] / resources.riLUT[Cnt['ISOTOPE']]['BF'] / float(hst['dur']) # log.debug('applying quantification factor:%r to the whole image for the frame duration of:%r' % (qf, hst['dur'])) -# imrec *= dcycrr * qf * 0.205 #additional factor for making it quantitative in absolute terms (derived from measurements) +# imrec *= dcycrr * qf * 0.205 # additional factor for making it quantitative in absolute terms (derived from measurements) # # convert to standard mMR image size # im = mmrimg.convert2e7(imrec, Cnt) @@ -575,7 +575,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # if fwhm>0: # im = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, Cnt), mode='mirror') -# #save images +# # save images # B = mmrimg.image_affine(datain, Cnt) # fout = '' @@ -606,7 +606,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # return recout -#============================================================================= +# ============================================================================ # OSEM # def osem14(datain, mumaps, hst, txLUT, axLUT, Cnt, @@ -620,7 +620,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # elif Cnt['SPN']==11: # snno = Cnt['NSN11'] -# #subsets (when not used) +# # subsets (when not used) # sbs = np.array([-1], dtype=np.int32) # # remove gaps from the prompt sino @@ -628,20 +628,20 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # GET NORM -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # nrmsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt) # #========================================================================= # #========================================================================= # # RANDOMS ESTIMATION -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # rsino, snglmap = mmrhist.rand(hst['fansums'], txLUT, axLUT, Cnt) # rndsng = mmraux.remgaps(rsino, txLUT, Cnt) # #========================================================================= # #========================================================================= # # FORM THE ADDITIVE TERM -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # if recmod==0 or recmod==1 or recmod==3 or recmod==4: # rssng = rndsng # elif recmod==2: @@ -656,7 +656,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # #========================================================================= # # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # # combine attenuation and norm together depending on reconstruction mode # if recmod==0 or recmod==2: # attnrmsng = nrmsng @@ -666,9 +666,9 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # attnrmsng *= nrmsng # #========================================================================= -# #mask for reconstructed image +# # mask for reconstructed image # rcnmsk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True) -# #------------------------------------------------------------------------- +# # ------------------------------------------------------------------------- # # number of subsets # Sn = 14 # # get one subset to get number of projection bins in a subset @@ -679,7 +679,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # for n in range(Sn): # sinoTIdx[n,:], s = get_subsets14(n,txLUT,Cnt) # petprj.bprj(sim[n,:,:,:], attnrmsng, txLUT, axLUT, sinoTIdx[n,:], Cnt) -# #-------------------------------------------------------------------------- +# # -------------------------------------------------------------------------- # # estimated image # xim = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) @@ -725,19 +725,19 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # # plt.figure(); plt.imshow(xim[:,:,70], interpolation='none', cmap='gray'); plt.show() -# #plt.figure(); plt.imshow(xim[:,:,70], interpolation='none', cmap='gray'); plt.show() +# # plt.figure(); plt.imshow(xim[:,:,70], interpolation='none', cmap='gray'); plt.show() # if (recmod==3 or recmod==4) and k0: # imsmo = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, Cnt), mode='mirror') # nimpa.array2nii( imsmo[::-1,::-1,:], B, diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index b033153b..e864706a 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -35,7 +35,7 @@ def simulate_sino( mu_input : if True, the values are representative of a mu-map in [1/cm], otherwise it represents the CT in [HU]. ''' - #> decompose the scanner constants and LUTs for easier access + # > decompose the scanner constants and LUTs for easier access Cnt = scanner_params['Cnt'] if petim.shape != ctim.shape: @@ -48,7 +48,7 @@ def simulate_sino( if petim.max() > 200: log.warning('the PET image may have too large intensities for robust simulation.') else: - #> 2D case with reduced rings + # > 2D case with reduced rings if len(petim.shape) == 3: # make sure that the shape of the input image matches the image size of the scanner if petim.shape[1:] != (Cnt['SO_IMY'], Cnt['SO_IMX']): @@ -75,43 +75,43 @@ def simulate_sino( # import pdb; pdb.set_trace() - #-------------------- + # -------------------- if mu_input: mui = ctim else: - #> get the mu-map [1/cm] from CT [HU] + # > get the mu-map [1/cm] from CT [HU] mui = nimpa.ct2mu(ctim) - #> get rid of negative values + # > get rid of negative values mui[mui < 0] = 0 - #-------------------- + # -------------------- if simulate_3d: rmu = mui rpet = petim else: - #> 2D case with reduced rings - #-------------------- - #> create a number of slices of the same chosen image slice for reduced (fast) 3D simulation + # > 2D case with reduced rings + # -------------------- + # > create a number of slices of the same chosen image slice for reduced (fast) 3D simulation rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) - #-------------------- + # -------------------- - #-------------------- - #> form a short 3D image of the same emission image slice + # -------------------- + # > form a short 3D image of the same emission image slice rpet = petim[slice_idx, :, :].copy() rpet.shape = (1,) + rpet.shape rpet = np.repeat(rpet, Cnt['rSZ_IMZ'], axis=0) - #-------------------- + # -------------------- - #> forward project the mu-map to obtain attenuation factors + # > forward project the mu-map to obtain attenuation factors attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True) - #> forward project the PET image to obtain non-attenuated emission sino + # > forward project the PET image to obtain non-attenuated emission sino emisino = mmrprj.frwd_prj(rpet, scanner_params, attenuation=False) - #> return the simulated emission sino with photon attenuation + # > return the simulated emission sino with photon attenuation return attsino * emisino @@ -143,7 +143,7 @@ def simulate_recon( axial and transaxial look up tables (LUTs) randoms : randoms and scatter events (optional) ''' - #> decompose the scanner constants and LUTs for easier access + # > decompose the scanner constants and LUTs for easier access Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -154,7 +154,7 @@ def simulate_recon( or ctim.shape!=(Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']): raise ValueError('The CT/mu-map image does not match the scanner image shape.') else: - #> 2D case with reduced rings + # > 2D case with reduced rings if len(ctim.shape) == 3: # make sure that the shape of the input image matches the image size of the scanner if ctim.shape[1:] != (Cnt['SO_IMY'], Cnt['SO_IMX']): @@ -178,39 +178,39 @@ def simulate_recon( if 'rSZ_IMZ' not in Cnt: raise ValueError('Missing reduced axial FOV parameters.') - #-------------------- + # -------------------- if mu_input: mui = ctim else: - #> get the mu-map [1/cm] from CT [HU] + # > get the mu-map [1/cm] from CT [HU] mui = nimpa.ct2mu(ctim) - #> get rid of negative values + # > get rid of negative values mui[mui < 0] = 0 - #-------------------- + # -------------------- if simulate_3d: rmu = mui - #> number of axial sinograms + # > number of axial sinograms nsinos = Cnt['NSN11'] else: - #-------------------- - #> create a number of slides of the same chosen image slice for reduced (fast) 3D simulation + # -------------------- + # > create a number of slides of the same chosen image slice for reduced (fast) 3D simulation rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) - #-------------------- - #> number of axial sinograms + # -------------------- + # > number of axial sinograms nsinos = Cnt['rNSN1'] # import pdb; pdb.set_trace() - #> attenuation factor sinogram + # > attenuation factor sinogram attsino = mmrprj.frwd_prj(rmu, scanner_params, attenuation=True, dev_out=True) nrmsino = np.ones(attsino.shape, dtype=np.float32) - #> randoms and scatter put together + # > randoms and scatter put together if isinstance(randoms, np.ndarray) and measured_sino.shape == randoms.shape: rsng = mmraux.remgaps(randoms, txLUT, Cnt) else: @@ -230,31 +230,31 @@ def simulate_recon( # measured sinogram in GPU-enabled shape psng = mmraux.remgaps(measured_sino.astype(np.uint16), txLUT, Cnt) - #> mask for reconstructed image. anything outside it is set to zero + # > mask for reconstructed image. anything outside it is set to zero msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9 - #> init image + # > init image eimg = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) - #------------------------------------ + # ------------------------------------ Sn = 14 # number of subsets - #-get one subset to get number of projection bins in a subset + # -get one subset to get number of projection bins in a subset Sprj, s = mmrrec.get_subsets14(0, scanner_params) Nprj = len(Sprj) - #> init subset array and sensitivity image for a given subset + # > init subset array and sensitivity image for a given subset sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) - #> init sensitivity images for each subset + # > init sensitivity images for each subset sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in trange(Sn, desc="sensitivity", leave=log.getEffectiveLevel() < logging.INFO): - sinoTIdx[n, 0] = Nprj #first number of projection for the given subset + sinoTIdx[n, 0] = Nprj # first number of projection for the given subset sinoTIdx[n, 1:], s = mmrrec.get_subsets14(n, scanner_params) - #> sensitivity image + # > sensitivity image petprj.bprj(sim[n, :, :, :], attsino[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], Cnt) - #------------------------------------- + # ------------------------------------- for k in trange(nitr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() < logging.INFO): @@ -269,12 +269,12 @@ def psf(x, output=None): x = ndi.gaussian_filter(x, sigma=Cnt['SIGMA_RM'], mode='constant', output=None) return x - #> estimated image, initialised to ones + # > estimated image, initialised to ones eim = np.ones(rmu.shape, dtype=np.float32) msk = mmrimg.get_cylinder(Cnt, rad=msk_radius, xo=0, yo=0, unival=1, gpu_dim=False) > 0.9 - #> sensitivity image for the EM-ML reconstruction + # > sensitivity image for the EM-ML reconstruction sim = mmrprj.back_prj(attsino, scanner_params) sim_inv = 1 / psf(sim) sim_inv[~msk] = 0 @@ -282,18 +282,18 @@ def psf(x, output=None): rndsct = rsng + ssng for i in trange(nitr, desc="MLEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() < logging.INFO): - #> remove gaps from the measured sinogram - #> then forward project the estimated image - #> after which divide the measured sinogram by the estimated sinogram (forward projected) + # > remove gaps from the measured sinogram + # > then forward project the estimated image + # > after which divide the measured sinogram by the estimated sinogram (forward projected) crrsino = mmraux.remgaps(measured_sino, txLUT, Cnt) / \ (mmrprj.frwd_prj(psf(eim), scanner_params, dev_out=True) + rndsct) - #> back project the correction factors sinogram + # > back project the correction factors sinogram bim = mmrprj.back_prj(crrsino, scanner_params) bim = psf(bim, output=bim) - #> divide the back-projected image by the sensitivity image - #> update the estimated image and remove NaNs + # > divide the back-projected image by the sensitivity image + # > update the estimated image and remove NaNs eim *= bim * sim_inv eim[np.isnan(eim)] = 0 diff --git a/niftypet/nipet/sct/__init__.py b/niftypet/nipet/sct/__init__.py index 2bdc2820..889048e4 100644 --- a/niftypet/nipet/sct/__init__.py +++ b/niftypet/nipet/sct/__init__.py @@ -1,3 +1,4 @@ # init the package folder +__all__ = ['mmrsct', 'get_knlut', 'vsm'] from . import mmrsct from .mmrsct import get_knlut, vsm diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index d8a6908a..9c1d0762 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -30,9 +30,9 @@ def fwhm2sig(fwhm, Cnt): return (fwhm / Cnt['SO_VXY']) / (2 * (2 * np.log(2))**.5) -#======================================================================= +# ====================================================================== # S C A T T E R -#----------------------------------------------------------------------- +# ---------------------------------------------------------------------- def get_scrystals(scanner_params): @@ -40,28 +40,28 @@ def get_scrystals(scanner_params): Get table of selected transaxial and axial (ring) crystals used for scatter modelling ''' - #> decompose constants, transaxial and axial LUTs are extracted + # > decompose constants, transaxial and axial LUTs are extracted Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #------------------------------------------------------ - #> transaxial crystals definitions + # ------------------------------------------------------ + # > transaxial crystals definitions crs = txLUT['crs'] - #> period of scatter crystals (needed for definition) + # > period of scatter crystals (needed for definition) SCRS_T = 7 - #> counter for crystal period, SCRS_T + # > counter for crystal period, SCRS_T cntr = 0 - #> scatter crystal index + # > scatter crystal index iscrs = 0 - #> initialise list of transaxial scatter crystal table + # > initialise list of transaxial scatter crystal table scrs = [] - #> transaxial scatter crystal selection for modelling + # > transaxial scatter crystal selection for modelling for c in range(Cnt['NCRS']): if (((c+1) % 9) == 0): continue @@ -71,16 +71,16 @@ def get_scrystals(scanner_params): scrs.append([c, 0.5 * (crs[c, 0] + crs[c, 2]), 0.5 * (crs[c, 1] + crs[c, 3])]) iscrs += 1 - #> convert the scatter crystal table to Numpy array + # > convert the scatter crystal table to Numpy array scrs = np.array(scrs, dtype=np.float32) - #------------------------------------------------------ + # ------------------------------------------------------ - #------------------------------------------------------ - #> scatter ring definition (axially) + # ------------------------------------------------------ + # > scatter ring definition (axially) sct_irng = np.int16([0, 10, 19, 28, 35, 44, 53, 63]) # number of scatter rings (used for scatter estimation) NSRNG = len(sct_irng) - #------------------------------------------------------ + # ------------------------------------------------------ logtxt = '' @@ -96,30 +96,30 @@ def get_scrystals(scanner_params): return dict(scrs=scrs, srng=srng, sirng=sct_irng, NSCRS=scrs.shape[0], NSRNG=NSRNG) -#======================================================================= +# ====================================================================== def get_sctlut2d(txLUT, scrs_def): - #> scatter to sinogram bin index LUT + # > scatter to sinogram bin index LUT sct2aw = np.zeros(scrs_def['NSCRS'] * scrs_def['NSCRS'], dtype=np.int32) # scatter/unscattered crystal x-coordinate (used for determining +/- sino segments) xsxu = np.zeros((scrs_def['NSCRS'], scrs_def['NSCRS']), dtype=np.int8) - #> loop over unscattered crystals + # > loop over unscattered crystals for uc in range(scrs_def['NSCRS']): - #> loop over scatter crystals + # > loop over scatter crystals for sc in range(scrs_def['NSCRS']): - #> sino linear index (full including any gaps) - #> scrs_def['scrs'] is a 2D array of rows [sct_crs_idx, mid_x, mid_y] + # > sino linear index (full including any gaps) + # > scrs_def['scrs'] is a 2D array of rows [sct_crs_idx, mid_x, mid_y] sct2aw[scrs_def['NSCRS']*uc + sc] = \ txLUT['c2sFw'][ int(scrs_def['scrs'][uc,0]), int(scrs_def['scrs'][sc,0]) ] - #> scattered and unscattered crystal positions (used for determining +/- sino segments) + # > scattered and unscattered crystal positions (used for determining +/- sino segments) xs = scrs_def['scrs'][sc, 1] xu = scrs_def['scrs'][uc, 1] @@ -131,10 +131,10 @@ def get_sctlut2d(txLUT, scrs_def): return dict(sct2aw=sct2aw, xsxu=xsxu, c2sFw=txLUT['c2sFw']) -#======================================================================= +# ====================================================================== -#======================================================================= +# ====================================================================== def get_knlut(Cnt): ''' get Klein-Nishina LUTs @@ -164,7 +164,7 @@ def get_knlut(Cnt): log.info('using energy resolution for scatter simulation, ER = {}'.format(Cnt['ER'])) knlut[i, 0] *= .5 * erfc( (Cnt['LLD'] - alpha * Cnt['E511']) / (SIG511 * np.sqrt(2 * alpha))) - #knlut[i,0] *= .5*erfc( (Cnt['LLD']-alpha*Cnt['E511'])/(SIG511) ); + # knlut[i,0] *= .5*erfc( (Cnt['LLD']-alpha*Cnt['E511'])/(SIG511) ); # for large angles (small cosups) when the angle in GPU calculations is greater than COSUPSMX if (i == 0): @@ -173,12 +173,12 @@ def get_knlut(Cnt): return knlut -#======================================================================= +# ====================================================================== -#================================================================================================== +# ================================================================================================= # GET SCATTER LUTs -#-------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------- def rd2sni(offseg, r1, r0): rd = np.abs(r1 - r0) rdi = (2*rd - 1 * (r1 > r0)) @@ -186,23 +186,23 @@ def rd2sni(offseg, r1, r0): return sni -#-------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------- def get_sctLUT(scanner_params): - #> decompose constants, transaxial and axial LUTs are extracted + # > decompose constants, transaxial and axial LUTs are extracted Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #> get the Klein-Nishina LUT: + # > get the Klein-Nishina LUT: KN = get_knlut(Cnt) - #> get scatter crystal tables: + # > get scatter crystal tables: scrs_def = get_scrystals(scanner_params) - #> get 2D scatter LUT (for transaxial sinograms) + # > get 2D scatter LUT (for transaxial sinograms) sctlut2d = get_sctlut2d(txLUT, scrs_def) # get the indexes of rings used for scatter estimation @@ -211,15 +211,15 @@ def get_sctLUT(scanner_params): # get number of ring accounting for the possible ring reduction (to save computation time) # NRNG = Cnt['RNG_END']-Cnt['RNG_STRT'] - #-span-1 LUT (rings to sino index) + # -span-1 LUT (rings to sino index) seg = np.append([Cnt['NRNG']], np.ceil(np.arange(Cnt['NRNG'] - 1, 0, -.5)).astype(np.int16)) offseg = np.int16(np.append([0], np.cumsum(seg))) - #-3D scatter sino LUT. axial component based on michelogram. + # -3D scatter sino LUT. axial component based on michelogram. sctaxR = np.zeros((Cnt['NRNG']**2, 4), dtype=np.int32) sctaxW = np.zeros((Cnt['NRNG']**2, 4), dtype=np.float32) - #-just for local check and display of the interpolation at work + # -just for local check and display of the interpolation at work mich = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) mich2 = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) @@ -229,22 +229,22 @@ def get_sctLUT(scanner_params): # plt.figure(64), plt.imshow(mich, interpolation='none') for r1 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): - #border up and down + # border up and down bd = next(idx for idx in irng if idx >= r1) bu = next(idx for idx in irng[::-1] if idx <= r1) for r0 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): # if (np.abs(r1-r0)>MRD): # continue - #border left and right + # border left and right br = next(idx for idx in irng if idx >= r0) bl = next(idx for idx in irng[::-1] if idx <= r0) - #print '(r0,r1)=', r0,r1, '(bl,br,bu,bd)', bl,br,bu,bd + # print '(r0,r1)=', r0,r1, '(bl,br,bu,bd)', bl,br,bu,bd - #span-1 sino index (sni) creation: + # span-1 sino index (sni) creation: sni = rd2sni(offseg, r1, r0) - #see: https://en.wikipedia.org/wiki/Bilinear_interpolation + # see: https://en.wikipedia.org/wiki/Bilinear_interpolation if (br == bl) and (bu != bd): sctaxR[sni, 0] = rd2sni(offseg, bd, r0) @@ -301,19 +301,19 @@ def get_sctLUT(scanner_params): return sctLUT -#------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------ # S C A T T E R I N T E R P O L A T I O N -#------------------------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------------------------ -#============================================================================== +# ============================================================================= def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): ''' interpolate the basic scatter distributions which are then transferred into the scatter sinograms. ''' - #> number of sinograms + # > number of sinograms snno = sct3d.shape[1] i_scrs = sctLUT['scrs'][:, 0].astype(int) @@ -323,10 +323,10 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): xnew = np.arange(Cnt['NCRS']) ynew = np.arange(Cnt['NCRS']) - #> advanced indexing matrix for rolling the non-interpolated results + # > advanced indexing matrix for rolling the non-interpolated results jj, ii = np.mgrid[0:sctLUT['NSCRS'], 0:sctLUT['NSCRS']] - #> roll each row according to the position + # > roll each row according to the position for i in range(sctLUT['NSCRS']): ii[i, :] = np.roll(ii[i, :], -1 * i) @@ -353,14 +353,14 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): # unroll znew = znew[jjnew, iinew] - #> upper triangle - #> add '1' to include index zero (distinguished from after triangulation) + # > upper triangle + # > add '1' to include index zero (distinguished from after triangulation) qi = np.triu(sctLUT['c2sFw'] + 1) > 0 sidx = sctLUT['c2sFw'][qi] s = znew[qi] sn2d[sidx] = s - #> lower triangle + # > lower triangle qi = np.tril(sctLUT['c2sFw'] + 1) > 0 sidx = sctLUT['c2sFw'][qi] s = znew[qi] @@ -370,10 +370,10 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): sssr[ti, ssrlut[si], ...] += ssn[ti, si, :, :] return np.squeeze(ssn), np.squeeze(sssr) - #------------------------------------------------- + # ------------------------------------------------- -#==================================================================================================== +# =================================================================================================== def vsm( @@ -427,7 +427,7 @@ def vsm( ''' - #> decompose constants, transaxial and axial LUTs are extracted + # > decompose constants, transaxial and axial LUTs are extracted Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -435,7 +435,7 @@ def vsm( if self_scaling: scaling = True - #> decompose mu-maps + # > decompose mu-maps muh, muo = mumaps if emmsk and not os.path.isfile(datain['em_nocrr']): @@ -449,14 +449,14 @@ def vsm( # if rsino is None and not histo is None and 'rsino' in histo: # rsino = histo['rsino'] - #> if histogram data or randoms sinogram not given, then no scaling or normalisation + # > if histogram data or randoms sinogram not given, then no scaling or normalisation if (histo is None) or (rsino is None): scaling = False - #-get the normalisation components + # -get the normalisation components nrmcmp, nhdr = mmrnorm.get_components(datain, Cnt) - #-smooth for defining the sino scatter only regions + # -smooth for defining the sino scatter only regions if fwhm_input > 0.: mu_sctonly = ndi.filters.gaussian_filter(mmrimg.convert2dev(muo, Cnt), fwhm2sig(fwhm_input, Cnt), mode='mirror') @@ -474,10 +474,10 @@ def vsm( ssrlut = axLUT['sn11_ssrb'] saxnrm = nrmcmp['sax_f11'] - #LUTs for scatter + # LUTs for scatter sctLUT = get_sctLUT(scanner_params) - #> smooth before scaling/down-sampling the mu-map and emission images + # > smooth before scaling/down-sampling the mu-map and emission images if fwhm_input > 0.: muim = ndi.filters.gaussian_filter(muo + muh, fwhm2sig(fwhm_input, Cnt), mode='mirror') emim = ndi.filters.gaussian_filter(em, fwhm2sig(fwhm_input, Cnt), mode='mirror') @@ -488,14 +488,14 @@ def vsm( muim = ndi.interpolation.zoom(muim, Cnt['SCTSCLMU'], order=3) #(0.499, 0.5, 0.5) emim = ndi.interpolation.zoom(emim, Cnt['SCTSCLEM'], order=3) #(0.34, 0.33, 0.33) - #-smooth the mu-map for mask creation. the mask contains voxels for which attenuation ray LUT is found. + # -smooth the mu-map for mask creation. the mask contains voxels for which attenuation ray LUT is found. if fwhm_input > 0.: smomu = ndi.filters.gaussian_filter(muim, fwhm2sig(fwhm_input, Cnt), mode='mirror') mumsk = np.int8(smomu > 0.003) else: mumsk = np.int8(muim > 0.001) - #CORE SCATTER ESTIMATION + # CORE SCATTER ESTIMATION NSCRS, NSRNG = sctLUT['NSCRS'], sctLUT['NSRNG'] sctout = { 'sct_3d': np.zeros((Cnt['TOFBINN'], snno_, NSCRS, NSCRS), dtype=np.float32), @@ -510,14 +510,14 @@ def vsm( log.debug('total scatter sum: {}'.format(np.sum(sct3d))) - #------------------------------------------------------------------- - #> initialise output dictionary + # ------------------------------------------------------------------- + # > initialise output dictionary out = {} if return_uninterp: out['uninterp'] = sct3d out['indexes'] = sctind - #------------------------------------------------------------------- + # ------------------------------------------------------------------- if np.sum(sct3d) < 1e-04: log.warning('total scatter below threshold: {}'.format(np.sum(sct3d))) @@ -528,10 +528,10 @@ def vsm( # import pdb; pdb.set_trace() - #------------------------------------------------------------------- + # ------------------------------------------------------------------- if interpolate: - #> interpolate basic scatter distributions into full size and - #> transfer them to sinograms + # > interpolate basic scatter distributions into full size and + # > transfer them to sinograms log.debug('transaxial scatter interpolation...') start = time.time() @@ -545,9 +545,9 @@ def vsm( return out else: return out - #------------------------------------------------------------------- + # ------------------------------------------------------------------- - #------------------------------------------------------------------- + # ------------------------------------------------------------------- # import pdb; pdb.set_trace() ''' debugging scatter: @@ -570,32 +570,32 @@ def vsm( plt.matshow(sssr[0,70,...]) plt.matshow(sssr[0,50,...]) ''' - #------------------------------------------------------------------- + # ------------------------------------------------------------------- - #> get SSR for randoms from span-1 or span-11 + # > get SSR for randoms from span-1 or span-11 rssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) if scaling: for i in range(snno): rssr[ssrlut[i], :, :] += rsino[i, :, :] - #ATTENUATION FRACTIONS for scatter only regions, and NORMALISATION for all SCATTER + # ATTENUATION FRACTIONS for scatter only regions, and NORMALISATION for all SCATTER #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> currentspan = Cnt['SPN'] Cnt['SPN'] = 1 atto = np.zeros((txLUT['Naw'], Cnt['NSN1']), dtype=np.float32) petprj.fprj(atto, mu_sctonly, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) atto = mmraux.putgaps(atto, txLUT, Cnt) - #-------------------------------------------------------------- - #> get norm components setting the geometry and axial to ones as they are accounted for differently + # -------------------------------------------------------------- + # > get norm components setting the geometry and axial to ones as they are accounted for differently nrmcmp['geo'][:] = 1 nrmcmp['axe1'][:] = 1 - #get sino with no gaps + # get sino with no gaps nrmg = np.zeros((txLUT['Naw'], Cnt['NSN1']), dtype=np.float32) mmr_auxe.norm(nrmg, nrmcmp, histo['buckets'], axLUT, txLUT['aw2ali'], Cnt) nrm = mmraux.putgaps(nrmg, txLUT, Cnt) - #-------------------------------------------------------------- + # -------------------------------------------------------------- - #> get attenuation + norm in (span-11) and SSR + # > get attenuation + norm in (span-11) and SSR attossr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) nrmsssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) @@ -608,11 +608,11 @@ def vsm( nrmg = np.zeros((txLUT['Naw'], snno), dtype=np.float32) mmr_auxe.norm(nrmg, nrmcmp, histo['buckets'], axLUT, txLUT['aw2ali'], Cnt) nrm = mmraux.putgaps(nrmg, txLUT, Cnt) - #-------------------------------------------------------------- + # -------------------------------------------------------------- #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> - #get the mask for the object from uncorrected emission image + # get the mask for the object from uncorrected emission image if emmsk and os.path.isfile(datain['em_nocrr']): nim = nib.load(datain['em_nocrr']) A = nim.get_sform() @@ -634,32 +634,32 @@ def vsm( #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> #======== SCALING ======== - #> scale scatter using non-TOF SSRB sinograms + # > scale scatter using non-TOF SSRB sinograms - #> gap mask + # > gap mask rmsk = (txLUT['msino'] > 0).T rmsk.shape = (1, Cnt['NSANGLES'], Cnt['NSBINS']) rmsk = np.repeat(rmsk, Cnt['NSEG0'], axis=0) - #> include attenuating object into the mask (and the emission if selected) + # > include attenuating object into the mask (and the emission if selected) amsksn = np.logical_and(attossr >= mask_threshlod, rmsk) * ~mssr - #> scaling factors for SSRB scatter + # > scaling factors for SSRB scatter scl_ssr = np.zeros((Cnt['NSEG0']), dtype=np.float32) for sni in range(Cnt['NSEG0']): - #> region for scaling defined by the percentage of lowest - #> but usable/significant scatter + # > region for scaling defined by the percentage of lowest + # > but usable/significant scatter thrshld = prcnt_scl * np.max(sssr[sni, :, :]) amsksn[sni, :, :] *= (sssr[sni, :, :] > thrshld) amsk = amsksn[sni, :, :] - #> normalised estimated scatter + # > normalised estimated scatter mssn = sssr[sni, :, :] * nrmsssr[sni, :, :] vpsn = histo['pssr'][sni, amsk] - rssr[sni, amsk] scl_ssr[sni] = np.sum(vpsn) / np.sum(mssn[amsk]) - #> scatter SSRB sinogram output + # > scatter SSRB sinogram output sssr[sni, :, :] *= nrmsssr[sni, :, :] * scl_ssr[sni] #=== scale scatter for the full-size sinogram === @@ -667,7 +667,7 @@ def vsm( for i in range(snno): sss[i, :, :] = ssn[i, :, :] * scl_ssr[ssrlut[i]] * saxnrm[i] * nrm[i, :, :] ''' - #> debug + # > debug si = 60 ai = 60 matshow(sssr[si,...]) diff --git a/setup.py b/setup.py index 9dc3e69e..6166b291 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ from niftypet.ninst import cudasetup as cs from niftypet.ninst import dinf from niftypet.ninst import install_tools as tls + __version__ = get_version(root=".", relative_to=__file__) logging.basicConfig(level=logging.INFO, format=tls.LOG_FORMAT) From c3b23c4f854c945a788df634eaf291292cd4dd84 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 02:40:53 +0000 Subject: [PATCH 09/64] bugfixes --- niftypet/nipet/img/mmrimg.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index 42895252..f4fc06f9 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -164,7 +164,7 @@ def getinterfile_off(fmu, Cnt, Offst=np.array([0., 0., 0.])): # > create GPU version of the mu-map murs = convert2dev(mur, Cnt) # > number of voxels - nvx = im.shape[0] + nvx = mu.shape[0] # > get the basic stats mumax = np.max(mur) mumin = np.min(mur) @@ -447,7 +447,7 @@ def align_mumap( nimpa.create_dir(tmpdir) # > get the timing of PET if affine not given - if faff == '' and not hst is None and isinstance(hst, dict) and 't0' in hst: + if faff == '' and hst is not None and isinstance(hst, dict) and 't0' in hst: t0 = hst['t0'] t1 = hst['t1'] @@ -714,10 +714,8 @@ def align_mumap( else: fname = fnm + '-aligned-to-given-affine' + fcomment if store_npy: - # > Numpy - if store_to_npy: - fnp = os.path.join(opth, fname + ".npz") - np.savez(fnp, mu=mu, A=A) + fnp = os.path.join(opth, fname + ".npz") + np.savez(fnp, mu=mu, A=A) if store: # > NIfTI fmu = os.path.join(opth, fname + '.nii.gz') @@ -828,7 +826,7 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', try: regdct = nimpa.coreg_spm(fpet, ft1w, outpath=os.path.join(outpath, 'PET', 'positioning')) - except: + except Exception: regdct = nimpa.affine_niftyreg( fpet, ft1w, @@ -1310,7 +1308,7 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): raise IOError('Path to registration executable is incorrect!') # pet the pCT mu-map with the above faff - pmudic = pct_mumap(datain, txLUT, axLUT, Cnt, faff=faff, fpet=recute.fpet, + pmudic = pct_mumap(datain, txLUT_, axLUT_, Cnt, faff=faff, fpet=recute.fpet, fcomment=fcomment) mup = pmudic['im'] From 6fa96617945b8be8163b3f055af8d26c3cafdaf4 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 21:09:05 +0000 Subject: [PATCH 10/64] more bugfixes --- niftypet/nipet/__init__.py | 12 +++-- niftypet/nipet/img/pipe.py | 101 ++++++++++++++++++++--------------- niftypet/nipet/mmrnorm.py | 2 +- niftypet/nipet/prj/mmrprj.py | 13 ++--- niftypet/nipet/prj/mmrrec.py | 4 +- niftypet/nipet/sct/mmrsct.py | 1 + 6 files changed, 77 insertions(+), 56 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index a5f8feae..0976bbc9 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -16,9 +16,9 @@ # GPU utils 'resource_filename', 'cs', 'dev_info', 'gpuinfo', # utils - 'LOG_FORMAT', 'LogHandler', 'path_resources', 'resources' + 'LOG_FORMAT', 'LogHandler', 'path_resources', 'resources', # package - 'img', 'lm', 'mmr_auxe', 'mmraux', 'mmrnorm', 'prj' + 'img', 'lm', 'mmr_auxe', 'mmraux', 'mmrnorm', 'prj', # img 'align_mumap', 'im_e72dev', 'im_dev2e7', 'hdw_mumap', 'obj_mumap', 'pct_mumap', 'mmrchain', @@ -29,7 +29,9 @@ # prj 'back_prj', 'frwd_prj', 'simulate_recon', 'simulate_sino', # sct - 'vsm'] # yapf: disable + 'vsm', + # optional + 'video_dyn', 'video_frm', 'xnat'] # yapf: disable from pkg_resources import resource_filename from niftypet.ninst import cudasetup as cs @@ -58,9 +60,13 @@ if resources.ENBLAGG: from .lm.pviews import video_dyn, video_frm +else: + video_dyn, video_frm = None, None if resources.ENBLXNAT: from xnat import xnat +else: + xnat = None # > GE Signa # from . import aux_sig diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index f2838380..d8115724 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -21,60 +21,73 @@ def mmrchain( - datain, # all input data in a dictionary - scanner_params, # all scanner parameters in one dictionary - # containing constants, transaxial and axial - # LUTs. - outpath='', # output path for results - frames=['fluid', [0, 0]], # definition of time frames. - mu_h=[], # hardware mu-map. - mu_o=[], # object mu-map. - tAffine=None, # affine transformations for the mu-map for - # each time frame separately. - itr=4, # number of OSEM iterations - fwhm=0., # Gaussian Post-Smoothing FWHM - psf=None, # Resolution Modelling - recmod=-1, # reconstruction mode: -1: undefined, chosen - # automatically. 3: attenuation and scatter - # correction, 1: attenuation correction - # only, 0: no correction (randoms only). - histo=None, # input histogram (from list-mode data); - # if not given, it will be performed. - decay_ref_time=None, # decay corrects relative to the reference - # time provided; otherwise corrects to the scan - # start time. + datain, # all input data in a dictionary + scanner_params, # all scanner parameters in one dictionary + # containing constants, transaxial and axial + # LUTs. + outpath='', # output path for results + frames=None, # definition of time frames, default: ['fluid', [0, 0]] + mu_h=None, # hardware mu-map. + mu_o=None, # object mu-map. + tAffine=None, # affine transformations for the mu-map for + # each time frame separately. + itr=4, # number of OSEM iterations + fwhm=0., # Gaussian Post-Smoothing FWHM + psf=None, # Resolution Modelling + recmod=-1, # reconstruction mode: -1: undefined, chosen + # automatically. 3: attenuation and scatter + # correction, 1: attenuation correction + # only, 0: no correction (randoms only). + histo=None, # input histogram (from list-mode data); + # if not given, it will be performed. + decay_ref_time=None, # decay corrects relative to the reference + # time provided; otherwise corrects to the scan + # start time. trim=False, trim_scale=2, - trim_interp=0, # interpolation for upsampling used in PVC - trim_memlim=True, # reduced use of memory for machines - # with limited memory (slow though) - pvcroi=[], # ROI used for PVC. If undefined no PVC - # is performed. - pvcreg_tool='niftyreg', # the registration tool used in PVC - store_rois=False, # stores the image of PVC ROIs - # as defined in pvcroi. - pvcpsf=[], + trim_interp=0, # interpolation for upsampling used in PVC + trim_memlim=True, # reduced use of memory for machines + # with limited memory (slow though) + pvcroi=None, # ROI used for PVC. If undefined no PVC + # is performed. + pvcreg_tool='niftyreg', # the registration tool used in PVC + store_rois=False, # stores the image of PVC ROIs + # as defined in pvcroi. + pvcpsf=None, pvcitr=5, - fcomment='', # text comment used in the file name of - # generated image files - ret_sinos=False, # return prompt, scatter and randoms - # sinograms for each reconstruction - ret_histo=False, # return histogram (LM processing output) for - # each image frame + fcomment='', # text comment used in the file name of + # generated image files + ret_sinos=False, # return prompt, scatter and randoms + # sinograms for each reconstruction + ret_histo=False, # return histogram (LM processing output) for + # each image frame store_img=True, store_img_intrmd=False, - store_itr=[], # store any reconstruction iteration in - # the list. ignored if the list is empty. + store_itr=None, # store any reconstruction iteration in + # the list. ignored if the list is empty. del_img_intrmd=False, ): - # decompose all the scanner parameters and constants + if frames is None: + frames = ['fluid', [0, 0]] + if mu_h is None: + mu_h = [] + if mu_o is None: + mu_o = [] + if pvcroi is None: + pvcroi = [] + if pvcpsf is None: + pvcpsf = [] + if store_itr is None: + store_itr = [] + + # decompose all the scanner parameters and constants Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] # ------------------------------------------------------------------------- # HISOTGRAM PRECEEDS FRAMES - if not histo == None and 'psino' in histo: + if histo is not None and 'psino' in histo: frames = ['fluid', [histo['t0'], histo['t1']]] else: histo = None @@ -157,7 +170,7 @@ def mmrchain( # ------------------------------------------------------------------------- # MU-MAPS # get the mu-maps, if given; otherwise will use blank mu-maps. - if not tAffine is None: + if tAffine is not None: muod = obtain_image(mu_o, imtype='object mu-map') else: muod = obtain_image(mu_o, Cnt=Cnt, imtype='object mu-map') @@ -284,7 +297,7 @@ def mmrchain( # check if there is enough prompt data to do a reconstruction # -------------- log.info('dynamic frame times t0={}, t1={}:'.format(t0, t1)) - if histo == None: + if histo is None: hst = mmrhist(datain, scanner_params, t0=t0, t1=t1) else: hst = histo @@ -307,7 +320,7 @@ def mmrchain( continue # -------------------- # transform the mu-map if given the affine transformation for each frame - if not tAffine is None: + if tAffine is not None: # create the folder for aligned (registered for motion compensation) mu-maps nimpa.create_dir(fmureg) # the converted nii image resample to the reference size diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 6f5f69c0..8a71016a 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -77,7 +77,7 @@ def get_components(datain, Cnt): if loc in d: try: nhdr = d[loc].value.decode() - except: + except Exception: continue if '!INTERFILE' in nhdr and 'scanner quantification factor' in nhdr: if Cnt['VERBOSE']: diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index e818f625..34e62b71 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -10,6 +10,7 @@ from . import petprj log = logging.getLogger(__name__) +ISUB_DEFAULT = np.array([-1], dtype=np.int32) # ======================================================================== # transaxial (one-slice) projector @@ -46,10 +47,10 @@ def trnx_prj(scanner_params, sino=None, im=None): # ------------------------------------------------------------------------ -def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=False, - attenuation=False): - ''' Calculate forward projection (a set of sinograms) for the provided input image. - Arguments: +def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False): + """ + Calculate forward projection (a set of sinograms) for the provided input image. + Arguments: im -- input image (can be emission or mu-map image). scanner_params -- dictionary of all scanner parameters, containing scanner constants, transaxial and axial look up tables (LUT). @@ -61,7 +62,7 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa is calculated; the default is False, meaning emission sinogram; for attenuation calculations (attenuation=True), the exponential of the negative of the integrated mu-values along LOR path is taken at the end. - ''' + """ # Get particular scanner parameters: Constants, transaxial and axial LUTs Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] @@ -132,7 +133,7 @@ def frwd_prj(im, scanner_params, isub=np.array([-1], dtype=np.int32), dev_out=Fa # ------------------------------------------------------------------------ -def back_prj(sino, scanner_params, isub=np.array([-1], dtype=np.int32)): +def back_prj(sino, scanner_params, isub=ISUB_DEFAULT): ''' Calculate forward projection for the provided input image. Arguments: diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 2bb929d6..527146dc 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -257,7 +257,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # SCAT # ------------------------------------------------------------------------- if recmod == 2: - if not sctsino is None: + if sctsino is not None: ssng = mmraux.remgaps(sctsino, txLUT, Cnt) elif sctsino is None and os.path.isfile(datain['em_crr']): emd = nimpa.getnii(datain['em_crr']) @@ -308,7 +308,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N if Cnt['DCYCRR'] and 't0' in hst and 'dur' in hst: # > decay correct to the reference time (e.g., injection time) if provided # > otherwise correct in reference to the scan start time - if not decay_ref_time is None: + if decay_ref_time is not None: tref = decay_ref_time else: tref = hst['t0'] diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 9c1d0762..b71635f0 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -14,6 +14,7 @@ from scipy.interpolate import CloughTocher2DInterpolator, interp2d from scipy.spatial import qhull from scipy.special import erfc +from scipy.interpolate import interp2d from .. import mmr_auxe, mmraux, mmrnorm from ..img import mmrimg From 901ccf53203f7f140f47d5819cca19a0c6649269 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 7 Jan 2021 21:09:28 +0000 Subject: [PATCH 11/64] manual formatting & tidy --- niftypet/nipet/img/auximg.py | 36 ++-- niftypet/nipet/img/mmrimg.py | 50 ++--- niftypet/nipet/img/pipe.py | 100 ++++----- niftypet/nipet/lm/__init__.py | 2 +- niftypet/nipet/lm/mmrhist.py | 98 ++++----- niftypet/nipet/lm/pviews.py | 28 ++- niftypet/nipet/mmraux.py | 166 +++++++------- niftypet/nipet/mmrnorm.py | 14 +- niftypet/nipet/prj/__init__.py | 1 + niftypet/nipet/prj/mmrprj.py | 19 +- niftypet/nipet/prj/mmrrec.py | 384 ++++----------------------------- niftypet/nipet/prj/mmrsim.py | 28 ++- niftypet/nipet/sct/mmrsct.py | 95 ++++---- setup.py | 27 +-- tests/conftest.py | 2 +- tests/test_amyloid_pvc.py | 100 ++++----- 16 files changed, 391 insertions(+), 759 deletions(-) diff --git a/niftypet/nipet/img/auximg.py b/niftypet/nipet/img/auximg.py index 05070679..3cfbce4b 100644 --- a/niftypet/nipet/img/auximg.py +++ b/niftypet/nipet/img/auximg.py @@ -1,6 +1,8 @@ """auxilary imaging functions for PET image reconstruction and analysis.""" import logging import os +from collections.abc import Collection +from numbers import Integral import numpy as np @@ -71,21 +73,21 @@ def obtain_image(img, Cnt=None, imtype=''): def dynamic_timings(flist, offset=0): ''' Get start and end frame timings from a list of dynamic PET frame definitions. - flist can be 1D list of time duration for each dynamic frame, e.g.: flist = [15, 15, 15, 15, 30, 30, 30, ...] - or a 2D list of lists having 2 entries: first for the number of repetitions and the other for the frame duration, - e.g.: flist = [[4,15], [3,15], ...]. - offset adjusts for the start time (usually when prompts are strong enough over randoms) - The output is a dictionary: - out['timings'] = [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] - out['total'] = total time - out['frames'] = array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) - + Arguments: + flist: can be 1D list of time duration for each dynamic frame, e.g.: + flist = [15, 15, 15, 15, 30, 30, 30, ...] + or a 2D list of lists having 2 entries: + first for the number of repetitions and the other for the frame duration, e.g.: + flist = [[4,15], [3,15], ...]. + offset: adjusts for the start time (usually when prompts are strong enough over randoms) + Returns (dict): + 'timings': [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] + 'total': total time + 'frames': array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) ''' - if not isinstance(flist, list): + if not isinstance(flist, Collection) or isinstance(flist, str): raise TypeError('Wrong type of frame data input') - if all([ - isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) - for t in flist]): + if all(isinstance(t, Integral) for t in flist): tsum = offset # list of frame timings if offset > 0: @@ -101,7 +103,7 @@ def dynamic_timings(flist, offset=0): # append the timings to the list t_frames.append([t0, t1]) frms = np.uint16(flist) - elif all([isinstance(t, list) and len(t) == 2 for t in flist]): + elif all(isinstance(t, Collection) and len(t) == 2 for t in flist): if offset > 0: flist.insert(0, [1, offset]) farray = np.asarray(flist, dtype=np.uint16) @@ -118,7 +120,7 @@ def dynamic_timings(flist, offset=0): # list of frame timings t_frames = [] for i in range(0, farray.shape[0]): - for t in range(0, farray[i, 0]): + for _ in range(0, farray[i, 0]): # frame start time t0 = tsum tsum += farray[i, 1] @@ -130,6 +132,4 @@ def dynamic_timings(flist, offset=0): fi += 1 else: raise TypeError('Unrecognised data input.') - # prepare the output dictionary - out = {'total': tsum, 'frames': frms, 'timings': t_frames} - return out + return {'total': tsum, 'frames': frms, 'timings': t_frames} diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index f4fc06f9..4f21bf50 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -5,18 +5,13 @@ import math import multiprocessing import os -import random import re import shutil -import sys -import time -from math import pi from subprocess import run import nibabel as nib import numpy as np import pydicom as dcm -import scipy.ndimage as ndi from niftypet import nimpa @@ -24,7 +19,7 @@ from .. import resources as rs log = logging.getLogger(__name__) - +OFFSET_DEFAULT = np.array([0., 0., 0.]) ct_nans = -1024 # ================================================================================== @@ -119,7 +114,7 @@ def image_affine(datain, Cnt, gantry_offset=False): return B -def getmu_off(mu, Cnt, Offst=np.array([0., 0., 0.])): +def getmu_off(mu, Cnt, Offst=OFFSET_DEFAULT): # pumber of voxels nvx = mu.shape[0] # phange the shape to 3D @@ -149,9 +144,10 @@ def getmu_off(mu, Cnt, Offst=np.array([0., 0., 0.])): return mur -def getinterfile_off(fmu, Cnt, Offst=np.array([0., 0., 0.])): +def getinterfile_off(fmu, Cnt, Offst=OFFSET_DEFAULT): ''' - Return the floating point mu-map in an array from Interfile, accounting for image offset (does slow interpolation). + Return the floating point mu-map in an array from Interfile, + accounting for image offset (does slow interpolation). ''' # pead the image file f = open(fmu, 'rb') @@ -207,7 +203,10 @@ def getinterfile(fim, Cnt): def get_cylinder(Cnt, rad=25, xo=0, yo=0, unival=1, gpu_dim=False): - '''Outputs image with a uniform cylinder of intensity = unival, radius = rad, and transaxial centre (xo, yo)''' + """ + Outputs image with a uniform cylinder of + intensity = unival, radius = rad, and transaxial centre (xo, yo) + """ imdsk = np.zeros((1, Cnt['SO_IMX'], Cnt['SO_IMY']), dtype=np.float32) for t in np.arange(0, math.pi, math.pi / (2*360)): x = xo + rad * math.cos(t) @@ -236,8 +235,8 @@ def hu2mu(im): rhobone = 0.326 uim = np.zeros(im.shape, dtype=np.float32) uim[im <= 0] = muwater * (1 + im[im <= 0] * 1e-3) - uim[im> 0] = muwater * \ - ( 1+im[im>0]*1e-3 * rhowater/muwater*(mubone-muwater)/(rhobone-rhowater) ) + uim[im > 0] = muwater * (1 + im[im > 0] * 1e-3 * rhowater / muwater * (mubone-muwater) / + (rhobone-rhowater)) # remove negative values uim[uim < 0] = 0 return uim @@ -370,7 +369,7 @@ def obj_mumap( mu[mu < 0] = 0 # > return image dictionary with the image itself and some other stats - mu_dct = dict(im=mu, affine=A) + mu_dct = {'im': mu, 'affine': A} if not del_auxilary: mu_dct['fmuref'] = fmuref @@ -494,7 +493,7 @@ def align_mumap( raise ValueError('Full scanner are parameters not provided\ but are required for histogramming.') - #========================================================= + # ======================================================== # -get hardware mu-map if 'hmumap' in datain and os.path.isfile(datain['hmumap']): muh = np.load(datain['hmumap'], allow_pickle=True)["hmu"] @@ -510,12 +509,12 @@ def align_mumap( else: log.error('the hardware mu-map is required first.') raise IOError('Could not find the hardware mu-map!') - #========================================================= + # ======================================================== # -check if T1w image is available if not {'MRT1W#', 'T1nii', 'T1bc', 'T1N4'}.intersection(datain): log.error('no MR T1w images required for co-registration!') raise IOError('T1w image could not be obtained!') - #========================================================= + # ======================================================== # -if the affine is not given, # -it will be generated by reconstructing PET image, with some or no corrections @@ -575,9 +574,8 @@ def align_mumap( fpet, fute, outpath=os.path.join(outpath, 'PET', 'positioning'), - # pcomment=fcomment, executable=Cnt['REGPATH'], - omp=multiprocessing.cpu_count() / 2, + omp=multiprocessing.cpu_count() / 2, # pcomment=fcomment, rigOnly=True, affDirect=False, maxit=5, @@ -750,8 +748,6 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', # constants, transaxial and axial LUTs are extracted Cnt = scanner_params['Cnt'] - txLUT = scanner_params['txLUT'] - axLUT = scanner_params['axLUT'] if not os.path.isfile(faff): from niftypet.nipet.prj import mmrrec @@ -830,8 +826,7 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', regdct = nimpa.affine_niftyreg( fpet, ft1w, - outpath=os.path.join(outpath, 'PET', 'positioning'), - # pcomment=fcomment, + outpath=os.path.join(outpath, 'PET', 'positioning'), # pcomment=fcomment, executable=Cnt['REGPATH'], omp=multiprocessing.cpu_count() / 2, rigOnly=True, @@ -910,8 +905,8 @@ def pct_mumap(datain, scanner_params, hst=None, t0=0, t1=0, itr=2, petopt='ac', return mu_dct -#********************************************************************************* -#GET HARDWARE MU-MAPS with positions and offsets +# ******************************************************************************** +# GET HARDWARE MU-MAPS with positions and offsets # -------------------------------------------------------------------------------- @@ -1133,7 +1128,6 @@ def get_hmupos(datain, parts, Cnt, outpath=''): 'ivs': vs, # prom interfile 'img': im, # prom interfile 'niipath': os.path.join(dirhmu, '_' + Cnt['HMULIST'][i - 1].split('.')[0] + '.nii.gz')} - # pave to NIfTI log.info('creating mu-map for: {}'.format(Cnt['HMULIST'][i - 1])) A = np.diag(np.append(10 * vs[::-1], 1)) A[0, 0] *= -1 @@ -1294,9 +1288,9 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): # putput for the T1w in register with PET ft1out = os.path.join(os.path.dirname(ft1w), 'T1w_r' + '.nii.gz') # pext file fo rthe affine transform T1w->PET - faff = os.path.join(os.path.dirname(ft1w), fcomment + 'mr2pet_affine' + - '.txt') # pime.strftime('%d%b%y_%H.%M',time.gmtime()) - # > call the registration routine + faff = os.path.join(os.path.dirname(ft1w), fcomment + 'mr2pet_affine' + '.txt') + # time.strftime('%d%b%y_%H.%M',time.gmtime()) + # > call the registration routine if os.path.isfile(Cnt['REGPATH']): cmd = [ Cnt['REGPATH'], '-ref', recute.fpet, '-flo', ft1w, '-rigOnly', '-speeeeed', '-aff', diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index d8115724..adcae76a 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -1,13 +1,11 @@ """module for pipelined image reconstruction and analysis""" import logging import os -import sys from numbers import Integral from subprocess import call from textwrap import dedent import numpy as np -import scipy.ndimage as ndi from niftypet import nimpa @@ -109,17 +107,17 @@ def mmrchain( # 2D starting with entry 'fluid' or 'timings' if (isinstance(frames[0], str) and frames[0] in ('fluid', 'timings') - and all([isinstance(t, list) and len(t) == 2 for t in frames[1:]])): + and all(isinstance(t, list) and len(t) == 2 for t in frames[1:])): t_frms = frames[1:] # if 2D definitions, starting with entry 'def': elif (isinstance(frames[0], str) and frames[0] == 'def' - and all([isinstance(t, list) and len(t) == 2 for t in frames[1:]])): + and all(isinstance(t, list) and len(t) == 2 for t in frames[1:])): # get total time and list of all time frames dfrms = dynamic_timings(frames) t_frms = dfrms[1:] # if 1D: - elif all([isinstance(t, Integral) for t in frames]): + elif all(isinstance(t, Integral) for t in frames): # get total time and list of all time frames dfrms = dynamic_timings(frames) t_frms = dfrms[1:] @@ -199,34 +197,31 @@ def mmrchain( output['frames'] = t_frms output['#frames'] = nfrm - # if affine transformation is given the baseline mu-map in NIfTI file or dictionary has to be given + # if affine transformation is given + # the baseline mu-map in NIfTI file or dictionary has to be given if tAffine is None: log.info('using the provided mu-map the same way for all frames.') else: if len(tAffine) != nfrm: - log.error('the number of affine transformations in the list\ - has to be the same as the number of dynamic frames!') - raise ValueError('Inconsistent number of frames.') + raise ValueError("the number of affine transformations in the list" + " has to be the same as the number of dynamic frames") elif not isinstance(tAffine, list): - log.error('tAffine has to be a list of either 4x4 numpy arrays\ - of affine transformations or a list of file path strings!') - raise ValueError('Expecting a list.') + raise ValueError("tAffine has to be a list of either 4x4 numpy arrays" + " of affine transformations or a list of file path strings") elif 'fim' not in muod: - log.error('when tAffine is given, the object mu-map has to be\ - provided either as a dictionary or NIfTI file!') - raise NameError('No path to object mu-map.') + raise NameError("when tAffine is given, the object mu-map has to be" + " provided either as a dictionary or NIfTI file") # check if all are file path strings to the existing files - if all([isinstance(t, str) for t in tAffine]): - if all([os.path.isfile(t) for t in tAffine]): + if all(isinstance(t, str) for t in tAffine): + if all(os.path.isfile(t) for t in tAffine): # the internal list of affine transformations faff_frms = tAffine log.info('using provided paths to affine transformations for each dynamic frame.') else: - log.error('not all provided paths are valid!') - raise IOError('Wrong paths.') + raise IOError('not all provided paths are valid!') # check if all are numpy arrays - elif all([isinstance(t, (np.ndarray, np.generic)) for t in tAffine]): + elif all(isinstance(t, (np.ndarray, np.generic)) for t in tAffine): # create the folder for dynamic affine transformations nimpa.create_dir(petaff) faff_frms = [] @@ -262,8 +257,9 @@ def mmrchain( output['fmuref'] = fmuref output['faffine'] = faff_frms - # output list of intermediate file names for mu-maps and PET images (useful for dynamic imaging) - if not tAffine is None: output['fmureg'] = [] + # output list of intermediate file names for mu-maps and PET images + # (useful for dynamic imaging) + if tAffine is not None: output['fmureg'] = [] if store_img_intrmd: output['fpeti'] = [] @@ -415,9 +411,10 @@ def mmrchain( if not pvcpsf: pvcpsf = nimpa.psf_measured(scanner='mmr', scale=trim_scale) else: - if isinstance( - pvcpsf, - (np.ndarray, np.generic)) and pvcpsf.shape != (3, 2 * Cnt['RSZ_PSF_KRNL'] + 1): + if ( + isinstance(pvcpsf, (np.ndarray, np.generic)) and + pvcpsf.shape != (3, 2 * Cnt['RSZ_PSF_KRNL'] + 1) + ): # yapf: disable raise ValueError( 'the PSF kernel has to be an numpy array with the shape of ({},{})'.format( 3, 2 * Cnt['RSZ_PSF_KRNL'] + 1)) @@ -443,13 +440,13 @@ def mmrchain( fcomment_pvc = '_frm' + str(i) + fcomment else: fcomment_pvc = fcomment - #============================ + # =========================== # perform PVC petpvc_dic = nimpa.pvc_iyang(petu['fimi'][i], datain, Cnt, pvcroi, pvcpsf, tool=pvcreg_tool, itr=pvcitr, faff=faffpvc, fcomment=fcomment_pvc, outpath=pvcdir, store_rois=store_rois, store_img=store_img_intrmd) - #============================ + # =========================== if nfrm > 1: dynpvc[i, :, :, :] = petpvc_dic['im'] else: @@ -471,15 +468,15 @@ def mmrchain( # description for saving NIFTI image # attenuation number: if only bed present then it is 0.5 attnum = (1 * muhd['exists'] + 1 * muod['exists']) / 2. - descrip = 'alg=osem' \ - +';att='+str(attnum*(recmod>0)) \ - +';sct='+str(1*(recmod>1)) \ - +';spn='+str(Cnt['SPN']) \ - +';sub=14' \ - +';itr='+str(itr) \ - +';fwhm='+str(fwhm) \ - +';psf='+str(psf) \ - +';nfrm='+str(nfrm) + descrip = (f"alg=osem" + f";att={attnum*(recmod>0)}" + f";sct={1*(recmod>1)}" + f";spn={Cnt['SPN']}" + f";sub=14" + f";itr={itr}" + f";fwhm={fwhm}" + f";psf={psf}" + f";nfrm={nfrm}") # squeeze the not needed dimensions dynim = np.squeeze(dynim) @@ -493,20 +490,14 @@ def mmrchain( if t1 == t0: t0 = 0 t1 = hst['dur'] - fpet = os.path.join( - petimg, - os.path.basename(recimg.fpet)[:8] \ - +'_t-'+str(t0)+'-'+str(t1)+'sec' \ - +'_itr-'+str(itr) ) - fpeto = fpet + fcomment + '.nii.gz' + fpet = os.path.join(petimg, + os.path.basename(recimg.fpet)[:8] + f'_t-{t0}-{t1}sec_itr-{itr}') + fpeto = f"{fpet}{fcomment}.nii.gz" nimpa.prc.array2nii(dynim[::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) else: - fpet = os.path.join( - petimg, - os.path.basename(recimg.fpet)[:8]\ - +'_nfrm-'+str(nfrm)+'_itr-'+str(itr) - ) - fpeto = fpet + fcomment + '.nii.gz' + fpet = os.path.join(petimg, + os.path.basename(recimg.fpet)[:8] + f'_nfrm-{nfrm}_itr-{itr}') + fpeto = f"{fpet}{fcomment}.nii.gz" nimpa.prc.array2nii(dynim[:, ::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) # get output file names for trimmed/PVC images @@ -516,21 +507,20 @@ def mmrchain( # make folder nimpa.create_dir(pettrim) # trimming scale added to NIfTI descritoption - descrip_trim = descrip + ';trim_scale=' + str(trim_scale) + descrip_trim = f'{descrip};trim_scale={trim_scale}' # file name for saving the trimmed image - fpetu = os.path.join( - pettrim, - os.path.basename(fpet) + '_trimmed-upsampled-scale-' + str(trim_scale)) + fpetu = os.path.join(pettrim, + os.path.basename(fpet) + f'_trimmed-upsampled-scale-{trim_scale}') # in case of PVC if pvcroi: # itertive Yang (iY) added to NIfTI descritoption - descrip_pvc = descrip_trim + ';pvc=iY' + descrip_pvc = f'{descrip_trim};pvc=iY' # file name for saving the PVC NIfTI image - fpvc = fpetu + '_PVC' + fcomment + '.nii.gz' + fpvc = f"{fpetu}_PVC{fcomment}.nii.gz" output['trimmed']['fpvc'] = fpvc # update the trimmed image file name - fpetu += fcomment + '.nii.gz' + fpetu += f'{fcomment}.nii.gz' # store the file name in the output dictionary output['trimmed']['fpet'] = fpetu diff --git a/niftypet/nipet/lm/__init__.py b/niftypet/nipet/lm/__init__.py index 894ecbb2..957f4d61 100644 --- a/niftypet/nipet/lm/__init__.py +++ b/niftypet/nipet/lm/__init__.py @@ -14,4 +14,4 @@ split_frames, ) -#from .pviews import video_frm, video_dyn +# from .pviews import video_frm, video_dyn diff --git a/niftypet/nipet/lm/mmrhist.py b/niftypet/nipet/lm/mmrhist.py index 2e8ce198..550e4a6e 100644 --- a/niftypet/nipet/lm/mmrhist.py +++ b/niftypet/nipet/lm/mmrhist.py @@ -1,9 +1,8 @@ """hist.py: processing of PET list-mode data: histogramming and randoms estimation.""" import logging import os -import pickle -import sys -from math import pi +from collections.abc import Collection +from numbers import Integral import nibabel as nib import numpy as np @@ -105,15 +104,8 @@ def hist( ssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.uint32) hstout = { - 'phc': phc, - 'dhc': dhc, - 'mss': mss, - 'pvs': pvs, - 'bck': bck, - 'fan': fan, - 'psn': psino, - 'dsn': dsino, - 'ssr': ssr,} + 'phc': phc, 'dhc': dhc, 'mss': mss, 'pvs': pvs, 'bck': bck, 'fan': fan, 'psn': psino, + 'dsn': dsino, 'ssr': ssr} # --------------------------------------- # do the histogramming and processing @@ -143,7 +135,7 @@ def hist( 'centre of mass of axial radiodistribution (filtered with Gaussian of SD ={}): COMPLETED.' .format(cmass_sig)) - #========================== BUCKET SINGLES ========================= + # ========================= BUCKET SINGLES ========================= # > number of single rates reported for the given second # > the last two bits are used for the number of reports nsr = (hstout['bck'][1, :, :] >> 30) @@ -162,31 +154,28 @@ def hist( # > get the average bucket singles: buckets = np.int32(np.sum(single_rate, axis=0) / single_rate.shape[0]) log.debug('dynamic and static buckets single rates: COMPLETED.') - #=================================================================== + # ================================================================== # account for the fact that when t0==t1 that means that full dataset is processed if t0 == t1: t1 = t0 + nitag - pdata = { + return { 't0': t0, 't1': t1, - 'dur': t1 - t0, # duration - 'phc': hstout['phc'], # prompts head curve - 'dhc': hstout['dhc'], # delayeds head curve - 'cmass': cmass, # centre of mass of the radiodistribution in axial direction - 'pvs_sgtl': pvs_sgtl, # sagittal projection views in short intervals - 'pvs_crnl': pvs_crnl, # coronal projection views in short intervals - 'fansums': hstout[ - 'fan'], # fan sums of delayeds for variance reduction of random event sinograms - 'sngl_rate': single_rate, # bucket singles over time - 'tsngl': t, # time points of singles measurements in list-mode data - 'buckets': buckets, # average bucket singles + 'dur': t1 - t0, # duration + 'phc': hstout['phc'], # prompts head curve + 'dhc': hstout['dhc'], # delayeds head curve + 'cmass': cmass, # centre of mass of the radiodistribution in axial direction + 'pvs_sgtl': pvs_sgtl, # sagittal projection views in short intervals + 'pvs_crnl': pvs_crnl, # coronal projection views in short intervals + 'fansums': hstout['fan'], # fan sums of delayeds for variance reduction of randoms + 'sngl_rate': single_rate, # bucket singles over time + 'tsngl': t, # time points of singles measurements in list-mode data + 'buckets': buckets, # average bucket singles 'psino': hstout['psn'].astype(np.uint16), # prompt sinogram 'dsino': hstout['dsn'].astype(np.uint16), # delayeds sinogram - 'pssr': hstout['ssr'] # single-slice rebinned sinogram of prompts - } - - return pdata + 'pssr': hstout['ssr'] # single-slice rebinned sinogram of prompts + } # yapf: disable # ============================================================================== @@ -223,9 +212,7 @@ def rand(fansums, txLUT, axLUT, Cnt): # random sino and estimated crystal map of singles put into a dictionary rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) - rndout = { - 'rsn': rsn, - 'cmap': cmap,} + rndout = {'rsn': rsn, 'cmap': cmap} mmr_lmproc.rand(rndout, fansums, txLUT, axLUT, Cnt) @@ -249,9 +236,7 @@ def prand(fansums, pmsk, txLUT, axLUT, Cnt): # random sino and estimated crystal map of singles put into a dictionary rsn = np.zeros((nsinos, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) cmap = np.zeros((Cnt['NCRS'], Cnt['NRNG']), dtype=np.float32) - rndout = { - 'rsn': rsn, - 'cmap': cmap,} + rndout = {'rsn': rsn, 'cmap': cmap} # save results for each frame @@ -300,9 +285,6 @@ def sino2nii(sino, Cnt, fpth): # ================================================================================ # create michelogram map for emission data, only when the input sino in in span-1 def get_michem(sino, axLUT, Cnt): - # span: - spn = -1 - if Cnt['SPN'] == 1: slut = np.arange(Cnt['NSN1']) # for span 1, one-to-one mapping elif Cnt['SPN'] == 11: @@ -500,7 +482,7 @@ def auxilary_frames(hst, t_frms, Cref=0, tr0=0, tr1=15, verbose=True): if verbose: print('t[{}, {}]; tp={}, tcm={} => frm id:{}, timings:{}'.format( t_frms[i][0], t_frms[i][1], tp, tcm, fi2afi[-1], timings[-1])) - # form the list of auxilary dynamic frames of equivalent count level (as in Cref) for reconstruction + # form the list of auxilary dynamic frames of equivalent count level (as in Cref) mfrm = ['fluid'] + timings return {'timings': mfrm, 'frame_idx': fi2afi} @@ -509,20 +491,21 @@ def dynamic_timings(flist, offset=0): ''' Get start and end frame timings from a list of dynamic PET frame definitions. Arguments: - flist can be 1D list of time duration for each dynamic frame, e.g.: flist = [15, 15, 15, 15, 30, 30, 30, ...] - or a 2D list of lists having 2 entries per definition: first for the number of repetitions and the other - for the frame duration, e.g.: flist = ['def', [4, 15], [8, 30], ...], meaning 4x15s, then 8x30s, etc. - offset adjusts for the start time (usually when prompts are strong enough over randoms) - The output is a dictionary: - out['timings'] = [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] - out['total'] = total time - out['frames'] = array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) + flist: can be 1D list of time duration for each dynamic frame, e.g.: + flist = [15, 15, 15, 15, 30, 30, 30, ...] + or a 2D list of lists having 2 entries per definition: + first for the number of repetitions and the other for the frame duration, e.g.: + flist = ['def', [4, 15], [8, 30], ...], + meaning 4x15s, then 8x30s, etc. + offset: adjusts for the start time (usually when prompts are strong enough over randoms) + Returns (dict): + 'timings': [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] + 'total': total time + 'frames': array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) ''' - if not isinstance(flist, list): + if not isinstance(flist, Collection) or isinstance(flist, str): raise TypeError('Wrong type of frame data input') - if all([ - isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) - for t in flist]): + if all(isinstance(t, Integral) for t in flist): tsum = offset # list of frame timings if offset > 0: @@ -538,8 +521,7 @@ def dynamic_timings(flist, offset=0): # append the timings to the list t_frames.append([t0, t1]) frms = np.uint16(flist) - - elif all([isinstance(t, list) and len(t) == 2 for t in flist[1:]]) and flist[0] == 'def': + elif flist[0] == 'def' and all(isinstance(t, Collection) and len(t) == 2 for t in flist[1:]): flist = flist[1:] if offset > 0: flist.insert(0, [0, offset]) @@ -556,8 +538,8 @@ def dynamic_timings(flist, offset=0): tsum = 0 # list of frame timings t_frames = ['timings'] - for i in range(0, farray.shape[0]): - for t in range(0, farray[i, 0]): + for i in range(farray.shape[0]): + for _ in range(farray[i, 0]): # frame start time t0 = tsum tsum += farray[i, 1] @@ -569,6 +551,4 @@ def dynamic_timings(flist, offset=0): fi += 1 else: raise TypeError('Unrecognised time frame definitions.') - # prepare the output dictionary - out = {'total': tsum, 'frames': frms, 'timings': t_frames} - return out + return {'total': tsum, 'frames': frms, 'timings': t_frames} diff --git a/niftypet/nipet/lm/pviews.py b/niftypet/nipet/lm/pviews.py index f9b50294..76fa731a 100644 --- a/niftypet/nipet/lm/pviews.py +++ b/niftypet/nipet/lm/pviews.py @@ -1,13 +1,9 @@ #!/usr/bin/python import os -import sys -import matplotlib -import matplotlib.animation as manimation - -# matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np +from matplotlib import animation as manimation def mvavg(interval, window_size): @@ -18,9 +14,9 @@ def mvavg(interval, window_size): def video_frm(hst, outpth): plt.close('all') - #=============== CONSTANTS ================== + # ============== CONSTANTS ================== VTIME = 4 - #============================================ + # =========================================== i = np.argmax(hst['phc']) ymin = np.floor(min(hst['cmass'][i:i + 300])) @@ -30,7 +26,7 @@ def video_frm(hst, outpth): # --for movie FFMpegWriter = manimation.writers['ffmpeg'] - metadata = dict(title='GPU Sino Views', artist='Pawel', comment=':)') + metadata = {'title': 'GPU Sino Views', 'artist': 'Pawel', 'comment': ':)'} writer = FFMpegWriter(fps=25, bitrate=30000, metadata=metadata) # -- @@ -47,10 +43,10 @@ def video_frm(hst, outpth): plt.title('Sagittal View') plt.setp(ax2.get_xticklabels(), visible=False) plt.tick_params(axis='both', which='both', bottom='off', top='off', labelbottom='off') - l = plt.imshow(hst['pvs_sgtl'][100, :, :] / np.mean(hst['pvs_sgtl'][100, :, :]), cmap='jet', - interpolation='nearest') + l0 = plt.imshow(hst['pvs_sgtl'][100, :, :] / np.mean(hst['pvs_sgtl'][100, :, :]), cmap='jet', + interpolation='nearest') - ax3 = plt.subplot(313) + plt.subplot(313) plt.title('Axial Centre of Mass') t = np.arange(0., hst['dur'], 1.) # plt.plot(t, rprmt, 'k', t, rdlyd, 'r') @@ -72,7 +68,7 @@ def video_frm(hst, outpth): tmp2 = np.sum(hst['pvs_crnl'][mf * i:mf * (i+1), :, :], axis=0) tmp = tmp / np.mean(tmp) tmp2 = tmp2 / np.mean(tmp2) - l.set_data(tmp) + l0.set_data(tmp) l1.set_data(tmp2) # l2.set_data(VTIME*mf*i*np.ones(2), np.array([0, np.max(hst['phc'])])) l2.set_data(VTIME * mf * i * np.ones(2), np.array([0, ymax])) @@ -90,7 +86,7 @@ def video_frm(hst, outpth): def video_dyn(hst, frms, outpth, axLUT, Cnt): plt.close('all') - #=============== CONSTANTS ================== + # ============== CONSTANTS ================== VTIME = 4 NRINGS = Cnt['NRNG'] NSN11 = Cnt['NSN11'] @@ -100,7 +96,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): voxz = Cnt['SO_VXZ'] nsinos = NSN11 - #============================================ + # =========================================== # for scaling of the mass centre i = np.argmax(hst['phc']) @@ -136,7 +132,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): # --for movie FFMpegWriter = manimation.writers['ffmpeg'] - metadata = dict(title='Axial View', artist='Pawel', comment='--') + metadata = {'title': 'Axial View', 'artist': 'Pawel', 'comment': '--'} writer = FFMpegWriter(fps=10, bitrate=30000, metadata=metadata) # -- @@ -158,7 +154,7 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): interpolation='nearest') # plt.clim([0, 70]) - ax3 = plt.subplot(313) + plt.subplot(313) plt.title('Axial Centre of Mass') plt.plot(range(hst['dur']), voxz * mvavg(hst['cmass'][:], 5), 'k') plt.ylim([voxz * ymin, voxz * ymax]) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 564ab746..6c6c8d0a 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -3,17 +3,15 @@ import logging import os import re -import sys +from collections.abc import Collection from math import pi +from numbers import Integral from os import fspath -from os.path import join as pjoin from pathlib import Path from textwrap import dedent -import nibabel as nib import numpy as np import pydicom as dcm -import scipy.ndimage as ndi from miutil.fdio import hasext from niftypet import nimpa @@ -65,15 +63,12 @@ def lm_pos(datain, Cnt): else: raise ValueError('unknown scanner software version!') - fi = re.search(b'GantryOffset(?!_)', csainfo).start() # csainfo.find('GantryOffset') - # regular expression for the needed three numbers - p = re.compile(b'-?\\d.\\d{4,10}') - xyz = p.findall(csainfo[fi:fi + 200]) - # offset in cm - # xoff = float(xyz[0])/10 - # yoff = float(xyz[1])/10 - # zoff = float(xyz[2])/10 - # > hack to avoid other numbers (counting from the back) + # csainfo.find('GantryOffset') + fi = re.search(b'GantryOffset(?!_)', csainfo).start() + # regular expression for the needed three numbers + xyz = re.findall(b'-?\\d.\\d{4,10}', csainfo[fi:fi + 200]) + # offset in cm + # > hack to avoid other numbers (counting from the back) xoff = float(xyz[-3]) / 10 yoff = float(xyz[-2]) / 10 zoff = float(xyz[-1]) / 10 @@ -160,14 +155,12 @@ def vh_bedpos(datain, Cnt): ihdr, csainfo = hdr_lm(datain, Cnt) # start horizontal bed position - p = re.compile(r'start horizontal bed position.*\d{1,3}\.*\d*') - m = p.search(ihdr) + m = re.search(r'start horizontal bed position.*\d{1,3}\.*\d*', ihdr) fi = ihdr[m.start():m.end()].find('=') hbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) # start vertical bed position - p = re.compile(r'start vertical bed position.*\d{1,3}\.*\d*') - m = p.search(ihdr) + m = re.search(r'start vertical bed position.*\d{1,3}\.*\d*', ihdr) fi = ihdr[m.start():m.end()].find('=') vbedpos = 0.1 * float(ihdr[m.start() + fi + 1:m.end()]) @@ -192,7 +185,7 @@ def hmu_resample0(hmupos, parts, Cnt): dtype=np.float32) imr = np.zeros((Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']), dtype=np.float32) - #===== Go through the hardware mu-map parts ===== + # ===== Go through the hardware mu-map parts ===== for i in parts: Cim['VXSOx'] = hmupos[i]['ivs'][2] Cim['VXSOy'] = hmupos[i]['ivs'][1] @@ -211,17 +204,18 @@ def hmu_resample0(hmupos, parts, Cnt): offresZ = (-.5 * Cnt['SO_IMZ'] * Cnt['SO_VXZ'] - hmupos[0]['HBedPos']) # excess of the hrdwr mu-map axially excemuZ = offresZ - (-hmupos[4]['vpos'][0]) - excevox = int(excemuZ / hmupos[4]['ivs'][0]) - 5 # with extra margin of 5 + # with extra margin of 5 + excevox = int(excemuZ / hmupos[4]['ivs'][0]) - 5 newoffZ = -hmupos[4]['vpos'][0] + excevox * hmupos[4]['ivs'][0] - # number of voxels included axially - inclvox = Cnt['SO_IMZ'] * Cnt['SO_VXZ'] / hmupos[4]['ivs'][0] + 10 # with extra margin... - # truncate the image + # number of voxels included axially + # with extra margin... + inclvox = Cnt['SO_IMZ'] * Cnt['SO_VXZ'] / hmupos[4]['ivs'][0] + 10 + # truncate the image im = hmupos[i]['img'][excevox:excevox + inclvox, :, :] - # update dictionary Cim + # update dictionary Cim Cim['OFFOz'] = newoffZ Cim['VXNOz'] = im.shape[0] imr += nimpa.prc.improc.resample(im, A, Cim) - else: imr += nimpa.prc.improc.resample(hmupos[i]['img'], A, Cim) @@ -230,31 +224,31 @@ def hmu_resample0(hmupos, parts, Cnt): def time_diff_norm_acq(datain): if 'lm_dcm' in datain and os.path.isfile(datain['lm_dcm']): - l = dcm.read_file(datain['lm_dcm']) + dcm_lm = dcm.read_file(datain['lm_dcm']) elif 'lm_ima' in datain and os.path.isfile(datain['lm_ima']): - l = dcm.read_file(datain['lm_ima']) + dcm_lm = dcm.read_file(datain['lm_ima']) else: log.error('dicom header of list-mode data does not exist.') return None # acq date - s = l[0x08, 0x21].value + s = dcm_lm[0x08, 0x21].value y = int(s[:4]) m = int(s[4:6]) d = int(s[6:8]) # acq time - s = l[0x08, 0x32].value + s = dcm_lm[0x08, 0x32].value hrs = int(s[:2]) mns = int(s[2:4]) sec = int(s[4:6]) # calib date - s = l[0x18, 0x1200].value + s = dcm_lm[0x18, 0x1200].value cy = int(s[:4]) cm = int(s[4:6]) cd = int(s[6:8]) # calib time - s = l[0x18, 0x1201].value + s = dcm_lm[0x18, 0x1201].value chrs = int(s[:2]) cmns = int(s[2:4]) csec = int(s[4:6]) @@ -276,22 +270,23 @@ def time_diff_norm_acq(datain): def timings_from_list(flist, offset=0): - ''' + """ Get start and end frame timings from a list of dynamic PET frame definitions. - flist can be 1D list of time duration for each dynamic frame, e.g.: flist = [15, 15, 15, 15, 30, 30, 30, ...] - or a 2D list of lists having 2 entries: first for the number of repetitions and the other for the frame duration, - e.g.: flist = [[4,15], [3,15], ...]. - offset adjusts for the start time (usually when prompts are strong enough over randoms) - The output is a dictionary: - out['timings'] = [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] - out['total'] = total time - out['frames'] = array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) - ''' - if not isinstance(flist, list): + Args: + flist: can be 1D list of time duration for each dynamic frame, e.g.: + flist = [15, 15, 15, 15, 30, 30, 30, ...] + or a 2D list of lists having 2 entries: + first for the number of repetitions and the other for the frame duration, e.g.: + flist = [[4,15], [3,15], ...]. + offset: adjusts for the start time (usually when prompts are strong enough over randoms) + Returns (dict): + 'timings': [[0, 15], [15, 30], [30, 45], [45, 60], [60, 90], [90, 120], [120, 150], ...] + 'total': total time + 'frames': array([ 15, 15, 15, 15, 30, 30, 30, 30, ...]) + """ + if not isinstance(flist, Collection) or isinstance(flist, str): raise TypeError('Wrong type of frame data input') - if all([ - isinstance(t, (int, np.int32, np.int16, np.int8, np.uint8, np.uint16, np.uint32)) - for t in flist]): + if all(isinstance(t, Integral) for t in flist): tsum = offset # list of frame timings if offset > 0: @@ -307,7 +302,7 @@ def timings_from_list(flist, offset=0): # append the timings to the list t_frames.append([t0, t1]) frms = np.uint16(flist) - elif all([isinstance(t, list) and len(t) == 2 for t in flist]): + elif all(isinstance(t, Collection) and len(t) == 2 for t in flist): if offset > 0: flist.insert(0, [1, offset]) farray = np.asarray(flist, dtype=np.uint16) @@ -324,7 +319,7 @@ def timings_from_list(flist, offset=0): # list of frame timings t_frames = [] for i in range(0, farray.shape[0]): - for t in range(0, farray[i, 0]): + for _ in range(0, farray[i, 0]): # frame start time t0 = tsum tsum += farray[i, 1] @@ -336,9 +331,7 @@ def timings_from_list(flist, offset=0): fi += 1 else: raise TypeError('Unrecognised data input.') - # prepare the output dictionary - out = {'total': tsum, 'frames': frms, 'timings': t_frames} - return out + return {'total': tsum, 'frames': frms, 'timings': t_frames} def axial_lut(Cnt): @@ -348,11 +341,13 @@ def axial_lut(Cnt): NRNG = Cnt['NRNG'] if Cnt['SPN'] == 1: - # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) + # number of rings calculated for the given ring range + # (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 NSN1_c = NRNG_c**2 - # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) + # correct for the max. ring difference in the full axial extent + # (don't use ring range (1,63) as for this case no correction) if NRNG_c == 64: NSN1_c -= 12 SEG0_c = 2*NRNG_c - 1 @@ -375,10 +370,7 @@ def axial_lut(Cnt): # ring difference range rd = list(range(-Cnt['MRD'], Cnt['MRD'] + 1)) # ring difference to segment - rd2sg = -1 * np.ones(( - len(rd), - 2, - ), dtype=np.int32) + rd2sg = -1 * np.ones((len(rd), 2), dtype=np.int32) for i in range(len(rd)): for iseg in range(len(Cnt['MNRD'])): if (rd[i] >= Cnt['MNRD'][iseg]) and (rd[i] <= Cnt['MXRD'][iseg]): @@ -419,7 +411,7 @@ def axial_lut(Cnt): # np.savetxt("Mnos.csv", Mnos, delimiter=",", fmt='%d') # np.savetxt("Msn.csv", Msn, delimiter=",", fmt='%d') - #====full LUT + # ===full LUT sn1_rno = np.zeros((NSN1_c, 2), dtype=np.int16) sn1_ssrb = np.zeros((NSN1_c), dtype=np.int16) sn1_sn11 = np.zeros((NSN1_c), dtype=np.int16) @@ -435,17 +427,19 @@ def axial_lut(Cnt): strt = NRNG * (ro + Cnt['RNG_STRT']) + Cnt['RNG_STRT'] stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG step = NRNG + 1 - for li in range(strt, stop, - step): # goes along a diagonal started in the first row at r1 - # linear indecies of michelogram --> subscript indecies for positive and negative RDs + + # goes along a diagonal started in the first row at r1 + for li in range(strt, stop, step): + # linear indicies of michelogram + # --> subscript indecies for positive and negative RDs + if m == 0: r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - else: # for positive now (? or vice versa) + else: # for positive now (? or vice versa) r0 = int(li / NRNG) r1 = int(li - r0*NRNG) - # avoid case when RD>MRD - if (Msn[r1, r0]) < 0: + if Msn[r1, r0] < 0: # avoid case when RD>MRD continue sn1_rno[sni, 0] = r0 @@ -489,7 +483,7 @@ def axial_lut(Cnt): li2sn = np.zeros((NLI2R_c, 2), dtype=np.int16) li2sn1 = np.zeros((NLI2R_c, 2), dtype=np.int16) li2rng = np.zeros((NLI2R_c, 2), dtype=np.float32) - #...to number of sinos (nos) + # ...to number of sinos (nos) li2nos = np.zeros((NLI2R_c), dtype=np.int8) dli = 0 @@ -499,21 +493,23 @@ def axial_lut(Cnt): stop = (Cnt['RNG_STRT'] + NRNG_c) * NRNG step = NRNG + 1 - for li in range(strt, stop, step): # goes along a diagonal started in the first row at r2o - # from the linear indexes of Michelogram get the subscript indexes + # goes along a diagonal started in the first row at r2o + for li in range(strt, stop, step): + # from the linear indexes of Michelogram get the subscript indexes r1 = int(li / NRNG) r0 = int(li - r1*NRNG) - # avoid case when RD>MRD - if (Msn[r1, r0]) < 0: + if Msn[r1, r0] < 0: + # avoid case when RD>MRD continue - # li2r[0, dli] = r0 - # li2r[1, dli] = r1 - # # -- - # li2rng[0, dli] = rng[r0,0]; - # li2rng[1, dli] = rng[r1,0]; - # # -- - # li2sn[0, dli] = Msn[r0,r1] - # li2sn[1, dli] = Msn[r1,r0] + + # li2r[0, dli] = r0 + # li2r[1, dli] = r1 + # # -- + # li2rng[0, dli] = rng[r0,0]; + # li2rng[1, dli] = rng[r1,0]; + # # -- + # li2sn[0, dli] = Msn[r0,r1] + # li2sn[1, dli] = Msn[r1,r0] li2r[dli, 0] = r0 li2r[dli, 1] = r1 @@ -576,7 +572,6 @@ def reduce_rings(pars, rs=0, re=64): rs -- start ring re -- end ring (not included in the resulting reduced rings) ''' - Cnt = pars['Cnt'] axLUT = pars['axLUT'] @@ -629,7 +624,6 @@ def transaxial_lut(Cnt, visualisation=False): p = 8 # pixel density of the visualisation VISXY = Cnt['SO_IMX'] * p T = np.zeros((VISXY, VISXY), dtype=np.float32) - # --- # --- crystal coordinates transaxially # > block width @@ -682,7 +676,7 @@ def transaxial_lut(Cnt, visualisation=False): v = int(.5*VISXY - np.ceil(ycp / (Cnt['SO_VXY'] / p))) T[v, u] = 2.5 - out = dict(crs=crs) + out = {'crs': crs} if visualisation: out['visual'] = T @@ -795,7 +789,8 @@ def transaxial_lut(Cnt, visualisation=False): # # cij - a square matrix of crystals in coincidence (transaxially) # # crsri - indexes of crystals with the gap crystals taken out (therefore reduced) - # # aw2sn - LUT array [AW x 2] translating linear index into a 2D sinogram with dead LOR (gaps) + # # aw2sn - LUT array [AW x 2] translating linear index into + # # a 2D sinogram with dead LOR (gaps) # # aw2ali - LUT from linear index of 2D full sinogram with gaps and bin-driven to # # linear index without gaps and angle driven # # msino - 2D sinogram with gaps marked (0). like a mask. @@ -1016,9 +1011,8 @@ def get_dicoms(dfile, datain, Cnt): if f0 >= 0: f1 = f0 + lmhdr[f0:].find('\n') # regular expression for the isotope symbol - p = re.compile(r'(?<=:=)\s*\S*') # the name of isotope: - istp = p.findall(lmhdr[f0:f1])[0] + istp = re.findall(r'(?<=:=)\s*\S*', lmhdr[f0:f1])[0] istp = istp.replace('-', '') Cnt['ISOTOPE'] = istp.strip() @@ -1126,11 +1120,13 @@ def putgaps(s, txLUT, Cnt, sino_no=0): # number of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: - # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) + # number of rings calculated for the given ring range + # (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 - # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) + # correct for the max. ring difference in the full axial extent + # (don't use ring range (1,63) as for this case no correction) if NRNG_c == 64: nsinos -= 12 @@ -1175,8 +1171,6 @@ def mmrinit(): def mMR_params(): - ''' - get all scanner parameters in one dictionary - ''' + """get all scanner parameters in one dictionary""" Cnt, txLUT, axLUT = mmrinit() return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 8a71016a..a8795f16 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -1,6 +1,6 @@ """mmraux.py: auxilary functions for PET list-mode data processing.""" +import logging import re -import sys from os import fspath, path from pathlib import Path @@ -10,6 +10,7 @@ from . import mmr_auxe # auxiliary functions through Python extensions in CUDA +log = logging.getLogger(__name__) # ================================================================================================ # GET NORM COMPONENTS # ================================================================================================ @@ -53,10 +54,12 @@ def get_components(datain, Cnt): auxdata = Path(resource_filename("niftypet.nipet", "auxdata")) # axial effects for span-1 ax_f1 = np.load(fspath(auxdata / "AxialFactorForSpan1.npy")) - # relative scale factors for axial scatter deriving span-11 scale factors from SSR scale factors + # relative scale factors for axial scatter + # deriving span-11 scale factors from SSR scale factors sax_f11 = np.fromfile(fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan11.f32"), np.float32, Cnt['NSN11']) - # relative scale factors for axial scatter deriving span-1 scale factors from SSR scale factors + # relative scale factors for axial scatter + # deriving span-1 scale factors from SSR scale factors sax_f1 = np.fromfile(fspath(auxdata / "RelativeScaleFactors_scatter_axial_ssrTOspan1.f32"), np.float32, Cnt['NSN1']) # ------------------------------------------------- @@ -80,9 +83,8 @@ def get_components(datain, Cnt): except Exception: continue if '!INTERFILE' in nhdr and 'scanner quantification factor' in nhdr: - if Cnt['VERBOSE']: - print('i> got the normalisation interfile header from [', hex(loc[0]), ',', - hex(loc[1]), ']') + log.debug('got the normalisation interfile header from [0x%x, 0x%x]', loc[0], + loc[1]) found_nhdr = True break if not found_nhdr: diff --git a/niftypet/nipet/prj/__init__.py b/niftypet/nipet/prj/__init__.py index 56d2df52..f4c4ce04 100644 --- a/niftypet/nipet/prj/__init__.py +++ b/niftypet/nipet/prj/__init__.py @@ -1,2 +1,3 @@ # init the package folder +__all__ = ['mmrprj', 'mmrrec', 'mmrsim'] from . import mmrprj, mmrrec, mmrsim diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 34e62b71..4947e42f 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -1,7 +1,5 @@ """Forward and back projector for PET data reconstruction""" import logging -import os -import sys import numpy as np @@ -18,8 +16,6 @@ def trnx_prj(scanner_params, sino=None, im=None): - - # Get particular scanner parameters: Constants, transaxial and axial LUTs Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] @@ -76,11 +72,13 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F att = 0 if Cnt['SPN'] == 1: - # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) + # number of rings calculated for the given ring range + # (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 - # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) + # correct for the max. ring difference in the full axial extent + # (don't use ring range (1,63) as for this case no correction) if NRNG_c == 64: nsinos -= 12 elif Cnt['SPN'] == 11: @@ -107,7 +105,8 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F log.debug('number of sinos:%d' % nsinos) - # predefine the sinogram. if subsets are used then only preallocate those bins which will be used. + # predefine the sinogram. + # if subsets are used then only preallocate those bins which will be used. if isub[0] < 0: sinog = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) else: @@ -149,11 +148,13 @@ def back_prj(sino, scanner_params, isub=ISUB_DEFAULT): axLUT = scanner_params['axLUT'] if Cnt['SPN'] == 1: - # number of rings calculated for the given ring range (optionally we can use only part of the axial FOV) + # number of rings calculated for the given ring range + # (optionally we can use only part of the axial FOV) NRNG_c = Cnt['RNG_END'] - Cnt['RNG_STRT'] # number of sinos in span-1 nsinos = NRNG_c**2 - # correct for the max. ring difference in the full axial extent (don't use ring range (1,63) as for this case no correction) + # correct for the max. ring difference in the full axial extent + # (don't use ring range (1,63) as for this case no correction) if NRNG_c == 64: nsinos -= 12 elif Cnt['SPN'] == 11: diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 527146dc..d5349d2f 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -1,8 +1,6 @@ """Image reconstruction from raw PET data""" import logging import os -import random -import sys import time from collections import namedtuple from collections.abc import Iterable @@ -17,15 +15,13 @@ from .. import resources # for isotope info from .. import mmraux, mmrnorm from ..img import mmrimg - -#from ..lm import mmrhist from ..lm.mmrhist import randoms from ..sct import vsm from . import petprj log = logging.getLogger(__name__) -#reconstruction mode: +# reconstruction mode: # 0 - no attenuation and no scatter # 1 - attenuation and no scatter # 2 - attenuation and scatter given as input parameter @@ -67,7 +63,8 @@ def get_subsets14(n, params): for s in range(N): # list of sino angular indexes for a given subset si = [] - #::::: iterate sino blocks. This bit may be unnecessary, it can be taken directly from sp array + # ::::: iterate sino blocks. + # This bit may be unnecessary, it can be taken directly from sp array for b in range(N): # --angle index within a sino block depending on subset s ai = (s+b) % N @@ -75,7 +72,7 @@ def get_subsets14(n, params): sai = sp[ai, b] si.append(sai) totsum[s] += aisum[sai] - #::::: + # ::::: # deal with the remaining part, ie, P-N per block rai = np.int16(np.floor(np.arange(s, 2 * N, fs)[:4] % N)) for i in range(P - N): @@ -110,7 +107,7 @@ def psf_config(psf, Cnt): def _config(fwhm3, check_len=True): # resolution modelling by custom kernels if check_len: - if len(fwhm3) != 3 or any([f < 0 for f in fwhm3]): + if len(fwhm3) != 3 or any(f < 0 for f in fwhm3): raise ValueError('Incorrect separable kernel FWHM definition') kernel = np.empty((3, 2 * Cnt['RSZ_PSF_KRNL'] + 1), dtype=np.float32) @@ -180,7 +177,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: opth = outpath - if ((store_img is True) or (not store_itr is None)): + if (store_img is True) or (store_itr is not None): mmraux.create_dir(opth) if ret_sinos: @@ -208,7 +205,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # remove gaps from the prompt sino psng = mmraux.remgaps(hst['psino'], txLUT, Cnt) - #========================================================================= + # ======================================================================== # GET NORM # ------------------------------------------------------------------------- if normcomp is None: @@ -217,9 +214,9 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N ncmp = normcomp log.warning('using user-defined normalisation components') nsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=ncmp) - #========================================================================= + # ======================================================================== - #========================================================================= + # ======================================================================== # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP # ------------------------------------------------------------------------- # > combine attenuation and norm together depending on reconstruction mode @@ -240,9 +237,9 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N petprj.fprj(asng, mus, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) # > combine attenuation and normalisation ansng = asng * nsng - #========================================================================= + # ======================================================================== - #========================================================================= + # ======================================================================== # Randoms # ------------------------------------------------------------------------- if isinstance(randsino, np.ndarray): @@ -251,9 +248,9 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: rsino, snglmap = randoms(hst, scanner_params) rsng = mmraux.remgaps(rsino, txLUT, Cnt) - #========================================================================= + # ======================================================================== - #========================================================================= + # ======================================================================== # SCAT # ------------------------------------------------------------------------- if recmod == 2: @@ -277,25 +274,27 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N " Check if it's present or the path is correct.") else: ssng = np.zeros(rsng.shape, dtype=rsng.dtype) - #========================================================================= + # ======================================================================== log.info('------ OSEM (%d) -------' % itr) # ------------------------------------ - Sn = 14 # number of subsets - # -get one subset to get number of projection bins in a subset + Sn = 14 # number of subsets + + # -get one subset to get number of projection bins in a subset Sprj, s = get_subsets14(0, scanner_params) Nprj = len(Sprj) - # -init subset array and sensitivity image for a given subset + # -init subset array and sensitivity image for a given subset sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) - # -init sensitivity images for each subset + # -init sensitivity images for each subset imgsens = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in range(Sn): - sinoTIdx[n, 0] = Nprj # first number of projection for the given subset + # first number of projection for the given subset + sinoTIdx[n, 0] = Nprj sinoTIdx[n, 1:], s = get_subsets14(n, scanner_params) - # sensitivity image + # sensitivity image petprj.bprj(imgsens[n, :, :, :], ansng[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], Cnt) - # ------------------------------------- + # ------------------------------------- # -mask for reconstructed image. anything outside it is set to zero msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True) > 0.9 @@ -340,7 +339,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # import pdb; pdb.set_trace() - #========================================================================= + # ======================================================================== # OSEM RECONSTRUCTION # ------------------------------------------------------------------------- with trange(itr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, @@ -356,7 +355,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # img[:]=0 itr = k break - if recmod >= 3 and (((k < itr - 1) and (itr > 1))): # or (itr==1) + if recmod >= 3 and k < itr - 1 and itr > 1: sct_time = time.time() sct = vsm(datain, mumaps, mmrimg.convert2e7(img, Cnt), scanner_params, histo=hst, rsino=rsino, emmsk=emmskS, return_ssrb=return_ssrb, @@ -372,19 +371,21 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # save images during reconstruction if requested if store_itr and k in store_itr: im = mmrimg.convert2e7(img * (dcycrr*qf*qf_loc), Cnt) - fout = os.path.join(opth, os.path.basename(datain['lm_bf'])[:8] \ - + frmno +'_t'+str(hst['t0'])+'-'+str(hst['t1'])+'sec' \ - +'_itr'+str(k)+fcomment+'_inrecon.nii.gz') + + fout = os.path.join( + opth, (os.path.basename(datain['lm_bf'])[:8] + + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k}{fcomment}_inrecon.nii.gz")) nimpa.array2nii(im[::-1, ::-1, :], B, fout) log.info('recon time:%.3g' % (time.time() - stime)) - #========================================================================= + # ======================================================================== log.info('applying decay correction of %r' % dcycrr) log.info('applying quantification factor:%r to the whole image' % qf) log.info('for the frame duration of :%r' % hst['dur']) - img *= dcycrr * qf * qf_loc # additional factor for making it quantitative in absolute terms (derived from measurements) + # additional factor for making it quantitative in absolute terms (derived from measurements) + img *= dcycrr * qf * qf_loc # ---- save images ----- # -first convert to standard mMR image size @@ -393,26 +394,25 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # -description text to NIfTI # -attenuation number: if only bed present then it is 0.5 attnum = (1 * (np.sum(muh) > 0.5) + 1 * (np.sum(muo) > 0.5)) / 2. - descrip = 'alg=osem'+ \ - ';sub=14'+ \ - ';att='+str(attnum*(recmod>0))+ \ - ';sct='+str(1*(recmod>1))+ \ - ';spn='+str(Cnt['SPN'])+ \ - ';itr='+str(itr) +\ - ';fwhm=0' +\ - ';t0='+str(hst['t0']) +\ - ';t1='+str(hst['t1']) +\ - ';dur='+str(hst['dur']) +\ - ';qf='+str(qf) + descrip = (f"alg=osem" + f";sub=14" + f";att={attnum*(recmod>0)}" + f";sct={1*(recmod>1)}" + f";spn={Cnt['SPN']}" + f";itr={itr}" + f";fwhm=0" + f";t0={hst['t0']}" + f";t1={hst['t1']}" + f";dur={hst['dur']}" + f";qf={qf}") # > file name of the output reconstructed image # > (maybe used later even if not stored now) - fpet = os.path.join(opth, os.path.basename(datain['lm_bf']).split('.')[0] \ - + frmno +'_t'+str(hst['t0'])+'-'+str(hst['t1'])+'sec' \ - +'_itr'+str(itr)+fcomment+'.nii.gz') + fpet = os.path.join(opth, (os.path.basename(datain['lm_bf']).split('.')[0] + + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{itr}{fcomment}.nii.gz")) if store_img: - log.info('saving image to: ' + fpet) + log.info('saving image to: %s', fpet) nimpa.array2nii(im[::-1, ::-1, :], B, fpet, descrip=descrip) im_smo = None @@ -455,295 +455,3 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N recout = RecOut(im, fpet, im_smo, fsmo, B) return recout - - -# ============================================================================== -# EMML -# def emml( datain, mumaps, hst, txLUT, axLUT, Cnt, -# recmod=3, itr=10, fwhm=0., mask_radius=29., store_img=True, ret_sinos=False, sctsino = None, randsino = None, normcomp = None): - -# # subsets (when not used) -# sbs = np.array([-1], dtype=np.int32) - -# # get object and hardware mu-maps -# muh, muo = mumaps - -# # get the GPU version of the image dims -# mus = mmrimg.convert2dev(muo+muh, Cnt) - -# # remove gaps from the prompt sinogram -# psng = mmraux.remgaps(hst['psino'], txLUT, Cnt) - -# #========================================================================= -# # GET NORM -# # ------------------------------------------------------------------------- -# if normcomp == None: -# ncmp, _ = mmrnorm.get_components(datain, Cnt) -# else: -# ncmp = normcomp -# print 'w> using user-defined normalisation components' -# nrmsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=ncmp) -# #========================================================================= - -# #========================================================================= -# # Randoms -# # ------------------------------------------------------------------------- -# if randsino == None: -# rsino, snglmap = mmrhist.rand(hst['fansums'], txLUT, axLUT, Cnt) -# rsng = mmraux.remgaps(rsino, txLUT, Cnt) -# else: -# rsino = randsino -# rsng = mmraux.remgaps(randsino, txLUT, Cnt) -# #========================================================================= - -# #========================================================================= -# # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP -# # ------------------------------------------------------------------------- -# # combine attenuation and norm together depending on reconstruction mode -# if recmod==0: -# asng = np.ones(psng.shape, dtype=np.float32) -# else: -# asng = np.zeros(psng.shape, dtype=np.float32) -# petprj.fprj(asng, mus, txLUT, axLUT, sbs, Cnt, 1) -# attnrmsng = asng*nrmsng -# #========================================================================= - -# #========================================================================= -# # SCATTER and the additive term -# # ------------------------------------------------------------------------- -# if recmod==2: -# if sctsino != None: -# # remove the gaps from the provided scatter sinogram -# ssng = mmraux.remgaps(sctsino, txLUT, Cnt) -# elif sctsino == None and os.path.isfile(datain['em_crr']): -# # estimate scatter from already reconstructed and corrected emission image -# emd = nimpa.prc.getnii(datain['em_crr'], Cnt) -# ssn, sssr, amsk = mmrsct.vsm(mumaps, emd['im'], datain, hst, rsn, 0.1, txLUT, axLUT, Cnt) -# ssng = mmraux.remgaps(ssn, txLUT, Cnt) -# else: -# print 'e> no emission image available for scatter estimation! check if it''s present or the path is correct.' -# sys.exit() -# else: -# ssng = np.zeros(rsng.shape, dtype=rsng.dtype) -# # form the additive term -# rssng = (rsng + ssng) / attnrmsng -# #========================================================================= - -# # mask for reconstructed image -# msk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True)>0.9 -# # estimated image -# imrec = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# # backprj image -# bim = np.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# # Get sensitivity image by backprojection -# sim = np.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# petprj.bprj(sim, attnrmsng, txLUT, axLUT, sbs, Cnt) -# # init estimate sino -# esng = np.zeros((Cnt['Naw'], Cnt['NSN11']), dtype=np.float32) - -# for k in range(itr): -# print '>--------- ITERATION', k, '-----------<' -# esng[:] = 0 -# petprj.fprj(esng, imrec, txLUT, axLUT, sbs, Cnt, 0) -# # esng *= attnrmsng -# esng += (rssng+ssng) -# # crr = attnrmsng*(psng/esng) -# crr = psng/esng -# bim[:] = 0 -# petprj.bprj(bim, crr, txLUT, axLUT, sbs, Cnt) -# bim /= sim -# imrec *= msk*bim -# imrec[np.isnan(imrec)] = 0 - -# if recmod>=3 and ( ((k1))): -# sct_time = time.time() -# ssn, sssr, amsk = mmrsct.vsm(mumaps, mmrimg.convert2e7(img, Cnt), datain, hst, rsn, scanner_params, prcntScl=0.1, emmsk=emmskS) -# ssng = mmraux.remgaps(ssn, txLUT, Cnt) / attnrmsng -# log.debug('scatter time:%.3g' % (time.time() - sct_time)) - -# # decay correction -# lmbd = np.log(2)/resources.riLUT[Cnt['ISOTOPE']]['thalf'] -# dcycrr = np.exp(lmbd*hst['t0'])*lmbd*hst['dur'] / (1-np.exp(-lmbd*hst['dur'])) -# # apply quantitative correction to the image -# qf = ncmp['qf'] / resources.riLUT[Cnt['ISOTOPE']]['BF'] / float(hst['dur']) -# log.debug('applying quantification factor:%r to the whole image for the frame duration of:%r' % (qf, hst['dur'])) -# imrec *= dcycrr * qf * 0.205 # additional factor for making it quantitative in absolute terms (derived from measurements) - -# # convert to standard mMR image size -# im = mmrimg.convert2e7(imrec, Cnt) - -# if fwhm>0: -# im = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, Cnt), mode='mirror') - -# # save images -# B = mmrimg.image_affine(datain, Cnt) -# fout = '' - -# if store_img: -# # description text to NIfTI -# # attenuation number: if only bed present then it is 0.5 -# attnum = ( 1*(np.sum(muh)>0.5)+1*(np.sum(muo)>0.5) ) / 2. -# descrip = 'alg=emml'+ \ -# ';sub=0'+ \ -# ';att='+str(attnum*(recmod>0))+ \ -# ';sct='+str(1*(recmod>1))+ \ -# ';spn='+str(Cnt['SPN'])+ \ -# ';itr='+str(itr)+ \ -# ';fwhm='+str(fwhm) +\ -# ';t0='+str(hst['t0']) +\ -# ';t1='+str(hst['t1']) +\ -# ';dur='+str(hst['dur']) +\ -# ';qf='+str(qf) -# fout = os.path.join(datain['corepath'], os.path.basename(datain['lm_dcm'])[:8]+'_emml_'+str(itr)+'.nii.gz') -# nimpa.array2nii( im[::-1,::-1,:], B, fout, descrip=descrip) - -# if ret_sinos and recmod>=3 and itr>1: -# RecOut = namedtuple('RecOut', 'im, fpet, affine, ssn, sssr, amsk, rsn') -# recout = RecOut(im, fout, B, ssn, sssr, amsk, rsn) -# else: -# RecOut = namedtuple('RecOut', 'im, fpet, affine') -# recout = RecOut(im, fout, B) - -# return recout - -# ============================================================================ -# OSEM - -# def osem14(datain, mumaps, hst, txLUT, axLUT, Cnt, -# recmod=3, itr=4, fwhm=0., mask_radius=29.): - -# muh, muo = mumaps -# mus = mmrimg.convert2dev(muo+muh, Cnt) - -# if Cnt['SPN']==1: -# snno = Cnt['NSN1'] -# elif Cnt['SPN']==11: -# snno = Cnt['NSN11'] - -# # subsets (when not used) -# sbs = np.array([-1], dtype=np.int32) - -# # remove gaps from the prompt sino -# psng = mmraux.remgaps(hst['psino'], txLUT, Cnt) - -# #========================================================================= -# # GET NORM -# # ------------------------------------------------------------------------- -# nrmsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt) -# #========================================================================= - -# #========================================================================= -# # RANDOMS ESTIMATION -# # ------------------------------------------------------------------------- -# rsino, snglmap = mmrhist.rand(hst['fansums'], txLUT, axLUT, Cnt) -# rndsng = mmraux.remgaps(rsino, txLUT, Cnt) -# #========================================================================= - -# #========================================================================= -# # FORM THE ADDITIVE TERM -# # ------------------------------------------------------------------------- -# if recmod==0 or recmod==1 or recmod==3 or recmod==4: -# rssng = rndsng -# elif recmod==2: -# if os.path.isfile(datain['em_crr']): -# emd = nimpa.getnii(datain['em_crr']) -# ssn, sssr, amsk = mmrsct.vsm(mumaps, emd['im'], datain, hst, rsino, 0.1, txLUT, axLUT, Cnt) -# rssng = rndsng + mmraux.remgaps(ssn, txLUT, Cnt) -# else: -# print 'e> no emission image availble for scatter estimation! check if it''s present or the path is correct.' -# sys.exit() -# #========================================================================= - -# #========================================================================= -# # ATTENUATION FACTORS FOR COMBINED OBJECT AND BED MU-MAP -# # ------------------------------------------------------------------------- -# # combine attenuation and norm together depending on reconstruction mode -# if recmod==0 or recmod==2: -# attnrmsng = nrmsng -# else: -# attnrmsng = np.zeros(psng.shape, dtype=np.float32) -# petprj.fprj(attnrmsng, mus, txLUT, axLUT, sbs, Cnt, 1) -# attnrmsng *= nrmsng -# #========================================================================= - -# # mask for reconstructed image -# rcnmsk = mmrimg.get_cylinder(Cnt, rad=mask_radius, xo=0, yo=0, unival=1, gpu_dim=True) -# # ------------------------------------------------------------------------- -# # number of subsets -# Sn = 14 -# # get one subset to get number of projection bins in a subset -# Sprj, s = get_subsets14(0,txLUT,Cnt) -# # init subset array and sensitivity image for a given subset -# sinoTIdx = np.zeros((Sn, len(Sprj)), dtype=np.int32) -# sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# for n in range(Sn): -# sinoTIdx[n,:], s = get_subsets14(n,txLUT,Cnt) -# petprj.bprj(sim[n,:,:,:], attnrmsng, txLUT, axLUT, sinoTIdx[n,:], Cnt) -# # -------------------------------------------------------------------------- - -# # estimated image -# xim = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# # backprj image -# bim = np.ones((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) -# # init scatter sino (zeros) -# ssng = np.zeros((Cnt['Naw'], snno), dtype=np.float32) -# # sinogram subset mask -# sbmsk = np.zeros((txLUT['Naw'], snno), dtype=np.bool) -# # estimated sinogram (forward model) -# esng = np.zeros((txLUT['Naw'], snno), dtype=np.float32) - -# for k in range(itr): -# # randomly go through subsets + ssng -# sn = range(Sn) -# random.shuffle(sn) -# s = 0 -# for n in sn: -# print ' ' -# print k, '>--------- SUBSET', s, n, '-----------' -# s+=1 -# sbmsk[:] = False -# sbmsk[sinoTIdx[n,:],:] = True -# esng[:] = 0 -# petprj.fprj(esng, xim, txLUT, axLUT, sinoTIdx[n,:], Cnt, 0) -# esng *= attnrmsng -# if (recmod==3 or recmod==4): -# esng += (rssng+ssng)*sbmsk -# else: -# esng += rssng*sbmsk - -# # corrections to be backprojected to the image space -# crr = attnrmsng*(np.float32(psng)/esng) -# crr[np.isnan(crr)] = 0 -# crr[np.isinf(crr)] = 0 -# petprj.bprj(bim, crr, txLUT, axLUT, sinoTIdx[n,:], Cnt) -# # devide the backprojected image by the corresponding subset sensitivity image -# bim /= sim[n,:,:,:] -# # apply the reconstruction mask -# xim *= rcnmsk*bim -# # get rid of any NaN values, if any -# xim[np.isnan(xim)]=0 - -# # plt.figure(); plt.imshow(xim[:,:,70], interpolation='none', cmap='gray'); plt.show() - -# # plt.figure(); plt.imshow(xim[:,:,70], interpolation='none', cmap='gray'); plt.show() -# if (recmod==3 or recmod==4) and k0: -# imsmo = ndi.filters.gaussian_filter(im, fwhm2sig(fwhm, Cnt), mode='mirror') -# nimpa.array2nii( imsmo[::-1,::-1,:], B, -# os.path.dirname(datain['lm_dcm'])+'/'+os.path.basename(datain['lm_dcm'])[:8]+'_osem14_i'+str(itr)+'_s'+str(Cnt['SPN'])+'_r'+str(recmod)+'_smo'+str(fwhm)+'.nii') - -# if recmod==3: -# datain['em_crr'] = fout - -# return im, fout diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index e864706a..decacf01 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -92,7 +92,8 @@ def simulate_sino( else: # > 2D case with reduced rings # -------------------- - # > create a number of slices of the same chosen image slice for reduced (fast) 3D simulation + # > create a number of slices of the same chosen image slice + # for reduced (fast) 3D simulation rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) @@ -195,7 +196,8 @@ def simulate_recon( nsinos = Cnt['NSN11'] else: # -------------------- - # > create a number of slides of the same chosen image slice for reduced (fast) 3D simulation + # > create a number of slides of the same chosen image slice + # for reduced (fast) 3D simulation rmu = mui[slice_idx, :, :] rmu.shape = (1,) + rmu.shape rmu = np.repeat(rmu, Cnt['rSZ_IMZ'], axis=0) @@ -238,7 +240,8 @@ def simulate_recon( # ------------------------------------ Sn = 14 # number of subsets - # -get one subset to get number of projection bins in a subset + + # -get one subset to get number of projection bins in a subset Sprj, s = mmrrec.get_subsets14(0, scanner_params) Nprj = len(Sprj) @@ -249,14 +252,16 @@ def simulate_recon( sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in trange(Sn, desc="sensitivity", leave=log.getEffectiveLevel() < logging.INFO): - sinoTIdx[n, 0] = Nprj # first number of projection for the given subset + # first number of projection for the given subset + sinoTIdx[n, 0] = Nprj sinoTIdx[n, 1:], s = mmrrec.get_subsets14(n, scanner_params) - # > sensitivity image + + # > sensitivity image petprj.bprj(sim[n, :, :, :], attsino[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], Cnt) - # ------------------------------------- + # ------------------------------------- - for k in trange(nitr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, + for _ in trange(nitr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() < logging.INFO): petprj.osem(eimg, psng, rsng, ssng, nrmsino, attsino, sinoTIdx, sim, msk, psfkernel, txLUT, axLUT, Cnt) @@ -280,13 +285,14 @@ def psf(x, output=None): sim_inv[~msk] = 0 rndsct = rsng + ssng - for i in trange(nitr, desc="MLEM", disable=log.getEffectiveLevel() > logging.INFO, + for _ in trange(nitr, desc="MLEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() < logging.INFO): # > remove gaps from the measured sinogram # > then forward project the estimated image - # > after which divide the measured sinogram by the estimated sinogram (forward projected) - crrsino = mmraux.remgaps(measured_sino, txLUT, Cnt) / \ - (mmrprj.frwd_prj(psf(eim), scanner_params, dev_out=True) + rndsct) + # > after which divide the measured sinogram + # by the estimated sinogram (forward projected) + crrsino = (mmraux.remgaps(measured_sino, txLUT, Cnt) / + (mmrprj.frwd_prj(psf(eim), scanner_params, dev_out=True) + rndsct)) # > back project the correction factors sinogram bim = mmrprj.back_prj(crrsino, scanner_params) diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index b71635f0..7090dd7b 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -3,18 +3,14 @@ ''' import logging import os -import sys import time -from concurrent.futures import ThreadPoolExecutor from math import pi import nibabel as nib import numpy as np import scipy.ndimage as ndi -from scipy.interpolate import CloughTocher2DInterpolator, interp2d -from scipy.spatial import qhull -from scipy.special import erfc from scipy.interpolate import interp2d +from scipy.special import erfc from .. import mmr_auxe, mmraux, mmrnorm from ..img import mmrimg @@ -93,8 +89,7 @@ def get_scrystals(scanner_params): logtxt += '> [{}]: ring_i={}, ring_z={}\n'.format(ir, int(srng[ir, 0]), srng[ir, 1]) log.debug(logtxt) - - return dict(scrs=scrs, srng=srng, sirng=sct_irng, NSCRS=scrs.shape[0], NSRNG=NSRNG) + return {'scrs': scrs, 'srng': srng, 'sirng': sct_irng, 'NSCRS': scrs.shape[0], 'NSRNG': NSRNG} # ====================================================================== @@ -106,36 +101,29 @@ def get_sctlut2d(txLUT, scrs_def): # scatter/unscattered crystal x-coordinate (used for determining +/- sino segments) xsxu = np.zeros((scrs_def['NSCRS'], scrs_def['NSCRS']), dtype=np.int8) + scrs = scrs_def['scrs'] # > loop over unscattered crystals for uc in range(scrs_def['NSCRS']): - # > loop over scatter crystals for sc in range(scrs_def['NSCRS']): - # > sino linear index (full including any gaps) - # > scrs_def['scrs'] is a 2D array of rows [sct_crs_idx, mid_x, mid_y] - sct2aw[scrs_def['NSCRS']*uc + sc] = \ - txLUT['c2sFw'][ - int(scrs_def['scrs'][uc,0]), - int(scrs_def['scrs'][sc,0]) - ] - - # > scattered and unscattered crystal positions (used for determining +/- sino segments) - xs = scrs_def['scrs'][sc, 1] - xu = scrs_def['scrs'][uc, 1] - - if (xs > xu): + # > scrs is a 2D array of rows [sct_crs_idx, mid_x, mid_y] + sct2aw[scrs_def['NSCRS'] * uc + sc] = txLUT['c2sFw'][int(scrs[uc, 0]), + int(scrs[sc, 0])] + # > scattered and unscattered crystal positions + # (used for determining +/- sino segments) + if scrs[sc, 1] > scrs[uc, 1]: xsxu[uc, sc] = 1 - sct2aw.shape = (scrs_def['NSCRS'], scrs_def['NSCRS']) + # TODO: was sct2aw.shape = (scrs_def['NSCRS'], scrs_def['NSCRS']) + sct2aw.resize((scrs_def['NSCRS'], scrs_def['NSCRS'])) - return dict(sct2aw=sct2aw, xsxu=xsxu, c2sFw=txLUT['c2sFw']) + return {'sct2aw': sct2aw, 'xsxu': xsxu, 'c2sFw': txLUT['c2sFw']} # ====================================================================== -# ====================================================================== def get_knlut(Cnt): ''' get Klein-Nishina LUTs @@ -157,8 +145,10 @@ def get_knlut(Cnt): alpha = 1 / (2-cosups) KNtmp = ((0.5 * Cnt['R02']) * alpha * alpha * (alpha + 1/alpha - (1 - cosups*cosups))) knlut[i, 0] = KNtmp / (2 * pi * Cnt['R02'] * CRSSavg) - knlut[i,1] = ( (1+alpha)/(alpha*alpha)*(2*(1+alpha)/(1+2*alpha)-1/alpha*np.log(1+2*alpha)) + \ - np.log(1+2*alpha)/(2*alpha)-(1+3*alpha)/((1+2*alpha)*(1+2*alpha)) ) / CRSSavg + knlut[i, 1] = ((1+alpha) / (alpha*alpha) * + (2 * (1+alpha) / + (1 + 2*alpha) - 1 / alpha * np.log(1 + 2*alpha)) + np.log(1 + 2*alpha) / + (2*alpha) - (1 + 3*alpha) / ((1 + 2*alpha) * (1 + 2*alpha))) / CRSSavg # Add energy resolution: if Cnt['ER'] > 0: @@ -167,8 +157,9 @@ def get_knlut(Cnt): (Cnt['LLD'] - alpha * Cnt['E511']) / (SIG511 * np.sqrt(2 * alpha))) # knlut[i,0] *= .5*erfc( (Cnt['LLD']-alpha*Cnt['E511'])/(SIG511) ); - # for large angles (small cosups) when the angle in GPU calculations is greater than COSUPSMX - if (i == 0): + # for large angles (small cosups) + # when the angle in GPU calculations is greater than COSUPSMX + if i == 0: knlut[0, 0] = 0 return knlut @@ -224,10 +215,12 @@ def get_sctLUT(scanner_params): mich = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) mich2 = np.zeros((Cnt['NRNG'], Cnt['NRNG']), dtype=np.float32) - J, I = np.meshgrid(irng, irng) + J, I = np.meshgrid(irng, irng) # NOQA: E741 mich[J, I] = np.reshape(np.arange(scrs_def['NSRNG']**2), (scrs_def['NSRNG'], scrs_def['NSRNG'])) - # plt.figure(64), plt.imshow(mich, interpolation='none') + + # plt.figure(64) + # plt.imshow(mich, interpolation='none') for r1 in range(Cnt['RNG_STRT'], Cnt['RNG_END']): # border up and down @@ -291,13 +284,8 @@ def get_sctLUT(scanner_params): # plt.figure(65), plt.imshow(mich2, interpolation='none') sctLUT = { - 'sctaxR': sctaxR, - 'sctaxW': sctaxW, - 'offseg': offseg, - 'KN': KN, - 'mich_chck': [mich, mich2], - **scrs_def, - **sctlut2d,} + 'sctaxR': sctaxR, 'sctaxW': sctaxW, 'offseg': offseg, 'KN': KN, 'mich_chck': [mich, mich2], + **scrs_def, **sctlut2d} return sctLUT @@ -440,9 +428,8 @@ def vsm( muh, muo = mumaps if emmsk and not os.path.isfile(datain['em_nocrr']): - log.info( - 'reconstructing emission data without scatter and attenuation corrections for mask generation...' - ) + log.info('reconstructing emission data without scatter and attenuation corrections' + ' for mask generation...') recnac = mmrrec.osemone(datain, mumaps, histo, scanner_params, recmod=0, itr=3, fwhm=2.0, store_img=True) datain['em_nocrr'] = recnac.fpet @@ -486,10 +473,11 @@ def vsm( muim = muo + muh emim = em - muim = ndi.interpolation.zoom(muim, Cnt['SCTSCLMU'], order=3) #(0.499, 0.5, 0.5) - emim = ndi.interpolation.zoom(emim, Cnt['SCTSCLEM'], order=3) #(0.34, 0.33, 0.33) + muim = ndi.interpolation.zoom(muim, Cnt['SCTSCLMU'], order=3) # (0.499, 0.5, 0.5) + emim = ndi.interpolation.zoom(emim, Cnt['SCTSCLEM'], order=3) # (0.34, 0.33, 0.33) - # -smooth the mu-map for mask creation. the mask contains voxels for which attenuation ray LUT is found. + # -smooth the mu-map for mask creation. + # the mask contains voxels for which attenuation ray LUT is found. if fwhm_input > 0.: smomu = ndi.filters.gaussian_filter(muim, fwhm2sig(fwhm_input, Cnt), mode='mirror') mumsk = np.int8(smomu > 0.003) @@ -500,11 +488,11 @@ def vsm( NSCRS, NSRNG = sctLUT['NSCRS'], sctLUT['NSRNG'] sctout = { 'sct_3d': np.zeros((Cnt['TOFBINN'], snno_, NSCRS, NSCRS), dtype=np.float32), - 'sct_val': np.zeros((Cnt['TOFBINN'], NSRNG, NSCRS, NSRNG, NSCRS), dtype=np.float32),} + 'sct_val': np.zeros((Cnt['TOFBINN'], NSRNG, NSCRS, NSRNG, NSCRS), dtype=np.float32)} - #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> nifty_scatter.vsm(sctout, muim, mumsk, emim, sctLUT, axLUT, Cnt) - #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> sct3d = sctout['sct_3d'] sctind = sctLUT['sct2aw'] @@ -580,14 +568,15 @@ def vsm( rssr[ssrlut[i], :, :] += rsino[i, :, :] # ATTENUATION FRACTIONS for scatter only regions, and NORMALISATION for all SCATTER - #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> currentspan = Cnt['SPN'] Cnt['SPN'] = 1 atto = np.zeros((txLUT['Naw'], Cnt['NSN1']), dtype=np.float32) petprj.fprj(atto, mu_sctonly, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) atto = mmraux.putgaps(atto, txLUT, Cnt) # -------------------------------------------------------------- - # > get norm components setting the geometry and axial to ones as they are accounted for differently + # > get norm components setting the geometry and axial to ones + # as they are accounted for differently nrmcmp['geo'][:] = 1 nrmcmp['axe1'][:] = 1 # get sino with no gaps @@ -611,7 +600,7 @@ def vsm( nrm = mmraux.putgaps(nrmg, txLUT, Cnt) # -------------------------------------------------------------- - #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> # get the mask for the object from uncorrected emission image if emmsk and os.path.isfile(datain['em_nocrr']): @@ -632,9 +621,9 @@ def vsm( else: mssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.bool) - #<<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> + # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> - #======== SCALING ======== + # ======= SCALING ======== # > scale scatter using non-TOF SSRB sinograms # > gap mask @@ -663,7 +652,7 @@ def vsm( # > scatter SSRB sinogram output sssr[sni, :, :] *= nrmsssr[sni, :, :] * scl_ssr[sni] - #=== scale scatter for the full-size sinogram === + # === scale scatter for the full-size sinogram === sss = np.zeros((snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) for i in range(snno): sss[i, :, :] = ssn[i, :, :] * scl_ssr[ssrlut[i]] * saxnrm[i] * nrm[i, :, :] @@ -681,7 +670,7 @@ def vsm( plot(np.sum(rssr+sssr,axis=(0,1))) ''' - #=== OUTPUT === + # === OUTPUT === if return_uninterp: out['uninterp'] = sct3d out['indexes'] = sctind diff --git a/setup.py b/setup.py index 6166b291..bb61981b 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,6 @@ for namespace 'niftypet'. """ import logging -import os -import platform import re import sys from pathlib import Path @@ -87,22 +85,8 @@ def chck_sct_h(Cnt): scth = sct_h[i0:i1] # list of constants which will be kept in sych from Python cnt_list = [ - "SS_IMX", - "SS_IMY", - "SS_IMZ", - "SSE_IMX", - "SSE_IMY", - "SSE_IMZ", - "NCOS", - "SS_VXY", - "SS_VXZ", - "IS_VXZ", - "SSE_VXY", - "SSE_VXZ", - "R_RING", - "R_2", - "IR_RING", - "SRFCRS",] + "SS_IMX", "SS_IMY", "SS_IMZ", "SSE_IMX", "SSE_IMY", "SSE_IMZ", "NCOS", "SS_VXY", "SS_VXZ", + "IS_VXZ", "SSE_VXY", "SSE_VXZ", "R_RING", "R_2", "IR_RING", "SRFCRS"] flg = False for i, s in enumerate(cnt_list): m = re.search("(?<=#define " + s + r")\s*\d*\.*\d*", scth) @@ -125,7 +109,7 @@ def chck_sct_h(Cnt): // SCATTER IMAGE SIZE AND PROPERTIES // SS_* are used for the mu-map in scatter calculations // SSE_* are used for the emission image in scatter calculations - // R_RING, R_2, IR_RING are ring radius, squared radius and inverse of the radius, respectively. + // R_RING, R_2, IR_RING: ring radius, squared radius, inverse radius // NCOS is the number of samples for scatter angular sampling """) @@ -168,10 +152,11 @@ def check_constants(): cs.resources_setup(gpu=False) # install resources.py - # check and update the constants in C headers according to resources.py + +# check and update the constants in C headers according to resources.py check_constants() try: - gpuarch = cs.dev_setup() # update resources.py with a supported GPU device + gpuarch = cs.dev_setup() # update resources.py with a supported GPU device except Exception as exc: log.error("could not set up CUDA:\n%s", exc) diff --git a/tests/conftest.py b/tests/conftest.py index be9e03a0..2e3390ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,4 @@ -from os import getenv, path +from os import getenv from pathlib import Path import pytest diff --git a/tests/test_amyloid_pvc.py b/tests/test_amyloid_pvc.py index af4556e7..a7bd979f 100644 --- a/tests/test_amyloid_pvc.py +++ b/tests/test_amyloid_pvc.py @@ -1,8 +1,7 @@ import errno import logging from collections.abc import Iterable -from os import fspath, path -from textwrap import dedent +from os import fspath import numpy as np import pytest @@ -12,39 +11,36 @@ # segmentation/parcellation for PVC, with unique regions numbered from 0 onwards pvcroi = [] -pvcroi.append([66, 67] + list(range(81, 95))) # white matter -pvcroi.append([36]) # brain stem -pvcroi.append([35]) # pons -pvcroi.append([39, 40, 72, 73, 74]) # cerebellum GM -pvcroi.append([41, 42]) # cerebellum WM -pvcroi.append([48, 49]) # hippocampus -pvcroi.append([167, 168]) # posterior cingulate gyrus -pvcroi.append([139, 140]) # middle cingulate gyrus -pvcroi.append([101, 102]) # anterior cingulate gyrus -pvcroi.append([169, 170]) # precuneus -pvcroi.append([32, 33]) # amygdala -pvcroi.append([37, 38]) # caudate -pvcroi.append([56, 57]) # pallidum -pvcroi.append([58, 59]) # putamen -pvcroi.append([60, 61]) # thalamus -pvcroi.append([175, 176, 199, 200]) # parietal without precuneus -pvcroi.append([133, 134, 155, 156, 201, 202, 203, 204]) # temporal -pvcroi.append([4, 5, 12, 16, 43, 44, 47, 50, 51, 52, 53]) # CSF -pvcroi.append([24, 31, 62, 63, 70, 76, 77, 96, 97]) # basal ganglia + optic chiasm +pvcroi.append([66, 67] + list(range(81, 95))) # white matter +pvcroi.append([36]) # brain stem +pvcroi.append([35]) # pons +pvcroi.append([39, 40, 72, 73, 74]) # cerebellum GM +pvcroi.append([41, 42]) # cerebellum WM +pvcroi.append([48, 49]) # hippocampus +pvcroi.append([167, 168]) # posterior cingulate gyrus +pvcroi.append([139, 140]) # middle cingulate gyrus +pvcroi.append([101, 102]) # anterior cingulate gyrus +pvcroi.append([169, 170]) # precuneus +pvcroi.append([32, 33]) # amygdala +pvcroi.append([37, 38]) # caudate +pvcroi.append([56, 57]) # pallidum +pvcroi.append([58, 59]) # putamen +pvcroi.append([60, 61]) # thalamus +pvcroi.append([175, 176, 199, 200]) # parietal without precuneus +pvcroi.append([133, 134, 155, 156, 201, 202, 203, 204]) # temporal +pvcroi.append([4, 5, 12, 16, 43, 44, 47, 50, 51, 52, 53]) # CSF +pvcroi.append([24, 31, 62, 63, 70, 76, 77, 96, 97]) # basal ganglia + optic chiasm + +# remaining neocortex pvcroi.append( list(range(103, 110 + 1)) + list(range(113, 126 + 1)) + list(range(129, 130 + 1)) + list(range(135, 138 + 1)) + list(range(141, 154 + 1)) + list(range(157, 158 + 1)) + list(range(161, 166 + 1)) + list(range(171, 174 + 1)) + list(range(177, 188 + 1)) + - list(range(191, 198 + 1)) + list(range(205, 208 + 1))) # remaining neocortex - # expected %error for static (SUVr) and PVC reconstructions + list(range(191, 198 + 1)) + list(range(205, 208 + 1))) + +# expected %error for static (SUVr) and PVC reconstructions emape_basic = 0.1 -emape_algnd = { - "pet": 3.0, - "pos": 0.1, - "trm": 3.0, - "pvc": 3.0, - "hmu": 0.01, - "omu": 3.0,} +emape_algnd = {"pet": 3.0, "pos": 0.1, "trm": 3.0, "pvc": 3.0, "hmu": 0.01, "omu": 3.0} @pytest.fixture(scope="session") @@ -81,11 +77,10 @@ def refimg(folder_ref): spm = folder_ref / "dyn_aligned" / "spm" niftyreg = folder_ref / "dyn_aligned" / "niftyreg" refpaths = { - "histo": {"p": 1570707830, "d": 817785422}, - "basic": { + "histo": {"p": 1570707830, "d": 817785422}, "basic": { "pet": basic / "17598013_t-3000-3600sec_itr-4_suvr.nii.gz", "omu": basic / "mumap-from-DICOM_no-alignment.nii.gz", - "hmu": basic / "hardware_umap.nii.gz",}, + "hmu": basic / "hardware_umap.nii.gz"}, "aligned": { "spm": { "hmu": spm / "hardware_umap.nii.gz", @@ -93,41 +88,36 @@ def refimg(folder_ref): "pos": spm / "17598013_t0-3600sec_itr2_AC-UTE.nii.gz", "pet": spm / "17598013_nfrm-2_itr-4.nii.gz", "trm": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2.nii.gz", - "pvc": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz",}, + "pvc": spm / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz"}, "niftyreg": { "hmu": niftyreg / "hardware_umap.nii.gz", "omu": niftyreg / "mumap-PCT-aligned-to_t0-3600_AC.nii.gz", "pos": niftyreg / "17598013_t0-3600sec_itr2_AC-UTE.nii.gz", "pet": niftyreg / "17598013_nfrm-2_itr-4.nii.gz", "trm": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2.nii.gz", - "pvc": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz",},}, - } + "pvc": niftyreg / "17598013_nfrm-2_itr-4_trimmed-upsampled-scale-2_PVC.nii.gz"}}} testext = { "basic": { "pet": "static reconstruction with unaligned UTE mu-map", "hmu": "hardware mu-map for the static unaligned reconstruction", - "omu": "object mu-map for the static unaligned reconstruction",}, - "aligned": { - "hmu": "hardware mu-map for the 2-frame aligned reconstruction", - "omu": "object mu-map for the 2-frame aligned reconstruction", - "pos": "AC reconstruction for positioning (full acquisition used)", - "pet": "2-frame scan with aligned UTE mu-map", - "trm": "trimming post reconstruction", - "pvc": "PVC post reconstruction",},} + "omu": "object mu-map for the static unaligned reconstruction"}, "aligned": { + "hmu": "hardware mu-map for the 2-frame aligned reconstruction", + "omu": "object mu-map for the 2-frame aligned reconstruction", + "pos": "AC reconstruction for positioning (full acquisition used)", + "pet": "2-frame scan with aligned UTE mu-map", + "trm": "trimming post reconstruction", "pvc": "PVC post reconstruction"}} # check basic files - frefs = refpaths["basic"] - for k, v in frefs.items(): + for k, v in refpaths["basic"].items(): if not v.is_file(): - raise FileNotFoundError(errno.ENOENT, v) + raise FileNotFoundError(errno.ENOENT, f"{k}: {v}") # check reg tools: niftyreg and spm - frefs = refpaths["aligned"] - for r in frefs: - for k, v in frefs[r].items(): + for r, frefs in refpaths["aligned"].items(): + for k, v in frefs.items(): if not v.is_file(): - raise FileNotFoundError(errno.ENOENT, v) + raise FileNotFoundError(errno.ENOENT, f"{k}[{r}]: {v}") return refpaths, testext @@ -208,12 +198,8 @@ def test_aligned_reconstruction(reg_tool, mMRpars, datain, muhdct, refimg, tmp_p ) testout = { - "pet": recon["fpet"], - "hmu": muhdct["im"], - "omu": muopct["im"], - "pos": muopct["fpet"], - "trm": recon["trimmed"]["fpet"], - "pvc": recon["trimmed"]["fpvc"],} + "pet": recon["fpet"], "hmu": muhdct["im"], "omu": muopct["im"], "pos": muopct["fpet"], + "trm": recon["trimmed"]["fpet"], "pvc": recon["trimmed"]["fpvc"]} for k in testext["aligned"]: diff = nimpa.imdiff(fspath(refpaths["aligned"][reg_tool][k]), testout[k], verbose=True, plot=False) From 6901f67e7b0c66b2f652d1d8d3266699fc3bf00d Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 8 Jan 2021 00:57:39 +0000 Subject: [PATCH 12/64] manual review --- niftypet/nipet/img/mmrimg.py | 3 +-- niftypet/nipet/img/pipe.py | 3 --- niftypet/nipet/lm/pviews.py | 4 ++-- niftypet/nipet/mmraux.py | 10 +++------- niftypet/nipet/prj/mmrprj.py | 1 - niftypet/nipet/sct/mmrsct.py | 7 +------ 6 files changed, 7 insertions(+), 21 deletions(-) diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index 4f21bf50..c55a085b 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -115,8 +115,7 @@ def image_affine(datain, Cnt, gantry_offset=False): def getmu_off(mu, Cnt, Offst=OFFSET_DEFAULT): - # pumber of voxels - nvx = mu.shape[0] + # phange the shape to 3D mu.shape = (Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMX']) diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index adcae76a..d181c9ae 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -80,8 +80,6 @@ def mmrchain( # decompose all the scanner parameters and constants Cnt = scanner_params['Cnt'] - txLUT = scanner_params['txLUT'] - axLUT = scanner_params['axLUT'] # ------------------------------------------------------------------------- # HISOTGRAM PRECEEDS FRAMES @@ -334,7 +332,6 @@ def mmrchain( # get the new mu-map from the just resampled file muodct = nimpa.getnii(fmu, output='all') muo = muodct['im'] - A = muodct['affine'] muo[muo < 0] = 0 output['fmureg'].append(fmu) else: diff --git a/niftypet/nipet/lm/pviews.py b/niftypet/nipet/lm/pviews.py index 76fa731a..069683cc 100644 --- a/niftypet/nipet/lm/pviews.py +++ b/niftypet/nipet/lm/pviews.py @@ -87,8 +87,8 @@ def video_dyn(hst, frms, outpth, axLUT, Cnt): plt.close('all') # ============== CONSTANTS ================== - VTIME = 4 - NRINGS = Cnt['NRNG'] + # VTIME = 4 + # NRINGS = Cnt['NRNG'] NSN11 = Cnt['NSN11'] NDSN = Cnt['NSEG0'] A = Cnt['NSANGLES'] diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 6c6c8d0a..42e4a5bb 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -350,7 +350,6 @@ def axial_lut(Cnt): # (don't use ring range (1,63) as for this case no correction) if NRNG_c == 64: NSN1_c -= 12 - SEG0_c = 2*NRNG_c - 1 else: NRNG_c = NRNG NSN1_c = Cnt['NSN1'] @@ -563,7 +562,7 @@ def sino2ssr(sino, axLUT, Cnt): def reduce_rings(pars, rs=0, re=64): - ''' + """ Reduce the axial rings for faster reconstructions, particularly simulations. This function customises axial FOV for reduced rings in range(rs,re). Note it only works in span-1 and ring re is not included in the reduced rings. @@ -571,10 +570,7 @@ def reduce_rings(pars, rs=0, re=64): pars -- scanner parameters: constants, LUTs rs -- start ring re -- end ring (not included in the resulting reduced rings) - ''' - Cnt = pars['Cnt'] - axLUT = pars['axLUT'] - + """ pars['Cnt']['SPN'] = 1 # select the number of sinograms for the number of rings # RNG_STRT is included in detection @@ -630,7 +626,7 @@ def transaxial_lut(Cnt, visualisation=False): bw = 3.209 # > block gap [cm] - dg = 0.474 + # dg = 0.474 NTBLK = 56 alpha = 0.1122 # 2*pi/NTBLK crs = np.zeros((Cnt['NCRS'], 4), dtype=np.float32) diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 4947e42f..e4a68a72 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -18,7 +18,6 @@ def trnx_prj(scanner_params, sino=None, im=None): Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] - axLUT = scanner_params['axLUT'] # if sino==None and im==None: # raise ValueError('Input sinogram or image has to be given.') diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 7090dd7b..ef78750b 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -82,7 +82,6 @@ def get_scrystals(scanner_params): logtxt = '' srng = np.zeros((NSRNG, 2), dtype=np.float32) - z = 0.5 * (-Cnt['NRNG'] * Cnt['AXR'] + Cnt['AXR']) for ir in range(NSRNG): srng[ir, 0] = float(sct_irng[ir]) srng[ir, 1] = axLUT['rng'][sct_irng[ir], :].mean() @@ -115,9 +114,7 @@ def get_sctlut2d(txLUT, scrs_def): if scrs[sc, 1] > scrs[uc, 1]: xsxu[uc, sc] = 1 - # TODO: was sct2aw.shape = (scrs_def['NSCRS'], scrs_def['NSCRS']) - sct2aw.resize((scrs_def['NSCRS'], scrs_def['NSCRS'])) - + sct2aw.shape = scrs_def['NSCRS'], scrs_def['NSCRS'] return {'sct2aw': sct2aw, 'xsxu': xsxu, 'c2sFw': txLUT['c2sFw']} @@ -186,7 +183,6 @@ def get_sctLUT(scanner_params): # > decompose constants, transaxial and axial LUTs are extracted Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] - axLUT = scanner_params['axLUT'] # > get the Klein-Nishina LUT: KN = get_knlut(Cnt) @@ -605,7 +601,6 @@ def vsm( # get the mask for the object from uncorrected emission image if emmsk and os.path.isfile(datain['em_nocrr']): nim = nib.load(datain['em_nocrr']) - A = nim.get_sform() eim = nim.get_fdata(dtype=np.float32) eim = eim[:, ::-1, ::-1] eim = np.transpose(eim, (2, 1, 0)) From c042f52bdee53691c135ea90c62720358f33d9e3 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 8 Jan 2021 01:25:29 +0000 Subject: [PATCH 13/64] format: minor tidy --- niftypet/nipet/lm/mmrhist.py | 4 +++- niftypet/nipet/mmrnorm.py | 3 ++- niftypet/nipet/prj/mmrrec.py | 13 ++++--------- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/niftypet/nipet/lm/mmrhist.py b/niftypet/nipet/lm/mmrhist.py index 550e4a6e..20a99f3d 100644 --- a/niftypet/nipet/lm/mmrhist.py +++ b/niftypet/nipet/lm/mmrhist.py @@ -11,7 +11,9 @@ from niftypet import nimpa from .. import mmraux -from . import mmr_lmproc # CUDA extension module + +# CUDA extension module +from . import mmr_lmproc log = logging.getLogger(__name__) diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index a8795f16..95dc0321 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -8,7 +8,8 @@ import pydicom as dcm from pkg_resources import resource_filename -from . import mmr_auxe # auxiliary functions through Python extensions in CUDA +# auxiliary functions through Python extensions in CUDA +from . import mmr_auxe log = logging.getLogger(__name__) # ================================================================================================ diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index d5349d2f..33d81fd4 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -12,8 +12,8 @@ from niftypet import nimpa -from .. import resources # for isotope info -from .. import mmraux, mmrnorm +# resources contain isotope info +from .. import mmraux, mmrnorm, resources from ..img import mmrimg from ..lm.mmrhist import randoms from ..sct import vsm @@ -177,15 +177,10 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: opth = outpath - if (store_img is True) or (store_itr is not None): + if store_img is True or store_itr is not None: mmraux.create_dir(opth) - if ret_sinos: - return_ssrb = True - return_mask = True - else: - return_ssrb = False - return_mask = False + return_ssrb, return_mask = ret_sinos, ret_sinos # ---------- From 302d4250ab955b56bf804da3122fb69599a4cc32 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 8 Jan 2021 20:13:55 +0000 Subject: [PATCH 14/64] tests: hide failed stdout --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index b8da5c57..f8c36e6d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -89,4 +89,4 @@ exclude=.git,__pycache__,build,dist,.eggs timeout=3600 log_level=INFO python_files=tests/test_*.py -addopts=-v --tb=short -rxs -W=error -n=auto --durations=0 --durations-min=2 --cov=niftypet --cov-report=term-missing --cov-report=xml +addopts=-v --tb=short -rxs -W=error --show-capture=stderr --show-capture=log -n=auto --durations=0 --durations-min=2 --cov=niftypet --cov-report=term-missing --cov-report=xml From b92db043103c1f2e26641c659c161dc3262f18c8 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 8 Jan 2021 20:32:51 +0000 Subject: [PATCH 15/64] tests: force nimpa build --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1448c30f..8fb99009 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,7 +48,7 @@ jobs: fetch-depth: 0 - name: Run setup-python run: setup-python -p3.7 - - run: pip install -U -e .[dev] + - run: pip install -U --no-binary nimpa -e .[dev] - run: pytest - run: codecov env: From 12156047536ebb0d77c8f17d6bb1b4fea8fb5328 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 10 Jan 2021 01:36:30 +0000 Subject: [PATCH 16/64] cmake: allow linking shared libs for external builds --- niftypet/CMakeLists.txt | 3 +++ niftypet/nipet/lm/CMakeLists.txt | 2 +- niftypet/nipet/prj/CMakeLists.txt | 2 +- niftypet/nipet/sct/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/niftypet/CMakeLists.txt b/niftypet/CMakeLists.txt index ebda8438..c74ad427 100644 --- a/niftypet/CMakeLists.txt +++ b/niftypet/CMakeLists.txt @@ -11,6 +11,9 @@ find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) find_package(CUDAToolkit REQUIRED) if(SKBUILD) find_package(PythonExtensions REQUIRED) +set(LIB_TYPE "MODULE") +else() +set(LIB_TYPE "SHARED") endif() cmake_policy(POP) diff --git a/niftypet/nipet/lm/CMakeLists.txt b/niftypet/nipet/lm/CMakeLists.txt index bdea7821..7eb12c82 100644 --- a/niftypet/nipet/lm/CMakeLists.txt +++ b/niftypet/nipet/lm/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(src) include_directories(${Python3_INCLUDE_DIRS}) include_directories(${Python3_NumPy_INCLUDE_DIRS}) -add_library(${PROJECT_NAME} MODULE ${SRC}) +add_library(${PROJECT_NAME} ${LIB_TYPE} ${SRC}) add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" diff --git a/niftypet/nipet/prj/CMakeLists.txt b/niftypet/nipet/prj/CMakeLists.txt index 426c11e0..5c747c3b 100644 --- a/niftypet/nipet/prj/CMakeLists.txt +++ b/niftypet/nipet/prj/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(src) include_directories(${Python3_INCLUDE_DIRS}) include_directories(${Python3_NumPy_INCLUDE_DIRS}) -add_library(${PROJECT_NAME} MODULE ${SRC}) +add_library(${PROJECT_NAME} ${LIB_TYPE} ${SRC}) add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" diff --git a/niftypet/nipet/sct/CMakeLists.txt b/niftypet/nipet/sct/CMakeLists.txt index b737ce0b..2dc879e9 100644 --- a/niftypet/nipet/sct/CMakeLists.txt +++ b/niftypet/nipet/sct/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories(src) include_directories(${Python3_INCLUDE_DIRS}) include_directories(${Python3_NumPy_INCLUDE_DIRS}) -add_library(${PROJECT_NAME} MODULE ${SRC}) +add_library(${PROJECT_NAME} ${LIB_TYPE} ${SRC}) add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" From 2cd1efd841fa35dbee45c19bba92ede11bf43fbd Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 10 Jan 2021 01:44:37 +0000 Subject: [PATCH 17/64] tests: misc minor framework updates --- .github/workflows/test.yml | 35 +++++++++++++++-------------------- .gitignore | 2 +- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8fb99009..c3ca4982 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,8 +9,6 @@ jobs: name: Check py${{ matrix.python }} steps: - uses: actions/checkout@v2 - with: - fetch-depth: 0 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python }} @@ -25,7 +23,8 @@ jobs: pip install -U pre-commit sudo apt-get install -yqq clang-format - uses: reviewdog/action-setup@v1 - - name: comment + - if: github.event_name != 'schedule' + name: comment run: | if [[ $EVENT == pull_request ]]; then REPORTER=github-pr-review @@ -65,22 +64,18 @@ jobs: with: fetch-depth: 0 - uses: actions/setup-python@v2 - - run: pip install -U twine setuptools wheel setuptools_scm[toml] ninst scikit-build - - run: PATHTOOLS=$HOME/NiftyPET_tools HMUDIR=$HOME python setup.py sdist - - run: twine check dist/* - - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') - run: twine upload dist/* + - id: dist + uses: casperdcl/deploy-pypi@v2 + with: + requirements: twine setuptools wheel setuptools_scm[toml] ninst scikit-build + build: sdist + password: ${{ secrets.PYPI_TOKEN }} + upload: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') }} env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - skip_existing: true - - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') - id: collect_assets - name: Collect assets - run: | - echo "::set-output name=asset_path::$(ls dist/*.tar.gz)" - echo "::set-output name=asset_name::$(basename dist/*.tar.gz)" - git log --pretty='format:%d%n- %s%n%b---' $(git tag --sort=v:refname | tail -n2 | head -n1)..HEAD > _CHANGES.md + PATHTOOLS: ${{ github.workspace }}/NiftyPET_tools + HMUDIR: ${{ github.workspace }} + - name: Changelog + run: git log --pretty='format:%d%n- %s%n%b---' $(git tag --sort=v:refname | tail -n2 | head -n1)..HEAD > _CHANGES.md - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') id: create_release uses: actions/create-release@v1 @@ -97,6 +92,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} with: upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ${{ steps.collect_assets.outputs.asset_path }} - asset_name: ${{ steps.collect_assets.outputs.asset_name }} + asset_path: dist/${{ steps.dist.outputs.targz }} + asset_name: ${{ steps.dist.outputs.targz }} asset_content_type: application/gzip diff --git a/.gitignore b/.gitignore index f927fcea..1457f1b8 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,5 @@ MANIFEST /*.egg*/ /.eggs/ -/.coverage +/.coverage* /coverage.xml From d45f775ea09aed740478cc33c30b3946080b787b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 12 Jan 2021 15:41:33 +0000 Subject: [PATCH 18/64] tests: add runner deps --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c3ca4982..6d4de98f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,7 +39,7 @@ jobs: - run: pre-commit run -a --show-diff-on-failure test: if: github.event_name != 'pull_request' || github.head_ref != 'devel' - runs-on: [self-hosted, cuda, python] + runs-on: [self-hosted, python, cuda, matlab] name: Test steps: - uses: actions/checkout@v2 From 2eb5fb69029d4ac325f5fa068a70542fffb5e940 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 12 Jan 2021 16:53:30 +0000 Subject: [PATCH 19/64] logging: purge VERBOSE --- niftypet/nipet/mmraux.py | 51 ++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 42e4a5bb..546e000a 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -809,9 +809,8 @@ def transaxial_lut(Cnt, visualisation=False): # ------------------------------------------------------------------------------------------------ -def get_npfiles(dfile, datain, v=False): - logger = log.info if v else log.debug - logger( +def get_npfiles(dfile, datain): + log.debug( dedent('''\ ------------------------------------------------------------------ file: {} @@ -821,29 +820,28 @@ def get_npfiles(dfile, datain, v=False): # pCT mu-map if os.path.basename(dfile) == 'mumap_pCT.npz': datain['mumapCT'] = dfile - logger('mu-map for the object.') + log.debug('mu-map for the object.') # DICOM UTE/Dixon mu-map if os.path.basename(dfile) == 'mumap-from-DICOM.npz': datain['mumapNPY'] = dfile - logger('mu-map for the object.') + log.debug('mu-map for the object.') if os.path.basename(dfile) == 'hmumap.npz': datain['hmumap'] = dfile - logger('mu-map for hardware.') + log.debug('mu-map for hardware.') if os.path.basename(dfile)[:8] == 'sinos_s1': datain['sinos'] = dfile - logger('prompt sinogram data.') + log.debug('prompt sinogram data.') # if os.path.basename(dfile)[:9]=='sinos_s11': # datain['sinos11'] = dfile - # logger('prompt sinogram data in span-11.') + # log.debug('prompt sinogram data in span-11.') -def get_niifiles(dfile, datain, v=False): - logger = log.info if v else log.debug - logger( +def get_niifiles(dfile, datain): + log.debug( dedent('''\ ------------------------------------------------------------------ file: {} @@ -853,18 +851,18 @@ def get_niifiles(dfile, datain, v=False): # > NIfTI file of converted MR-based mu-map from DICOMs if os.path.basename(dfile).split('.nii')[0] == 'mumap-from-DICOM': datain['mumapNII'] = dfile - logger('mu-map for the object.') + log.debug('mu-map for the object.') # > NIfTI file of pseudo CT fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*_synth.nii*')) if len(fpct) > 0: datain['pCT'] = fpct[0] - logger('pseudoCT of the object.') + log.debug('pseudoCT of the object.') fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*_p[cC][tT].nii*')) if len(fpct) > 0: datain['pCT'] = fpct[0] - logger('pseudoCT of the object.') + log.debug('pseudoCT of the object.') # MR T1 fmri = glob.glob(os.path.join(os.path.dirname(dfile), '[tT]1*.nii*')) @@ -872,7 +870,7 @@ def get_niifiles(dfile, datain, v=False): bnm = os.path.basename(fmri[0]).lower() if not {'giflabels', 'parcellation', 'pct', 'n4bias'}.intersection(bnm): datain['T1nii'] = fmri[0] - logger('NIfTI for T1w of the object.') + log.debug('NIfTI for T1w of the object.') elif len(fmri) > 1: for fg in fmri: bnm = os.path.basename(fg).lower() @@ -888,7 +886,7 @@ def get_niifiles(dfile, datain, v=False): bnm = os.path.basename(fmri[0]).lower() if not {'giflabels', 'parcellation', 'pct'}.intersection(bnm): datain['T1N4'] = fmri[0] - logger('NIfTI for T1w of the object.') + log.debug('NIfTI for T1w of the object.') elif len(fmri) > 1: for fg in fmri: bnm = os.path.basename(fg).lower() @@ -902,37 +900,36 @@ def get_niifiles(dfile, datain, v=False): fbc = glob.glob(os.path.join(os.path.dirname(dfile), '*gifbc.nii*')) if len(fbc) == 1: datain['T1bc'] = fbc[0] - logger('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) + log.debug('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) fbc = glob.glob(os.path.join(os.path.dirname(dfile), '*[tT]1*BiasCorrected.nii*')) if len(fbc) == 1: datain['T1bc'] = fbc[0] - logger('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) + log.debug('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) # T1-based labels after parcellation flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*giflabels.nii*')) if len(flbl) == 1: datain['T1lbl'] = flbl[0] - logger('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) + log.debug('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*[tT]1*[Pp]arcellation.nii*')) if len(flbl) == 1: datain['T1lbl'] = flbl[0] - logger('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) + log.debug('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) # reconstructed emission data without corrections, minimum 2 osem iter fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*__ACbed.nii*')) if len(fpct) > 0: datain['em_nocrr'] = fpct[0] - logger('pseudoCT of the object.') + log.debug('pseudoCT of the object.') # reconstructed emission data with corrections, minimum 3 osem iter fpct = glob.glob(os.path.join(os.path.dirname(dfile), '*QNT*.nii*')) if len(fpct) > 0: datain['em_crr'] = fpct[0] - logger('pseudoCT of the object.') + log.debug('pseudoCT of the object.') def get_dicoms(dfile, datain, Cnt): - # v = Cnt['VERBOSE'] log.debug( dedent('''\ ------------------------------------------------------------------ @@ -941,7 +938,7 @@ def get_dicoms(dfile, datain, Cnt): ''').format(dfile)) d = dcm.dcmread(dfile) - dcmtype = nimpa.dcminfo(d, verbose=Cnt['VERBOSE']) + dcmtype = nimpa.dcminfo(d) # > check if it is norm file if 'mmr' in dcmtype and 'norm' in dcmtype: @@ -1067,8 +1064,6 @@ def get_dicoms(dfile, datain, Cnt): else: datain['#UTE1'] += 1 - if Cnt['VERBOSE']: print('') - def explore_input(fldr, params, print_paths=False, recurse=1): """ @@ -1094,9 +1089,9 @@ def explore_input(fldr, params, print_paths=False, recurse=1): # elif hasext(f, "bf"): # get_bf(f, datain, Cnt) elif hasext(f, ("npy", "npz", "dic")): - get_npfiles(fspath(f), datain, Cnt['VERBOSE']) + get_npfiles(fspath(f), datain) elif hasext(f, ("nii", "nii.gz")): - get_niifiles(fspath(f), datain, Cnt['VERBOSE']) + get_niifiles(fspath(f), datain) elif f.is_dir() and recurse: # go one level into subfolder extra = explore_input(f, params, recurse=recurse - 1) From 182db64f372dd0cf040376e31d19c2156f4fa968 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 19 Jan 2021 23:38:27 +0000 Subject: [PATCH 20/64] tests: skip check on devel PR --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6d4de98f..fa63580d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,6 +2,7 @@ name: Test on: [push, pull_request] jobs: check: + if: github.event_name != 'push' || github.ref != 'refs/heads/devel' runs-on: ubuntu-latest strategy: matrix: From 1c33a8fe0c1398213682b1147b6d2e36ccc97615 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 19 Jan 2021 23:38:44 +0000 Subject: [PATCH 21/64] tests: minor tidy --- tests/conftest.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2e3390ee..d977ce42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,26 +1,26 @@ from os import getenv from pathlib import Path -import pytest +from pytest import fixture, skip HOME = Path(getenv("DATA_ROOT", "~")).expanduser() -@pytest.fixture(scope="session") +@fixture(scope="session") def folder_in(): Ab_PET_mMR_test = HOME / "Ab_PET_mMR_test" if not Ab_PET_mMR_test.is_dir(): - pytest.skip(f"""Cannot find Ab_PET_mMR_test in ${{DATA_ROOT:-~}} ({HOME}). + skip(f"""Cannot find Ab_PET_mMR_test in ${{DATA_ROOT:-~}} ({HOME}). Try running `python -m tests` to download it. """) return Ab_PET_mMR_test -@pytest.fixture(scope="session") +@fixture(scope="session") def folder_ref(folder_in): Ab_PET_mMR_ref = folder_in / "testing_reference" / "Ab_PET_mMR_ref" if not Ab_PET_mMR_ref.is_dir(): - pytest.skip(f"""Cannot find Ab_PET_mMR_ref in + skip(f"""Cannot find Ab_PET_mMR_ref in ${{DATA_ROOT:-~}}/testing_reference ({HOME}/testing_reference). Try running `python -m tests` to download it. """) From 23b262df605ed27592cf872ff10c4b6e819b674c Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 19 Jan 2021 23:41:29 +0000 Subject: [PATCH 22/64] remove unneeded var - possibel bug possibly indicative of bug in logic https://github.com/NiftyPET/NIPET/pull/32/files/901ccf53203f7f140f47d5819cca19a0c6649269#r553686258 --- niftypet/nipet/prj/mmrrec.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 33d81fd4..8233e896 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -192,11 +192,6 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # get the GPU version of the image dims mus = mmrimg.convert2dev(muo + muh, Cnt) - if Cnt['SPN'] == 1: - snno = Cnt['NSN1'] - elif Cnt['SPN'] == 11: - snno = Cnt['NSN11'] - # remove gaps from the prompt sino psng = mmraux.remgaps(hst['psino'], txLUT, Cnt) From 1d58843bec25484ea7c1cbecf243f8a2ac26629f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 20 Jan 2021 02:47:54 +0000 Subject: [PATCH 23/64] format: update C style --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e746c633..a5eded12 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,4 +48,4 @@ repos: hooks: - id: clang-format files: \.(cc?|cuh?|cxx|cpp|h|hpp|hxx|java|js)$ - args: ['-fallback-style=none', '-style={BasedOnStyle: LLVM, ColumnLimit: 99}'] + args: ['-fallback-style=none', '-style={BasedOnStyle: LLVM, ColumnLimit: 99, AllowShortBlocksOnASingleLine: true, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true}'] From 7c6c2521cb9631acef160ffeffb868e3cf552d5a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 20 Jan 2021 02:48:33 +0000 Subject: [PATCH 24/64] format: clang-format --- niftypet/nipet/lm/src/hst.cu | 46 +++++++++------------------- niftypet/nipet/lm/src/lm_module.cu | 3 +- niftypet/nipet/lm/src/lmaux.cu | 33 +++++++------------- niftypet/nipet/lm/src/lmproc.cu | 25 +++++---------- niftypet/nipet/lm/src/rnd.cu | 45 +++++++++------------------ niftypet/nipet/prj/src/prj_module.cu | 26 +++++----------- niftypet/nipet/prj/src/prjb.cu | 21 +++++-------- niftypet/nipet/prj/src/prjf.cu | 18 ++++------- niftypet/nipet/prj/src/recon.cu | 42 +++++++++---------------- niftypet/nipet/sct/src/ray.cu | 18 ++++------- niftypet/nipet/sct/src/sct.cu | 31 ++++++------------- niftypet/nipet/sct/src/sct_module.cu | 24 +++++---------- niftypet/nipet/sct/src/sctaux.cu | 24 +++++---------- niftypet/nipet/src/aux_module.cu | 13 +++----- niftypet/nipet/src/norm.cu | 9 ++---- niftypet/nipet/src/scanner_0.cu | 31 ++++++------------- 16 files changed, 134 insertions(+), 275 deletions(-) diff --git a/niftypet/nipet/lm/src/hst.cu b/niftypet/nipet/lm/src/hst.cu index 5eb40fb9..b87b00fe 100644 --- a/niftypet/nipet/lm/src/hst.cu +++ b/niftypet/nipet/lm/src/hst.cu @@ -150,8 +150,7 @@ __global__ void hst(int *lm, unsigned int *psino, si_ssrb = c_ssrb[si]; // span-1 - if (span == 1) - addr = val; + if (span == 1) addr = val; // span-11 else if (span == 11) addr = si11 * NSBINANG + aw; @@ -256,8 +255,7 @@ curandGenerator_t h_rndgen; curandState *setup_curand() { // Setup RANDOM NUMBERS even when bootstrapping was not requested - if (LOG <= LOGINFO) - printf("\ni> setting up CUDA pseudorandom number generator... "); + if (LOG <= LOGINFO) printf("\ni> setting up CUDA pseudorandom number generator... "); curandState *d_prng_states; // cudaMalloc((void **)&d_prng_states, MIN(NSTREAMS, lmprop.nchnk)*BTHREADS*NTHREADS * @@ -267,8 +265,7 @@ curandState *setup_curand() { cudaMalloc((void **)&d_prng_states, BTHREADS * NTHREADS * sizeof(curandState)); setup_rand<<>>(d_prng_states); - if (LOG <= LOGINFO) - printf("DONE.\n"); + if (LOG <= LOGINFO) printf("DONE.\n"); return d_prng_states; } @@ -300,8 +297,7 @@ void seek_lm(FILE *f) { _fseeki64(f, seek_offset, SEEK_SET); //<<<<------------------- IMPORTANT!!! #endif - if (LOG <= LOGDEBUG) - printf("ic> fseek adrress: %zd\n", lmprop.lmoff + lmprop.atag[nchnkrd]); + if (LOG <= LOGDEBUG) printf("ic> fseek adrress: %zd\n", lmprop.lmoff + lmprop.atag[nchnkrd]); } void get_lm_chunk(FILE *f, int stream_idx) { @@ -325,8 +321,7 @@ void get_lm_chunk(FILE *f, int stream_idx) { // Set a flag: stream[i] is free now and the new data is ready. dataready[stream_idx] = true; - if (LOG <= LOGDEBUG) - printf("[%4d / %4d] chunks read\n\n", nchnkrd, lmprop.nchnk); + if (LOG <= LOGDEBUG) printf("[%4d / %4d] chunks read\n\n", nchnkrd, lmprop.nchnk); } //================================================================================================ @@ -345,8 +340,7 @@ void CUDART_CB MyCallback(cudaStream_t stream, cudaError_t status, void *data) { get_lm_chunk(fr, stream_idx); fclose(fr); } - if (LOG <= LOGDEBUG) - printf("\n"); + if (LOG <= LOGDEBUG) printf("\n"); } //================================================================================ @@ -370,8 +364,7 @@ void gpu_hst(unsigned int *d_psino, // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); //--- INITIALISE GPU RANDOM GENERATOR if (Cnt.BTP > 0) { @@ -386,8 +379,7 @@ void gpu_hst(unsigned int *d_psino, curandDiscreteDistribution_t poisson_hst; // normally instead of Cnt.BTPRT I would have 1.0 if expecting the same // number of resampled events as in the original file (or close to) - if (Cnt.BTP == 2) - curandCreatePoissonDistribution(Cnt.BTPRT, &poisson_hst); + if (Cnt.BTP == 2) curandCreatePoissonDistribution(Cnt.BTPRT, &poisson_hst); //--- // single slice rebinning LUT to constant memory @@ -413,8 +405,7 @@ void gpu_hst(unsigned int *d_psino, // cumulative sum of the above segment def int cumSeg[nSEG]; cumSeg[0] = 0; - for (int i = 1; i < nSEG; i++) - cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; + for (int i = 1; i < nSEG; i++) cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; cudaMemcpyToSymbol(c_cumSeg, cumSeg, nSEG * sizeof(int)); @@ -428,14 +419,11 @@ void gpu_hst(unsigned int *d_psino, // Get the number of streams to be used int nstreams = MIN(NSTREAMS, lmprop.nchnk); - if (Cnt.LOG <= LOGINFO) - printf("\ni> creating %d CUDA streams... ", nstreams); + if (Cnt.LOG <= LOGINFO) printf("\ni> creating %d CUDA streams... ", nstreams); cudaStream_t *stream = new cudaStream_t[nstreams]; // cudaStream_t stream[nstreams]; - for (int i = 0; i < nstreams; ++i) - HANDLE_ERROR(cudaStreamCreate(&stream[i])); - if (Cnt.LOG <= LOGINFO) - printf("DONE.\n"); + for (int i = 0; i < nstreams; ++i) HANDLE_ERROR(cudaStreamCreate(&stream[i])); + if (Cnt.LOG <= LOGINFO) printf("DONE.\n"); // ****** check memory usage getMemUse(Cnt); @@ -453,9 +441,7 @@ void gpu_hst(unsigned int *d_psino, // Jump the any LM headers seek_lm(fr); - for (int i = 0; i < nstreams; i++) { - get_lm_chunk(fr, i); - } + for (int i = 0; i < nstreams; i++) { get_lm_chunk(fr, i); } fclose(fr); if (Cnt.LOG <= LOGINFO) { @@ -512,8 +498,7 @@ void gpu_hst(unsigned int *d_psino, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("+> histogramming DONE in %fs.\n\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("+> histogramming DONE in %fs.\n\n", 0.001 * elapsedTime); for (int i = 0; i < nstreams; ++i) { cudaError_t err = cudaStreamSynchronize(stream[i]); @@ -536,8 +521,7 @@ void gpu_hst(unsigned int *d_psino, cudaFree(d_sn1_rno); // destroy the histogram for parametric bootstrap - if (Cnt.BTP == 2) - curandDestroyDistribution(poisson_hst); + if (Cnt.BTP == 2) curandDestroyDistribution(poisson_hst); //***** return; diff --git a/niftypet/nipet/lm/src/lm_module.cu b/niftypet/nipet/lm/src/lm_module.cu index a9f0f4f5..23491321 100644 --- a/niftypet/nipet/lm/src/lm_module.cu +++ b/niftypet/nipet/lm/src/lm_module.cu @@ -75,8 +75,7 @@ static PyObject *mmr_lminfo(PyObject *self, PyObject *args) { //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "s", &flm)) - return NULL; + if (!PyArg_ParseTuple(args, "s", &flm)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FILE *fr; diff --git a/niftypet/nipet/lm/src/lmaux.cu b/niftypet/nipet/lm/src/lmaux.cu index 33c1c15a..8715d20a 100644 --- a/niftypet/nipet/lm/src/lmaux.cu +++ b/niftypet/nipet/lm/src/lmaux.cu @@ -31,8 +31,7 @@ void getLMinfo(char *flm, const Cnst Cnt) { fseek(fr, 0, SEEK_END); size_t nbytes = ftell(fr); size_t ele = nbytes / sizeof(int); - if (Cnt.LOG <= LOGINFO) - printf("i> number of elements in the list mode file: %lu\n", ele); + if (Cnt.LOG <= LOGINFO) printf("i> number of elements in the list mode file: %lu\n", ele); rewind(fr); #endif @@ -42,14 +41,12 @@ void getLMinfo(char *flm, const Cnst Cnt) { _stati64(flm, &bufStat); size_t nbytes = bufStat.st_size; size_t ele = nbytes / sizeof(int); - if (Cnt.LOG <= LOGINFO) - printf("i> number of elements in the list mode file: %lu\n", ele); + if (Cnt.LOG <= LOGINFO) printf("i> number of elements in the list mode file: %lu\n", ele); #endif //--try reading the whole lot to memory #if RD2MEM - if (Cnt.LOG <= LOGINFO) - printf("i> reading the whole file..."); + if (Cnt.LOG <= LOGINFO) printf("i> reading the whole file..."); if (NULL == (lm = (int *)malloc(ele * sizeof(int)))) { printf("malloc failed\n"); return; @@ -59,8 +56,7 @@ void getLMinfo(char *flm, const Cnst Cnt) { fprintf(stderr, "Reading error: r = %lu and ele = %lu\n", r, ele); exit(3); } - if (Cnt.LOG <= LOGINFO) - printf("DONE.\n\n"); + if (Cnt.LOG <= LOGINFO) printf("DONE.\n\n"); rewind(fr); #endif @@ -118,8 +114,7 @@ void getLMinfo(char *flm, const Cnst Cnt) { // first time tag is also the time offset used later on. if (first_ttag < last_ttag) { toff = first_ttag; - if (Cnt.LOG <= LOGINFO) - printf("i> using time offset: %d\n", toff); + if (Cnt.LOG <= LOGINFO) printf("i> using time offset: %d\n", toff); } else { fprintf(stderr, "Weird time stamps. The first and last time tags are: %d and %d\n", first_ttag, last_ttag); @@ -129,17 +124,14 @@ void getLMinfo(char *flm, const Cnst Cnt) { int nitag = ((last_ttag - toff) + ITIME - 1) / ITIME; // # integration time tags (+1 for the end). - if (Cnt.LOG <= LOGINFO) - printf("i> number of report itags is: %d\n", nitag); + if (Cnt.LOG <= LOGINFO) printf("i> number of report itags is: %d\n", nitag); // divide the data into data chunks // the default is to read 1GB to be dealt with all streams (default: 32) int nchnk = 10 + (ele + ELECHNK - 1) / ELECHNK; // plus ten extra... - if (Cnt.LOG <= LOGINFO) - printf("i> # chunks of data (initial): %d\n\n", nchnk); + if (Cnt.LOG <= LOGINFO) printf("i> # chunks of data (initial): %d\n\n", nchnk); - if (Cnt.LOG <= LOGINFO) - printf("i> # elechnk: %d\n\n", ELECHNK); + if (Cnt.LOG <= LOGINFO) printf("i> # elechnk: %d\n\n", ELECHNK); // divide the list mode data (1GB) into chunks in terms of addresses of selected time tags // break time tag @@ -159,8 +151,7 @@ void getLMinfo(char *flm, const Cnst Cnt) { atag[0] = 0; //------------------------------------------------------------------------------------------------ - if (Cnt.LOG <= LOGINFO) - printf("i> setting up data chunks:\n"); + if (Cnt.LOG <= LOGINFO) printf("i> setting up data chunks:\n"); int i = 0; while ((ele - atag[i]) > (size_t)ELECHNK) { // printf(">>>>>>>>>>>>>>>>>>> ele=%lu, atag=%lu, ELE=%d\n", ele, atag[i], ELECHNK); @@ -240,8 +231,7 @@ void modifyLMinfo(int tstart, int tstop, const Cnst Cnt) { int ntag[2] = {-1, -1}; // new start and end time/address break tag for (int n = 0; n < lmprop.nchnk; n++) { if ((tstart <= (lmprop.btag[n + 1] / ITIME)) && ((lmprop.btag[n] / ITIME) < tstop)) { - if (ntag[0] == -1) - ntag[0] = n; + if (ntag[0] == -1) ntag[0] = n; ntag[1] = n; if (Cnt.LOG <= LOGDEBUG) printf(" > time break [%d] <%lu, %lu> is in. ele={%d, %d}.\n", n + 1, lmprop.btag[n], @@ -259,8 +249,7 @@ void modifyLMinfo(int tstart, int tstop, const Cnst Cnt) { int nn = 0; // new indexing tmp_btag[0] = lmprop.btag[ntag[0]]; tmp_atag[0] = lmprop.atag[ntag[0]]; - if (Cnt.LOG <= LOGDEBUG) - printf("> leaving only those chunks for histogramming:\n"); + if (Cnt.LOG <= LOGDEBUG) printf("> leaving only those chunks for histogramming:\n"); for (int n = ntag[0]; n <= ntag[1]; n++) { tmp_btag[nn + 1] = lmprop.btag[n + 1]; diff --git a/niftypet/nipet/lm/src/lmproc.cu b/niftypet/nipet/lm/src/lmproc.cu index 29d584e1..96c87656 100644 --- a/niftypet/nipet/lm/src/lmproc.cu +++ b/niftypet/nipet/lm/src/lmproc.cu @@ -18,8 +18,7 @@ execution. { // list mode data file (binary) - if (Cnt.LOG <= LOGINFO) - printf("i> the list-mode file: %s\n", flm); + if (Cnt.LOG <= LOGINFO) printf("i> the list-mode file: %s\n", flm); //------------ file and path names #ifdef WIN32 @@ -125,14 +124,10 @@ execution. //> list mode data offset, start of events lmprop.lmoff = Cnt.LMOFF; - if (Cnt.LOG <= LOGDEBUG) - printf("i> LM offset in bytes: %d\n", lmprop.lmoff); - if (Cnt.LOG <= LOGDEBUG) - printf("i> bytes per LM event: %d\n", lmprop.bpe); - if (Cnt.LOG <= LOGINFO) - printf("i> frame start time: %d\n", tstart); - if (Cnt.LOG <= LOGINFO) - printf("i> frame stop time: %d\n", tstop); + if (Cnt.LOG <= LOGDEBUG) printf("i> LM offset in bytes: %d\n", lmprop.lmoff); + if (Cnt.LOG <= LOGDEBUG) printf("i> bytes per LM event: %d\n", lmprop.bpe); + if (Cnt.LOG <= LOGINFO) printf("i> frame start time: %d\n", tstart); + if (Cnt.LOG <= LOGINFO) printf("i> frame stop time: %d\n", tstop); //--- //======= get only the chunks which have the time frame data @@ -154,9 +149,7 @@ execution. HANDLE_ERROR(cudaMemcpy(dicout.ssr, d_ssrb, SEG0 * NSBINANG * sizeof(unsigned int), cudaMemcpyDeviceToHost)); unsigned long long psum_ssrb = 0; - for (int i = 0; i < SEG0 * NSBINANG; i++) { - psum_ssrb += dicout.ssr[i]; - } + for (int i = 0; i < SEG0 * NSBINANG; i++) { psum_ssrb += dicout.ssr[i]; } //--- //> copy to host the compressed prompt and delayed sinograms @@ -171,8 +164,7 @@ execution. dicout.dsn[i] = sino[i] >> 16; dicout.psm += dicout.psn[i]; dicout.dsm += dicout.dsn[i]; - if (mxbin < dicout.psn[i]) - mxbin = dicout.psn[i]; + if (mxbin < dicout.psn[i]) mxbin = dicout.psn[i]; } //--- output data to Python @@ -207,8 +199,7 @@ execution. dicout.dsm); if (Cnt.LOG <= LOGINFO) printf("\nic> total prompt and delayeds head-curve events: P = %llu, D = %llu\n", sphc, sdhc); - if (Cnt.LOG <= LOGINFO) - printf("\nic> maximum prompt sino value: %u \n", mxbin); + if (Cnt.LOG <= LOGINFO) printf("\nic> maximum prompt sino value: %u \n", mxbin); //-fansums and bucket singles HANDLE_ERROR(cudaMemcpy(dicout.fan, d_fansums, NRINGS * nCRS * sizeof(unsigned int), diff --git a/niftypet/nipet/lm/src/rnd.cu b/niftypet/nipet/lm/src/rnd.cu index a06cb71c..46e9814b 100644 --- a/niftypet/nipet/lm/src/rnd.cu +++ b/niftypet/nipet/lm/src/rnd.cu @@ -38,15 +38,13 @@ __inline__ __device__ float crystal_sum(float cval) { cval = warpsum(cval); // write the sum to shared memory and then sync (wait) - if (lane == 0) - shared[warpid] = cval; + if (lane == 0) shared[warpid] = cval; __syncthreads(); // read from shared memory only if that warp existed cval = (cidx < (blockDim.x * blockDim.y) / warpSize) ? shared[lane] : 0; - if (warpid == 0) - cval = warpsum(cval); // Final reduce within first warp + if (warpid == 0) cval = warpsum(cval); // Final reduce within first warp return cval; } @@ -141,8 +139,7 @@ __global__ void rnd(float *res, const float *crs) { // first see the order of the range; since it is on a circle the other end can be of lower // number if (c_crange[iby + 2 * nCRSR] == 0) { - if (ic <= c_crange[iby + nCRSR]) - crystal_val = crs[itx + NRINGS * ic]; + if (ic <= c_crange[iby + nCRSR]) crystal_val = crs[itx + NRINGS * ic]; } else { if (ic <= (c_crange[iby + nCRSR] + nCRSR)) { ic -= nCRSR * (ic >= nCRSR); @@ -174,8 +171,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); //--- the sino for estimated random events float *d_rsino; @@ -215,8 +211,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s for (int c2 = 0; c2 < Cnt.NCRSR; c2 += 1) { wsum += txlut.cij[c2 + Cnt.NCRSR * c1]; - if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) - crange[c1] = c2; + if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) crange[c1] = c2; if (txlut.cij[c2 + Cnt.NCRSR * c1] < prv) crange[c1 + Cnt.NCRSR] = c2 - 1 + Cnt.NCRSR * (c2 == 0); prv = txlut.cij[c2 + Cnt.NCRSR * c1]; @@ -245,8 +240,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s for (int rq = (ri - Cnt.MRD); rq < (ri + Cnt.MRD + 1); rq++) { if ((rq >= 0) && (rq < Cnt.NRNG)) { wsum += 1; - if (rrange[ri] == 257) - rrange[ri] = rq; + if (rrange[ri] == 257) rrange[ri] = rq; rrange[ri + Cnt.NRNG] = rq; } rrange[ri + 2 * Cnt.NRNG] = wsum; @@ -291,8 +285,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s // crystal 'ones' for init and number of crystal in coincidence for each opposing crystal float *ones = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); - for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) - ones[i] = 1; + for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) ones[i] = 1; float *d_ones; HANDLE_ERROR(cudaMalloc(&d_ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); HANDLE_ERROR( @@ -303,8 +296,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s HANDLE_ERROR(cudaMalloc(&d_ncrs, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); //=============================================<<<<<<<< - if (Cnt.LOG <= LOGINFO) - printf("\ni> estimating random events (variance reduction)... "); + if (Cnt.LOG <= LOGINFO) printf("\ni> estimating random events (variance reduction)... "); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -349,8 +341,7 @@ void gpu_randoms(float *rsn, float *cmap, unsigned int *fansums, txLUTs txlut, s cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf(" DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001 * elapsedTime); //=============================================<<<<<<<< //--- results to CPU @@ -472,8 +463,7 @@ void p_randoms(float *rsn, float *cmap, int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); //--- the sino for estimated random events float *d_rsino; @@ -527,8 +517,7 @@ void p_randoms(float *rsn, float *cmap, for (int c2 = 0; c2 < Cnt.NCRSR; c2 += 1) { wsum += txlut.cij[c2 + Cnt.NCRSR * c1]; - if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) - crange[c1] = c2; + if (txlut.cij[c2 + Cnt.NCRSR * c1] > prv) crange[c1] = c2; if (txlut.cij[c2 + Cnt.NCRSR * c1] < prv) crange[c1 + Cnt.NCRSR] = c2 - 1 + Cnt.NCRSR * (c2 == 0); prv = txlut.cij[c2 + Cnt.NCRSR * c1]; @@ -557,8 +546,7 @@ void p_randoms(float *rsn, float *cmap, for (int rq = (ri - Cnt.MRD); rq < (ri + Cnt.MRD + 1); rq++) { if ((rq >= 0) && (rq < Cnt.NRNG)) { wsum += 1; - if (rrange[ri] == 257) - rrange[ri] = rq; + if (rrange[ri] == 257) rrange[ri] = rq; rrange[ri + Cnt.NRNG] = rq; } rrange[ri + 2 * Cnt.NRNG] = wsum; @@ -603,8 +591,7 @@ void p_randoms(float *rsn, float *cmap, // crystal 'ones' for init and number of crystal in coincidence for each opposing crystal float *ones = (float *)malloc(Cnt.NRNG * Cnt.NCRSR * sizeof(float)); - for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) - ones[i] = 1; + for (int i = 0; i < Cnt.NRNG * Cnt.NCRSR; i++) ones[i] = 1; float *d_ones; HANDLE_ERROR(cudaMalloc(&d_ones, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); HANDLE_ERROR( @@ -615,8 +602,7 @@ void p_randoms(float *rsn, float *cmap, HANDLE_ERROR(cudaMalloc(&d_ncrs, Cnt.NRNG * Cnt.NCRSR * sizeof(float))); //=============================================<<<<<<<< - if (Cnt.LOG <= LOGINFO) - printf("\ni> estimating random events from prompts... "); + if (Cnt.LOG <= LOGINFO) printf("\ni> estimating random events from prompts... "); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -661,8 +647,7 @@ void p_randoms(float *rsn, float *cmap, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf(" DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001 * elapsedTime); //=============================================<<<<<<<< //--- results to CPU diff --git a/niftypet/nipet/prj/src/prj_module.cu b/niftypet/nipet/prj/src/prj_module.cu index 71e38a93..1f47daef 100644 --- a/niftypet/nipet/prj/src/prj_module.cu +++ b/niftypet/nipet/prj/src/prj_module.cu @@ -154,8 +154,7 @@ static PyObject *trnx_prj(PyObject *self, PyObject *args) { int N0crs = PyArray_DIM(p_crs, 0); int N1crs = PyArray_DIM(p_crs, 1); - if (Cnt.LOG <= LOGDEBUG) - printf("\ni> N0crs=%d, N1crs=%d\n", N0crs, N1crs); + if (Cnt.LOG <= LOGDEBUG) printf("\ni> N0crs=%d, N1crs=%d\n", N0crs, N1crs); float *im = (float *)PyArray_DATA(p_im); if (Cnt.LOG <= LOGDEBUG) @@ -176,8 +175,7 @@ static PyObject *trnx_prj(PyObject *self, PyObject *args) { int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); //--- TRANSAXIAL COMPONENTS float4 *d_crs; @@ -377,9 +375,7 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { printf("i> no subsets defined. number of projection bins in 2D: %d\n", Nprj); // all projections in subs = (int *)malloc(Nprj * sizeof(int)); - for (int i = 0; i < Nprj; i++) { - subs[i] = i; - } + for (int i = 0; i < Nprj; i++) { subs[i] = i; } } else { if (Cnt.LOG <= LOGDEBUG) printf("i> subsets defined. number of subset projection bins in 2D: %d\n", Nprj); @@ -411,8 +407,7 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { PyArray_ResolveWritebackIfCopy(p_prjout); Py_DECREF(p_prjout); - if (subs_[0] == -1) - free(subs); + if (subs_[0] == -1) free(subs); Py_INCREF(Py_None); return Py_None; @@ -558,9 +553,7 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { printf("\ni> no subsets defined. number of projection bins in 2D: %d\n", Nprj); // all projections in subs = (int *)malloc(Nprj * sizeof(int)); - for (int i = 0; i < Nprj; i++) { - subs[i] = i; - } + for (int i = 0; i < Nprj; i++) { subs[i] = i; } } else { if (Cnt.LOG <= LOGDEBUG) printf("\ni> subsets defined. number of subset projection bins in 2D: %d\n", Nprj); @@ -595,8 +588,7 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { PyArray_ResolveWritebackIfCopy(p_bim); Py_DECREF(p_bim); - if (subs_[0] == -1) - free(subs); + if (subs_[0] == -1) free(subs); Py_INCREF(Py_None); return Py_None; @@ -769,12 +761,10 @@ static PyObject *osem_rec(PyObject *self, PyObject *args) { //>--- PSF KERNEL --- float *krnl; int SZ_KRNL = (int)PyArray_DIM(p_krnl, 1); - if (Cnt.LOG <= LOGINFO) - printf("i> kernel size [voxels]: %d\n", SZ_KRNL); + if (Cnt.LOG <= LOGINFO) printf("i> kernel size [voxels]: %d\n", SZ_KRNL); if (SZ_KRNL != KERNEL_LENGTH) { - if (Cnt.LOG <= LOGWARNING) - printf("w> wrong kernel size.\n"); + if (Cnt.LOG <= LOGWARNING) printf("w> wrong kernel size.\n"); krnl = (float *)malloc(KERNEL_LENGTH * sizeof(float)); krnl[0] = -1; } else { diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index 5f722c3e..63369dab 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -189,8 +189,7 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); //--- TRANSAXIAL COMPONENT float4 *d_crs; @@ -269,8 +268,7 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no cudaEventCreate(&stop); cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) - printf("i> calculating image through back projection... "); + if (Cnt.LOG <= LOGDEBUG) printf("i> calculating image through back projection... "); //------------DO TRANSAXIAL CALCULATIONS--------------------------------- gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); @@ -314,8 +312,7 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); cudaDeviceSynchronize(); @@ -341,8 +338,7 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no cudaMemcpy(bimg, d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyDeviceToHost)); cudaFree(d_im); cudaFree(d_imr); - if (Cnt.LOG <= LOGDEBUG) - printf("i> reduced the axial (z) image size to %d\n", nvz); + if (Cnt.LOG <= LOGDEBUG) printf("i> reduced the axial (z) image size to %d\n", nvz); } else { // copy to host memory HANDLE_ERROR( @@ -368,8 +364,7 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); // get the axial LUTs in constant memory cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); @@ -387,8 +382,7 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) - printf("i> subset back projection (Nprj=%d)... ", Nprj); + if (Cnt.LOG <= LOGDEBUG) printf("i> subset back projection (Nprj=%d)... ", Nprj); //============================================================================ bprj_drct<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno); @@ -413,8 +407,7 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); cudaDeviceSynchronize(); diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index 83ab6bb0..bdfe68a3 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -206,8 +206,7 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no char att) { int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); //--- TRANSAXIAL COMPONENT float4 *d_crs; @@ -318,8 +317,7 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no cudaEventCreate(&stop); cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) - printf("i> calculating sinograms via forward projection..."); + if (Cnt.LOG <= LOGDEBUG) printf("i> calculating sinograms via forward projection..."); //------------DO TRANSAXIAL CALCULATIONS--------------------------------- gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); @@ -354,8 +352,7 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); cudaDeviceSynchronize(); @@ -385,8 +382,7 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); // get the axial LUTs in constant memory cudaMemcpyToSymbol(c_li2rng, li2rng, NLI2R * sizeof(float2)); @@ -404,8 +400,7 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); - if (Cnt.LOG <= LOGDEBUG) - printf("i> subset forward projection (Nprj=%d)... ", Nprj); + if (Cnt.LOG <= LOGDEBUG) printf("i> subset forward projection (Nprj=%d)... ", Nprj); //============================================================================ fprj_drct<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0); @@ -430,8 +425,7 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); cudaDeviceSynchronize(); diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index da539101..0b87696f 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -16,15 +16,12 @@ Copyrights: /// z: how many Z-slices to add __global__ void pad(float *dst, float *src, const int z) { int i = threadIdx.x + blockDim.x * blockIdx.x; - if (i >= SZ_IMX) - return; + if (i >= SZ_IMX) return; int j = threadIdx.y + blockDim.y * blockIdx.y; - if (j >= SZ_IMY) - return; + if (j >= SZ_IMY) return; src += i * SZ_IMY * SZ_IMZ + j * SZ_IMZ; dst += i * SZ_IMY * (SZ_IMZ + z) + j * (SZ_IMZ + z); - for (int k = 0; k < SZ_IMZ; ++k) - dst[k] = src[k]; + for (int k = 0; k < SZ_IMZ; ++k) dst[k] = src[k]; } void d_pad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { @@ -37,15 +34,12 @@ void d_pad(float *dst, float *src, /// z: how many Z-slices to remove __global__ void unpad(float *dst, float *src, const int z) { int i = threadIdx.x + blockDim.x * blockIdx.x; - if (i >= SZ_IMX) - return; + if (i >= SZ_IMX) return; int j = threadIdx.y + blockDim.y * blockIdx.y; - if (j >= SZ_IMY) - return; + if (j >= SZ_IMY) return; dst += i * SZ_IMY * SZ_IMZ + j * SZ_IMZ; src += i * SZ_IMY * (SZ_IMZ + z) + j * (SZ_IMZ + z); - for (int k = 0; k < SZ_IMZ; ++k) - dst[k] = src[k]; + for (int k = 0; k < SZ_IMZ; ++k) dst[k] = src[k]; } void d_unpad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { @@ -65,12 +59,10 @@ void setConvolutionKernel(float *krnl) { void setKernelGaussian(float sigma) { float knlRM[KERNEL_LENGTH * 3]; const double tmpE = -1.0 / (2 * sigma * sigma); - for (int i = 0; i < KERNEL_LENGTH; ++i) - knlRM[i] = (float)exp(tmpE * pow(RSZ_PSF_KRNL - i, 2)); + for (int i = 0; i < KERNEL_LENGTH; ++i) knlRM[i] = (float)exp(tmpE * pow(RSZ_PSF_KRNL - i, 2)); // normalise double knlSum = 0; - for (size_t i = 0; i < KERNEL_LENGTH; ++i) - knlSum += knlRM[i]; + for (size_t i = 0; i < KERNEL_LENGTH; ++i) knlSum += knlRM[i]; for (size_t i = 0; i < KERNEL_LENGTH; ++i) { knlRM[i] /= knlSum; // also fill in other dimensions @@ -229,8 +221,7 @@ void d_conv(float *d_buff, float *d_imgout, float *d_imgint, int Nvk, int Nvj, i // Element-wise multiplication __global__ void elmult(float *inA, float *inB, int length) { int idx = threadIdx.x + blockDim.x * blockIdx.x; - if (idx < length) - inA[idx] *= inB[idx]; + if (idx < length) inA[idx] *= inB[idx]; } void d_elmult(float *d_inA, float *d_inB, int length) { @@ -244,8 +235,7 @@ void d_elmult(float *d_inA, float *d_inB, int length) { // Element-wise division with result stored in first input variable __global__ void eldiv0(float *inA, float *inB, int length) { int idx = threadIdx.x + blockDim.x * blockIdx.x; - if (idx >= length) - return; + if (idx >= length) return; if (FLOAT_WITHIN_EPS(inB[idx])) inA[idx] = 0; else @@ -263,8 +253,7 @@ void d_eldiv(float *d_inA, float *d_inB, int length) { __global__ void sneldiv(float *inA, unsigned short *inB, int *sub, int Nprj, int snno) { int idz = threadIdx.x + blockDim.x * blockIdx.x; - if (!(blockIdx.y < Nprj && idz < snno)) - return; + if (!(blockIdx.y < Nprj && idz < snno)) return; // inA > only active bins of the subset // inB > all sinogram bins float b = (float)inB[snno * sub[blockIdx.y] + idz]; @@ -299,8 +288,7 @@ void d_sneladd(float *d_inA, float *d_inB, int *d_sub, int Nprj, int snno) { //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - __global__ void eladd(float *inA, float *inB, int length) { int idx = threadIdx.x + blockDim.x * blockIdx.x; - if (idx < length) - inA[idx] += inB[idx]; + if (idx < length) inA[idx] += inB[idx]; } void d_eladd(float *d_inA, float *d_inB, int length) { @@ -342,8 +330,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGDEBUG) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); //--- TRANSAXIAL COMPONENT float4 *d_crs; @@ -472,8 +459,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float getMemUse(Cnt); for (int i = 0; i < Nsub; i++) { - if (Cnt.LOG <= LOGDEBUG) - printf("<> subset %d-th <>\n", i); + if (Cnt.LOG <= LOGDEBUG) printf("<> subset %d-th <>\n", i); // resolution modelling current image if (krnl[0] >= 0) { diff --git a/niftypet/nipet/sct/src/ray.cu b/niftypet/nipet/sct/src/ray.cu index 02a3eff9..eb0a4609 100644 --- a/niftypet/nipet/sct/src/ray.cu +++ b/niftypet/nipet/sct/src/ray.cu @@ -9,14 +9,12 @@ Copyrights: 2018 #include "sct.h" __inline__ __device__ float warpsum(float uval) { - for (int off = 16; off > 0; off /= 2) - uval += __shfl_down_sync(0xffffffff, uval, off); + for (int off = 16; off > 0; off /= 2) uval += __shfl_down_sync(0xffffffff, uval, off); return uval; } __inline__ __device__ float warpsum_xor(float val) { - for (int mask = 16; mask > 0; mask /= 2) - val += __shfl_xor_sync(0xffffffff, val, mask); + for (int mask = 16; mask > 0; mask /= 2) val += __shfl_xor_sync(0xffffffff, val, mask); return val; } @@ -94,8 +92,7 @@ __global__ void satt(short *output, cudaTextureObject_t texo, const int *i2v, //<><><><><><><><><><><><><><><><><><><><><> uval = warpsum(uval); - if (idx == 0) - ray_sum += uval; + if (idx == 0) ray_sum += uval; } if (idx == 0) @@ -114,8 +111,7 @@ short *raysLUT(cudaTextureObject_t texo_mu3d, iMSK d_mu_msk, scrsDEF d_scrsdef, // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); // Allocate result of transformation in device memory short *d_LUTout; @@ -130,8 +126,7 @@ short *raysLUT(cudaTextureObject_t texo_mu3d, iMSK d_mu_msk, scrsDEF d_scrsdef, // return d_LUTout; - if (Cnt.LOG <= LOGINFO) - printf("i> precalculating attenuation paths into LUT..."); + if (Cnt.LOG <= LOGINFO) printf("i> precalculating attenuation paths into LUT..."); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -150,8 +145,7 @@ short *raysLUT(cudaTextureObject_t texo_mu3d, iMSK d_mu_msk, scrsDEF d_scrsdef, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 0.001 * elapsedTime); cudaDeviceSynchronize(); diff --git a/niftypet/nipet/sct/src/sct.cu b/niftypet/nipet/sct/src/sct.cu index 4f3e140d..6c36e832 100644 --- a/niftypet/nipet/sct/src/sct.cu +++ b/niftypet/nipet/sct/src/sct.cu @@ -22,15 +22,13 @@ __device__ char sgn(float x) { return x > 0 ? 1 : (x < 0 ? -1 : 0); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __inline__ __device__ float warpsum(float val) { - for (int off = 16; off > 0; off /= 2) - val += __shfl_down_sync(0xffffffff, val, off); + for (int off = 16; off > 0; off /= 2) val += __shfl_down_sync(0xffffffff, val, off); return val; } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ __inline__ __device__ float warpsum_xor(float val) { - for (int mask = SS_WRP / 2; mask > 0; mask /= 2) - val += __shfl_xor_sync(0xffffffff, val, mask); + for (int mask = SS_WRP / 2; mask > 0; mask /= 2) val += __shfl_xor_sync(0xffffffff, val, mask); return val; } @@ -83,8 +81,7 @@ __global__ void Psct(float *rslt, cudaTextureObject_t texo, const short *rays, // size) int mvxi = mu_msk.v2i[(int)(u + SS_IMX * v + SS_IMX * SS_IMY * w)]; - if (mvxi < 0) - return; + if (mvxi < 0) return; // if ((mvxi>393674)||(mvxi<0)) printf(">>>>DISASTER: mvxi=%d, u=%d,v=%d,w=%d\n", mvxi, u, v, w // ); @@ -206,8 +203,7 @@ __global__ void Psct(float *rslt, cudaTextureObject_t texo, const short *rays, float uval = tex3D(texo, u, v, w); uval = warpsum_xor(uval); - if (uval > 0) - Nw = k; + if (uval > 0) Nw = k; } //--- @@ -417,8 +413,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); getMemUse(Cnt); @@ -437,8 +432,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em if (Cnt.LOG <= LOGINFO) { printf("i> time of flight properties for scatter estimation:\n"); - for (int i = 0; i < 4; i++) - printf(" tofbin[%d]=%f\n", i, tofbin[i]); + for (int i = 0; i < 4; i++) printf(" tofbin[%d]=%f\n", i, tofbin[i]); } //--------------- K-N LUTs --------------------------- @@ -530,8 +524,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em cudaTextureObject_t texo_mu3d = 0; cudaCreateTextureObject(&texo_mu3d, &resDesc, &texDesc, NULL); - if (Cnt.LOG <= LOGINFO) - printf("i> 3D CUDA texture for the mu-map has been initialised.\n"); + if (Cnt.LOG <= LOGINFO) printf("i> 3D CUDA texture for the mu-map has been initialised.\n"); //==================================================================== //============================================================ @@ -571,8 +564,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n\n", 0.001 * elapsedTime); cudaFree(d_rays); cudaDeviceSynchronize(); HANDLE_ERROR(cudaGetLastError()); @@ -585,9 +577,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em } else if (Cnt.SPN == 11) { tbins = Cnt.NSN11 * d_scrsdef.nscrs * d_scrsdef.nscrs; } else { - if (Cnt.LOG <= LOGWARNING) { - printf("e> Unrecognised span definition.\n"); - } + if (Cnt.LOG <= LOGWARNING) { printf("e> Unrecognised span definition.\n"); } } // 3D scatter pre-sino out @@ -626,8 +616,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em end = clock(); time_spent = (double)(end - begin) / CLOCKS_PER_SEC; - if (Cnt.LOG <= LOGINFO) - printf("\ni> TOTAL SCATTER TIME: %f\n", time_spent); + if (Cnt.LOG <= LOGINFO) printf("\ni> TOTAL SCATTER TIME: %f\n", time_spent); return sctout; } diff --git a/niftypet/nipet/sct/src/sct_module.cu b/niftypet/nipet/sct/src/sct_module.cu index 48c48ab7..326c3346 100644 --- a/niftypet/nipet/sct/src/sct_module.cu +++ b/niftypet/nipet/sct/src/sct_module.cu @@ -272,16 +272,12 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args) { // get the stats in the image structure float mumx = -1e12, emmx = -1e12, mumn = 1e12, emmn = 1e12; for (int i = 0; i < muIMG.nvx; i++) { - if (mumap[i] > mumx) - mumx = mumap[i]; - if (mumap[i] < mumn) - mumn = mumap[i]; + if (mumap[i] > mumx) mumx = mumap[i]; + if (mumap[i] < mumn) mumn = mumap[i]; } for (int i = 0; i < emIMG.nvx; i++) { - if (emimg[i] > emmx) - emmx = emimg[i]; - if (emimg[i] < emmn) - emmn = emimg[i]; + if (emimg[i] > emmx) emmx = emimg[i]; + if (emimg[i] < emmn) emmn = emimg[i]; } muIMG.im = mumap; @@ -293,12 +289,10 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args) { muIMG.n10mx = 0; emIMG.n10mx = 0; for (int i = 0; i < muIMG.nvx; i++) - if (mumap[i] > 0.1 * mumx) - muIMG.n10mx += 1; + if (mumap[i] > 0.1 * mumx) muIMG.n10mx += 1; for (int i = 0; i < emIMG.nvx; i++) - if (emimg[i] > 0.1 * emmx) - emIMG.n10mx += 1; + if (emimg[i] > 0.1 * emmx) emIMG.n10mx += 1; if (Cnt.LOG <= LOGDEBUG) printf("i> mumx = %f, mumin = %f, emmx = %f, emmn = %f\n", mumx, mumn, emmx, emmn); @@ -315,8 +309,7 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args) { //<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> // Clean up - if (Cnt.LOG <= LOGDEBUG) - printf("i> cleaning scatter variables..."); + if (Cnt.LOG <= LOGDEBUG) printf("i> cleaning scatter variables..."); Py_DECREF(p_mumap); Py_DECREF(p_mumsk); Py_DECREF(p_emimg); @@ -337,7 +330,6 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args) { Py_DECREF(p_sval); Py_INCREF(Py_None); - if (Cnt.LOG <= LOGDEBUG) - printf("DONE.\n"); + if (Cnt.LOG <= LOGDEBUG) printf("DONE.\n"); return Py_None; } diff --git a/niftypet/nipet/sct/src/sctaux.cu b/niftypet/nipet/sct/src/sctaux.cu index 0dc8e7e1..197d788d 100644 --- a/niftypet/nipet/sct/src/sctaux.cu +++ b/niftypet/nipet/sct/src/sctaux.cu @@ -87,8 +87,7 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, // axially interpolated scatter pre-sino; full span-1 without MRD limit or span-11 with MRD=60 float *d_sct3di; int tbins = 0; - if (Cnt.SPN == 1) - tbins = Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs; + if (Cnt.SPN == 1) tbins = Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs; // scatter pre-sino, span-11 else if (Cnt.SPN == 11) tbins = Cnt.NSN11 * d_scrsdef.nscrs * d_scrsdef.nscrs; @@ -160,11 +159,9 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n", 1e-3 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3 * elapsedTime); - if (Cnt.LOG <= LOGINFO) - printf("i> 3D scatter axial interpolation..."); + if (Cnt.LOG <= LOGINFO) printf("i> 3D scatter axial interpolation..."); cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); @@ -185,8 +182,7 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n", 1e-3 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3 * elapsedTime); } cudaFree(d_scts1); @@ -203,15 +199,13 @@ iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); iMSK msk; int nvx = 0; for (int i = 0; i < (SSE_IMX * SSE_IMY * SSE_IMZ); i++) { - if (imvol.im[i] > thrshld) - nvx++; + if (imvol.im[i] > thrshld) nvx++; } //------------------------------------------------------------------ // create the mask thru indexes @@ -280,13 +274,11 @@ iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); int nvx = 0; for (int i = 0; i < (SS_IMX * SS_IMY * SS_IMZ); i++) { - if (msk[i] > 0) - nvx++; + if (msk[i] > 0) nvx++; } //------------------------------------------------------------------ // create the mask thru indecies diff --git a/niftypet/nipet/src/aux_module.cu b/niftypet/nipet/src/aux_module.cu index e21a9979..7f493e08 100644 --- a/niftypet/nipet/src/aux_module.cu +++ b/niftypet/nipet/src/aux_module.cu @@ -404,8 +404,7 @@ static PyObject *mmr_rgaps(PyObject *self, PyObject *args) { //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOO", &o_sng, &o_sino, &o_txLUT, &o_mmrcnst)) - return NULL; + if (!PyArg_ParseTuple(args, "OOOO", &o_sng, &o_sino, &o_txLUT, &o_mmrcnst)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Interpret the input objects as... PyLong_AsLong*/ @@ -482,8 +481,7 @@ static PyObject *mmr_span11LUT(PyObject *self, PyObject *args) { //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "O", &o_mmrcnst)) - return NULL; + if (!PyArg_ParseTuple(args, "O", &o_mmrcnst)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Interpret the input objects as... */ @@ -532,8 +530,7 @@ static PyObject *aux_varon(PyObject *self, PyObject *args) { //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOiO", &o_m1, &o_m2, &o_x, &b, &o_mmrcnst)) - return NULL; + if (!PyArg_ParseTuple(args, "OOOiO", &o_m1, &o_m2, &o_x, &b, &o_mmrcnst)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PyObject *pd_log = PyDict_GetItemString(o_mmrcnst, "LOG"); @@ -562,9 +559,7 @@ static PyObject *aux_varon(PyObject *self, PyObject *args) { float *x = (float *)PyArray_DATA(p_x); int ndim = PyArray_NDIM(p_x); size_t nele = 1; - for (int i = 0; i < ndim; i++) { - nele *= PyArray_DIM(p_x, i); - } + for (int i = 0; i < ndim; i++) { nele *= PyArray_DIM(p_x, i); } printf("i> number of elements in data array: %lu\n", nele); diff --git a/niftypet/nipet/src/norm.cu b/niftypet/nipet/src/norm.cu index dc4d76fd..977e5c82 100644 --- a/niftypet/nipet/src/norm.cu +++ b/niftypet/nipet/src/norm.cu @@ -63,8 +63,7 @@ void norm_from_components(float *sino, // output norm sino int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); int snno = -1; if (Cnt.SPN == 1) @@ -184,8 +183,7 @@ void norm_from_components(float *sino, // output norm sino // CUDA grid size (in blocks) int blcks = ceil(AW / (float)NTHREADS); - if (Cnt.LOG <= LOGINFO) - printf("i> calculating normalisation sino from norm components..."); + if (Cnt.LOG <= LOGINFO) printf("i> calculating normalisation sino from norm components..."); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -204,8 +202,7 @@ void norm_from_components(float *sino, // output norm sino cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf(" DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs.\n", 0.001 * elapsedTime); //===================================== // copy the GPU norm array to the output normalisation sinogram diff --git a/niftypet/nipet/src/scanner_0.cu b/niftypet/nipet/src/scanner_0.cu index a619a564..5049a200 100644 --- a/niftypet/nipet/src/scanner_0.cu +++ b/niftypet/nipet/src/scanner_0.cu @@ -25,8 +25,7 @@ int *lm; //************ CHECK DEVICE MEMORY USAGE ********************* void getMemUse(const Cnst Cnt) { - if (Cnt.LOG > LOGDEBUG) - return; + if (Cnt.LOG > LOGDEBUG) return; size_t free_mem; size_t total_mem; HANDLE_ERROR(cudaMemGetInfo(&free_mem, &total_mem)); @@ -52,8 +51,7 @@ span11LUT span1_span11(const Cnst Cnt) { // cumulative sum of the above segment def int cumSeg[SPAN]; cumSeg[0] = 0; - for (int i = 1; i < SPAN; i++) - cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; + for (int i = 1; i < SPAN; i++) cumSeg[i] = cumSeg[i - 1] + sinoSeg[i - 1]; int segsum = Cnt.NRNG; int rd = 0; @@ -111,8 +109,7 @@ void remove_gaps(float *sng, float *sino, int snno, int *aw2ali, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); int nthreads = 256; int blcks = ceil(AW / (float)nthreads); @@ -130,8 +127,7 @@ void remove_gaps(float *sng, float *sino, int snno, int *aw2ali, Cnst Cnt) { HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); - if (Cnt.LOG <= LOGINFO) - printf("i> and removing the gaps and reordering sino for GPU..."); + if (Cnt.LOG <= LOGINFO) printf("i> and removing the gaps and reordering sino for GPU..."); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -147,8 +143,7 @@ void remove_gaps(float *sng, float *sino, int snno, int *aw2ali, Cnst Cnt) { cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf(" DONE in %fs\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf(" DONE in %fs\n", 0.001 * elapsedTime); HANDLE_ERROR(cudaMemcpy(sng, d_sng, AW * snno * sizeof(float), cudaMemcpyDeviceToHost)); @@ -167,9 +162,7 @@ __global__ void d_putgaps(float *sne7, float *snaw, int *aw2ali, const int snno) // sino bin index int awi = blockIdx.x; - if (sni < snno) { - sne7[aw2ali[awi] * snno + sni] = snaw[awi * snno + sni]; - } + if (sni < snno) { sne7[aw2ali[awi] * snno + sni] = snaw[awi * snno + sni]; } } //============================================================================= @@ -178,8 +171,7 @@ void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) - printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); // number of sinos int snno = -1; @@ -200,8 +192,7 @@ void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt) { snno = nrng_c * nrng_c; // correct for the max. ring difference in the full axial extent (don't use ring range (1,63) // as for this case no correction) - if (nrng_c == 64) - snno -= 12; + if (nrng_c == 64) snno -= 12; } else { printf("e> not span-1, span-11 nor user defined.\n"); return; @@ -222,8 +213,7 @@ void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt) { HANDLE_ERROR(cudaMalloc(&d_aw2ali, AW * sizeof(int))); HANDLE_ERROR(cudaMemcpy(d_aw2ali, aw2ali, AW * sizeof(int), cudaMemcpyHostToDevice)); - if (Cnt.LOG <= LOGINFO) - printf("i> put gaps in and reorder sino..."); + if (Cnt.LOG <= LOGINFO) printf("i> put gaps in and reorder sino..."); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -239,8 +229,7 @@ void put_gaps(float *sino, float *sng, int *aw2ali, int sino_no, Cnst Cnt) { cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) - printf("DONE in %fs.\n", 0.001 * elapsedTime); + if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 0.001 * elapsedTime); HANDLE_ERROR( cudaMemcpy(sino, d_sino, NSBINS * NSANGLES * snno * sizeof(float), cudaMemcpyDeviceToHost)); From 02b86642bab11f8cc292554524d2516a5c37eccd Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 20 Jan 2021 03:00:57 +0000 Subject: [PATCH 25/64] build: fix cuda compute capability>=3.5 auto-detection --- .github/workflows/test.yml | 2 -- setup.py | 18 +++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fa63580d..95967ff1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,8 +51,6 @@ jobs: - run: pip install -U --no-binary nimpa -e .[dev] - run: pytest - run: codecov - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - name: Post Run setup-python run: setup-python -p3.7 -Dr if: ${{ always() }} diff --git a/setup.py b/setup.py index bb61981b..602a2be8 100644 --- a/setup.py +++ b/setup.py @@ -191,17 +191,17 @@ def check_constants(): log.info("hardware mu-maps have been located") build_ver = ".".join(__version__.split('.')[:3]).split(".dev")[0] +cmake_args = [f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] try: - nvcc_arches = {"{2:d}{3:d}".format(*i) for i in dinf.gpuinfo()} + nvcc_arches = {"{2:d}{3:d}".format(*i) for i in dinf.gpuinfo() if i[2:4] >= (3, 5)} + if nvcc_arches: + cmake_args.append("-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))) except Exception as exc: if "sdist" not in sys.argv or any(i in sys.argv for i in ["build", "bdist", "wheel"]): - log.warning("could not detect CUDA architectures:\n%s", exc) - nvcc_arches = [] + log.warning("Import or CUDA device detection error:\n%s", exc) for i in (Path(__file__).resolve().parent / "_skbuild").rglob("CMakeCache.txt"): i.write_text(re.sub("^//.*$\n^[^#].*pip-build-env.*$", "", i.read_text(), flags=re.M)) -setup( - use_scm_version=True, packages=find_packages(exclude=["examples", "tests"]), - package_data={"niftypet": ["nipet/auxdata/*"]}, cmake_source_dir="niftypet", - cmake_languages=("C", "CXX", "CUDA"), cmake_minimum_required_version="3.18", cmake_args=[ - f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}", - "-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))]) +setup(use_scm_version=True, packages=find_packages(exclude=["examples", "tests"]), + package_data={"niftypet": ["nipet/auxdata/*"]}, cmake_source_dir="niftypet", + cmake_languages=("C", "CXX", "CUDA"), cmake_minimum_required_version="3.18", + cmake_args=cmake_args) From 7b70f3363fb5eb0e90879aa5089dd86be9ed8ff7 Mon Sep 17 00:00:00 2001 From: Pawel Date: Wed, 20 Jan 2021 21:16:22 +0000 Subject: [PATCH 26/64] improving basic forward and back projection for image reconstruction --- niftypet/nipet/__init__.py | 1 + niftypet/nipet/prj/mmrprj.py | 26 +++++++++++++++++--------- niftypet/nipet/prj/src/recon.cu | 3 +++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 0976bbc9..fae871f5 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -42,6 +42,7 @@ # > Siemens Biograph mMR from . import img, lm, mmr_auxe, mmraux, mmrnorm, prj from .img.mmrimg import align_mumap +from .img.mmrimg import get_cylinder from .img.mmrimg import convert2dev as im_e72dev from .img.mmrimg import convert2e7 as im_dev2e7 from .img.mmrimg import hdw_mumap, obj_mumap, pct_mumap diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index e4a68a72..ed46d3e9 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -42,7 +42,7 @@ def trnx_prj(scanner_params, sino=None, im=None): # ------------------------------------------------------------------------ -def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False): +def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False, fullsino_out=True): """ Calculate forward projection (a set of sinograms) for the provided input image. Arguments: @@ -114,13 +114,20 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F # -------------------- petprj.fprj(sinog, ims, txLUT, axLUT, isub, Cnt, att) # -------------------- - # get the sinogram bins in a proper sinogram - sino = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - if isub[0] >= 0: sino[isub, :] = sinog - else: sino = sinog + + + # get the sinogram bins in a full sinogram if requested + if fullsino_out: + sino = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) + if isub[0] >= 0: + sino[isub, :] = sinog + else: + sino = sinog + else: + sino = sinog # put the gaps back to form displayable sinogram - if not dev_out: + if not dev_out and fullsino_out: sino = mmraux.putgaps(sino, txLUT, Cnt) return sino @@ -131,7 +138,7 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F # ------------------------------------------------------------------------ -def back_prj(sino, scanner_params, isub=ISUB_DEFAULT): +def back_prj(sino, scanner_params, isub=ISUB_DEFAULT, dev_out=False): ''' Calculate forward projection for the provided input image. Arguments: @@ -192,7 +199,8 @@ def back_prj(sino, scanner_params, isub=ISUB_DEFAULT): # > run back-projection petprj.bprj(bimg, sinog, txLUT, axLUT, isub, Cnt) - # > change from GPU optimised image dimensions to the standard Siemens shape - bimg = mmrimg.convert2e7(bimg, Cnt) + if not dev_out: + # > change from GPU optimised image dimensions to the standard Siemens shape + bimg = mmrimg.convert2e7(bimg, Cnt) return bimg diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index 0b87696f..63a6b325 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -441,6 +441,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling sensitivity image for (int i = 0; i < Nsub && krnl[0] >= 0; i++) { + HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, &d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY]); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(&d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY], d_convDst); @@ -463,6 +464,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling current image if (krnl[0] >= 0) { + HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, d_imgout); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(d_imgout_rm, d_convDst); @@ -486,6 +488,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling backprojection if (krnl[0] >= 0) { + HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, d_bimg); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(d_bimg, d_convDst); From a8921321322f46068b721423f72afb02ee0d9042 Mon Sep 17 00:00:00 2001 From: Pawel Date: Thu, 21 Jan 2021 02:07:56 +0000 Subject: [PATCH 27/64] fixed PSF bug --- niftypet/nipet/prj/src/recon.cu | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index 63a6b325..896acda0 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -441,7 +441,6 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling sensitivity image for (int i = 0; i < Nsub && krnl[0] >= 0; i++) { - HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, &d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY]); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(&d_sensim[i * SZ_IMZ * SZ_IMX * SZ_IMY], d_convDst); @@ -464,7 +463,6 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling current image if (krnl[0] >= 0) { - HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, d_imgout); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(d_imgout_rm, d_convDst); @@ -472,7 +470,7 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // forward project cudaMemset(d_esng, 0, Nprj * snno * sizeof(float)); - rec_fprj(d_esng, Cnt.SIGMA_RM > 0 ? d_imgout_rm : d_imgout, &d_subs[i * Nprj + 1], + rec_fprj(d_esng, krnl[0]>=0 ? d_imgout_rm : d_imgout, &d_subs[i * Nprj + 1], subs[i * Nprj], d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); // add the randoms+scatter @@ -488,7 +486,6 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // resolution modelling backprojection if (krnl[0] >= 0) { - HANDLE_ERROR(cudaMemset(d_convDst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + 1) * sizeof(float))); d_pad(d_convSrc, d_bimg); d_conv(d_convTmp, d_convDst, d_convSrc, SZ_IMX, SZ_IMY, SZ_IMZ + 1); d_unpad(d_bimg, d_convDst); From 61841d3eb565fb40281a132227fc8ac4800f47b0 Mon Sep 17 00:00:00 2001 From: Pawel Date: Fri, 29 Jan 2021 00:11:58 +0000 Subject: [PATCH 28/64] improvments of the output generation of mmrchain function for image reconstruction --- niftypet/nipet/__init__.py | 2 ++ niftypet/nipet/img/pipe.py | 50 +++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index fae871f5..d6cea930 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -50,6 +50,8 @@ from .lm.mmrhist import dynamic_timings, mmrhist, randoms from .mmraux import explore_input as classify_input from .mmraux import mMR_params as get_mmrparams +from .mmraux import sino2ssr + from .prj.mmrprj import back_prj, frwd_prj from .prj.mmrsim import simulate_recon, simulate_sino from .sct.mmrsct import vsm diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index d181c9ae..27344951 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -23,7 +23,8 @@ def mmrchain( scanner_params, # all scanner parameters in one dictionary # containing constants, transaxial and axial # LUTs. - outpath='', # output path for results + outpath=None, # output path for results + fout=None, # full file name (any folders and extensions are disregarded) frames=None, # definition of time frames, default: ['fluid', [0, 0]] mu_h=None, # hardware mu-map. mu_o=None, # object mu-map. @@ -134,7 +135,7 @@ def mmrchain( # ------------------------------------------------------------------------- # create folders for results - if outpath == '': + if outpath is None: petdir = os.path.join(datain['corepath'], 'reconstructed') fmudir = os.path.join(datain['corepath'], 'mumap-obj') pvcdir = os.path.join(datain['corepath'], 'PRCL') @@ -143,6 +144,12 @@ def mmrchain( fmudir = os.path.join(outpath, 'mumap-obj') pvcdir = os.path.join(outpath, 'PRCL') + if fout is not None: + #> get rid of folders + fout = os.path.basename(fout) + #> get rid of extension + fout = fout.split('.')[0] + # folder for co-registered mu-maps (for motion compensation) fmureg = os.path.join(fmudir, 'registered') # folder for affine transformation MR/CT->PET @@ -373,7 +380,11 @@ def mmrchain( output['im'] = np.squeeze(dynim) if ret_sinos and itr > 1 and recmod > 2: - output['sinos'] = {'psino': dynpsn, 'ssino': dynssn, 'rsino': dynrsn, 'amask': dynmsk} + output['sinos'] = dict( + psino=np.squeeze(dynpsn), + ssino=np.squeeze(dynssn), + rsino=np.squeeze(dynrsn), + amask=np.squeeze(dynmsk)) if ret_histo: output['hst'] = hsts @@ -487,16 +498,28 @@ def mmrchain( if t1 == t0: t0 = 0 t1 = hst['dur'] - fpet = os.path.join(petimg, - os.path.basename(recimg.fpet)[:8] + f'_t-{t0}-{t1}sec_itr-{itr}') - fpeto = f"{fpet}{fcomment}.nii.gz" + # > --- file naming and saving --- + if fout is None: + fpet = os.path.join(petimg, + os.path.basename(recimg.fpet)[:8] + f'_t-{t0}-{t1}sec_itr-{itr}') + fpeto = f"{fpet}{fcomment}.nii.gz" + else: + fpeto = os.path.join(petimg, os.path.basename(fout)+'.nii.gz') + nimpa.prc.array2nii(dynim[::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) + # > --- --- else: - fpet = os.path.join(petimg, - os.path.basename(recimg.fpet)[:8] + f'_nfrm-{nfrm}_itr-{itr}') - fpeto = f"{fpet}{fcomment}.nii.gz" + if fout is None: + fpet = os.path.join(petimg, + os.path.basename(recimg.fpet)[:8] + f'_nfrm-{nfrm}_itr-{itr}') + fpeto = f"{fpet}{fcomment}.nii.gz" + else: + fpeto = os.path.join(petimg, os.path.basename(fout) + f'_nfrm-{nfrm}.nii.gz') + nimpa.prc.array2nii(dynim[:, ::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) + output['fpet'] = fpeto + # get output file names for trimmed/PVC images if trim: # folder for trimmed and dynamic @@ -506,8 +529,12 @@ def mmrchain( # trimming scale added to NIfTI descritoption descrip_trim = f'{descrip};trim_scale={trim_scale}' # file name for saving the trimmed image - fpetu = os.path.join(pettrim, - os.path.basename(fpet) + f'_trimmed-upsampled-scale-{trim_scale}') + if fout is None: + fpetu = os.path.join(pettrim, + os.path.basename(fpet) + f'_trimmed-upsampled-scale-{trim_scale}') + else: + fpetu = os.path.join(pettrim, + os.path.basename(fout) + f'_trimmed-upsampled-scale-{trim_scale}') # in case of PVC if pvcroi: # itertive Yang (iY) added to NIfTI descritoption @@ -521,7 +548,6 @@ def mmrchain( # store the file name in the output dictionary output['trimmed']['fpet'] = fpetu - output['fpet'] = fpeto # save images if nfrm == 1: From 93edc15dc12f98a1580d2b0b2590c07e1bd4731a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 Jan 2021 00:09:14 +0000 Subject: [PATCH 29/64] fix style --- niftypet/nipet/__init__.py | 4 +--- niftypet/nipet/img/pipe.py | 26 +++++++++++++------------- niftypet/nipet/prj/mmrprj.py | 8 ++++---- niftypet/nipet/prj/src/recon.cu | 4 ++-- 4 files changed, 20 insertions(+), 22 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index d6cea930..18161910 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -42,16 +42,14 @@ # > Siemens Biograph mMR from . import img, lm, mmr_auxe, mmraux, mmrnorm, prj from .img.mmrimg import align_mumap -from .img.mmrimg import get_cylinder from .img.mmrimg import convert2dev as im_e72dev from .img.mmrimg import convert2e7 as im_dev2e7 -from .img.mmrimg import hdw_mumap, obj_mumap, pct_mumap +from .img.mmrimg import get_cylinder, hdw_mumap, obj_mumap, pct_mumap from .img.pipe import mmrchain from .lm.mmrhist import dynamic_timings, mmrhist, randoms from .mmraux import explore_input as classify_input from .mmraux import mMR_params as get_mmrparams from .mmraux import sino2ssr - from .prj.mmrprj import back_prj, frwd_prj from .prj.mmrsim import simulate_recon, simulate_sino from .sct.mmrsct import vsm diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index 27344951..6f426c2f 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -380,11 +380,9 @@ def mmrchain( output['im'] = np.squeeze(dynim) if ret_sinos and itr > 1 and recmod > 2: - output['sinos'] = dict( - psino=np.squeeze(dynpsn), - ssino=np.squeeze(dynssn), - rsino=np.squeeze(dynrsn), - amask=np.squeeze(dynmsk)) + output['sinos'] = { + 'psino': np.squeeze(dynpsn), 'ssino': np.squeeze(dynssn), 'rsino': np.squeeze(dynrsn), + 'amask': np.squeeze(dynmsk)} if ret_histo: output['hst'] = hsts @@ -500,11 +498,12 @@ def mmrchain( t1 = hst['dur'] # > --- file naming and saving --- if fout is None: - fpet = os.path.join(petimg, - os.path.basename(recimg.fpet)[:8] + f'_t-{t0}-{t1}sec_itr-{itr}') + fpet = os.path.join( + petimg, + os.path.basename(recimg.fpet)[:8] + f'_t-{t0}-{t1}sec_itr-{itr}') fpeto = f"{fpet}{fcomment}.nii.gz" else: - fpeto = os.path.join(petimg, os.path.basename(fout)+'.nii.gz') + fpeto = os.path.join(petimg, os.path.basename(fout) + '.nii.gz') nimpa.prc.array2nii(dynim[::-1, ::-1, :], recimg.affine, fpeto, descrip=descrip) # > --- --- @@ -530,11 +529,13 @@ def mmrchain( descrip_trim = f'{descrip};trim_scale={trim_scale}' # file name for saving the trimmed image if fout is None: - fpetu = os.path.join(pettrim, - os.path.basename(fpet) + f'_trimmed-upsampled-scale-{trim_scale}') + fpetu = os.path.join( + pettrim, + os.path.basename(fpet) + f'_trimmed-upsampled-scale-{trim_scale}') else: - fpetu = os.path.join(pettrim, - os.path.basename(fout) + f'_trimmed-upsampled-scale-{trim_scale}') + fpetu = os.path.join( + pettrim, + os.path.basename(fout) + f'_trimmed-upsampled-scale-{trim_scale}') # in case of PVC if pvcroi: # itertive Yang (iY) added to NIfTI descritoption @@ -548,7 +549,6 @@ def mmrchain( # store the file name in the output dictionary output['trimmed']['fpet'] = fpetu - # save images if nfrm == 1: if trim: diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index ed46d3e9..19300760 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -42,7 +42,8 @@ def trnx_prj(scanner_params, sino=None, im=None): # ------------------------------------------------------------------------ -def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False, fullsino_out=True): +def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False, + fullsino_out=True): """ Calculate forward projection (a set of sinograms) for the provided input image. Arguments: @@ -115,13 +116,12 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F petprj.fprj(sinog, ims, txLUT, axLUT, isub, Cnt, att) # -------------------- - # get the sinogram bins in a full sinogram if requested if fullsino_out: sino = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - if isub[0] >= 0: + if isub[0] >= 0: sino[isub, :] = sinog - else: + else: sino = sinog else: sino = sinog diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index 896acda0..decf9797 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -470,8 +470,8 @@ void osem(float *imgout, bool *rncmsk, unsigned short *psng, float *rsng, float // forward project cudaMemset(d_esng, 0, Nprj * snno * sizeof(float)); - rec_fprj(d_esng, krnl[0]>=0 ? d_imgout_rm : d_imgout, &d_subs[i * Nprj + 1], - subs[i * Nprj], d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); + rec_fprj(d_esng, krnl[0] >= 0 ? d_imgout_rm : d_imgout, &d_subs[i * Nprj + 1], subs[i * Nprj], + d_tt, d_tv, li2rng, li2sn, li2nos, Cnt); // add the randoms+scatter d_sneladd(d_esng, d_rsng, &d_subs[i * Nprj + 1], subs[i * Nprj], snno); From b8c5fd383de263ce584dee61f67f7fe9910b111b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 Jan 2021 00:16:56 +0000 Subject: [PATCH 30/64] minor tidy --- niftypet/nipet/prj/src/recon.cu | 4 +++- niftypet/nipet/prj/src/recon.h | 8 ++++---- setup.cfg | 2 ++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index decf9797..381f6c19 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -7,10 +7,12 @@ Copyrights: 2020 Casper da Costa-Luis ------------------------------------------------------------------------*/ #include "recon.h" -#include +#include // number of threads used for element-wise GPU calculations +#ifndef NTHRDS #define NTHRDS 1024 +#endif NTHRDS #define FLOAT_WITHIN_EPS(x) (-0.000001f < x && x < 0.000001f) /// z: how many Z-slices to add diff --git a/niftypet/nipet/prj/src/recon.h b/niftypet/nipet/prj/src/recon.h index e3e3f2d1..b2eb8aad 100644 --- a/niftypet/nipet/prj/src/recon.h +++ b/niftypet/nipet/prj/src/recon.h @@ -3,10 +3,10 @@ #include "prjf.h" #include "scanner_0.h" #include "tprj.h" -#include +#include -#ifndef RECON_H -#define RECON_H +#ifndef _NIPET_RECON_H_ +#define _NIPET_RECON_H_ /* separable convolution */ #define KERNEL_LENGTH (2 * RSZ_PSF_KRNL + 1) @@ -34,4 +34,4 @@ void osem(float *imgout, bool *rcnmsk, unsigned short *psng, float *rsng, float int Nsub, int Nprj, int N0crs, Cnst Cnt); -#endif +#endif // _NIPET_RECON_H_ diff --git a/setup.cfg b/setup.cfg index f8c36e6d..3f530fbd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,6 +16,8 @@ maintainer_email=casper.dcl@physics.org keywords=PET, image reconstruction, analysis classifiers= Development Status :: 5 - Production/Stable + Environment :: GPU + Environment :: GPU :: NVIDIA CUDA Intended Audience :: Education Intended Audience :: Healthcare Industry Intended Audience :: Science/Research From 4709b5a22c05a416efcbcd8781d09217b7146b5b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 1 Feb 2021 18:22:49 +0000 Subject: [PATCH 31/64] tests: attempt using CUDA 10.2 --- .github/workflows/test.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 95967ff1..dc4f5646 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,7 +48,12 @@ jobs: fetch-depth: 0 - name: Run setup-python run: setup-python -p3.7 - - run: pip install -U --no-binary nimpa -e .[dev] + - name: pip install -e . + run: | + export PATH="$CUDAToolkit_ROOT/bin:$PATH" + pip install -U --no-binary nimpa -e .[dev] + env: + CUDAToolkit_ROOT: /usr/local/cuda-10.2 - run: pytest - run: codecov - name: Post Run setup-python From 749a04d54ee7759a68252c78d6963cae41b65a32 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 1 Feb 2021 21:09:41 +0000 Subject: [PATCH 32/64] tests: use latest NInst for consistent device ID --- .github/workflows/test.yml | 7 +------ pyproject.toml | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dc4f5646..95967ff1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,12 +48,7 @@ jobs: fetch-depth: 0 - name: Run setup-python run: setup-python -p3.7 - - name: pip install -e . - run: | - export PATH="$CUDAToolkit_ROOT/bin:$PATH" - pip install -U --no-binary nimpa -e .[dev] - env: - CUDAToolkit_ROOT: /usr/local/cuda-10.2 + - run: pip install -U --no-binary nimpa -e .[dev] - run: pytest - run: codecov - name: Post Run setup-python diff --git a/pyproject.toml b/pyproject.toml index 2e4331aa..a1e18c51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4", - "ninst>=0.8.0", "numpy>=1.14", "miutil[cuda]>=0.4.0", + "ninst>=0.10.0", "numpy>=1.14", "miutil[cuda]>=0.4.0", "scikit-build>=0.11.0", "cmake>=3.18", "ninja"] [tool.setuptools_scm] diff --git a/setup.cfg b/setup.cfg index 3f530fbd..e74a381a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,7 +39,7 @@ setup_requires= setuptools>=42 wheel setuptools_scm[toml] - ninst>=0.8.0 + ninst>=0.10.0 numpy>=1.14 miutil[cuda]>=0.4.0 scikit-build>=0.11.0 From 5450cb2470db88937ad5d3737d2fe00e9df3d13e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 1 Feb 2021 21:32:37 +0000 Subject: [PATCH 33/64] minor: use NInst CC --- setup.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 602a2be8..ec63899d 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,6 @@ from skbuild import setup from niftypet.ninst import cudasetup as cs -from niftypet.ninst import dinf from niftypet.ninst import install_tools as tls __version__ = get_version(root=".", relative_to=__file__) @@ -156,8 +155,9 @@ def check_constants(): # check and update the constants in C headers according to resources.py check_constants() try: - gpuarch = cs.dev_setup() # update resources.py with a supported GPU device + nvcc_arches = cs.dev_setup() # update resources.py with a supported GPU device except Exception as exc: + nvcc_arches = [] log.error("could not set up CUDA:\n%s", exc) log.info( @@ -193,7 +193,6 @@ def check_constants(): build_ver = ".".join(__version__.split('.')[:3]).split(".dev")[0] cmake_args = [f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] try: - nvcc_arches = {"{2:d}{3:d}".format(*i) for i in dinf.gpuinfo() if i[2:4] >= (3, 5)} if nvcc_arches: cmake_args.append("-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))) except Exception as exc: From 11af607dbb7dc971b89a3afe05fc53a9e17e075a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 1 Feb 2021 22:19:20 +0000 Subject: [PATCH 34/64] fix np.bool deprecation --- niftypet/nipet/sct/mmrsct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index ef78750b..2c054730 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -614,7 +614,7 @@ def vsm( mssr = mmraux.sino2ssr(msksn, axLUT, Cnt) mssr = mssr > 0 else: - mssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.bool) + mssr = np.zeros((Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=bool) # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> From 089308c24b6a2351acc11e91d864c5d1430c0ecf Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 1 Feb 2021 23:27:17 +0000 Subject: [PATCH 35/64] fix minor formatting --- niftypet/nipet/img/pipe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index 6f426c2f..e84b1520 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -145,10 +145,10 @@ def mmrchain( pvcdir = os.path.join(outpath, 'PRCL') if fout is not None: - #> get rid of folders + # > get rid of folders fout = os.path.basename(fout) - #> get rid of extension - fout = fout.split('.')[0] + # > get rid of extension + fout = fout.rsplit('.', 1)[0] # folder for co-registered mu-maps (for motion compensation) fmureg = os.path.join(fmudir, 'registered') From e5eb84c6d1345bd5f596749bb25f97c6e54b6da7 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 3 Feb 2021 23:44:53 +0000 Subject: [PATCH 36/64] CMake: static CUDA lib linking, indentation --- niftypet/CMakeLists.txt | 10 +++++----- niftypet/nipet/CMakeLists.txt | 2 +- niftypet/nipet/lm/CMakeLists.txt | 2 +- niftypet/nipet/prj/CMakeLists.txt | 2 +- niftypet/nipet/sct/CMakeLists.txt | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/niftypet/CMakeLists.txt b/niftypet/CMakeLists.txt index c74ad427..702a6a12 100644 --- a/niftypet/CMakeLists.txt +++ b/niftypet/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) if("${NIPET_BUILD_VERSION}" STREQUAL "") -set(NIPET_BUILD_VERSION 2 CACHE STRING "version" FORCE) + set(NIPET_BUILD_VERSION 2 CACHE STRING "version" FORCE) endif() project(nipet LANGUAGES C CXX CUDA VERSION "${NIPET_BUILD_VERSION}") @@ -10,16 +10,16 @@ cmake_policy(SET CMP0104 NEW) # CMAKE_CUDA_ARCHITECTURES find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) find_package(CUDAToolkit REQUIRED) if(SKBUILD) -find_package(PythonExtensions REQUIRED) -set(LIB_TYPE "MODULE") + find_package(PythonExtensions REQUIRED) + set(LIB_TYPE "MODULE") else() -set(LIB_TYPE "SHARED") + set(LIB_TYPE "SHARED") endif() cmake_policy(POP) message(STATUS "CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") if("${CMAKE_BUILD_TYPE}" STREQUAL "") -set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) endif() message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") diff --git a/niftypet/nipet/CMakeLists.txt b/niftypet/nipet/CMakeLists.txt index 625ee7e2..3dac1042 100644 --- a/niftypet/nipet/CMakeLists.txt +++ b/niftypet/nipet/CMakeLists.txt @@ -13,7 +13,7 @@ add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" "$") -target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} ${CUDA_LIBRARIES}) +target_link_libraries(${PROJECT_NAME} ${Python3_LIBRARIES} CUDA::cudart_static) if(SKBUILD) python_extension_module(${PROJECT_NAME}) diff --git a/niftypet/nipet/lm/CMakeLists.txt b/niftypet/nipet/lm/CMakeLists.txt index 7eb12c82..a5f4b335 100644 --- a/niftypet/nipet/lm/CMakeLists.txt +++ b/niftypet/nipet/lm/CMakeLists.txt @@ -10,7 +10,7 @@ add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" "$") -target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} ${CUDA_LIBRARIES} ${CUDA_curand_LIBRARY}) +target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} CUDA::cudart_static CUDA::curand_static) if(SKBUILD) python_extension_module(${PROJECT_NAME}) diff --git a/niftypet/nipet/prj/CMakeLists.txt b/niftypet/nipet/prj/CMakeLists.txt index 5c747c3b..63e15dce 100644 --- a/niftypet/nipet/prj/CMakeLists.txt +++ b/niftypet/nipet/prj/CMakeLists.txt @@ -10,7 +10,7 @@ add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" "$") -target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} ${CUDA_LIBRARIES}) +target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} CUDA::cudart_static) if(SKBUILD) python_extension_module(${PROJECT_NAME}) diff --git a/niftypet/nipet/sct/CMakeLists.txt b/niftypet/nipet/sct/CMakeLists.txt index 2dc879e9..69aee884 100644 --- a/niftypet/nipet/sct/CMakeLists.txt +++ b/niftypet/nipet/sct/CMakeLists.txt @@ -10,7 +10,7 @@ add_library(NiftyPET::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) target_include_directories(${PROJECT_NAME} PUBLIC "$" "$") -target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} ${CUDA_LIBRARIES}) +target_link_libraries(${PROJECT_NAME} mmr_auxe ${Python3_LIBRARIES} CUDA::cudart_static) if(SKBUILD) python_extension_module(${PROJECT_NAME}) From b2c9be80119f5689cfcf0835a362ff24e991c481 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 13:05:40 +0000 Subject: [PATCH 37/64] logging: defer formatting --- niftypet/nipet/prj/mmrprj.py | 2 +- niftypet/nipet/prj/mmrrec.py | 12 ++++++------ niftypet/nipet/prj/mmrsim.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 19300760..1dd7d50a 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -103,7 +103,7 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F ' it has to be one of these: (z,y,x) = (127,344,344)' ' or (y,x,z) = (320,320,128)') - log.debug('number of sinos:%d' % nsinos) + log.debug('number of sinos: %d', nsinos) # predefine the sinogram. # if subsets are used then only preallocate those bins which will be used. diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 8233e896..2cb4bd16 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -184,7 +184,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # ---------- - log.info('reconstruction in mode:%d' % recmod) + log.info('reconstruction in mode: %d', recmod) # get object and hardware mu-maps muh, muo = mumaps @@ -266,7 +266,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N ssng = np.zeros(rsng.shape, dtype=rsng.dtype) # ======================================================================== - log.info('------ OSEM (%d) -------' % itr) + log.info('------ OSEM (%d) -------', itr) # ------------------------------------ Sn = 14 # number of subsets @@ -367,12 +367,12 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k}{fcomment}_inrecon.nii.gz")) nimpa.array2nii(im[::-1, ::-1, :], B, fout) - log.info('recon time:%.3g' % (time.time() - stime)) + log.info('recon time: %.3g', time.time() - stime) # ======================================================================== - log.info('applying decay correction of %r' % dcycrr) - log.info('applying quantification factor:%r to the whole image' % qf) - log.info('for the frame duration of :%r' % hst['dur']) + log.info('applying decay correction of: %r', dcycrr) + log.info('applying quantification factor: %r to the whole image', qf) + log.info('for the frame duration of: %r', hst['dur']) # additional factor for making it quantitative in absolute terms (derived from measurements) img *= dcycrr * qf * qf_loc diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index decacf01..e1635f55 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -227,7 +227,7 @@ def simulate_recon( Cnt['SIGMA_RM'] = mmrrec.fwhm2sig(fwhm_rm, voxsize=Cnt['SZ_VOXZ'] * 10) if fwhm_rm else 0 if simulate_3d: - log.debug('------ OSEM (%d) -------' % nitr) + log.debug('------ OSEM (%d) -------', nitr) # measured sinogram in GPU-enabled shape psng = mmraux.remgaps(measured_sino.astype(np.uint16), txLUT, Cnt) From e7e4ff685016b231e78a917fe14f3825b45566a6 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 13:06:13 +0000 Subject: [PATCH 38/64] CMake: expose NIPET_CU_THREADS=1024 --- niftypet/CMakeLists.txt | 5 +++++ niftypet/nipet/include/def.h | 4 +++- niftypet/nipet/prj/src/prjb.cu | 2 +- niftypet/nipet/prj/src/prjf.cu | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/niftypet/CMakeLists.txt b/niftypet/CMakeLists.txt index 702a6a12..0e40a43c 100644 --- a/niftypet/CMakeLists.txt +++ b/niftypet/CMakeLists.txt @@ -23,6 +23,11 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "") endif() message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") +if("${NIPET_CU_THREADS}" STREQUAL "") + set(NIPET_CU_THREADS 1024 CACHE STRING + "Maximum number of CUDA threads per block (should be less than cudaDeviceProp::maxThreadsDim)" FORCE) +endif() +add_compile_definitions(NIPET_CU_THREADS=${NIPET_CU_THREADS}) add_subdirectory(nipet) include(CMakePackageConfigHelpers) diff --git a/niftypet/nipet/include/def.h b/niftypet/nipet/include/def.h index 43c13660..d5c38b2f 100644 --- a/niftypet/nipet/include/def.h +++ b/niftypet/nipet/include/def.h @@ -28,7 +28,9 @@ #define MXNITAG 5400 // max number of time tags to avoid out of memory errors // maximum threads for device -#define MXTHRD 1024 +#ifndef NIPET_CU_THREADS +#define NIPET_CU_THREADS 1024 +#endif #define TOT_BINS_S1 354033792 // 344*252*4084 diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index 63369dab..7cc6e813 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -328,7 +328,7 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); HANDLE_ERROR(cudaMemset(d_imr, 0, SZ_IMX * SZ_IMY * nvz * sizeof(float))); // number of axial row for max threads - int nar = MXTHRD / nvz; + int nar = NIPET_CU_THREADS / nvz; dim3 THRD(nvz, nar, 1); dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); imReduce<<>>(d_imr, d_im, vz0, nvz); diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index bdfe68a3..530f3fbe 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -286,7 +286,7 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no // put zeros in the gaps of unused voxels HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); // number of axial row for max threads - int nar = MXTHRD / nvz; + int nar = NIPET_CU_THREADS / nvz; dim3 THRD(nvz, nar, 1); dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); imExpand<<>>(d_im, d_imr, vz0, nvz); From 343bae9aa2233301d96933c631611682b1f1007f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 15:55:30 +0000 Subject: [PATCH 39/64] more threads --- niftypet/nipet/prj/src/recon.cu | 36 +++++++++++++++------------------ 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index 381f6c19..004f20e7 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -9,10 +9,6 @@ Copyrights: #include "recon.h" #include -// number of threads used for element-wise GPU calculations -#ifndef NTHRDS -#define NTHRDS 1024 -#endif NTHRDS #define FLOAT_WITHIN_EPS(x) (-0.000001f < x && x < 0.000001f) /// z: how many Z-slices to add @@ -28,8 +24,8 @@ __global__ void pad(float *dst, float *src, const int z) { void d_pad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { HANDLE_ERROR(cudaMemset(dst, 0, SZ_IMX * SZ_IMY * (SZ_IMZ + z) * sizeof(float))); - dim3 BpG((SZ_IMX + NTHRDS / 32 - 1) / (NTHRDS / 32), (SZ_IMY + 31) / 32); - dim3 TpB(NTHRDS / 32, 32); + dim3 BpG((SZ_IMX + NIPET_CU_THREADS / 32 - 1) / (NIPET_CU_THREADS / 32), (SZ_IMY + 31) / 32); + dim3 TpB(NIPET_CU_THREADS / 32, 32); pad<<>>(dst, src, z); } @@ -45,8 +41,8 @@ __global__ void unpad(float *dst, float *src, const int z) { } void d_unpad(float *dst, float *src, const int z = COLUMNS_BLOCKDIM_X - SZ_IMZ % COLUMNS_BLOCKDIM_X) { - dim3 BpG((SZ_IMX + NTHRDS / 32 - 1) / (NTHRDS / 32), (SZ_IMY + 31) / 32); - dim3 TpB(NTHRDS / 32, 32); + dim3 BpG((SZ_IMX + NIPET_CU_THREADS / 32 - 1) / (NIPET_CU_THREADS / 32), (SZ_IMY + 31) / 32); + dim3 TpB(NIPET_CU_THREADS / 32, 32); unpad<<>>(dst, src, z); } @@ -227,8 +223,8 @@ __global__ void elmult(float *inA, float *inB, int length) { } void d_elmult(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); elmult<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -245,8 +241,8 @@ __global__ void eldiv0(float *inA, float *inB, int length) { } void d_eldiv(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); eldiv0<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -267,8 +263,8 @@ __global__ void sneldiv(float *inA, unsigned short *inB, int *sub, int Nprj, int } void d_sneldiv(float *d_inA, unsigned short *d_inB, int *d_sub, int Nprj, int snno) { - dim3 BpG(ceil(snno / (float)NTHRDS), Nprj, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(snno / (float)NIPET_CU_THREADS), Nprj, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); sneldiv<<>>(d_inA, d_inB, d_sub, Nprj, snno); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -281,8 +277,8 @@ __global__ void sneladd(float *inA, float *inB, int *sub, int Nprj, int snno) { } void d_sneladd(float *d_inA, float *d_inB, int *d_sub, int Nprj, int snno) { - dim3 BpG(ceil(snno / (float)NTHRDS), Nprj, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(snno / (float)NIPET_CU_THREADS), Nprj, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); sneladd<<>>(d_inA, d_inB, d_sub, Nprj, snno); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -294,8 +290,8 @@ __global__ void eladd(float *inA, float *inB, int length) { } void d_eladd(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); eladd<<>>(d_inA, d_inB, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -313,8 +309,8 @@ __global__ void elmsk(float *inA, float *inB, bool *msk, int length) { } void d_elmsk(float *d_inA, float *d_inB, bool *d_msk, int length) { - dim3 BpG(ceil(length / (float)NTHRDS), 1, 1); - dim3 TpB(NTHRDS, 1, 1); + dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); elmsk<<>>(d_inA, d_inB, d_msk, length); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 7acaca307d3f1e3e3810c67035d2d217f57c054a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 16:34:12 +0000 Subject: [PATCH 40/64] examples: fix & update MLEM demo arguments --- examples/demo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/demo.ipynb b/examples/demo.ipynb index 3ae223d7..afde8439 100644 --- a/examples/demo.ipynb +++ b/examples/demo.ipynb @@ -234,9 +234,9 @@ "## Scatter\n", "\n", "# One OSEM iteration estimate (implicitly using voxel-driven scatter model)\n", - "eim = nipet.mmrchain(datain, mMRpars, mu_h=mu_h, mu_o=mu_o, itr=1, outpath=opth)['im']\n", + "eim = nipet.mmrchain(datain, mMRpars, mu_h=mu_h, mu_o=mu_o, itr=1, histo=m, outpath=opth)['im']\n", "# Recalculate scatter\n", - "s = nipet.vsm(datain, (mu_h['im'], mu_o['im']), eim, m, r, mMRpars)\n", + "s = nipet.vsm(datain, (mu_h['im'], mu_o['im']), eim, mMRpars, histo=m, rsino=r)\n", "print(\"Scatter: %.3g%%\" % (s.sum() / m['psino'].sum() * 100))\n", "\n", "## Attenuation, Normalisation & Sensitivity\n", From 9d51c5828dfaa8b687b1668422d3290e75344dd7 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 17:46:33 +0000 Subject: [PATCH 41/64] more thread tidy --- niftypet/nipet/prj/src/recon.cu | 12 ++++++------ niftypet/nipet/prj/src/tprj.cu | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/niftypet/nipet/prj/src/recon.cu b/niftypet/nipet/prj/src/recon.cu index 004f20e7..fccf2ac7 100644 --- a/niftypet/nipet/prj/src/recon.cu +++ b/niftypet/nipet/prj/src/recon.cu @@ -223,7 +223,7 @@ __global__ void elmult(float *inA, float *inB, int length) { } void d_elmult(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 BpG((length + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, 1, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); elmult<<>>(d_inA, d_inB, length); } @@ -241,7 +241,7 @@ __global__ void eldiv0(float *inA, float *inB, int length) { } void d_eldiv(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 BpG((length + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, 1, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); eldiv0<<>>(d_inA, d_inB, length); } @@ -263,7 +263,7 @@ __global__ void sneldiv(float *inA, unsigned short *inB, int *sub, int Nprj, int } void d_sneldiv(float *d_inA, unsigned short *d_inB, int *d_sub, int Nprj, int snno) { - dim3 BpG(ceil(snno / (float)NIPET_CU_THREADS), Nprj, 1); + dim3 BpG((snno + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, Nprj, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); sneldiv<<>>(d_inA, d_inB, d_sub, Nprj, snno); } @@ -277,7 +277,7 @@ __global__ void sneladd(float *inA, float *inB, int *sub, int Nprj, int snno) { } void d_sneladd(float *d_inA, float *d_inB, int *d_sub, int Nprj, int snno) { - dim3 BpG(ceil(snno / (float)NIPET_CU_THREADS), Nprj, 1); + dim3 BpG((snno + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, Nprj, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); sneladd<<>>(d_inA, d_inB, d_sub, Nprj, snno); } @@ -290,7 +290,7 @@ __global__ void eladd(float *inA, float *inB, int length) { } void d_eladd(float *d_inA, float *d_inB, int length) { - dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 BpG((length + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, 1, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); eladd<<>>(d_inA, d_inB, length); } @@ -309,7 +309,7 @@ __global__ void elmsk(float *inA, float *inB, bool *msk, int length) { } void d_elmsk(float *d_inA, float *d_inB, bool *d_msk, int length) { - dim3 BpG(ceil(length / (float)NIPET_CU_THREADS), 1, 1); + dim3 BpG((length + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, 1, 1); dim3 TpB(NIPET_CU_THREADS, 1, 1); elmsk<<>>(d_inA, d_inB, d_msk, length); } diff --git a/niftypet/nipet/prj/src/tprj.cu b/niftypet/nipet/prj/src/tprj.cu index 09cd3f77..284a8cb0 100644 --- a/niftypet/nipet/prj/src/tprj.cu +++ b/niftypet/nipet/prj/src/tprj.cu @@ -180,8 +180,8 @@ void gpu_siddon_tx(float4 *d_crs, short2 *d_s2c, float *d_tt, unsigned char *d_t cudaEventRecord(start, 0); //----- - dim3 BpG(ceil(AW / (float)NTHREADS), 1, 1); - dim3 TpB(NTHREADS, 1, 1); + dim3 BpG((AW + NIPET_CU_THREADS - 1) / NIPET_CU_THREADS, 1, 1); + dim3 TpB(NIPET_CU_THREADS, 1, 1); sddn_tx<<>>(d_crs, d_s2c, d_tt, d_tv); HANDLE_ERROR(cudaGetLastError()); //----- From 2a1f38510d7cb0b917f822273462a2fd3ac3f6f1 Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Sun, 7 Feb 2021 19:25:00 +0000 Subject: [PATCH 42/64] minor changing with reporting/logging --- niftypet/nipet/prj/mmrrec.py | 5 +++-- niftypet/nipet/prj/src/prj_module.cu | 4 ++-- niftypet/nipet/sct/src/sct.cu | 10 +++++----- niftypet/nipet/sct/src/sct_module.cu | 4 ++-- niftypet/nipet/sct/src/sctaux.cu | 22 +++++++++++----------- niftypet/nipet/src/norm.cu | 4 ++-- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 8233e896..f2060e25 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -296,7 +296,8 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N lmbd = np.log(2) / resources.riLUT[Cnt['ISOTOPE']]['thalf'] if Cnt['DCYCRR'] and 't0' in hst and 'dur' in hst: # > decay correct to the reference time (e.g., injection time) if provided - # > otherwise correct in reference to the scan start time + # > otherwise correct in reference to the scan start time (using the time + # > past from the start to the start time frame) if decay_ref_time is not None: tref = decay_ref_time else: @@ -363,7 +364,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N im = mmrimg.convert2e7(img * (dcycrr*qf*qf_loc), Cnt) fout = os.path.join( - opth, (os.path.basename(datain['lm_bf'])[:8] + + opth, (os.path.basename(datain['lm_bf'])[:16].replace('.','-') + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k}{fcomment}_inrecon.nii.gz")) nimpa.array2nii(im[::-1, ::-1, :], B, fout) diff --git a/niftypet/nipet/prj/src/prj_module.cu b/niftypet/nipet/prj/src/prj_module.cu index 1f47daef..5a67268e 100644 --- a/niftypet/nipet/prj/src/prj_module.cu +++ b/niftypet/nipet/prj/src/prj_module.cu @@ -371,7 +371,7 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { int *subs; if (subs_[0] == -1) { Nprj = AW; - if (Cnt.LOG <= LOGWARNING) + if (Cnt.LOG <= LOGDEBUG) printf("i> no subsets defined. number of projection bins in 2D: %d\n", Nprj); // all projections in subs = (int *)malloc(Nprj * sizeof(int)); @@ -761,7 +761,7 @@ static PyObject *osem_rec(PyObject *self, PyObject *args) { //>--- PSF KERNEL --- float *krnl; int SZ_KRNL = (int)PyArray_DIM(p_krnl, 1); - if (Cnt.LOG <= LOGINFO) printf("i> kernel size [voxels]: %d\n", SZ_KRNL); + if (Cnt.LOG <= LOGDEBUG) printf("d> kernel size [voxels]: %d\n", SZ_KRNL); if (SZ_KRNL != KERNEL_LENGTH) { if (Cnt.LOG <= LOGWARNING) printf("w> wrong kernel size.\n"); diff --git a/niftypet/nipet/sct/src/sct.cu b/niftypet/nipet/sct/src/sct.cu index 6c36e832..cb31c2c8 100644 --- a/niftypet/nipet/sct/src/sct.cu +++ b/niftypet/nipet/sct/src/sct.cu @@ -413,7 +413,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("i> using CUDA device #%d\n", dev_id); getMemUse(Cnt); @@ -430,7 +430,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em tofbin[3] = Cnt.ITOFBIND; cudaMemcpyToSymbol(c_TOFBIN, tofbin, 4 * sizeof(float)); - if (Cnt.LOG <= LOGINFO) { + if (Cnt.LOG <= LOGDEBUG) { printf("i> time of flight properties for scatter estimation:\n"); for (int i = 0; i < 4; i++) printf(" tofbin[%d]=%f\n", i, tofbin[i]); } @@ -452,7 +452,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em d_scrsdef.nscrs = Cnt.NSCRS; d_scrsdef.nsrng = Cnt.NSRNG; - if (Cnt.LOG <= LOGINFO) + if (Cnt.LOG <= LOGDEBUG) printf("i> number of scatter crystals used:\n >transaxially: %d\n >axially: %d\n", d_scrsdef.nscrs, d_scrsdef.nsrng); @@ -524,7 +524,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em cudaTextureObject_t texo_mu3d = 0; cudaCreateTextureObject(&texo_mu3d, &resDesc, &texDesc, NULL); - if (Cnt.LOG <= LOGINFO) printf("i> 3D CUDA texture for the mu-map has been initialised.\n"); + if (Cnt.LOG <= LOGDEBUG) printf("d> 3D CUDA texture for the mu-map has been initialised.\n"); //==================================================================== //============================================================ @@ -541,7 +541,7 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em //============================================================ if (Cnt.LOG <= LOGINFO) - printf("i> calculating scatter probabilities for %d emission voxels...", d_em_msk.nvx); + printf("i> calculating scatter probabilities for %d emission voxels using device #%d...", d_em_msk.nvx, dev_id); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); diff --git a/niftypet/nipet/sct/src/sct_module.cu b/niftypet/nipet/sct/src/sct_module.cu index 326c3346..d273bda4 100644 --- a/niftypet/nipet/sct/src/sct_module.cu +++ b/niftypet/nipet/sct/src/sct_module.cu @@ -265,8 +265,8 @@ static PyObject *vsm_scatter(PyObject *self, PyObject *args) { emIMG.nvx = (size_t)(PyArray_DIM(p_emimg, 0) * PyArray_DIM(p_emimg, 1) * PyArray_DIM(p_emimg, 2)); - if ((muIMG.nvx != emIMG.nvx) && (Cnt.LOG <= LOGWARNING)) - printf("\nw> mu-map and emission image have different dims: mu.nvx = %lu, em.nvx = %lu\n", + if ((muIMG.nvx != emIMG.nvx) && (Cnt.LOG <= LOGDEBUG)) + printf("\nd> mu-map and emission image have different dims: mu.nvx = %lu, em.nvx = %lu\n", muIMG.nvx, emIMG.nvx); // get the stats in the image structure diff --git a/niftypet/nipet/sct/src/sctaux.cu b/niftypet/nipet/sct/src/sctaux.cu index 197d788d..8b1196db 100644 --- a/niftypet/nipet/sct/src/sctaux.cu +++ b/niftypet/nipet/sct/src/sctaux.cu @@ -138,8 +138,8 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, HANDLE_ERROR( cudaMemset(d_scts1, 0, Cnt.NSN64 * d_scrsdef.nscrs * d_scrsdef.nscrs * sizeof(float))); - if (Cnt.LOG <= LOGINFO) - printf("i> 3D scatter results into span-1 pre-sino for TOF bin %d...", i); + if (Cnt.LOG <= LOGDEBUG) + printf("d> 3D scatter results into span-1 pre-sino for TOF bin %d...", i); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -159,9 +159,9 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 1e-3 * elapsedTime); - if (Cnt.LOG <= LOGINFO) printf("i> 3D scatter axial interpolation..."); + if (Cnt.LOG <= LOGDEBUG) printf("d> 3D scatter axial interpolation..."); cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); @@ -182,7 +182,7 @@ float *srslt2sino(float *d_srslt, char *d_xsxu, scrsDEF d_scrsdef, int *sctaxR, cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); - if (Cnt.LOG <= LOGINFO) printf("DONE in %fs.\n", 1e-3 * elapsedTime); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 1e-3 * elapsedTime); } cudaFree(d_scts1); @@ -199,7 +199,7 @@ iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("d> emission data masking using CUDA device #%d\n", dev_id); iMSK msk; int nvx = 0; @@ -257,8 +257,8 @@ iMSK get_imskEm(IMflt imvol, float thrshld, Cnst Cnt) { #endif - if (Cnt.LOG <= LOGINFO) - printf("i> number of voxel values greater than %3.2f is %d out of %d (ratio: %3.2f)\n", + if (Cnt.LOG <= LOGDEBUG) + printf("d> number of voxel values greater than %3.2f is %d out of %d (ratio: %3.2f)\n", thrshld, nvx, SSE_IMX * SSE_IMY * SSE_IMZ, nvx / (float)(SSE_IMX * SSE_IMY * SSE_IMZ)); msk.nvx = nvx; msk.i2v = d_i2v; @@ -274,7 +274,7 @@ iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt) { // check which device is going to be used int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("d> masking using CUDA device #%d\n", dev_id); int nvx = 0; for (int i = 0; i < (SS_IMX * SS_IMY * SS_IMZ); i++) { @@ -329,8 +329,8 @@ iMSK get_imskMu(IMflt imvol, char *msk, Cnst Cnt) { } #endif - if (Cnt.LOG <= LOGINFO) - printf("i> number of voxels within the mu-mask is %d out of %d (ratio: %3.2f)\n", nvx, + if (Cnt.LOG <= LOGDEBUG) + printf("d> number of voxels within the mu-mask is %d out of %d (ratio: %3.2f)\n", nvx, SS_IMX * SS_IMY * SS_IMZ, nvx / (float)(SS_IMX * SS_IMY * SS_IMZ)); iMSK mlut; mlut.nvx = nvx; diff --git a/niftypet/nipet/src/norm.cu b/niftypet/nipet/src/norm.cu index 977e5c82..21fbc4ab 100644 --- a/niftypet/nipet/src/norm.cu +++ b/niftypet/nipet/src/norm.cu @@ -63,7 +63,7 @@ void norm_from_components(float *sino, // output norm sino int dev_id; cudaGetDevice(&dev_id); - if (Cnt.LOG <= LOGINFO) printf("i> using CUDA device #%d\n", dev_id); + if (Cnt.LOG <= LOGDEBUG) printf("d> using CUDA device #%d\n", dev_id); int snno = -1; if (Cnt.SPN == 1) @@ -183,7 +183,7 @@ void norm_from_components(float *sino, // output norm sino // CUDA grid size (in blocks) int blcks = ceil(AW / (float)NTHREADS); - if (Cnt.LOG <= LOGINFO) printf("i> calculating normalisation sino from norm components..."); + if (Cnt.LOG <= LOGINFO) printf("i> calculating normalisation sinogram using device #%d...", dev_id); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); From 087f84245eca9c239a7ef3743e5063f207e9060b Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Sun, 11 Apr 2021 23:08:32 +0100 Subject: [PATCH 43/64] modified recon image output --- niftypet/nipet/img/pipe.py | 10 +++++----- niftypet/nipet/prj/mmrrec.py | 31 +++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index e84b1520..7913f3f8 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -148,7 +148,7 @@ def mmrchain( # > get rid of folders fout = os.path.basename(fout) # > get rid of extension - fout = fout.rsplit('.', 1)[0] + fout = fout.split('.')[0] # folder for co-registered mu-maps (for motion compensation) fmureg = os.path.join(fmudir, 'registered') @@ -231,9 +231,9 @@ def mmrchain( nimpa.create_dir(petaff) faff_frms = [] for i in range(nfrm): - fout = os.path.join(petaff, 'affine_frame(' + str(i) + ').txt') - np.savetxt(fout, tAffine[i], fmt='%3.9f') - faff_frms.append(fout) + fout_ = os.path.join(petaff, 'affine_frame(' + str(i) + ').txt') + np.savetxt(fout_, tAffine[i], fmt='%3.9f') + faff_frms.append(fout_) log.info('using provided numpy arrays affine transformations for each dynamic frame.') else: raise ValueError( @@ -355,7 +355,7 @@ def mmrchain( recimg = mmrrec.osemone(datain, [muhd['im'], muo], hst, scanner_params, decay_ref_time=decay_ref_time, recmod=recmod, itr=itr, fwhm=fwhm, psf=psf, outpath=petimg, frmno=frmno, fcomment=fcomment + '_i', - store_img=store_img_intrmd, store_itr=store_itr, + store_img=store_img_intrmd, store_itr=store_itr, fout=fout, ret_sinos=ret_sinos) # form dynamic Numpy array diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 85468bbe..d61b32f4 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -155,8 +155,8 @@ def _config(fwhm3, check_len=True): def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=None, mask_radius=29., decay_ref_time=None, attnsino=None, sctsino=None, randsino=None, - normcomp=None, emmskS=False, frmno='', fcomment='', outpath=None, store_img=False, - store_itr=None, ret_sinos=False): + normcomp=None, emmskS=False, frmno='', fcomment='', outpath=None, fout=None, + store_img=False, store_itr=None, ret_sinos=False): ''' OSEM image reconstruction with several modes (with/without scatter and/or attenuation correction) @@ -177,6 +177,13 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: opth = outpath + #> file output name (the path is ignored if given) + if fout is not None: + # > get rid of folders + fout = os.path.basename(fout) + # > get rid of extension + fout = fout.split('.')[0] + if store_img is True or store_itr is not None: mmraux.create_dir(opth) @@ -360,13 +367,18 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N ssng = mmraux.remgaps(ssn, txLUT, Cnt) pbar.set_postfix(scatter="%.3gs" % (time.time() - sct_time)) # save images during reconstruction if requested - if store_itr and k in store_itr: + if store_itr and (k+1) in store_itr: im = mmrimg.convert2e7(img * (dcycrr*qf*qf_loc), Cnt) - fout = os.path.join( - opth, (os.path.basename(datain['lm_bf'])[:16].replace('.','-') + - f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k}{fcomment}_inrecon.nii.gz")) - nimpa.array2nii(im[::-1, ::-1, :], B, fout) + if fout is None: + fpet = os.path.join( + opth, (os.path.basename(datain['lm_bf'])[:16].replace('.','-') + + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k+1}{fcomment}_inrecon.nii.gz")) + else: + fpet = os.path.join( + opth, fout+f'_itr{k+1}{fcomment}_inrecon.nii.gz') + + nimpa.array2nii(im[::-1, ::-1, :], B, fpet) log.info('recon time: %.3g', time.time() - stime) # ======================================================================== @@ -399,8 +411,11 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # > file name of the output reconstructed image # > (maybe used later even if not stored now) - fpet = os.path.join(opth, (os.path.basename(datain['lm_bf']).split('.')[0] + + if fout is None: + fpet = os.path.join(opth, (os.path.basename(datain['lm_bf']).split('.')[0] + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{itr}{fcomment}.nii.gz")) + else: + fpet = os.path.join(opth, fout+f'_itr{itr}{fcomment}.nii.gz') if store_img: log.info('saving image to: %s', fpet) From 946cc021f6710ca190ad72d2e84da0856abf3a6c Mon Sep 17 00:00:00 2001 From: Pawel Date: Wed, 14 Apr 2021 23:42:24 +0100 Subject: [PATCH 44/64] fixes in generating aligned mu-maps when loading existing ones; also moved the definitions of scatter rings to resources.py --- niftypet/nipet/img/mmrimg.py | 15 ++++++++------- niftypet/nipet/mmraux.py | 3 +++ niftypet/nipet/sct/mmrsct.py | 20 ++++++++------------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index c55a085b..3b2c1f50 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -440,10 +440,6 @@ def align_mumap( # > create the folder, if not existent nimpa.create_dir(opth) - # > tmp folder for not aligned mu-maps - tmpdir = os.path.join(opth, 'tmp') - nimpa.create_dir(tmpdir) - # > get the timing of PET if affine not given if faff == '' and hst is not None and isinstance(hst, dict) and 't0' in hst: t0 = hst['t0'] @@ -459,9 +455,10 @@ def align_mumap( # > used stored if requested if use_stored: fmu_stored = fnm + '-aligned-to_t'\ - + str(hst['t0'])+'-'+str(hst['t1'])+'_'+petopt.upper()\ + + str(t0)+'-'+str(t1)+'_'+petopt.upper()\ + fcomment - fmupath = os.path.join(opth, fmu_stored) + fmupath = os.path.join(opth, fmu_stored+'.nii.gz') + if os.path.isfile(fmupath): mudct_stored = nimpa.getnii(fmupath, output='all') # > create output dictionary @@ -471,6 +468,10 @@ def align_mumap( return mu_dct # --------------------------------------------------------------------------- + # > tmp folder for not aligned mu-maps + tmpdir = os.path.join(opth, 'tmp') + nimpa.create_dir(tmpdir) + # > three ways of passing scanner constants are here decoded if 'Cnt' in scanner_params: Cnt = scanner_params['Cnt'] @@ -706,7 +707,7 @@ def align_mumap( nimpa.create_dir(opth) if faff == '': fname = fnm + '-aligned-to_t'\ - + str(hst['t0'])+'-'+str(hst['t1'])+'_'+petopt.upper()\ + + str(t0)+'-'+str(t1)+'_'+petopt.upper()\ + fcomment else: fname = fnm + '-aligned-to-given-affine' + fcomment diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 546e000a..b617d487 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -571,7 +571,10 @@ def reduce_rings(pars, rs=0, re=64): rs -- start ring re -- end ring (not included in the resulting reduced rings) """ + + #> reduced rings work in span-1 only pars['Cnt']['SPN'] = 1 + # select the number of sinograms for the number of rings # RNG_STRT is included in detection # RNG_END is not included in detection process diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 2c054730..04e188e4 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -72,23 +72,19 @@ def get_scrystals(scanner_params): scrs = np.array(scrs, dtype=np.float32) # ------------------------------------------------------ - # ------------------------------------------------------ - # > scatter ring definition (axially) - sct_irng = np.int16([0, 10, 19, 28, 35, 44, 53, 63]) - # number of scatter rings (used for scatter estimation) - NSRNG = len(sct_irng) - # ------------------------------------------------------ - logtxt = '' - srng = np.zeros((NSRNG, 2), dtype=np.float32) - for ir in range(NSRNG): - srng[ir, 0] = float(sct_irng[ir]) - srng[ir, 1] = axLUT['rng'][sct_irng[ir], :].mean() + sirng = np.int16(Cnt['SIRNG']) + + #> axial scatter ring positions in cm + srng = np.zeros((Cnt['NSRNG'], 2), dtype=np.float32) + for ir in range(Cnt['NSRNG']): + srng[ir, 0] = float(sirng[ir]) + srng[ir, 1] = axLUT['rng'][sirng[ir], :].mean() logtxt += '> [{}]: ring_i={}, ring_z={}\n'.format(ir, int(srng[ir, 0]), srng[ir, 1]) log.debug(logtxt) - return {'scrs': scrs, 'srng': srng, 'sirng': sct_irng, 'NSCRS': scrs.shape[0], 'NSRNG': NSRNG} + return {'scrs': scrs, 'srng': srng, 'sirng': sirng, 'NSCRS': scrs.shape[0], 'NSRNG': Cnt['NSRNG']} # ====================================================================== From 2a4e32198473c4ace8d8955b39a4ab4e09d6a58e Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Thu, 15 Apr 2021 15:19:22 +0100 Subject: [PATCH 45/64] fixed bugs with span-1 sinogram numbers in scatter and pipe recon --- niftypet/nipet/img/pipe.py | 14 +++++++++++--- niftypet/nipet/sct/mmrsct.py | 7 ++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index 7913f3f8..76874fbc 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -271,14 +271,22 @@ def mmrchain( if fwhm > 0: output['fsmoi'] = [] + # > number of3D sinograms + if Cnt['SPN']==1: + snno = Cnt['NSN1'] + elif Cnt['SPN']==11: + snno = Cnt['NSN11'] + else: + raise ValueError('unrecognised span: {}'.format(Cnt['SPN'])) + # dynamic images in one numpy array dynim = np.zeros((nfrm, Cnt['SO_IMZ'], Cnt['SO_IMY'], Cnt['SO_IMY']), dtype=np.float32) # if asked, output only scatter+randoms sinogram for each frame if ret_sinos and itr > 1 and recmod > 2: dynmsk = np.zeros((nfrm, Cnt['NSEG0'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) - dynrsn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) - dynssn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) - dynpsn = np.zeros((nfrm, Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + dynrsn = np.zeros((nfrm, snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + dynssn = np.zeros((nfrm, snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) + dynpsn = np.zeros((nfrm, snno, Cnt['NSANGLES'], Cnt['NSBINS']), dtype=np.float32) # > returning dictionary of histograms if requested if ret_histo: diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 04e188e4..256af813 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -295,7 +295,12 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): ''' # > number of sinograms - snno = sct3d.shape[1] + if Cnt['SPN']==1: + snno = Cnt['NSN1'] + elif Cnt['SPN']==11: + snno = Cnt['NSN11'] + else: + raise ValueError('unrecognised span!') i_scrs = sctLUT['scrs'][:, 0].astype(int) From ebb86f6cc3fc70a22098dae6a8fe277b0e13aa2e Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Tue, 20 Apr 2021 23:37:13 +0100 Subject: [PATCH 46/64] fixing one get_norm() function and depreciating two old ones; adding condition for reduced rings (reduced axial fov) in mmraux.py --- niftypet/nipet/__init__.py | 2 ++ niftypet/nipet/mmraux.py | 5 ++++ niftypet/nipet/mmrnorm.py | 52 ++++++++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 18161910..2a8354a0 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -54,6 +54,8 @@ from .prj.mmrsim import simulate_recon, simulate_sino from .sct.mmrsct import vsm +from .mmrnorm import get_norm_sino + # log = logging.getLogger(__name__) # technically bad practice to add handlers # https://docs.python.org/3/howto/logging.html#library-config diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index b617d487..b0e2366f 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -566,12 +566,17 @@ def reduce_rings(pars, rs=0, re=64): Reduce the axial rings for faster reconstructions, particularly simulations. This function customises axial FOV for reduced rings in range(rs,re). Note it only works in span-1 and ring re is not included in the reduced rings. + Total number of used rings has to be even at all times. Arguments: pars -- scanner parameters: constants, LUTs rs -- start ring re -- end ring (not included in the resulting reduced rings) """ + + if (re-rs)<0 or ((re-rs)%2)!=0: + raise ValueError('The resulting number of rings has to be even and start ring (rs) smaller than end ring (re)') + #> reduced rings work in span-1 only pars['Cnt']['SPN'] = 1 diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 95dc0321..6de59f2c 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -110,7 +110,8 @@ def get_components(datain, Cnt): def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): - + ''' to be depreciated + ''' # get the normalisation components if normcomp is None: normcomp, _ = get_components(datain, Cnt) @@ -131,8 +132,10 @@ def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): def get_sino(datain, hst, axLUT, txLUT, Cnt): + ''' to be depreciated + ''' - # gumber of sino planes (2D sinos) depends on the span used + # number of sino planes (2D sinos) depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: @@ -149,27 +152,48 @@ def get_sino(datain, hst, axLUT, txLUT, Cnt): return sino -def get_norm_sino(datain, scanner_params, hst): +def get_norm_sino( + datain, + scanner_params, + hst, + normcomp=None, + gpu_dim=False): Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - # if not hst: - # hst = mmrhist.mmrhist(datain, scanner_params) + #> get the normalisation components + if normcomp is None: + normcomp, _ = get_components(datain, Cnt) - # gumber of sino planes (2D sinos) depends on the span used + #> number of sinogram planes, depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: nsinos = Cnt['NSN11'] + else: + raise ValueError('unrecognised span {}'.format(Cnt['SPN'])) - # get sino with no gaps - s = get_sinog(datain, hst, axLUT, txLUT, Cnt) - # greallocate sino with gaps - sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) - # gill the sino with gaps - mmr_auxe.pgaps(sino, s, txLUT, Cnt, 0) - sino = np.transpose(sino, (2, 0, 1)) + #------------------------------------------------------------------------- + #> initialise the sinogram + sng = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - return sino + #> get the norm + mmr_auxe.norm(sng, normcomp, hst['buckets'], axLUT, txLUT['aw2ali'], Cnt) + #------------------------------------------------------------------------- + + #> check if needed reduction of axial FOV (reducing the number of rings) + if 'rNSN1' in Cnt and 'rLUT' in axLUT: + sng = sng[:, axLUT['rLUT']] + + if gpu_dim: + return sng + + else: + # initialise sinogram with gaps + sino = np.zeros((Cnt['NSANGLES'], Cnt['NSBINS'], nsinos), dtype=np.float32) + # fill the sinogram + mmr_auxe.pgaps(sino, sng, txLUT, Cnt, 0) + sino = np.transpose(sino, (2, 0, 1)) + return sino From 27edbe596f0ef6e9e36c25f8444e69746f6f2477 Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Tue, 20 Apr 2021 23:37:49 +0100 Subject: [PATCH 47/64] fixing bugs with reduced rings projectors --- niftypet/nipet/prj/mmrrec.py | 2 +- niftypet/nipet/prj/src/prjb.cu | 56 ++++++++++++++++---------------- niftypet/nipet/prj/src/prjf.cu | 58 ++++++++++++++++++++-------------- 3 files changed, 63 insertions(+), 53 deletions(-) diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index d61b32f4..7a041820 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -210,7 +210,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: ncmp = normcomp log.warning('using user-defined normalisation components') - nsng = mmrnorm.get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=ncmp) + nsng = mmrnorm.get_norm_sino(datain, scanner_params, hst, normcomp=ncmp, gpu_dim=True) # ======================================================================== # ======================================================================== diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index 7cc6e813..904ed6b2 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -84,9 +84,12 @@ __global__ void bprj_drct(const float *sino, float *im, const float *tt, const u //************** OBLIQUE ************************************************** __global__ void bprj_oblq(const float *sino, float *im, const float *tt, const unsigned char *tv, - const int *subs, const short snno, const int zoff) { + const int *subs, const short snno, const int zoff, const short nil2r_c) { + int ixz = threadIdx.x + zoff; // axial (z) - if (ixz < NLI2R) { + + if (ixz < nil2r_c) { + int ixt = subs[blockIdx.x]; // blockIdx.x is the transaxial bin index // bin values to be back projected float bin = sino[c_li2sn[ixz].x + snno * blockIdx.x]; @@ -280,30 +283,22 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no //============================================================================ int zoff = nrng_c; - // number of oblique sinograms + //> number of oblique sinograms int Noblq = (nrng_c - 1) * nrng_c / 2; + int Nz = ((Noblq+127)/128)*128; + + //============================================================================ + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + HANDLE_ERROR(cudaGetLastError()); + + zoff += Nz/2; + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + - // cudaGetDeviceCount(&nDevices); - // for (int i = 0; i < nDevices; i++) { - // cudaDeviceProp prop; - // cudaGetDeviceProperties(&prop, i); - // printf("Device Number: %d\n", i); - // printf(" Device name: %s\n", prop.name); - // printf(" Device supports concurrentManagedAccess?: %s\n", prop.concurrentManagedAccess); - //} - // cudaMemPrefetchAsync(d_sino, Nprj*snno * sizeof(float), nDevices, NULL); - if (Cnt.SPN == 1 && Noblq <= 1024) { - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - } else { - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - zoff += NSINOS / 4; - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff); - HANDLE_ERROR(cudaGetLastError()); - } //============================================================================ cudaEventRecord(stop, 0); @@ -378,6 +373,11 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u else if (Cnt.SPN == 11) snno = NSINOS11; + //> number of oblique sinograms + int Noblq = (NRINGS*(NRINGS-1)-12)/2; + //> number of threads (in the axial direction) + int Nz = ((Noblq+127)/128)*128; + cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); @@ -386,19 +386,19 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u //============================================================================ bprj_drct<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno); - // HANDLE_ERROR(cudaGetLastError()); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ int zoff = NRINGS; //============================================================================ - bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); - // HANDLE_ERROR(cudaGetLastError()); + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ - zoff += NSINOS / 4; + zoff += Nz/2; //============================================================================ - bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff); - // HANDLE_ERROR(cudaGetLastError()); + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ cudaEventRecord(stop, 0); diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index 530f3fbe..e3c9e14d 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -97,10 +97,15 @@ __global__ void fprj_drct(float *sino, const float *im, const float *tt, const u //************** OBLIQUE ************************************************** __global__ void fprj_oblq(float *sino, const float *im, const float *tt, const unsigned char *tv, const int *subs, const short snno, const char span, const char att, - const int zoff) { + const int zoff, const short nil2r_c) { int ixz = threadIdx.x + zoff; // axial (z) - if (ixz < NLI2R) { - int ixt = subs[blockIdx.x]; // transaxial indx + + //if (ixz < NLI2R) { + + //> get the number of linear indices of direct and oblique sinograms + if (ixz < nil2r_c) { + + int ixt = subs[blockIdx.x]; // transaxial index //------------------------------------------------- /*** accumulation ***/ @@ -234,7 +239,7 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no // RINGS: either all or a subset of rings can be used (span-1 feature only) //----------------------------------------------------------------- // number of rings customised and the resulting size of LUTs and voxels - int nrng_c, nil2r_c, vz0, vz1, nvz; + short nrng_c, nil2r_c, vz0, vz1, nvz; // number of sinos short snno = -1; if (Cnt.SPN == 1) { @@ -323,28 +328,26 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); //----------------------------------------------------------------------- + //============================================================================ fprj_drct<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att); HANDLE_ERROR(cudaGetLastError()); - // ============================================================================ + //============================================================================ + int zoff = nrng_c; - // number of oblique sinograms + //> number of oblique sinograms int Noblq = (nrng_c - 1) * nrng_c / 2; + int Nz = ((Noblq+127)/128)*128; - // first for reduced number of detector rings - if (Cnt.SPN == 1 && Noblq <= 1024 && Noblq > 0) { - fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); - - } else { - fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); + //============================================================================ + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); + HANDLE_ERROR(cudaGetLastError()); - zoff += NSINOS / 4; - fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff); - HANDLE_ERROR(cudaGetLastError()); - } + zoff += Nz/2; + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); + HANDLE_ERROR(cudaGetLastError()); + //============================================================================ cudaEventRecord(stop, 0); cudaEventSynchronize(stop); @@ -396,29 +399,36 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, else if (Cnt.SPN == 11) snno = NSINOS11; + //> number of oblique sinograms + int Noblq = (NRINGS*(NRINGS-1)-12)/2; + //> number of threads (in the axial direction) + int Nz = ((Noblq+127)/128)*128; + cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaEventRecord(start, 0); if (Cnt.LOG <= LOGDEBUG) printf("i> subset forward projection (Nprj=%d)... ", Nprj); + //============================================================================ fprj_drct<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0); - // HANDLE_ERROR(cudaGetLastError()); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ int zoff = NRINGS; //============================================================================ - fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); - // HANDLE_ERROR(cudaGetLastError()); + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ - zoff += NSINOS / 4; + zoff += Nz/2; //============================================================================ - fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff); - // HANDLE_ERROR(cudaGetLastError()); + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); + HANDLE_ERROR(cudaGetLastError()); //============================================================================ + cudaEventRecord(stop, 0); cudaEventSynchronize(stop); float elapsedTime; From 90a06ca0c91c6015c882ae32c62353bd68e6db9e Mon Sep 17 00:00:00 2001 From: Pawel Markiewicz Date: Mon, 7 Jun 2021 15:36:11 +0100 Subject: [PATCH 48/64] updates for reduced axial FOV recon --- niftypet/nipet/img/mmrimg.py | 8 +++++++- niftypet/nipet/mmrnorm.py | 9 +++++---- niftypet/nipet/prj/mmrrec.py | 3 ++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index 3b2c1f50..c2a5c6f5 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -105,11 +105,17 @@ def image_affine(datain, Cnt, gantry_offset=False): else: goff = np.zeros((3)) vbed, hbed = mmraux.vh_bedpos(datain, Cnt) + + if 'rNRNG' in Cnt and 'rSO_IMZ' in Cnt: + imz = Cnt['rSO_IMZ'] + else: + imz = Cnt['SO_IMZ'] + # create a reference empty mu-map image B = np.diag(np.array([-10 * Cnt['SO_VXX'], 10 * Cnt['SO_VXY'], 10 * Cnt['SO_VXZ'], 1])) B[0, 3] = 10 * (.5 * Cnt['SO_IMX'] * Cnt['SO_VXX'] + goff[0]) B[1, 3] = 10 * ((-.5 * Cnt['SO_IMY'] + 1) * Cnt['SO_VXY'] - goff[1]) - B[2, 3] = 10 * ((-.5 * Cnt['SO_IMZ'] + 1) * Cnt['SO_VXZ'] - goff[2] + hbed) + B[2, 3] = 10 * ((-.5 * imz + 1) * Cnt['SO_VXZ'] - goff[2] + hbed) # ------------------------------------------------------------------------------------- return B diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 6de59f2c..3488e370 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -163,6 +163,11 @@ def get_norm_sino( txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] + #> check if reduction of axial FOV (reducing the number of rings) is off + if 'rNSN1' in Cnt and 'rLUT' in axLUT: + raise ValueError('Full FOV has to be used for normalisation - switch off reduced rings mode.') + + #> get the normalisation components if normcomp is None: normcomp, _ = get_components(datain, Cnt) @@ -183,10 +188,6 @@ def get_norm_sino( mmr_auxe.norm(sng, normcomp, hst['buckets'], axLUT, txLUT['aw2ali'], Cnt) #------------------------------------------------------------------------- - #> check if needed reduction of axial FOV (reducing the number of rings) - if 'rNSN1' in Cnt and 'rLUT' in axLUT: - sng = sng[:, axLUT['rLUT']] - if gpu_dim: return sng diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 7a041820..9798974f 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -239,7 +239,8 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # ======================================================================== # Randoms # ------------------------------------------------------------------------- - if isinstance(randsino, np.ndarray): + if isinstance(randsino, np.ndarray) \ + and randsino.shape==(Cnt['NSN11'], Cnt['NSANGLES'], Cnt['NSBINS']): rsino = randsino rsng = mmraux.remgaps(randsino, txLUT, Cnt) else: From 03762669ee2886b66f566931b23814dadbfd44a7 Mon Sep 17 00:00:00 2001 From: Pawel Date: Thu, 17 Jun 2021 00:58:57 +0100 Subject: [PATCH 49/64] fixing weird rtx bug --- niftypet/nipet/prj/src/tprj.cu | 73 +++++++++++++++++----------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/niftypet/nipet/prj/src/tprj.cu b/niftypet/nipet/prj/src/tprj.cu index 284a8cb0..f7981e69 100644 --- a/niftypet/nipet/prj/src/tprj.cu +++ b/niftypet/nipet/prj/src/tprj.cu @@ -1,5 +1,5 @@ /*------------------------------------------------------------------------ -CUDA C extention for Python +CUDA C extension for Python Provides functionality for forward and back projection in transaxial dimension. @@ -21,50 +21,51 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne short c1 = s2c[idx].x; short c2 = s2c[idx].y; - float cc1[3]; - float cc2[3]; - cc1[0] = .5 * (crs[c1].x + crs[c1].z); - cc2[0] = .5 * (crs[c2].x + crs[c2].z); + float2 cc1; + float2 cc2; + cc1.x = .5 * (crs[c1].x + crs[c1].z); + cc2.x = .5 * (crs[c2].x + crs[c2].z); - cc1[1] = .5 * (crs[c1].y + crs[c1].w); - cc2[1] = .5 * (crs[c2].y + crs[c2].w); + cc1.y = .5 * (crs[c1].y + crs[c1].w); + cc2.y = .5 * (crs[c2].y + crs[c2].w); // crystal edge vector - float e[2]; - e[0] = crs[c1].z - crs[c1].x; - e[1] = crs[c1].w - crs[c1].y; + float2 e; + e.x = crs[c1].z - crs[c1].x; + e.y = crs[c1].w - crs[c1].y; float px, py; - px = crs[c1].x + 0.5 * e[0]; - py = crs[c1].y + 0.5 * e[1]; - - float at[3], atn; - for (int i = 0; i < 2; i++) { - at[i] = cc2[i] - cc1[i]; - atn += at[i] * at[i]; - } + px = crs[c1].x + 0.5 * e.x; + py = crs[c1].y + 0.5 * e.y; + + float2 at; + float atn; + + at.x = cc2.x - cc1.x; + at.y = cc2.y - cc1.y; + atn = at.x*at.x + at.y*at.y; atn = sqrtf(atn); - at[0] = at[0] / atn; - at[1] = at[1] / atn; + at.x = at.x / atn; + at.y = at.y / atn; //--ring tfov - float Br = 2 * (px * at[0] + py * at[1]); + float Br = 2 * (px * at.x + py * at.y); float Cr = 4 * (-TFOV2 + px * px + py * py); float t1 = .5 * (-Br - sqrtf(Br * Br - Cr)); float t2 = .5 * (-Br + sqrtf(Br * Br - Cr)); //-- //-rows - float y1 = py + at[1] * t1; - float lr1 = SZ_VOXY * (ceilf(y1 / SZ_VOXY) - signbit(at[1])); // line of the first row + float y1 = py + at.y * t1; + float lr1 = SZ_VOXY * (ceilf(y1 / SZ_VOXY) - signbit(at.y)); // line of the first row int v = 0.5 * SZ_IMY - ceil(y1 / SZ_VOXY); - float y2 = py + at[1] * t2; - float lr2 = SZ_VOXY * (floorf(y2 / SZ_VOXY) + signbit(at[1])); // line of the last row + float y2 = py + at.y * t2; + float lr2 = SZ_VOXY * (floorf(y2 / SZ_VOXY) + signbit(at.y)); // line of the last row - float tr1 = (lr1 - py) / at[1]; // first ray interaction with a row - float tr2 = (lr2 - py) / at[1]; // last ray interaction with a row + float tr1 = (lr1 - py) / at.y; // first ray interaction with a row + float tr2 = (lr2 - py) / at.y; // last ray interaction with a row // boolean bool y21 = (fabsf(y2 - y1) >= SZ_VOXY); bool lr21 = (fabsf(lr1 - lr2) < L21); @@ -76,15 +77,15 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne dtr = t2; //-columns - double x1 = px + at[0] * t1; - float lc1 = SZ_VOXY * (ceil(x1 / SZ_VOXY) - signbit(at[0])); + double x1 = px + at.x * t1; + float lc1 = SZ_VOXY * (ceil(x1 / SZ_VOXY) - signbit(at.x)); int u = 0.5 * SZ_IMX + floor(x1 / SZ_VOXY); // starting voxel column - float x2 = px + at[0] * t2; - float lc2 = SZ_VOXY * (floor(x2 / SZ_VOXY) + signbit(at[0])); + float x2 = px + at.x * t2; + float lc2 = SZ_VOXY * (floor(x2 / SZ_VOXY) + signbit(at.x)); - float tc1 = (lc1 - px) / at[0]; - float tc2 = (lc2 - px) / at[0]; + float tc1 = (lc1 - px) / at.x; + float tc2 = (lc2 - px) / at.x; bool x21 = (fabsf(x2 - x1) >= SZ_VOXY); bool lc21 = (fabsf(lc1 - lc2) < L21); @@ -101,17 +102,17 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne // } /***************************************************************/ - float ang = atanf(at[1] / at[0]); // angle of the ray + float ang = atanf(at.y / at.x); // angle of the ray bool tsin; // condition for the slower changing to be in // save the sign of vector at components. used for image indx increments. // since it is saved in unsigned format use offset of 1; - if (at[0] >= 0) + if (at.x >= 0) tv[N_TV * idx] = 2; else tv[N_TV * idx] = 0; - if (at[1] >= 0) + if (at.y >= 0) tv[N_TV * idx + 1] = 2; else tv[N_TV * idx + 1] = 0; From 8039b76eaedb749f7c8d22cf3ca1c0fa98e5a018 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 18 Jun 2021 15:05:32 +0100 Subject: [PATCH 50/64] fix formatting --- niftypet/nipet/__init__.py | 9 +++++---- niftypet/nipet/img/mmrimg.py | 2 +- niftypet/nipet/img/pipe.py | 4 ++-- niftypet/nipet/mmraux.py | 10 +++++----- niftypet/nipet/mmrnorm.py | 29 ++++++++++++----------------- niftypet/nipet/prj/mmrrec.py | 19 ++++++++++--------- niftypet/nipet/prj/src/prjb.cu | 23 ++++++++++------------- niftypet/nipet/prj/src/prjf.cu | 28 ++++++++++++---------------- niftypet/nipet/prj/src/tprj.cu | 8 ++++---- niftypet/nipet/sct/mmrsct.py | 9 +++++---- niftypet/nipet/sct/src/sct.cu | 3 ++- niftypet/nipet/src/norm.cu | 3 ++- 12 files changed, 70 insertions(+), 77 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 2a8354a0..1fb9d805 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -20,12 +20,14 @@ # package 'img', 'lm', 'mmr_auxe', 'mmraux', 'mmrnorm', 'prj', # img - 'align_mumap', 'im_e72dev', 'im_dev2e7', 'hdw_mumap', 'obj_mumap', + 'align_mumap', 'get_cylinder', 'im_e72dev', 'im_dev2e7', 'hdw_mumap', 'obj_mumap', 'pct_mumap', 'mmrchain', # lm 'dynamic_timings', 'mmrhist', 'randoms', # mmraux - 'classify_input', 'get_mmrparams', + 'classify_input', 'get_mmrparams', 'sino2ssr', + # mmrnorm + 'get_norm_sino', # prj 'back_prj', 'frwd_prj', 'simulate_recon', 'simulate_sino', # sct @@ -50,12 +52,11 @@ from .mmraux import explore_input as classify_input from .mmraux import mMR_params as get_mmrparams from .mmraux import sino2ssr +from .mmrnorm import get_norm_sino from .prj.mmrprj import back_prj, frwd_prj from .prj.mmrsim import simulate_recon, simulate_sino from .sct.mmrsct import vsm -from .mmrnorm import get_norm_sino - # log = logging.getLogger(__name__) # technically bad practice to add handlers # https://docs.python.org/3/howto/logging.html#library-config diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index c2a5c6f5..d5665e41 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -463,7 +463,7 @@ def align_mumap( fmu_stored = fnm + '-aligned-to_t'\ + str(t0)+'-'+str(t1)+'_'+petopt.upper()\ + fcomment - fmupath = os.path.join(opth, fmu_stored+'.nii.gz') + fmupath = os.path.join(opth, fmu_stored + '.nii.gz') if os.path.isfile(fmupath): mudct_stored = nimpa.getnii(fmupath, output='all') diff --git a/niftypet/nipet/img/pipe.py b/niftypet/nipet/img/pipe.py index 76874fbc..df94e994 100644 --- a/niftypet/nipet/img/pipe.py +++ b/niftypet/nipet/img/pipe.py @@ -272,9 +272,9 @@ def mmrchain( output['fsmoi'] = [] # > number of3D sinograms - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: snno = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: snno = Cnt['NSN11'] else: raise ValueError('unrecognised span: {}'.format(Cnt['SPN'])) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index b0e2366f..30099fc7 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -573,13 +573,13 @@ def reduce_rings(pars, rs=0, re=64): re -- end ring (not included in the resulting reduced rings) """ + if (re - rs) < 0 or ((re-rs) % 2) != 0: + raise ValueError('The resulting number of rings has to be even and start ring (rs)' + ' smaller than end ring (re)') - if (re-rs)<0 or ((re-rs)%2)!=0: - raise ValueError('The resulting number of rings has to be even and start ring (rs) smaller than end ring (re)') - - #> reduced rings work in span-1 only + # > reduced rings work in span-1 only pars['Cnt']['SPN'] = 1 - + # select the number of sinograms for the number of rings # RNG_STRT is included in detection # RNG_END is not included in detection process diff --git a/niftypet/nipet/mmrnorm.py b/niftypet/nipet/mmrnorm.py index 3488e370..9ced14c2 100644 --- a/niftypet/nipet/mmrnorm.py +++ b/niftypet/nipet/mmrnorm.py @@ -110,7 +110,7 @@ def get_components(datain, Cnt): def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): - ''' to be depreciated + ''' to be depreciated ''' # get the normalisation components if normcomp is None: @@ -132,7 +132,7 @@ def get_sinog(datain, hst, axLUT, txLUT, Cnt, normcomp=None): def get_sino(datain, hst, axLUT, txLUT, Cnt): - ''' to be depreciated + ''' to be depreciated ''' # number of sino planes (2D sinos) depends on the span used @@ -152,27 +152,22 @@ def get_sino(datain, hst, axLUT, txLUT, Cnt): return sino -def get_norm_sino( - datain, - scanner_params, - hst, - normcomp=None, - gpu_dim=False): +def get_norm_sino(datain, scanner_params, hst, normcomp=None, gpu_dim=False): Cnt = scanner_params['Cnt'] txLUT = scanner_params['txLUT'] axLUT = scanner_params['axLUT'] - #> check if reduction of axial FOV (reducing the number of rings) is off + # > check if reduction of axial FOV (reducing the number of rings) is off if 'rNSN1' in Cnt and 'rLUT' in axLUT: - raise ValueError('Full FOV has to be used for normalisation - switch off reduced rings mode.') + raise ValueError( + 'Full FOV has to be used for normalisation - switch off reduced rings mode.') - - #> get the normalisation components + # > get the normalisation components if normcomp is None: normcomp, _ = get_components(datain, Cnt) - #> number of sinogram planes, depends on the span used + # > number of sinogram planes, depends on the span used if Cnt['SPN'] == 1: nsinos = Cnt['NSN1'] elif Cnt['SPN'] == 11: @@ -180,13 +175,13 @@ def get_norm_sino( else: raise ValueError('unrecognised span {}'.format(Cnt['SPN'])) - #------------------------------------------------------------------------- - #> initialise the sinogram + # ------------------------------------------------------------------------- + # > initialise the sinogram sng = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - #> get the norm + # > get the norm mmr_auxe.norm(sng, normcomp, hst['buckets'], axLUT, txLUT['aw2ali'], Cnt) - #------------------------------------------------------------------------- + # ------------------------------------------------------------------------- if gpu_dim: return sng diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 9798974f..7a8dc4bb 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -177,7 +177,7 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N else: opth = outpath - #> file output name (the path is ignored if given) + # > file output name (the path is ignored if given) if fout is not None: # > get rid of folders fout = os.path.basename(fout) @@ -368,16 +368,16 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N ssng = mmraux.remgaps(ssn, txLUT, Cnt) pbar.set_postfix(scatter="%.3gs" % (time.time() - sct_time)) # save images during reconstruction if requested - if store_itr and (k+1) in store_itr: + if store_itr and (k + 1) in store_itr: im = mmrimg.convert2e7(img * (dcycrr*qf*qf_loc), Cnt) if fout is None: fpet = os.path.join( - opth, (os.path.basename(datain['lm_bf'])[:16].replace('.','-') + - f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k+1}{fcomment}_inrecon.nii.gz")) + opth, + (os.path.basename(datain['lm_bf'])[:16].replace('.', '-') + + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{k+1}{fcomment}_inrecon.nii.gz")) else: - fpet = os.path.join( - opth, fout+f'_itr{k+1}{fcomment}_inrecon.nii.gz') + fpet = os.path.join(opth, fout + f'_itr{k+1}{fcomment}_inrecon.nii.gz') nimpa.array2nii(im[::-1, ::-1, :], B, fpet) @@ -413,10 +413,11 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N # > file name of the output reconstructed image # > (maybe used later even if not stored now) if fout is None: - fpet = os.path.join(opth, (os.path.basename(datain['lm_bf']).split('.')[0] + - f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{itr}{fcomment}.nii.gz")) + fpet = os.path.join(opth, + (os.path.basename(datain['lm_bf']).split('.')[0] + + f"{frmno}_t{hst['t0']}-{hst['t1']}sec_itr{itr}{fcomment}.nii.gz")) else: - fpet = os.path.join(opth, fout+f'_itr{itr}{fcomment}.nii.gz') + fpet = os.path.join(opth, fout + f'_itr{itr}{fcomment}.nii.gz') if store_img: log.info('saving image to: %s', fpet) diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index 904ed6b2..91e9168b 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -85,7 +85,7 @@ __global__ void bprj_drct(const float *sino, float *im, const float *tt, const u //************** OBLIQUE ************************************************** __global__ void bprj_oblq(const float *sino, float *im, const float *tt, const unsigned char *tv, const int *subs, const short snno, const int zoff, const short nil2r_c) { - + int ixz = threadIdx.x + zoff; // axial (z) if (ixz < nil2r_c) { @@ -285,20 +285,17 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no int zoff = nrng_c; //> number of oblique sinograms int Noblq = (nrng_c - 1) * nrng_c / 2; - int Nz = ((Noblq+127)/128)*128; + int Nz = ((Noblq + 127) / 128) * 128; //============================================================================ - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); - zoff += Nz/2; - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + zoff += Nz / 2; + bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - - - //============================================================================ cudaEventRecord(stop, 0); @@ -374,9 +371,9 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u snno = NSINOS11; //> number of oblique sinograms - int Noblq = (NRINGS*(NRINGS-1)-12)/2; + int Noblq = (NRINGS * (NRINGS - 1) - 12) / 2; //> number of threads (in the axial direction) - int Nz = ((Noblq+127)/128)*128; + int Nz = ((Noblq + 127) / 128) * 128; cudaEvent_t start, stop; cudaEventCreate(&start); @@ -391,13 +388,13 @@ void rec_bprj(float *d_bimg, float *d_sino, int *d_sub, int Nprj, float *d_tt, u int zoff = NRINGS; //============================================================================ - bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - zoff += Nz/2; + zoff += Nz / 2; //============================================================================ - bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); + bprj_oblq<<>>(d_sino, d_bimg, d_tt, d_tv, d_sub, snno, zoff, NLI2R); HANDLE_ERROR(cudaGetLastError()); //============================================================================ diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index e3c9e14d..2ae6d347 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -100,11 +100,11 @@ __global__ void fprj_oblq(float *sino, const float *im, const float *tt, const u const int zoff, const short nil2r_c) { int ixz = threadIdx.x + zoff; // axial (z) - //if (ixz < NLI2R) { + // if (ixz < NLI2R) { - //> get the number of linear indices of direct and oblique sinograms + //> get the number of linear indices of direct and oblique sinograms if (ixz < nil2r_c) { - + int ixt = subs[blockIdx.x]; // transaxial index //------------------------------------------------- @@ -328,24 +328,22 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no gpu_siddon_tx(d_crs, d_s2c, d_tt, d_tv); //----------------------------------------------------------------------- - //============================================================================ fprj_drct<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - int zoff = nrng_c; //> number of oblique sinograms int Noblq = (nrng_c - 1) * nrng_c / 2; - int Nz = ((Noblq+127)/128)*128; + int Nz = ((Noblq + 127) / 128) * 128; //============================================================================ - fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); - zoff += Nz/2; - fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); + zoff += Nz / 2; + fprj_oblq<<>>(d_sn, d_im, d_tt, d_tv, d_subs, snno, Cnt.SPN, att, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); //============================================================================ @@ -400,9 +398,9 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, snno = NSINOS11; //> number of oblique sinograms - int Noblq = (NRINGS*(NRINGS-1)-12)/2; + int Noblq = (NRINGS * (NRINGS - 1) - 12) / 2; //> number of threads (in the axial direction) - int Nz = ((Noblq+127)/128)*128; + int Nz = ((Noblq + 127) / 128) * 128; cudaEvent_t start, stop; cudaEventCreate(&start); @@ -410,7 +408,6 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, cudaEventRecord(start, 0); if (Cnt.LOG <= LOGDEBUG) printf("i> subset forward projection (Nprj=%d)... ", Nprj); - //============================================================================ fprj_drct<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0); HANDLE_ERROR(cudaGetLastError()); @@ -418,17 +415,16 @@ void rec_fprj(float *d_sino, float *d_img, int *d_sub, int Nprj, int zoff = NRINGS; //============================================================================ - fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - zoff += Nz/2; + zoff += Nz / 2; //============================================================================ - fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); + fprj_oblq<<>>(d_sino, d_img, d_tt, d_tv, d_sub, snno, Cnt.SPN, 0, zoff, NLI2R); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - cudaEventRecord(stop, 0); cudaEventSynchronize(stop); float elapsedTime; diff --git a/niftypet/nipet/prj/src/tprj.cu b/niftypet/nipet/prj/src/tprj.cu index f7981e69..251ab972 100644 --- a/niftypet/nipet/prj/src/tprj.cu +++ b/niftypet/nipet/prj/src/tprj.cu @@ -40,10 +40,10 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne float2 at; float atn; - + at.x = cc2.x - cc1.x; at.y = cc2.y - cc1.y; - atn = at.x*at.x + at.y*at.y; + atn = at.x * at.x + at.y * at.y; atn = sqrtf(atn); at.x = at.x / atn; @@ -66,7 +66,7 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne float tr1 = (lr1 - py) / at.y; // first ray interaction with a row float tr2 = (lr2 - py) / at.y; // last ray interaction with a row - // boolean + // boolean bool y21 = (fabsf(y2 - y1) >= SZ_VOXY); bool lr21 = (fabsf(lr1 - lr2) < L21); int nr = y21 * roundf(abs(lr2 - lr1) / SZ_VOXY) + lr21; // number of rows on the way *_SZVXY @@ -103,7 +103,7 @@ __global__ void sddn_tx(const float4 *crs, const short2 *s2c, float *tt, unsigne /***************************************************************/ float ang = atanf(at.y / at.x); // angle of the ray - bool tsin; // condition for the slower changing to be in + bool tsin; // condition for the slower changing to be in // save the sign of vector at components. used for image indx increments. // since it is saved in unsigned format use offset of 1; diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 256af813..4283ef8d 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -76,7 +76,7 @@ def get_scrystals(scanner_params): sirng = np.int16(Cnt['SIRNG']) - #> axial scatter ring positions in cm + # > axial scatter ring positions in cm srng = np.zeros((Cnt['NSRNG'], 2), dtype=np.float32) for ir in range(Cnt['NSRNG']): srng[ir, 0] = float(sirng[ir]) @@ -84,7 +84,8 @@ def get_scrystals(scanner_params): logtxt += '> [{}]: ring_i={}, ring_z={}\n'.format(ir, int(srng[ir, 0]), srng[ir, 1]) log.debug(logtxt) - return {'scrs': scrs, 'srng': srng, 'sirng': sirng, 'NSCRS': scrs.shape[0], 'NSRNG': Cnt['NSRNG']} + return { + 'scrs': scrs, 'srng': srng, 'sirng': sirng, 'NSCRS': scrs.shape[0], 'NSRNG': Cnt['NSRNG']} # ====================================================================== @@ -295,9 +296,9 @@ def intrp_bsct(sct3d, Cnt, sctLUT, ssrlut, dtype=np.float32): ''' # > number of sinograms - if Cnt['SPN']==1: + if Cnt['SPN'] == 1: snno = Cnt['NSN1'] - elif Cnt['SPN']==11: + elif Cnt['SPN'] == 11: snno = Cnt['NSN11'] else: raise ValueError('unrecognised span!') diff --git a/niftypet/nipet/sct/src/sct.cu b/niftypet/nipet/sct/src/sct.cu index cb31c2c8..f10ee7dd 100644 --- a/niftypet/nipet/sct/src/sct.cu +++ b/niftypet/nipet/sct/src/sct.cu @@ -541,7 +541,8 @@ scatOUT prob_scatt(scatOUT sctout, float *KNlut, char *mumsk, IMflt mu, IMflt em //============================================================ if (Cnt.LOG <= LOGINFO) - printf("i> calculating scatter probabilities for %d emission voxels using device #%d...", d_em_msk.nvx, dev_id); + printf("i> calculating scatter probabilities for %d emission voxels using device #%d...", + d_em_msk.nvx, dev_id); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); diff --git a/niftypet/nipet/src/norm.cu b/niftypet/nipet/src/norm.cu index 21fbc4ab..ec49f569 100644 --- a/niftypet/nipet/src/norm.cu +++ b/niftypet/nipet/src/norm.cu @@ -183,7 +183,8 @@ void norm_from_components(float *sino, // output norm sino // CUDA grid size (in blocks) int blcks = ceil(AW / (float)NTHREADS); - if (Cnt.LOG <= LOGINFO) printf("i> calculating normalisation sinogram using device #%d...", dev_id); + if (Cnt.LOG <= LOGINFO) + printf("i> calculating normalisation sinogram using device #%d...", dev_id); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); From 1d9138826c574e683f9aa04ffb0ac310f848368a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 3 Feb 2021 23:43:52 +0000 Subject: [PATCH 51/64] fprj using cuvec>=2.5.0 --- niftypet/CMakeLists.txt | 14 ++++++++++ niftypet/nipet/prj/CMakeLists.txt | 2 ++ niftypet/nipet/prj/mmrprj.py | 25 ++++++++++------- niftypet/nipet/prj/mmrrec.py | 7 +++-- niftypet/nipet/prj/src/prj_module.cu | 42 ++++++++-------------------- niftypet/nipet/prj/src/prjf.cu | 26 ++--------------- niftypet/nipet/prj/src/prjf.h | 2 +- niftypet/nipet/sct/mmrsct.py | 6 ++-- pyproject.toml | 2 +- setup.cfg | 6 ++-- 10 files changed, 60 insertions(+), 72 deletions(-) diff --git a/niftypet/CMakeLists.txt b/niftypet/CMakeLists.txt index 0e40a43c..9fb77efd 100644 --- a/niftypet/CMakeLists.txt +++ b/niftypet/CMakeLists.txt @@ -9,6 +9,15 @@ cmake_policy(SET CMP0074 NEW) # _ROOT hints for find_package cmake_policy(SET CMP0104 NEW) # CMAKE_CUDA_ARCHITECTURES find_package(Python3 COMPONENTS Interpreter Development NumPy REQUIRED) find_package(CUDAToolkit REQUIRED) +execute_process( + COMMAND "${Python3_EXECUTABLE}" -c "import cuvec; print(cuvec.include_path)" + OUTPUT_VARIABLE CUVEC_INCLUDE_DIRS + OUTPUT_STRIP_TRAILING_WHITESPACE) +if("${CUVEC_INCLUDE_DIRS}" STREQUAL "") + message(WARNING "Could not find cuvec includes") +else() + message(STATUS "Found cuvec includes: ${CUVEC_INCLUDE_DIRS}") +endif() if(SKBUILD) find_package(PythonExtensions REQUIRED) set(LIB_TYPE "MODULE") @@ -23,6 +32,11 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "") endif() message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") +option(CUVEC_DEBUG "Print out CUDA malloc & free operations" OFF) +if(CUVEC_DEBUG) + add_compile_definitions(CUVEC_DEBUG) +endif(CUVEC_DEBUG) +message(STATUS "cuvec debugging: ${CUVEC_DEBUG}") if("${NIPET_CU_THREADS}" STREQUAL "") set(NIPET_CU_THREADS 1024 CACHE STRING "Maximum number of CUDA threads per block (should be less than cudaDeviceProp::maxThreadsDim)" FORCE) diff --git a/niftypet/nipet/prj/CMakeLists.txt b/niftypet/nipet/prj/CMakeLists.txt index 63e15dce..5e11ca32 100644 --- a/niftypet/nipet/prj/CMakeLists.txt +++ b/niftypet/nipet/prj/CMakeLists.txt @@ -3,6 +3,7 @@ project(petprj) file(GLOB SRC LIST_DIRECTORIES false "src/*.cu") include_directories(src) include_directories(${Python3_INCLUDE_DIRS}) +include_directories(${CUVEC_INCLUDE_DIRS}) include_directories(${Python3_NumPy_INCLUDE_DIRS}) add_library(${PROJECT_NAME} ${LIB_TYPE} ${SRC}) @@ -16,6 +17,7 @@ if(SKBUILD) python_extension_module(${PROJECT_NAME}) endif() set_target_properties(${PROJECT_NAME} PROPERTIES + CXX_STANDARD 11 VERSION ${CMAKE_PROJECT_VERSION} SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR} INTERFACE_${PROJECT_NAME}_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR}) diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index 1dd7d50a..f7607f1d 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -1,6 +1,7 @@ """Forward and back projector for PET data reconstruction""" import logging +import cuvec as cu import numpy as np from .. import mmraux @@ -43,7 +44,7 @@ def trnx_prj(scanner_params, sino=None, im=None): def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=False, - fullsino_out=True): + fullsino_out=True, output=None): """ Calculate forward projection (a set of sinograms) for the provided input image. Arguments: @@ -58,6 +59,7 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F is calculated; the default is False, meaning emission sinogram; for attenuation calculations (attenuation=True), the exponential of the negative of the integrated mu-values along LOR path is taken at the end. + output(CuVec, optional) -- output sinogram. """ # Get particular scanner parameters: Constants, transaxial and axial LUTs Cnt = scanner_params['Cnt'] @@ -108,21 +110,24 @@ def frwd_prj(im, scanner_params, isub=ISUB_DEFAULT, dev_out=False, attenuation=F # predefine the sinogram. # if subsets are used then only preallocate those bins which will be used. if isub[0] < 0: - sinog = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) + out_shape = txLUT['Naw'], nsinos else: - sinog = np.zeros((len(isub), nsinos), dtype=np.float32) + out_shape = len(isub), nsinos + if output is None: + sinog = cu.zeros(out_shape, dtype=np.float32) + else: + sinog = cu.asarray(output) + assert sinog.shape == out_shape + assert sinog.dtype == np.dtype('float32') # -------------------- - petprj.fprj(sinog, ims, txLUT, axLUT, isub, Cnt, att) + petprj.fprj(sinog.cuvec, cu.asarray(ims).cuvec, txLUT, axLUT, isub, Cnt, att) # -------------------- # get the sinogram bins in a full sinogram if requested - if fullsino_out: - sino = np.zeros((txLUT['Naw'], nsinos), dtype=np.float32) - if isub[0] >= 0: - sino[isub, :] = sinog - else: - sino = sinog + if fullsino_out and isub[0] >= 0: + sino = cu.zeros((txLUT['Naw'], nsinos), dtype=np.float32) + sino[isub, :] = sinog else: sino = sinog diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 7a8dc4bb..6b4bd2ba 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -6,6 +6,7 @@ from collections.abc import Iterable from numbers import Real +import cuvec as cu import numpy as np import scipy.ndimage as ndi from tqdm.auto import trange @@ -230,8 +231,10 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N asng = attnsino log.info('using provided attenuation factor sinogram') else: - asng = np.zeros(psng.shape, dtype=np.float32) - petprj.fprj(asng, mus, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) + asng = cu.zeros(psng.shape, dtype=np.float32) + petprj.fprj(asng.cuvec, + cu.asarray(mus).cuvec, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, + 1) # > combine attenuation and normalisation ansng = asng * nsng # ======================================================================== diff --git a/niftypet/nipet/prj/src/prj_module.cu b/niftypet/nipet/prj/src/prj_module.cu index 5a67268e..95dd4d10 100644 --- a/niftypet/nipet/prj/src/prj_module.cu +++ b/niftypet/nipet/prj/src/prj_module.cu @@ -11,6 +11,7 @@ Copyrights: 2019 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION // NPY_API_VERSION #include "def.h" +#include "pycuvec.cuh" #include #include #include @@ -242,21 +243,21 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { PyObject *o_txLUT; // input image to be forward projected (reshaped for GPU execution) - PyObject *o_im; + PyCuVec *o_im; // subsets for OSEM, first the default PyObject *o_subs; // output projection sino - PyObject *o_prjout; + PyCuVec *o_prjout; // flag for attenuation factors to be found based on mu-map; if 0 normal emission projection is // used int att; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOOi", &o_prjout, &o_im, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst, - &att)) + if (!PyArg_ParseTuple(args, "OOOOOOi", (PyObject **)&o_prjout, (PyObject **)&o_im, &o_txLUT, + &o_axLUT, &o_subs, &o_mmrcnst, &att)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -301,23 +302,16 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - // image object - PyArrayObject *p_im = NULL; - p_im = (PyArrayObject *)PyArray_FROM_OTF(o_im, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - // subsets if using e.g., OSEM PyArrayObject *p_subs = NULL; p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); - // output sino object - PyArrayObject *p_prjout = NULL; - p_prjout = (PyArrayObject *)PyArray_FROM_OTF(o_prjout, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); //-- /* If that didn't work, throw an exception. */ if (p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || - p_aw2ali == NULL || p_s2c == NULL || p_im == NULL || p_crs == NULL || p_subs == NULL || - p_prjout == NULL || p_li2rng == NULL) { + p_aw2ali == NULL || p_s2c == NULL || !o_im || p_crs == NULL || p_subs == NULL || !o_prjout || + p_li2rng == NULL) { // axLUTs Py_XDECREF(p_li2rno); Py_XDECREF(p_li2sn); @@ -330,15 +324,8 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { // sino 2 crystals Py_XDECREF(p_s2c); Py_XDECREF(p_crs); - // image object - Py_XDECREF(p_im); // subset definition object Py_XDECREF(p_subs); - - // output sino object - PyArray_DiscardWritebackIfCopy(p_prjout); - Py_XDECREF(p_prjout); - return NULL; } @@ -354,11 +341,10 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { char *li2nos = (char *)PyArray_DATA(p_li2nos); float *li2rng = (float *)PyArray_DATA(p_li2rng); float *crs = (float *)PyArray_DATA(p_crs); - float *im = (float *)PyArray_DATA(p_im); if (Cnt.LOG <= LOGDEBUG) - printf("i> forward-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_im, 0), - PyArray_DIM(p_im, 1), PyArray_DIM(p_im, 2)); + printf("i> forward-projection image dimensions: %ld, %ld, %ld\n", o_im->shape[0], + o_im->shape[1], o_im->shape[2]); int Nprj = PyArray_DIM(p_subs, 0); int N0crs = PyArray_DIM(p_crs, 0); @@ -382,14 +368,12 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { subs = subs_; } - // output projection sinogram - float *prjout = (float *)PyArray_DATA(p_prjout); - // sets the device on which to calculate HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); //<><><><><><><<><><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> - gpu_fprj(prjout, im, li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, Nprj, Naw, N0crs, Cnt, att); + gpu_fprj(o_prjout->vec.data(), o_im->vec.data(), li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, + Nprj, Naw, N0crs, Cnt, att); //<><><><><><><><<><><><><><><><><><><><><><><><><<><><><><><><><><><><><><><><><><><><<><><><><><><><><><><> // Clean up @@ -401,12 +385,8 @@ static PyObject *frwd_prj(PyObject *self, PyObject *args) { Py_DECREF(p_aw2ali); Py_DECREF(p_s2c); Py_DECREF(p_crs); - Py_DECREF(p_im); Py_DECREF(p_subs); - PyArray_ResolveWritebackIfCopy(p_prjout); - Py_DECREF(p_prjout); - if (subs_[0] == -1) free(subs); Py_INCREF(Py_None); diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index 2ae6d347..51307629 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -206,7 +206,7 @@ __global__ void fprj_oblq(float *sino, const float *im, const float *tt, const u } //-------------------------------------------------------------------------------------------------- -void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2nos, short *s2c, +void gpu_fprj(float *d_sn, float *d_im, float *li2rng, short *li2sn, char *li2nos, short *s2c, int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt, char att) { int dev_id; @@ -271,23 +271,13 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no } //----------------------------------------------------------------- - - //--- FULLY 3D - float *d_sn; - HANDLE_ERROR(cudaMalloc(&d_sn, Nprj * snno * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_sn, 0, Nprj * snno * sizeof(float))); - - // allocate for image to be forward projected on the device - float *d_im; - HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); - // when rings are reduced expand the image to account for whole axial FOV if (nvz < SZ_IMZ) { - // first the reduced image into the device + // copy the reduced image float *d_imr; HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); HANDLE_ERROR( - cudaMemcpy(d_imr, im, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyHostToDevice)); + cudaMemcpy(d_imr, d_im, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyDeviceToDevice)); // put zeros in the gaps of unused voxels HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); // number of axial row for max threads @@ -297,10 +287,6 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no imExpand<<>>(d_im, d_imr, vz0, nvz); HANDLE_ERROR(cudaGetLastError()); cudaFree(d_imr); - } else { - // copy to GPU memory - HANDLE_ERROR( - cudaMemcpy(d_im, im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), cudaMemcpyHostToDevice)); } // float *d_li2rng; HANDLE_ERROR( cudaMalloc(&d_li2rng, N0li*N1li*sizeof(float)) ); @@ -357,17 +343,11 @@ void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2no cudaDeviceSynchronize(); - HANDLE_ERROR(cudaMemcpy(prjout, d_sn, Nprj * snno * sizeof(float), cudaMemcpyDeviceToHost)); - - cudaFree(d_sn); - cudaFree(d_im); cudaFree(d_tt); cudaFree(d_tv); cudaFree(d_subs); HANDLE_ERROR(cudaFree(d_crs)); HANDLE_ERROR(cudaFree(d_s2c)); - - return; } //======================================================================= diff --git a/niftypet/nipet/prj/src/prjf.h b/niftypet/nipet/prj/src/prjf.h index b37d16ee..a11512cb 100644 --- a/niftypet/nipet/prj/src/prjf.h +++ b/niftypet/nipet/prj/src/prjf.h @@ -6,7 +6,7 @@ #ifndef PRJF_H #define PRJF_H -void gpu_fprj(float *prjout, float *im, float *li2rng, short *li2sn, char *li2nos, short *s2c, +void gpu_fprj(float *d_sn, float *d_im, float *li2rng, short *li2sn, char *li2nos, short *s2c, int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt, char att); diff --git a/niftypet/nipet/sct/mmrsct.py b/niftypet/nipet/sct/mmrsct.py index 4283ef8d..3245b3bc 100644 --- a/niftypet/nipet/sct/mmrsct.py +++ b/niftypet/nipet/sct/mmrsct.py @@ -6,6 +6,7 @@ import time from math import pi +import cuvec as cu import nibabel as nib import numpy as np import scipy.ndimage as ndi @@ -569,8 +570,9 @@ def vsm( # <<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>><<+>> currentspan = Cnt['SPN'] Cnt['SPN'] = 1 - atto = np.zeros((txLUT['Naw'], Cnt['NSN1']), dtype=np.float32) - petprj.fprj(atto, mu_sctonly, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) + atto = cu.zeros((txLUT['Naw'], Cnt['NSN1']), dtype=np.float32) + petprj.fprj(atto.cuvec, + cu.asarray(mu_sctonly).cuvec, txLUT, axLUT, np.array([-1], dtype=np.int32), Cnt, 1) atto = mmraux.putgaps(atto, txLUT, Cnt) # -------------------------------------------------------------- # > get norm components setting the geometry and axial to ones diff --git a/pyproject.toml b/pyproject.toml index a1e18c51..786e72f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]>=3.4", - "ninst>=0.10.0", "numpy>=1.14", "miutil[cuda]>=0.4.0", + "cuvec>=2.5.0", "ninst>=0.10.0", "numpy>=1.14", "miutil[cuda]>=0.4.0", "scikit-build>=0.11.0", "cmake>=3.18", "ninja"] [tool.setuptools_scm] diff --git a/setup.cfg b/setup.cfg index e74a381a..1bcbefe6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -39,18 +39,20 @@ setup_requires= setuptools>=42 wheel setuptools_scm[toml] + cuvec>=2.5.0 + miutil[cuda]>=0.4.0 ninst>=0.10.0 numpy>=1.14 - miutil[cuda]>=0.4.0 scikit-build>=0.11.0 cmake>=3.18 ninja install_requires= + cuvec>=2.5.0 miutil>=0.6.0 nibabel>=2.4.0 nimpa>=2.0.0 - numpy>=1.14 ninst>=0.7.0 + numpy>=1.14 pydicom>=1.0.2 setuptools tqdm>=4.27 From 115142e4f0888c2a2e1ba6edc975a9f1640ca459 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 14:58:07 +0000 Subject: [PATCH 52/64] fprj: fix for reduced dims, memset output safety --- niftypet/nipet/prj/src/prjf.cu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/niftypet/nipet/prj/src/prjf.cu b/niftypet/nipet/prj/src/prjf.cu index 51307629..11434bc8 100644 --- a/niftypet/nipet/prj/src/prjf.cu +++ b/niftypet/nipet/prj/src/prjf.cu @@ -271,22 +271,22 @@ void gpu_fprj(float *d_sn, float *d_im, float *li2rng, short *li2sn, char *li2no } //----------------------------------------------------------------- + + //--- FULLY 3D + HANDLE_ERROR(cudaMemset(d_sn, 0, Nprj * snno * sizeof(float))); + // when rings are reduced expand the image to account for whole axial FOV if (nvz < SZ_IMZ) { - // copy the reduced image - float *d_imr; - HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); - HANDLE_ERROR( - cudaMemcpy(d_imr, d_im, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyDeviceToDevice)); + float *d_imr = d_im; // save old pointer to reduced image input + // reallocate full size + HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); // put zeros in the gaps of unused voxels HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); - // number of axial row for max threads int nar = NIPET_CU_THREADS / nvz; dim3 THRD(nvz, nar, 1); dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); imExpand<<>>(d_im, d_imr, vz0, nvz); HANDLE_ERROR(cudaGetLastError()); - cudaFree(d_imr); } // float *d_li2rng; HANDLE_ERROR( cudaMalloc(&d_li2rng, N0li*N1li*sizeof(float)) ); @@ -335,17 +335,17 @@ void gpu_fprj(float *d_sn, float *d_im, float *li2rng, short *li2sn, char *li2no cudaEventRecord(stop, 0); cudaEventSynchronize(stop); + // cudaDeviceSynchronize(); float elapsedTime; cudaEventElapsedTime(&elapsedTime, start, stop); cudaEventDestroy(start); cudaEventDestroy(stop); if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); - cudaDeviceSynchronize(); - - cudaFree(d_tt); - cudaFree(d_tv); - cudaFree(d_subs); + if (nvz < SZ_IMZ) HANDLE_ERROR(cudaFree(d_im)); + HANDLE_ERROR(cudaFree(d_tt)); + HANDLE_ERROR(cudaFree(d_tv)); + HANDLE_ERROR(cudaFree(d_subs)); HANDLE_ERROR(cudaFree(d_crs)); HANDLE_ERROR(cudaFree(d_s2c)); } From de24dd452d5001a9819aff4e128de0b2aab84a5b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 4 Feb 2021 15:36:30 +0000 Subject: [PATCH 53/64] bprj: use cuvec --- niftypet/nipet/prj/mmrprj.py | 4 +- niftypet/nipet/prj/mmrrec.py | 6 ++- niftypet/nipet/prj/mmrsim.py | 9 +++- niftypet/nipet/prj/src/prj_module.cu | 39 ++++----------- niftypet/nipet/prj/src/prjb.cu | 73 ++++++++++------------------ niftypet/nipet/prj/src/prjb.h | 2 +- 6 files changed, 52 insertions(+), 81 deletions(-) diff --git a/niftypet/nipet/prj/mmrprj.py b/niftypet/nipet/prj/mmrprj.py index f7607f1d..675edd8f 100644 --- a/niftypet/nipet/prj/mmrprj.py +++ b/niftypet/nipet/prj/mmrprj.py @@ -199,10 +199,10 @@ def back_prj(sino, scanner_params, isub=ISUB_DEFAULT, dev_out=False): nvz = Cnt['rSZ_IMZ'] else: nvz = Cnt['SZ_IMZ'] - bimg = np.zeros((Cnt['SZ_IMX'], Cnt['SZ_IMY'], nvz), dtype=np.float32) + bimg = cu.zeros((Cnt['SZ_IMX'], Cnt['SZ_IMY'], nvz), dtype=np.float32) # > run back-projection - petprj.bprj(bimg, sinog, txLUT, axLUT, isub, Cnt) + petprj.bprj(bimg.cuvec, cu.asarray(sinog).cuvec, txLUT, axLUT, isub, Cnt) if not dev_out: # > change from GPU optimised image dimensions to the standard Siemens shape diff --git a/niftypet/nipet/prj/mmrrec.py b/niftypet/nipet/prj/mmrrec.py index 6b4bd2ba..0f9d0d57 100644 --- a/niftypet/nipet/prj/mmrrec.py +++ b/niftypet/nipet/prj/mmrrec.py @@ -288,13 +288,17 @@ def osemone(datain, mumaps, hst, scanner_params, recmod=3, itr=4, fwhm=0., psf=N sinoTIdx = np.zeros((Sn, Nprj + 1), dtype=np.int32) # -init sensitivity images for each subset imgsens = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) + tmpsens = cu.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in range(Sn): # first number of projection for the given subset sinoTIdx[n, 0] = Nprj sinoTIdx[n, 1:], s = get_subsets14(n, scanner_params) # sensitivity image - petprj.bprj(imgsens[n, :, :, :], ansng[sinoTIdx[n, 1:], :], txLUT, axLUT, sinoTIdx[n, 1:], + petprj.bprj(tmpsens.cuvec, + cu.asarray(ansng[sinoTIdx[n, 1:], :]).cuvec, txLUT, axLUT, sinoTIdx[n, 1:], Cnt) + imgsens[n] = tmpsens + del tmpsens # ------------------------------------- # -mask for reconstructed image. anything outside it is set to zero diff --git a/niftypet/nipet/prj/mmrsim.py b/niftypet/nipet/prj/mmrsim.py index e1635f55..2332fa85 100644 --- a/niftypet/nipet/prj/mmrsim.py +++ b/niftypet/nipet/prj/mmrsim.py @@ -1,6 +1,7 @@ """Simulations for image reconstruction with recommended reduced axial field of view""" import logging +import cuvec as cu import numpy as np from scipy import ndimage as ndi from tqdm.auto import trange @@ -250,6 +251,7 @@ def simulate_recon( # > init sensitivity images for each subset sim = np.zeros((Sn, Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) + tmpsim = cu.zeros((Cnt['SZ_IMY'], Cnt['SZ_IMX'], Cnt['SZ_IMZ']), dtype=np.float32) for n in trange(Sn, desc="sensitivity", leave=log.getEffectiveLevel() < logging.INFO): # first number of projection for the given subset @@ -257,9 +259,12 @@ def simulate_recon( sinoTIdx[n, 1:], s = mmrrec.get_subsets14(n, scanner_params) # > sensitivity image - petprj.bprj(sim[n, :, :, :], attsino[sinoTIdx[n, 1:], :], txLUT, axLUT, + petprj.bprj(tmpsim.cuvec, + cu.asarray(attsino[sinoTIdx[n, 1:], :]).cuvec, txLUT, axLUT, sinoTIdx[n, 1:], Cnt) - # ------------------------------------- + sim[n] = tmpsim + del tmpsim + # ------------------------------------- for _ in trange(nitr, desc="OSEM", disable=log.getEffectiveLevel() > logging.INFO, leave=log.getEffectiveLevel() < logging.INFO): diff --git a/niftypet/nipet/prj/src/prj_module.cu b/niftypet/nipet/prj/src/prj_module.cu index 95dd4d10..9309fea7 100644 --- a/niftypet/nipet/prj/src/prj_module.cu +++ b/niftypet/nipet/prj/src/prj_module.cu @@ -411,17 +411,18 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { PyObject *o_txLUT; // sino to be back projected to image (both reshaped for GPU execution) - PyObject *o_sino; + PyCuVec *o_sino; // subsets for OSEM, first the default PyObject *o_subs; // output backprojected image - PyObject *o_bimg; + PyCuVec *o_bimg; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ /* Parse the input tuple */ - if (!PyArg_ParseTuple(args, "OOOOOO", &o_bimg, &o_sino, &o_txLUT, &o_axLUT, &o_subs, &o_mmrcnst)) + if (!PyArg_ParseTuple(args, "OOOOOO", (PyObject **)&o_bimg, (PyObject **)&o_sino, &o_txLUT, + &o_axLUT, &o_subs, &o_mmrcnst)) return NULL; //^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -466,23 +467,15 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { p_aw2ali = (PyArrayObject *)PyArray_FROM_OTF(pd_aw2ali, NPY_INT32, NPY_ARRAY_IN_ARRAY); - // sino object - PyArrayObject *p_sino = NULL; - p_sino = (PyArrayObject *)PyArray_FROM_OTF(o_sino, NPY_FLOAT32, NPY_ARRAY_IN_ARRAY); - // subsets if using e.g., OSEM PyArrayObject *p_subs = NULL; p_subs = (PyArrayObject *)PyArray_FROM_OTF(o_subs, NPY_INT32, NPY_ARRAY_IN_ARRAY); - - // output back-projection image - PyArrayObject *p_bim = NULL; - p_bim = (PyArrayObject *)PyArray_FROM_OTF(o_bimg, NPY_FLOAT32, NPY_ARRAY_INOUT_ARRAY2); //-- /* If that didn't work, throw an exception. */ if (p_li2rno == NULL || p_li2sn == NULL || p_li2sn1 == NULL || p_li2nos == NULL || - p_aw2ali == NULL || p_s2c == NULL || p_sino == NULL || p_crs == NULL || p_subs == NULL || - p_li2rng == NULL || p_bim == NULL) { + p_aw2ali == NULL || p_s2c == NULL || !o_sino || p_crs == NULL || p_subs == NULL || + p_li2rng == NULL || !o_bimg) { // axLUTs Py_XDECREF(p_li2rno); Py_XDECREF(p_li2sn); @@ -495,15 +488,9 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { // sino 2 crystals Py_XDECREF(p_s2c); Py_XDECREF(p_crs); - // sino object - Py_XDECREF(p_sino); // subset definition object Py_XDECREF(p_subs); - // back-projection image - PyArray_DiscardWritebackIfCopy(p_bim); - Py_XDECREF(p_bim); - return NULL; } @@ -519,7 +506,6 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { char *li2nos = (char *)PyArray_DATA(p_li2nos); float *li2rng = (float *)PyArray_DATA(p_li2rng); float *crs = (float *)PyArray_DATA(p_crs); - float *sino = (float *)PyArray_DATA(p_sino); int Nprj = PyArray_DIM(p_subs, 0); int N0crs = PyArray_DIM(p_crs, 0); @@ -540,17 +526,16 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { subs = subs_; } - float *bimg = (float *)PyArray_DATA(p_bim); - if (Cnt.LOG <= LOGDEBUG) - printf("i> back-projection image dimensions: %ld, %ld, %ld\n", PyArray_DIM(p_bim, 0), - PyArray_DIM(p_bim, 1), PyArray_DIM(p_bim, 2)); + printf("i> back-projection image dimensions: %ld, %ld, %ld\n", o_bimg->shape[0], + o_bimg->shape[1], o_bimg->shape[2]); // sets the device on which to calculate HANDLE_ERROR(cudaSetDevice(Cnt.DEVID)); //<><><<><><><><><><><><><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> - gpu_bprj(bimg, sino, li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, Nprj, Naw, N0crs, Cnt); + gpu_bprj(o_bimg->vec.data(), o_sino->vec.data(), li2rng, li2sn, li2nos, s2c, aw2ali, crs, subs, + Nprj, Naw, N0crs, Cnt); //<><><><><><><><><><><>><><><><><><><><><<><><><><<><><><><><><><><><><><><><><><><><<><><><><><><> // Clean up @@ -562,12 +547,8 @@ static PyObject *back_prj(PyObject *self, PyObject *args) { Py_DECREF(p_aw2ali); Py_DECREF(p_s2c); Py_DECREF(p_crs); - Py_DECREF(p_sino); Py_DECREF(p_subs); - PyArray_ResolveWritebackIfCopy(p_bim); - Py_DECREF(p_bim); - if (subs_[0] == -1) free(subs); Py_INCREF(Py_None); diff --git a/niftypet/nipet/prj/src/prjb.cu b/niftypet/nipet/prj/src/prjb.cu index 91e9168b..8511427b 100644 --- a/niftypet/nipet/prj/src/prjb.cu +++ b/niftypet/nipet/prj/src/prjb.cu @@ -187,7 +187,7 @@ __global__ void bprj_oblq(const float *sino, float *im, const float *tt, const u } //-------------------------------------------------------------------------------------------------- -void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, +void gpu_bprj(float *d_im, float *d_sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt) { int dev_id; @@ -252,14 +252,13 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no } //----------------------------------------------------------------- - //--- FULLY 3D sino to be back-projected to image - float *d_sino; - HANDLE_ERROR(cudaMalloc(&d_sino, Nprj * snno * sizeof(float))); - HANDLE_ERROR(cudaMemcpy(d_sino, sino, Nprj * snno * sizeof(float), cudaMemcpyHostToDevice)); - - float *d_im; - HANDLE_ERROR(cudaMalloc(&d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_im, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + float *d_imf; + // when rings are reduced + if (nvz < SZ_IMZ) + HANDLE_ERROR(cudaMalloc(&d_imf, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); + else + d_imf = d_im; + HANDLE_ERROR(cudaMemset(d_imf, 0, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float))); //--- cudaMemcpyToSymbol(c_li2rng, li2rng, nil2r_c * sizeof(float2)); @@ -278,36 +277,24 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no //----------------------------------------------------------------------- //============================================================================ - bprj_drct<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno); + bprj_drct<<>>(d_sino, d_imf, d_tt, d_tv, d_subs, snno); HANDLE_ERROR(cudaGetLastError()); //============================================================================ int zoff = nrng_c; - //> number of oblique sinograms + // number of oblique sinograms int Noblq = (nrng_c - 1) * nrng_c / 2; int Nz = ((Noblq + 127) / 128) * 128; //============================================================================ - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + bprj_oblq<<>>(d_sino, d_imf, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); zoff += Nz / 2; - bprj_oblq<<>>(d_sino, d_im, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); + bprj_oblq<<>>(d_sino, d_imf, d_tt, d_tv, d_subs, snno, zoff, nil2r_c); HANDLE_ERROR(cudaGetLastError()); //============================================================================ - //============================================================================ - - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - float elapsedTime; - cudaEventElapsedTime(&elapsedTime, start, stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); - - cudaDeviceSynchronize(); - // // the actual axial size used (due to the customised ring subset used) // int vz0 = 2*Cnt.RNG_STRT; // int vz1 = 2*(Cnt.RNG_END-1); @@ -316,36 +303,30 @@ void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2no // when rings are reduced if (nvz < SZ_IMZ) { - float *d_imr; - HANDLE_ERROR(cudaMalloc(&d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float))); - HANDLE_ERROR(cudaMemset(d_imr, 0, SZ_IMX * SZ_IMY * nvz * sizeof(float))); // number of axial row for max threads int nar = NIPET_CU_THREADS / nvz; dim3 THRD(nvz, nar, 1); dim3 BLCK((SZ_IMY + nar - 1) / nar, SZ_IMX, 1); - imReduce<<>>(d_imr, d_im, vz0, nvz); + imReduce<<>>(d_im, d_imf, vz0, nvz); HANDLE_ERROR(cudaGetLastError()); - // copy to host memory - HANDLE_ERROR( - cudaMemcpy(bimg, d_imr, SZ_IMX * SZ_IMY * nvz * sizeof(float), cudaMemcpyDeviceToHost)); - cudaFree(d_im); - cudaFree(d_imr); + HANDLE_ERROR(cudaFree(d_imf)); if (Cnt.LOG <= LOGDEBUG) printf("i> reduced the axial (z) image size to %d\n", nvz); - } else { - // copy to host memory - HANDLE_ERROR( - cudaMemcpy(bimg, d_im, SZ_IMX * SZ_IMY * SZ_IMZ * sizeof(float), cudaMemcpyDeviceToHost)); - cudaFree(d_im); } - cudaFree(d_sino); - cudaFree(d_tt); - cudaFree(d_tv); - cudaFree(d_subs); - cudaFree(d_crs); - cudaFree(d_s2c); + cudaEventRecord(stop, 0); + cudaEventSynchronize(stop); + // cudaDeviceSynchronize(); + float elapsedTime; + cudaEventElapsedTime(&elapsedTime, start, stop); + cudaEventDestroy(start); + cudaEventDestroy(stop); + if (Cnt.LOG <= LOGDEBUG) printf("DONE in %fs.\n", 0.001 * elapsedTime); - return; + HANDLE_ERROR(cudaFree(d_tt)); + HANDLE_ERROR(cudaFree(d_tv)); + HANDLE_ERROR(cudaFree(d_subs)); + HANDLE_ERROR(cudaFree(d_crs)); + HANDLE_ERROR(cudaFree(d_s2c)); } //======================================================================= diff --git a/niftypet/nipet/prj/src/prjb.h b/niftypet/nipet/prj/src/prjb.h index 98da6422..d03b4e19 100644 --- a/niftypet/nipet/prj/src/prjb.h +++ b/niftypet/nipet/prj/src/prjb.h @@ -7,7 +7,7 @@ #define PRJB_H // used from Python -void gpu_bprj(float *bimg, float *sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, +void gpu_bprj(float *d_im, float *d_sino, float *li2rng, short *li2sn, char *li2nos, short *s2c, int *aw2ali, float *crs, int *subs, int Nprj, int Naw, int N0crs, Cnst Cnt); // to be used within CUDA C reconstruction From 3b2e9c7d341aad6f2da4bcb10780719cd6a407cf Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Fri, 18 Jun 2021 17:35:51 +0100 Subject: [PATCH 54/64] update framework --- .github/workflows/comment-bot.yml | 2 ++ .github/workflows/test.yml | 18 ++++++------------ .gitignore | 15 ++++++--------- .pre-commit-config.yaml | 7 ++++--- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/.github/workflows/comment-bot.yml b/.github/workflows/comment-bot.yml index 4451632e..b44ee7ba 100644 --- a/.github/workflows/comment-bot.yml +++ b/.github/workflows/comment-bot.yml @@ -29,6 +29,7 @@ jobs: post({ owner: context.repo.owner, repo: context.repo.repo, comment_id: context.payload.comment.id, content: "eyes"}) + github-token: ${{ secrets.GH_TOKEN }} - name: Tag Commit run: | git clone https://${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY} repo @@ -48,3 +49,4 @@ jobs: post({ owner: context.repo.owner, repo: context.repo.repo, comment_id: context.payload.comment.id, content: "rocket"}) + github-token: ${{ secrets.GH_TOKEN }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 95967ff1..ea647f68 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,7 +2,7 @@ name: Test on: [push, pull_request] jobs: check: - if: github.event_name != 'push' || github.ref != 'refs/heads/devel' + if: github.event_name != 'pull_request' || github.head_ref != 'devel' runs-on: ubuntu-latest strategy: matrix: @@ -68,6 +68,7 @@ jobs: with: requirements: twine setuptools wheel setuptools_scm[toml] ninst scikit-build build: sdist + gpg_key: ${{ secrets.GPG_KEY }} password: ${{ secrets.PYPI_TOKEN }} upload: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') }} env: @@ -81,16 +82,9 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} with: - tag_name: ${{ github.ref }} - release_name: nipet ${{ github.ref }} stable + name: nipet ${{ github.ref }} stable body_path: _CHANGES.md draft: true - - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: dist/${{ steps.dist.outputs.targz }} - asset_name: ${{ steps.dist.outputs.targz }} - asset_content_type: application/gzip + files: | + dist/${{ steps.dist.outputs.targz }} + dist/${{ steps.dist.outputs.targz_asc }} diff --git a/.gitignore b/.gitignore index 1457f1b8..bd92b233 100644 --- a/.gitignore +++ b/.gitignore @@ -1,17 +1,14 @@ *.py[co] -__pycache__/ - -# build -MANIFEST *.so +__pycache__/ +/_skbuild/ +/_cmake_test_compile/ /niftypet/nipet/cmake/ /niftypet/nipet/_dist_ver.py +MANIFEST +/*.egg*/ /build/ /dist/ -/_skbuild/ -/_cmake_test_compile/ -/*.egg*/ -/.eggs/ - /.coverage* /coverage.xml +/.pytest_cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a5eded12..b0cdf49e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: check-added-large-files - id: check-case-conflict @@ -26,9 +26,10 @@ repos: exclude: ^(.pre-commit-config.yaml|.github/workflows/test.yml)$ args: [-i] - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.2 hooks: - id: flake8 + args: [-j8] additional_dependencies: - flake8-bugbear - flake8-comprehensions @@ -40,7 +41,7 @@ repos: - id: yapf args: [-i] - repo: https://github.com/PyCQA/isort - rev: 5.7.0 + rev: 5.8.0 hooks: - id: isort - repo: https://github.com/doublify/pre-commit-clang-format From 811943ea6ad3a66f953bfb96403a8480d3cc266e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 11 Aug 2021 11:53:05 +0100 Subject: [PATCH 55/64] build: misc minor updates --- .github/workflows/test.yml | 16 +++++++++------- .pre-commit-config.yaml | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea647f68..adc802df 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -70,19 +70,21 @@ jobs: build: sdist gpg_key: ${{ secrets.GPG_KEY }} password: ${{ secrets.PYPI_TOKEN }} - upload: ${{ github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') }} + upload: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags') }} env: PATHTOOLS: ${{ github.workspace }}/NiftyPET_tools HMUDIR: ${{ github.workspace }} - - name: Changelog - run: git log --pretty='format:%d%n- %s%n%b---' $(git tag --sort=v:refname | tail -n2 | head -n1)..HEAD > _CHANGES.md - - if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') - id: create_release - uses: actions/create-release@v1 + - id: meta + name: Changelog + run: | + echo ::set-output name=tag::${GITHUB_REF#refs/tags/} + git log --pretty='format:%d%n- %s%n%b---' $(git tag --sort=v:refname | tail -n2 | head -n1)..HEAD > _CHANGES.md + - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') + uses: softprops/action-gh-release@v1 env: GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} with: - name: nipet ${{ github.ref }} stable + name: nipet ${{ steps.meta.outputs.tag }} stable body_path: _CHANGES.md draft: true files: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0cdf49e..f5733d03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -41,7 +41,7 @@ repos: - id: yapf args: [-i] - repo: https://github.com/PyCQA/isort - rev: 5.8.0 + rev: 5.9.3 hooks: - id: isort - repo: https://github.com/doublify/pre-commit-clang-format From c27b4641243b026565d4a3a12e97177b7ff52307 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 14 Sep 2021 03:47:15 +0100 Subject: [PATCH 56/64] fix multiarch CUDA --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ec63899d..3db05fb9 100644 --- a/setup.py +++ b/setup.py @@ -194,7 +194,7 @@ def check_constants(): cmake_args = [f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] try: if nvcc_arches: - cmake_args.append("-DCMAKE_CUDA_ARCHITECTURES=" + " ".join(sorted(nvcc_arches))) + cmake_args.append("-DCMAKE_CUDA_ARCHITECTURES=" + ";".join(sorted(nvcc_arches))) except Exception as exc: if "sdist" not in sys.argv or any(i in sys.argv for i in ["build", "bdist", "wheel"]): log.warning("Import or CUDA device detection error:\n%s", exc) From ad9dd0acf5af9662595274e2300f5176534c10e8 Mon Sep 17 00:00:00 2001 From: Pawel Date: Mon, 20 Sep 2021 21:26:05 +0100 Subject: [PATCH 57/64] accounting for significant changes in resources.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ec63899d..4746fddc 100644 --- a/setup.py +++ b/setup.py @@ -170,7 +170,7 @@ def check_constants(): # if exists, import the resources and get the constants resources = cs.get_resources() # get the current setup, if any -Cnt = resources.get_setup() +Cnt = resources.get_mmr_constants() # hardware mu-maps hmu_dir = None From 7bc25bf3ecf741f0e7303698420afb06fc58aedb Mon Sep 17 00:00:00 2001 From: Pawel Date: Mon, 20 Sep 2021 21:56:10 +0100 Subject: [PATCH 58/64] changed mmr init function name --- niftypet/nipet/__init__.py | 2 +- niftypet/nipet/mmraux.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 1fb9d805..69e241da 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -50,7 +50,7 @@ from .img.pipe import mmrchain from .lm.mmrhist import dynamic_timings, mmrhist, randoms from .mmraux import explore_input as classify_input -from .mmraux import mMR_params as get_mmrparams +from .mmraux import get_mmrparams from .mmraux import sino2ssr from .mmrnorm import get_norm_sino from .prj.mmrprj import back_prj, frwd_prj diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 30099fc7..db191400 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -1169,7 +1169,7 @@ def mmrinit(): return Cnt, txLUT, axLUT -def mMR_params(): +def get_mmrparams(): """get all scanner parameters in one dictionary""" Cnt, txLUT, axLUT = mmrinit() return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} From fb7e6d4aedd1578c383d348daf553f99a325f1ca Mon Sep 17 00:00:00 2001 From: Pawel Date: Mon, 20 Sep 2021 22:12:51 +0100 Subject: [PATCH 59/64] moved mMR hardware mu-maps setup after NiftyPET installation --- niftypet/nipet/mmraux.py | 42 ++++++++++++++++++++++++++++++++++++++-- setup.py | 30 ---------------------------- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index db191400..84a3930d 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -16,6 +16,9 @@ from niftypet import nimpa +from niftypet.ninst import cudasetup as cs +from niftypet.ninst import install_tools as tls + from . import mmr_auxe, resources log = logging.getLogger(__name__) @@ -1169,7 +1172,42 @@ def mmrinit(): return Cnt, txLUT, axLUT -def get_mmrparams(): - """get all scanner parameters in one dictionary""" +def get_mmrparams(hmu_dir=None): + """ get all scanner parameters in one dictionary. + hmudir: folder with the mMR hardware mu-maps if known; + they will be stored in resources.py for the future use. + """ + + log.info( + dedent("""\ + -------------------------------------------------------------- + Finding hardware mu-maps + --------------------------------------------------------------""")) + + # get the local path to NiftyPET resources.py + path_resources = cs.path_niftypet_local() + # if exists, import the resources and get the constants + resources = cs.get_resources() + # get the current setup, if any + Cnt = resources.get_mmr_constants() + + # > hardware mu-maps + if Cnt.get("HMUDIR", None): + hmu_dir = Path(Cnt["HMUDIR"]) + # check each piece of the hardware components + for i in Cnt["HMULIST"]: + if not (hmu_dir / i).is_file(): + hmu_dir = None + break + # prompt for installation path + if hmu_dir is None: + Cnt["HMUDIR"] = tls.askdirectory(title="Folder for hardware mu-maps: ", name="HMUDIR") + # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + # update the path in resources.py + tls.update_resources(Cnt) + # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + log.info("hardware mu-maps have been located") + + Cnt, txLUT, axLUT = mmrinit() return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} diff --git a/setup.py b/setup.py index 825862ec..a9df8ccd 100644 --- a/setup.py +++ b/setup.py @@ -160,36 +160,6 @@ def check_constants(): nvcc_arches = [] log.error("could not set up CUDA:\n%s", exc) -log.info( - dedent("""\ - -------------------------------------------------------------- - Finding hardware mu-maps - --------------------------------------------------------------""")) -# get the local path to NiftyPET resources.py -path_resources = cs.path_niftypet_local() -# if exists, import the resources and get the constants -resources = cs.get_resources() -# get the current setup, if any -Cnt = resources.get_mmr_constants() - -# hardware mu-maps -hmu_dir = None -if Cnt.get("HMUDIR", None): - hmu_dir = Path(Cnt["HMUDIR"]) - # check each piece of the hardware components - for i in Cnt["HMULIST"]: - if not (hmu_dir / i).is_file(): - hmu_dir = None - break -# prompt for installation path -if hmu_dir is None: - Cnt["HMUDIR"] = tls.askdirectory(title="Folder for hardware mu-maps: ", name="HMUDIR") -# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# update the path in resources.py -tls.update_resources(Cnt) -# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -log.info("hardware mu-maps have been located") - build_ver = ".".join(__version__.split('.')[:3]).split(".dev")[0] cmake_args = [f"-DNIPET_BUILD_VERSION={build_ver}", f"-DPython3_ROOT_DIR={sys.prefix}"] try: From 4a05c8295316b35d174783fd16346497b4d44cd2 Mon Sep 17 00:00:00 2001 From: Pawel Date: Mon, 20 Sep 2021 22:58:27 +0100 Subject: [PATCH 60/64] imporved the init of mMR with hardware mumaps at the same time --- niftypet/nipet/mmraux.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 84a3930d..f50c7491 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -16,7 +16,6 @@ from niftypet import nimpa -from niftypet.ninst import cudasetup as cs from niftypet.ninst import install_tools as tls from . import mmr_auxe, resources @@ -1184,12 +1183,7 @@ def get_mmrparams(hmu_dir=None): Finding hardware mu-maps --------------------------------------------------------------""")) - # get the local path to NiftyPET resources.py - path_resources = cs.path_niftypet_local() - # if exists, import the resources and get the constants - resources = cs.get_resources() - # get the current setup, if any - Cnt = resources.get_mmr_constants() + Cnt, txLUT, axLUT = mmrinit() # > hardware mu-maps if Cnt.get("HMUDIR", None): @@ -1208,6 +1202,5 @@ def get_mmrparams(hmu_dir=None): # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ log.info("hardware mu-maps have been located") - - Cnt, txLUT, axLUT = mmrinit() + return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} From 616146784715e4cf5fbab5d4b34d7e9397e01c6a Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 21 Sep 2021 22:09:45 +0100 Subject: [PATCH 61/64] fix memleak --- niftypet/nipet/__init__.py | 3 +-- niftypet/nipet/mmraux.py | 2 -- niftypet/nipet/src/aux_module.cu | 9 +++++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/niftypet/nipet/__init__.py b/niftypet/nipet/__init__.py index 69e241da..25c7e116 100644 --- a/niftypet/nipet/__init__.py +++ b/niftypet/nipet/__init__.py @@ -50,8 +50,7 @@ from .img.pipe import mmrchain from .lm.mmrhist import dynamic_timings, mmrhist, randoms from .mmraux import explore_input as classify_input -from .mmraux import get_mmrparams -from .mmraux import sino2ssr +from .mmraux import get_mmrparams, sino2ssr from .mmrnorm import get_norm_sino from .prj.mmrprj import back_prj, frwd_prj from .prj.mmrsim import simulate_recon, simulate_sino diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index f50c7491..70052766 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -15,7 +15,6 @@ from miutil.fdio import hasext from niftypet import nimpa - from niftypet.ninst import install_tools as tls from . import mmr_auxe, resources @@ -1202,5 +1201,4 @@ def get_mmrparams(hmu_dir=None): # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ log.info("hardware mu-maps have been located") - return {'Cnt': Cnt, 'txLUT': txLUT, 'axLUT': axLUT} diff --git a/niftypet/nipet/src/aux_module.cu b/niftypet/nipet/src/aux_module.cu index 7f493e08..3af8972d 100644 --- a/niftypet/nipet/src/aux_module.cu +++ b/niftypet/nipet/src/aux_module.cu @@ -471,6 +471,11 @@ static PyObject *mmr_rgaps(PyObject *self, PyObject *args) { return Py_None; } +void free_capsule(PyObject *capsule) { + void *data = PyCapsule_GetPointer(capsule, NULL); + free(data); +} + //==================================================================================================== static PyObject *mmr_span11LUT(PyObject *self, PyObject *args) { // Dictionary of scanner constants @@ -500,9 +505,13 @@ static PyObject *mmr_span11LUT(PyObject *self, PyObject *args) { dims[0] = Cnt.NSN1; PyArrayObject *s1s11_out = (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT16, span11.li2s11); + PyObject *capsule = PyCapsule_New(span11.li2s11, NULL, free_capsule); + PyArray_SetBaseObject(s1s11_out, capsule); dims[0] = Cnt.NSN11; PyArrayObject *s1nos_out = (PyArrayObject *)PyArray_SimpleNewFromData(1, dims, NPY_INT8, span11.NSinos); + capsule = PyCapsule_New(span11.NSinos, NULL, free_capsule); + PyArray_SetBaseObject(s1nos_out, capsule); PyObject *o_out = PyTuple_New(2); PyTuple_SetItem(o_out, 0, PyArray_Return(s1s11_out)); From da5262fef9b96d1f2b51e205a501ef70d5c93be0 Mon Sep 17 00:00:00 2001 From: Pawel Date: Thu, 23 Sep 2021 17:56:13 +0100 Subject: [PATCH 62/64] slight change of recognising GIF parcellation image files --- niftypet/nipet/mmraux.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/niftypet/nipet/mmraux.py b/niftypet/nipet/mmraux.py index 70052766..60b45d0d 100644 --- a/niftypet/nipet/mmraux.py +++ b/niftypet/nipet/mmraux.py @@ -916,7 +916,7 @@ def get_niifiles(dfile, datain): log.debug('NIfTI for bias corrected T1w of the object:\n{}'.format(fbc[0])) # T1-based labels after parcellation - flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*giflabels.nii*')) + flbl = glob.glob(os.path.join(os.path.dirname(dfile), '*gif*labels.nii*')) if len(flbl) == 1: datain['T1lbl'] = flbl[0] log.debug('NIfTI for regional parcellations of the object:\n{}'.format(flbl[0])) From 88669e6ac90aaae8d26f4615e6e695d0a9f8a590 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 30 Sep 2021 12:49:51 +0100 Subject: [PATCH 63/64] fix rename mMR_params => get_mmrparams --- niftypet/nipet/img/mmrimg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/niftypet/nipet/img/mmrimg.py b/niftypet/nipet/img/mmrimg.py index d5665e41..cff4e6c0 100644 --- a/niftypet/nipet/img/mmrimg.py +++ b/niftypet/nipet/img/mmrimg.py @@ -1267,7 +1267,7 @@ def rmumaps(datain, Cnt, t0=0, t1=0, use_stored=False): if os.path.isfile(datain['pCT']): # reconstruct PET image with default settings to be used to alight pCT mu-map - params = mmraux.mMR_params() + params = mmraux.get_mmrparams() Cnt_ = params['Cnt'] txLUT_ = params['txLUT'] axLUT_ = params['axLUT'] From e7d9cfdbf7029e09cdd83fb8025c5a6985459af8 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Thu, 30 Sep 2021 13:11:09 +0100 Subject: [PATCH 64/64] misc framework updates --- .github/workflows/test.yml | 24 +++++++++++------------- .pre-commit-config.yaml | 6 +++--- setup.cfg | 12 +++++------- setup.py | 0 4 files changed, 19 insertions(+), 23 deletions(-) mode change 100644 => 100755 setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index adc802df..4e93ce37 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -2,17 +2,12 @@ name: Test on: [push, pull_request] jobs: check: - if: github.event_name != 'pull_request' || github.head_ref != 'devel' + if: github.event_name != 'pull_request' || github.repository_owner != 'NiftyPET' runs-on: ubuntu-latest - strategy: - matrix: - python: [3.6, 3.9] - name: Check py${{ matrix.python }} + name: Check steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python }} - name: set PYSHA run: echo "PYSHA=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - uses: actions/cache@v1 @@ -24,7 +19,7 @@ jobs: pip install -U pre-commit sudo apt-get install -yqq clang-format - uses: reviewdog/action-setup@v1 - - if: github.event_name != 'schedule' + - if: github.event_name == 'push' || github.event_name == 'pull_request' name: comment run: | if [[ $EVENT == pull_request ]]; then @@ -39,20 +34,23 @@ jobs: EVENT: ${{ github.event_name }} - run: pre-commit run -a --show-diff-on-failure test: - if: github.event_name != 'pull_request' || github.head_ref != 'devel' + if: github.event_name != 'pull_request' || github.repository_owner != 'NiftyPET' + name: Test py${{ matrix.python }} runs-on: [self-hosted, python, cuda, matlab] - name: Test + strategy: + matrix: + python: [3.6, 3.9] steps: - uses: actions/checkout@v2 with: fetch-depth: 0 - name: Run setup-python - run: setup-python -p3.7 + run: setup-python -p${{ matrix.python }} - run: pip install -U --no-binary nimpa -e .[dev] - run: pytest - - run: codecov + - uses: codecov/codecov-action@v1 - name: Post Run setup-python - run: setup-python -p3.7 -Dr + run: setup-python -p${{ matrix.python }} -Dr if: ${{ always() }} deploy: needs: [check, test] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f5733d03..b5956c1f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,10 +21,10 @@ repos: - id: todo name: Check TODO language: pygrep + args: [-i] entry: TODO types: [text] exclude: ^(.pre-commit-config.yaml|.github/workflows/test.yml)$ - args: [-i] - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: @@ -36,7 +36,7 @@ repos: - flake8-debugger - flake8-string-format - repo: https://github.com/google/yapf - rev: 6db9374 + rev: v0.31.0 hooks: - id: yapf args: [-i] @@ -45,7 +45,7 @@ repos: hooks: - id: isort - repo: https://github.com/doublify/pre-commit-clang-format - rev: master + rev: '6230247' hooks: - id: clang-format files: \.(cc?|cuh?|cxx|cpp|h|hpp|hxx|java|js)$ diff --git a/setup.cfg b/setup.cfg index 1bcbefe6..06f66b3e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -64,9 +64,13 @@ dev= pytest-cov pytest-timeout pytest-xdist - codecov examples=jupyter; ipywidgets; matplotlib; brainweb +[flake8] +max_line_length=99 +extend-ignore=W504,E225,E261,E701,P1 +exclude=.git,__pycache__,build,dist,.eggs + [yapf] spaces_before_comment=15, 20 arithmetic_precedence_indication=true @@ -83,12 +87,6 @@ profile=black line_length=99 known_first_party=niftypet,tests -[flake8] -statistics=True -max_line_length=99 -extend-ignore=W504,E225,E261,E701,P1 -exclude=.git,__pycache__,build,dist,.eggs - [tool:pytest] timeout=3600 log_level=INFO diff --git a/setup.py b/setup.py old mode 100644 new mode 100755