diff --git a/.appveyor.yml b/.appveyor.yml
index 326e4515a..a613b1993 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -14,7 +14,6 @@ version: '{build}'
 os:
   - Visual Studio 2022
   - Visual Studio 2019
-  - Visual Studio 2015
 
 platform:
   - x64
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index c7d1576e0..a39f59282 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -79,7 +79,7 @@ jobs:
           compiler: gcc
           compiler_version: 12
           cuda_version: "12.1.0"
-          BUILD_FLAGS: "-DSTIR_OPENMP=ON -DCMAKE_CXX_STANDARD=14"
+          BUILD_FLAGS: "-DSTIR_OPENMP=ON -DCMAKE_CXX_STANDARD=17"
           BUILD_TYPE: "Release"
           parallelproj: "ON"
           ROOT: "OFF"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2511aa596..47275ae2d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,7 +33,7 @@ include(src/cmake/SetC++Version.cmake)
 
 # minimum C++ version required (you can still ask for a more recent one
 # by setting CMAKE_CXX_STANDARD)
-UseCXX(14)
+UseCXX(17)
 
 # set default build-type to Release
 if(NOT CMAKE_BUILD_TYPE)
@@ -118,18 +118,12 @@ if(NOT DISABLE_LLN_MATRIX)
 endif()
 
 if(NOT DISABLE_CERN_ROOT)
-  find_package(CERN_ROOT)
+  find_package(CERN_ROOT 6.28.00) # required for C++17
   if (CERN_ROOT_FOUND)
     message(STATUS "ROOT Version: ${CERN_ROOT_VERSION}")
     if (${CERN_ROOT_VERSION} VERSION_GREATER 6.29.99)
         message(STATUS "ROOT Version is >= 6.30. Setting the minimum CXX version to 20.")
         UseCXX(20)
-    elseif (${CERN_ROOT_VERSION} VERSION_GREATER 6.27.99)
-        message(STATUS "ROOT Version is >= 6.28. Setting the minimum CXX version to 17.")
-        UseCXX(17)
-    elseif (${CERN_ROOT_VERSION} VERSION_GREATER 6.23.99)
-        message(STATUS "ROOT Version is >= 6.24. Setting the minimum CXX version to 14.")
-        UseCXX(14)
     endif()
   endif()
 endif()
diff --git a/documentation/release_6.2.htm b/documentation/release_6.2.htm
new file mode 100644
index 000000000..b73467460
--- /dev/null
+++ b/documentation/release_6.2.htm
@@ -0,0 +1,125 @@
+<!DOCTYPE HTML>
+<html lang="en">
+  <head>
+    <title>Summary of changes in STIR release 6.2</title>
+  </head>
+
+  <body>
+    <h1>Summary of changes in STIR release 6.2</h1>
+
+    <p>
+      This version is 100% backwards compatible with STIR 6.1. However, C++-17 is now required.
+    </p>
+
+<h2>Overall summary</h2>
+    <p>
+     
+    </p>
+
+    <p>
+      Of course, there is also the usual code-cleanup and improvements to the documentation.
+    </p>
+
+    <p>
+      This release contains mainly code written by Nicole Jurjew (UCL) and Kris Thielemans (UCL).
+    </p>
+
+<h2>Patch release info</h2>
+<ul>
+  <li>
+    6.2.0 released ??/2024<br>
+    <a href="https://github.com/UCL/STIR/milestone/11">GitHub Milestone 6.2</a>
+  </li>
+  <!--
+      <li> 4.0.1 released 28/04/2020
+        <ul>
+          <li><a href=https://github.com/UCL/STIR/pull/513>PR 513</a>  (suppress warnings with clang)</li>
+        </ul>
+        -->
+</ul>
+
+<h2> Summary for end users (also to be read by developers)</h2>
+
+
+<h3>New functionality</h3>
+<ul>
+</ul>
+
+
+<h3>Changed functionality</h3>
+<ul>
+</ul>
+
+
+<h3>Bug fixes</h3>
+<ul>
+</ul>
+
+<h3>Build system</h3>
+<ul
+  <li>
+    C++-17 is now required.
+  </li>
+</ul>
+
+<h3>Known problems</h3>
+    <p>See <a href=https://github.com/UCL/STIR/labels/bug>our issue tracker</a>.</p>
+
+
+<H2>What's new for developers (aside from what should be obvious
+from the above):</H2>
+
+<h3>Changed functionality</h3>
+<ul>
+    <li>
+      <code>Array</code> classes by default use contiguous memory allocation (as opposed to a sequence of 1D vectors).
+      This could speed up memory allocation and destruction of arrays with a high number of underlying 1D vectors. It also allows reading/writing
+      data in one call to the C++ library, as opposed to many small calls. Also added move constructors to the <code>Array</code>,
+      <code>VectorWithOffset</code> classes.
+      <br>
+      <a href=https://github.com/UCL/STIR/pull/1236>issue #1236</a>.
+    </li>
+</ul>
+
+<h3>Bug fixes</h3>
+<ul>
+  <li>
+    <code>PoissonLogLikelihoodWithLinearModelForMeanAndProjData</code> had a (minor?) problem with TOF data
+    that when computing the gradient, the normalisation object was not set-up with the TOF data,
+    but non-TOF instead. This did not happen in our normal reconstruction code, and would have thrown an error
+    if it occured.
+    <br>
+    Fixed in <a href=https://github.com/UCL/STIR/pull/1427>issue #1427</a>.
+  </li>
+</ul>
+
+
+<h3>Other code changes</h3>
+<ul>
+  <li>
+    Fixes an incompatibility with C++20.
+  </li>
+</ul>
+
+<h3>Build system</h3>
+<ul>
+  <li>
+    Force C++ version according to CERN ROOT versions: ROOT 6.28.10 needs C++17 and 6.30.2 needs C++20.
+    Also some fixes when relying on <code>root-config</code>.
+  </li>
+</ul>
+
+<h3>Test changes</h3>
+
+<h4>C++ tests</h4>
+<ul>
+  <li>
+    Objective functions (both projection-data and list-mode) and priors now have a numerical test for <code>accumulate_Hessian_times_input</code>
+    <br>
+    <a href=https://github.com/UCL/STIR/pull/1418> PR #1418</a>
+  </li>
+</ul>
+
+</body>
+
+</html>
diff --git a/src/buildblock/ProjDataInfoCylindrical.cxx b/src/buildblock/ProjDataInfoCylindrical.cxx
index 8878214a0..8cc5942cb 100644
--- a/src/buildblock/ProjDataInfoCylindrical.cxx
+++ b/src/buildblock/ProjDataInfoCylindrical.cxx
@@ -43,7 +43,6 @@ using std::min_element;
 using std::max_element;
 using std::min;
 using std::max;
-using std::swap;
 using std::endl;
 using std::string;
 using std::pair;
@@ -113,7 +112,7 @@ ProjDataInfoCylindrical::ProjDataInfoCylindrical(const shared_ptr<Scanner>& scan
                   min_ring_diff[segment_num],
                   max_ring_diff[segment_num],
                   segment_num);
-          swap(min_ring_diff[segment_num], max_ring_diff[segment_num]);
+          std::swap(min_ring_diff[segment_num], max_ring_diff[segment_num]);
         }
   }
 
diff --git a/src/cmake/FindCERN_ROOT.cmake b/src/cmake/FindCERN_ROOT.cmake
index 92c07159b..487977964 100644
--- a/src/cmake/FindCERN_ROOT.cmake
+++ b/src/cmake/FindCERN_ROOT.cmake
@@ -168,4 +168,6 @@ if (CERN_ROOT_DEBUG)
 endif()
 
 INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(CERN_ROOT "CERN ROOT not found. If you do have it, set ROOT_DIR (preferred), ROOTSYS or add root-config to your path" CERN_ROOT_VERSION CERN_ROOT_LIBRARIES CERN_ROOT_INCLUDE_DIRS)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(CERN_ROOT FAIL_MESSAGE "CERN ROOT not found. If you do have it, set ROOT_DIR (preferred), ROOTSYS or add root-config to your path"
+  VERSION_VAR CERN_ROOT_VERSION
+  REQUIRED_VARS CERN_ROOT_LIBRARIES CERN_ROOT_INCLUDE_DIRS)
diff --git a/src/include/stir/Array.h b/src/include/stir/Array.h
index 837ae7be4..83ca9e9f2 100644
--- a/src/include/stir/Array.h
+++ b/src/include/stir/Array.h
@@ -3,6 +3,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2011-10-14, Hammersmith Imanet Ltd
     Copyright (C) 2011-07-01 - 2012, Kris Thielemans
+    Copyright (C) 2023 - 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -34,6 +35,7 @@
 #include "stir/ByteOrder.h"
 #include "stir/IndexRange.h"
 #include "stir/deprecated.h"
+#include "stir/shared_ptr.h"
 
 START_NAMESPACE_STIR
 class NumericType;
@@ -134,20 +136,52 @@ class Array : public NumericVectorWithOffset<Array<num_dimensions - 1, elemT>, e
   //! Construct an Array of given range of indices, elements are initialised to 0
   inline explicit Array(const IndexRange<num_dimensions>&);
 
+  //! Construct an Array pointing to existing contiguous data
+  /*!
+    \arg data_sptr should point to a contiguous block of correct size.
+    The constructed Array will essentially be a "view" of the
+       \c data_sptr.get() block. Therefore, any modifications to the array will modify the data at \c data_sptr.get().
+    This will be true until the Array is resized.
+
+    The C-array \data_ptr will be accessed with the last dimension running fastest
+    ("row-major" order).
+  */
+  inline Array(const IndexRange<num_dimensions>& range, shared_ptr<elemT[]> data_sptr);
+
 #ifndef SWIG
-  //! Construct an Array from an object of its base_type
-  inline Array(const base_type& t);
-#else
   // swig 2.0.4 gets confused by base_type (due to numeric template arguments)
   // therefore, we declare this constructor using the "self" type,
   // i.e. it's just a copy-constructor.
   // This is less powerful as in C++, but swig-generated interfaces don't need to know about the base_type anyway
-  inline Array(const self& t);
+  //! Construct an Array from an object of its base_type
+  inline Array(const base_type& t);
 #endif
 
+  //! Copy constructor
+  // implementation needed as the above doesn't disable the auto-generated copy-constructor
+  inline Array(const self& t);
+
   //! virtual destructor, frees up any allocated memory
   inline ~Array() override;
 
+  //! Swap content/members of 2 objects
+  // implementation in .h because of templates/friends/whatever, see https://stackoverflow.com/a/61020224
+  friend inline void swap(Array& first, Array& second) // nothrow
+  {
+    using std::swap;
+    //  swap the members of two objects
+    swap(static_cast<base_type&>(first), static_cast<base_type&>(second));
+    swap(first._allocated_full_data_ptr, second._allocated_full_data_ptr);
+  }
+
+  //! move constructor
+  /*! implementation uses the copy-and-swap idiom, see e.g. https://stackoverflow.com/a/3279550 */
+  Array(Array&& other) noexcept;
+
+  //! assignment operator
+  /*! implementation uses the copy-and-swap idiom, see e.g. https://stackoverflow.com/a/3279550 */
+  Array& operator=(Array other);
+
   /*! @name functions returning full_iterators*/
   //@{
   //! start value for iterating through all elements in the array, see full_iterator
@@ -169,14 +203,18 @@ class Array : public NumericVectorWithOffset<Array<num_dimensions - 1, elemT>, e
   //! return the total number of elements in this array
   inline size_t size_all() const;
 
-  /* Implementation note: grow() and resize() are inline such that they are
-     defined for any type you happen to use for elemT. Otherwise, we would
-     need instantiation in Array.cxx.
-  */
   //! change the array to a new range of indices, new elements are set to 0
+  /*! Current behaviour is that when resizing to a smaller array, the same memory
+    will be used. However, when growing any of the dimensions, a new Array
+    will be allocated and the data copied.
+
+    If the array points to an existing block of data, resizing is therefore problematic.
+    When growing the array, the resized array will no longer point to the original block
+    of data.
+  */
   inline virtual void resize(const IndexRange<num_dimensions>& range);
 
-  //! grow the array to a new range of indices, new elements are set to 0
+  //! alias for resize()
   virtual inline void grow(const IndexRange<num_dimensions>& range);
 
   //! return sum of all elements
@@ -250,6 +288,44 @@ class Array : public NumericVectorWithOffset<Array<num_dimensions - 1, elemT>, e
   //! set values of the array to self*a+y*b where a and b are scalar or arrays
   template <class T>
   inline void sapyb(const T& a, const Array& y, const T& b);
+
+  //! \name access to the data via a pointer
+  //@{
+  //! return if the array is contiguous in memory
+  bool is_contiguous() const;
+
+  //! member function for access to the data via a elemT*
+  inline elemT* get_full_data_ptr();
+
+  //! member function for access to the data via a const elemT*
+  inline const elemT* get_const_full_data_ptr() const;
+
+  //! signal end of access to elemT*
+  inline void release_full_data_ptr();
+
+  //! signal end of access to const elemT*
+  inline void release_const_full_data_ptr() const;
+  //@}
+
+private:
+  //! boolean to test if get_full_data_ptr is called
+  // This variable is declared mutable such that get_const_full_data_ptr() can change it.
+  mutable bool _full_pointer_access;
+
+  //! A pointer to the allocated chunk if the array is constructed that way, zero otherwise
+  shared_ptr<elemT[]> _allocated_full_data_ptr;
+
+  //! change the array to a new range of indices, pointing to \c data_ptr
+  /*!
+    \arg data_ptr should point to a contiguous block of correct size
+
+    The C-array \data_ptr will be accessed with the last dimension running fastest
+    ("row-major" order).
+  */
+  inline void init(const IndexRange<num_dimensions>& range, elemT* const data_ptr, bool copy_data);
+  // Make sure that we can access init() recursively
+  template <int num_dimensions2, class elemT2>
+  friend class Array;
 };
 
 /**************************************************
@@ -307,12 +383,46 @@ class Array<1, elemT> : public NumericVectorWithOffset<elemT, elemT>
   //! constructor given first and last indices, initialising elements to 0
   inline Array(const int min_index, const int max_index);
 
+  //! constructor given an IndexRange<1>, pointing to existing contiguous data
+  /*!
+    \arg data_ptr should point to a contiguous block of correct size.
+    The constructed Array will essentially be a "view" of the
+    \c data_sptr block. Therefore, any modifications to the array will modify the data at \a data_sptr.
+    This will be the case until the Array is resized.
+  */
+  inline Array(const IndexRange<1>& range, shared_ptr<elemT[]> data_sptr);
+
+  //! constructor given an IndexRange<1> from existing contiguous data (will copy)
+  /*!
+    \arg data_ptr should point to a contiguous block of correct size.
+  */
+  inline Array(const IndexRange<1>& range, const elemT* const data_ptr);
+
   //! constructor from basetype
   inline Array(const NumericVectorWithOffset<elemT, elemT>& il);
 
+  //! Copy constructor
+  // implementation needed as the above doesn't replace the normal copy-constructor
+  // and the auto-generated is disabled because of the move constructor
+  inline Array(const self& t);
+
+  //! move constructor
+  /*! implementation uses the copy-and-swap idiom, see e.g. https://stackoverflow.com/a/3279550 */
+  Array(Array&& other) noexcept;
+
   //! virtual destructor
   inline ~Array() override;
 
+  //! Swap content/members of 2 objects
+  // implementation in .h because of templates/friends/whatever, see https://stackoverflow.com/a/61020224
+  friend inline void swap(Array& first, Array& second) // nothrow
+  {
+    swap(static_cast<base_type&>(first), static_cast<base_type&>(second));
+  }
+
+  //! assignment
+  inline Array& operator=(const Array& other);
+
   /*! @name functions returning full_iterators*/
   //@{
   //! start value for iterating through all elements in the array, see full_iterator
@@ -347,6 +457,38 @@ class Array<1, elemT> : public NumericVectorWithOffset<elemT, elemT>
   // Array::resize initialises new elements to 0
   inline void resize(const int min_index, const int max_index) override;
 
+  //! \name access to the data via a pointer
+  //@{
+  //! return if the array is contiguous in memory (always \c true)
+  bool is_contiguous() const
+  {
+    return true;
+  }
+  //! member function for access to the data via a elemT*
+  inline elemT* get_full_data_ptr()
+  {
+    return this->get_data_ptr();
+  }
+
+  //! member function for access to the data via a const elemT*
+  inline const elemT* get_const_full_data_ptr() const
+  {
+    return this->get_const_data_ptr();
+  }
+
+  //! signal end of access to elemT*
+  inline void release_full_data_ptr()
+  {
+    this->release_data_ptr();
+  }
+
+  //! signal end of access to const elemT*
+  inline void release_const_full_data_ptr() const
+  {
+    this->release_const_data_ptr();
+  }
+  //@}
+
   //! return sum of all elements
   inline elemT sum() const;
 
@@ -424,6 +566,17 @@ class Array<1, elemT> : public NumericVectorWithOffset<elemT, elemT>
 
   inline const elemT& at(const BasicCoordinate<1, int>& c) const;
   //@}
+
+private:
+  // Make sure we can call init() recursively.
+  template <int num_dimensions2, class elemT2>
+  friend class Array;
+
+  //! change vector with new index range and point to \c data_ptr
+  /*!
+    \arg data_ptr should start to a contiguous block of correct size
+  */
+  inline void init(const IndexRange<1>& range, elemT* const data_ptr, bool copy_data);
 };
 
 END_NAMESPACE_STIR
diff --git a/src/include/stir/Array.inl b/src/include/stir/Array.inl
index 98fce7eb1..aa3c11dce 100644
--- a/src/include/stir/Array.inl
+++ b/src/include/stir/Array.inl
@@ -4,6 +4,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2011-01-11, Hammersmith Imanet Ltd
     Copyright (C) 2011-07-01 - 2012, Kris Thielemans
+    Copyright (C) 2023 - 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -24,12 +25,29 @@
 #include <algorithm>
 #include "stir/assign.h"
 #include "stir/error.h"
+//#include "stir/info.h"
+//#include <string>
 
 START_NAMESPACE_STIR
 
 /**********************************************
  inlines for Array<num_dimensions, elemT>
  **********************************************/
+template <int num_dimensions, typename elemT>
+bool
+Array<num_dimensions, elemT>::is_contiguous() const
+{
+  auto mem = &(*this->begin_all());
+  for (auto i = this->get_min_index(); i < this->get_max_index(); ++i)
+    {
+      if (!(*this)[i].is_contiguous())
+        return false;
+      mem += (*this)[i].size_all();
+      if (mem != &(*(*this)[i + 1].begin_all()))
+        return false;
+    }
+  return true;
+}
 
 template <int num_dimensions, typename elemT>
 void
@@ -42,6 +60,21 @@ Array<num_dimensions, elemT>::resize(const IndexRange<num_dimensions>& range)
     (*iter).resize(*range_iter);
 }
 
+template <int num_dimensions, typename elemT>
+void
+Array<num_dimensions, elemT>::init(const IndexRange<num_dimensions>& range, elemT* const data_ptr, bool copy_data)
+{
+  base_type::resize(range.get_min_index(), range.get_max_index());
+  auto iter = this->begin();
+  auto range_iter = range.begin();
+  auto ptr = data_ptr;
+  for (; iter != this->end(); ++iter, ++range_iter)
+    {
+      (*iter).init(*range_iter, ptr, copy_data);
+      ptr += range_iter->size_all();
+    }
+}
+
 template <int num_dimensions, typename elemT>
 void
 Array<num_dimensions, elemT>::grow(const IndexRange<num_dimensions>& range)
@@ -51,29 +84,75 @@ Array<num_dimensions, elemT>::grow(const IndexRange<num_dimensions>& range)
 
 template <int num_dimensions, typename elemT>
 Array<num_dimensions, elemT>::Array()
-    : base_type()
+    : base_type(),
+      _allocated_full_data_ptr(nullptr)
 {}
 
 template <int num_dimensions, typename elemT>
 Array<num_dimensions, elemT>::Array(const IndexRange<num_dimensions>& range)
-    : base_type()
+    : base_type(),
+      _allocated_full_data_ptr(new elemT[range.size_all()])
 {
-  grow(range);
+  // info("Array constructor range " + std::to_string(reinterpret_cast<std::size_t>(this->_allocated_full_data_ptr)) + " of size "
+  // + std::to_string(range.size_all())); set elements to zero
+  std::for_each(this->_allocated_full_data_ptr.get(), this->_allocated_full_data_ptr.get() + range.size_all(), [](elemT& e) {
+    assign(e, 0);
+  });
+  this->init(range, this->_allocated_full_data_ptr.get(), false);
+}
+
+template <int num_dimensions, typename elemT>
+Array<num_dimensions, elemT>::Array(const IndexRange<num_dimensions>& range, shared_ptr<elemT[]> data_sptr)
+{
+  this->_allocated_full_data_ptr = data_sptr;
+  this->init(range, this->_allocated_full_data_ptr.get(), false);
 }
 
 template <int num_dimensions, typename elemT>
-#ifdef SWIG
-// swig-specific work-around (see Array.h)
 Array<num_dimensions, elemT>::Array(const self& t)
-#else
+    : base_type(t),
+      _allocated_full_data_ptr(nullptr)
+{
+  // info("constructor " + std::to_string(num_dimensions) + "copy of size " + std::to_string(this->size_all()));
+}
+
+#ifndef SWIG
+// swig cannot parse this ATM, but we don't need it anyway in the wrappers
+template <int num_dimensions, typename elemT>
 Array<num_dimensions, elemT>::Array(const base_type& t)
+    : base_type(t),
+      _allocated_full_data_ptr(nullptr)
+{
+  // info("constructor basetype " + std::to_string(num_dimensions) + " of size " + std::to_string(this->size_all()));
+}
 #endif
-: base_type(t)
-{}
 
 template <int num_dimensions, typename elemT>
 Array<num_dimensions, elemT>::~Array()
-{}
+{
+  if (this->_allocated_full_data_ptr)
+    {
+      // info("Array destructor full_data_ptr " + std::to_string(reinterpret_cast<std::size_t>(this->_allocated_full_data_ptr)) +
+      // " of size " + std::to_string(this->size_all())); delete [] this->_allocated_full_data_ptr;
+    }
+}
+
+template <int num_dimensions, typename elemT>
+Array<num_dimensions, elemT>::Array(Array<num_dimensions, elemT>&& other) noexcept
+    : Array()
+{
+  swap(*this, other);
+  // info("move constructor " + std::to_string(num_dimensions) + "copy of size " + std::to_string(this->size_all()));
+}
+
+template <int num_dimensions, typename elemT>
+Array<num_dimensions, elemT>&
+Array<num_dimensions, elemT>::operator=(Array<num_dimensions, elemT> other)
+{
+  swap(*this, other);
+  // info("Array= " + std::to_string(num_dimensions) + "copy of size " + std::to_string(this->size_all()));
+  return *this;
+}
 
 template <int num_dimensions, typename elemT>
 typename Array<num_dimensions, elemT>::full_iterator
@@ -151,6 +230,7 @@ Array<num_dimensions, elemT>::get_index_range() const
     }
   return IndexRange<num_dimensions>(range);
 }
+
 template <int num_dimensions, typename elemT>
 size_t
 Array<num_dimensions, elemT>::size_all() const
@@ -162,6 +242,81 @@ Array<num_dimensions, elemT>::size_all() const
   return acc;
 }
 
+/*!
+  If is_contiguous() is \c false, calls error(). Otherwise, return a
+  \c elemT* to the first element of the array.
+
+  Use only in emergency cases...
+
+  To prevent invalidating the safety checks (and making
+  reimplementation more difficult), NO manipulation with
+  the array is allowed between the pairs
+      get_full_data_ptr() and release_full_data_ptr()
+  and
+      get_const_full_data_ptr() and release_const_full_data_ptr().
+  (This is checked with assert() in DEBUG mode.)
+*/
+template <int num_dimensions, typename elemT>
+elemT*
+Array<num_dimensions, elemT>::get_full_data_ptr()
+{
+  this->_full_pointer_access = true;
+  if (!this->is_contiguous())
+    error("Array::get_full_data_ptr() called for non-contiguous array.");
+  return &(*this->begin_all());
+};
+
+/*!
+  If is_contiguous() is \c false, calls error(). Otherwise, return a
+  \c const \c elemT* to the first element of the array.
+
+  Use get_const_full_data_ptr() when you are not going to modify
+  the data.
+
+  \see get_full_data_ptr()
+*/
+template <int num_dimensions, typename elemT>
+const elemT*
+Array<num_dimensions, elemT>::get_const_full_data_ptr() const
+{
+  this->_full_pointer_access = true;
+  if (!this->is_contiguous())
+    error("Array::get_const_full_data_ptr() called for non-contiguous array.");
+  return &(*this->begin_all_const());
+};
+
+/*!
+  This has to be used when access to the elemT* returned by get_full_data_ptr() is
+  finished. It updates
+  the Array with any changes you made, and allows access to
+  the other member functions again.
+
+  \see get_full_data_ptr()
+*/
+template <int num_dimensions, typename elemT>
+void
+Array<num_dimensions, elemT>::release_full_data_ptr()
+{
+  assert(this->_full_pointer_access);
+
+  this->_full_pointer_access = false;
+}
+
+/*!
+  This has to be used when access to the const elemT* returned by get_const_full_data_ptr() is
+  finished. It allows access to the other member functions again.
+
+  \see get_const_full_data_ptr()
+*/
+
+template <int num_dimensions, typename elemT>
+void
+Array<num_dimensions, elemT>::release_const_full_data_ptr() const
+{
+  assert(this->_full_pointer_access);
+  this->_full_pointer_access = false;
+}
+
 template <int num_dimensions, typename elemT>
 elemT
 Array<num_dimensions, elemT>::sum() const
@@ -384,6 +539,12 @@ Array<num_dimensions, elemT>::sapyb(const T& a, const Array& y, const T& b)
 /**********************************************
  inlines for Array<1, elemT>
  **********************************************/
+template <class elemT>
+void
+Array<1, elemT>::init(const IndexRange<1>& range, elemT* const data_ptr, bool copy_data)
+{
+  base_type::init(range.get_min_index(), range.get_max_index(), data_ptr, copy_data);
+}
 
 template <class elemT>
 void
@@ -449,15 +610,46 @@ Array<1, elemT>::Array(const int min_index, const int max_index)
   grow(min_index, max_index);
 }
 
+template <class elemT>
+Array<1, elemT>::Array(const IndexRange<1>& range, shared_ptr<elemT[]> data_sptr)
+    : base_type(range.get_min_index(), range.get_max_index(), data_sptr)
+{}
+
+template <class elemT>
+Array<1, elemT>::Array(const IndexRange<1>& range, const elemT* const data_ptr)
+    : base_type(range.get_min_index(), range.get_max_index(), data_ptr)
+{}
+
 template <class elemT>
 Array<1, elemT>::Array(const base_type& il)
     : base_type(il)
 {}
 
+template <typename elemT>
+Array<1, elemT>::Array(const Array<1, elemT>& other)
+    : base_type(other)
+{}
+
 template <typename elemT>
 Array<1, elemT>::~Array()
 {}
 
+template <typename elemT>
+Array<1, elemT>::Array(Array<1, elemT>&& other) noexcept
+    : Array()
+{
+  swap(*this, other);
+}
+
+template <typename elemT>
+Array<1, elemT>&
+Array<1, elemT>::operator=(const Array<1, elemT>& other)
+{
+  // use the base_type assignment, as this tries to avoid reallocating memory
+  base_type::operator=(other);
+  return *this;
+}
+
 template <typename elemT>
 typename Array<1, elemT>::full_iterator
 Array<1, elemT>::begin_all()
diff --git a/src/include/stir/IO/read_data.h b/src/include/stir/IO/read_data.h
index a898eeb5e..1baefc818 100644
--- a/src/include/stir/IO/read_data.h
+++ b/src/include/stir/IO/read_data.h
@@ -2,6 +2,7 @@
 #define __stir_IO_read_data_H__
 /*
     Copyright (C) 2004- 2007, Hammersmith Imanet Ltd
+    Copyright (C) 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
diff --git a/src/include/stir/IO/read_data.inl b/src/include/stir/IO/read_data.inl
index 7351576c8..e8c6b3081 100644
--- a/src/include/stir/IO/read_data.inl
+++ b/src/include/stir/IO/read_data.inl
@@ -1,5 +1,6 @@
 /*
     Copyright (C) 2004- 2007, Hammersmith Imanet Ltd
+    Copyright (C) 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -34,6 +35,10 @@ template <int num_dimensions, class IStreamT, class elemT>
 inline Succeeded
 read_data_help(is_not_1d, IStreamT& s, Array<num_dimensions, elemT>& data, const ByteOrder byte_order)
 {
+  if (data.is_contiguous())
+    return read_data_1d(s, data, byte_order);
+
+  // otherwise, recurse
   for (typename Array<num_dimensions, elemT>::iterator iter = data.begin(); iter != data.end(); ++iter)
     {
       if (read_data(s, *iter, byte_order) == Succeeded::no)
diff --git a/src/include/stir/IO/read_data_1d.h b/src/include/stir/IO/read_data_1d.h
index 0bd70136e..1784df7ea 100644
--- a/src/include/stir/IO/read_data_1d.h
+++ b/src/include/stir/IO/read_data_1d.h
@@ -34,15 +34,15 @@ namespace detail
 
   This function might propagate any exceptions by std::istream::read.
  */
-template <class elemT>
-inline Succeeded read_data_1d(std::istream& s, Array<1, elemT>& data, const ByteOrder byte_order);
+template <int num_dimensions, class elemT>
+inline Succeeded read_data_1d(std::istream& s, Array<num_dimensions, elemT>& data, const ByteOrder byte_order);
 
 /* \ingroup Array_IO_detail
   \brief  This is the (internal) function that does the actual reading from a FILE*.
   \internal
  */
-template <class elemT>
-inline Succeeded read_data_1d(FILE*&, Array<1, elemT>& data, const ByteOrder byte_order);
+template <int num_dimensions, class elemT>
+inline Succeeded read_data_1d(FILE*&, Array<num_dimensions, elemT>& data, const ByteOrder byte_order);
 
 } // end namespace detail
 END_NAMESPACE_STIR
diff --git a/src/include/stir/IO/read_data_1d.inl b/src/include/stir/IO/read_data_1d.inl
index d0c8a2d71..0c841cfda 100644
--- a/src/include/stir/IO/read_data_1d.inl
+++ b/src/include/stir/IO/read_data_1d.inl
@@ -8,6 +8,7 @@
 */
 /*
     Copyright (C) 2004- 2009, Hammersmith Imanet Ltd
+    Copyright (C) 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -27,9 +28,9 @@ namespace detail
 
 /***************** version for istream *******************************/
 
-template <class elemT>
+template <int num_dimensions, class elemT>
 Succeeded
-read_data_1d(std::istream& s, Array<1, elemT>& data, const ByteOrder byte_order)
+read_data_1d(std::istream& s, Array<num_dimensions, elemT>& data, const ByteOrder byte_order)
 {
   if (!s || (dynamic_cast<std::ifstream*>(&s) != 0 && !dynamic_cast<std::ifstream*>(&s)->is_open())
       || (dynamic_cast<std::fstream*>(&s) != 0 && !dynamic_cast<std::fstream*>(&s)->is_open()))
@@ -41,9 +42,9 @@ read_data_1d(std::istream& s, Array<1, elemT>& data, const ByteOrder byte_order)
   // note: find num_to_read (using size()) outside of s.read() function call
   // otherwise Array::check_state() in size() might abort if
   // get_data_ptr() is called before size() (which is compiler dependent)
-  const std::streamsize num_to_read = static_cast<std::streamsize>(data.size()) * sizeof(elemT);
-  s.read(reinterpret_cast<char*>(data.get_data_ptr()), num_to_read);
-  data.release_data_ptr();
+  const std::streamsize num_to_read = static_cast<std::streamsize>(data.size_all()) * sizeof(elemT);
+  s.read(reinterpret_cast<char*>(data.get_full_data_ptr()), num_to_read);
+  data.release_full_data_ptr();
 
   if (!s)
     {
@@ -53,8 +54,8 @@ read_data_1d(std::istream& s, Array<1, elemT>& data, const ByteOrder byte_order)
 
   if (!byte_order.is_native_order())
     {
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data[i]);
+      for (auto iter = data.begin_all(); iter != data.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   return Succeeded::yes;
@@ -63,9 +64,9 @@ read_data_1d(std::istream& s, Array<1, elemT>& data, const ByteOrder byte_order)
 /***************** version for FILE *******************************/
 // largely a copy of above, but with calls to stdio function
 
-template <class elemT>
+template <int num_dimensions, class elemT>
 Succeeded
-read_data_1d(FILE*& fptr_ref, Array<1, elemT>& data, const ByteOrder byte_order)
+read_data_1d(FILE*& fptr_ref, Array<num_dimensions, elemT>& data, const ByteOrder byte_order)
 {
   FILE* fptr = fptr_ref;
   if (fptr == NULL || ferror(fptr))
@@ -77,9 +78,9 @@ read_data_1d(FILE*& fptr_ref, Array<1, elemT>& data, const ByteOrder byte_order)
   // note: find num_to_read (using size()) outside of s.read() function call
   // otherwise Array::check_state() in size() might abort if
   // get_data_ptr() is called before size() (which is compiler dependent)
-  const std::size_t num_to_read = static_cast<std::size_t>(data.size());
-  const std::size_t num_read = fread(reinterpret_cast<char*>(data.get_data_ptr()), sizeof(elemT), num_to_read, fptr);
-  data.release_data_ptr();
+  const std::size_t num_to_read = static_cast<std::size_t>(data.size_all());
+  const std::size_t num_read = fread(reinterpret_cast<char*>(data.get_full_data_ptr()), sizeof(elemT), num_to_read, fptr);
+  data.release_full_data_ptr();
 
   if (ferror(fptr) || num_to_read != num_read)
     {
@@ -89,8 +90,8 @@ read_data_1d(FILE*& fptr_ref, Array<1, elemT>& data, const ByteOrder byte_order)
 
   if (!byte_order.is_native_order())
     {
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data[i]);
+      for (auto iter = data.begin_all(); iter != data.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   return Succeeded::yes;
diff --git a/src/include/stir/IO/write_data_1d.h b/src/include/stir/IO/write_data_1d.h
index e37621036..e96c8c05b 100644
--- a/src/include/stir/IO/write_data_1d.h
+++ b/src/include/stir/IO/write_data_1d.h
@@ -10,6 +10,7 @@
 */
 /*
     Copyright (C) 2004- 2009, Hammersmith Imanet Ltd
+    Copyright (C) 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -35,9 +36,9 @@ namespace detail
   This function does not throw any exceptions. Exceptions thrown by std::ostream::write
   are caught.
  */
-template <class elemT>
+template <int num_dimensions, class elemT>
 inline Succeeded
-write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data);
+write_data_1d(std::ostream& s, const Array<num_dimensions, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data);
 /*! \ingroup Array_IO_detail
   \brief This is an internal function called by \c write_data(). It does the actual writing
    to \c FILE* using stdio functions.
@@ -45,9 +46,9 @@ write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte
   This function does not throw any exceptions.
 
  */
-template <class elemT>
+template <int num_dimensions, class elemT>
 inline Succeeded
-write_data_1d(FILE*& fptr_ref, const Array<1, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data);
+write_data_1d(FILE*& fptr_ref, const Array<num_dimensions, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data);
 } // namespace detail
 
 END_NAMESPACE_STIR
diff --git a/src/include/stir/IO/write_data_1d.inl b/src/include/stir/IO/write_data_1d.inl
index 8a704d985..b1a0a9c29 100644
--- a/src/include/stir/IO/write_data_1d.inl
+++ b/src/include/stir/IO/write_data_1d.inl
@@ -8,6 +8,7 @@
 */
 /*
     Copyright (C) 2004- 2009, Hammersmith Imanet Ltd
+    Copyright (C) 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0
@@ -27,9 +28,9 @@ namespace detail
 
 /***************** version for ostream *******************************/
 
-template <class elemT>
+template <int num_dimensions, class elemT>
 inline Succeeded
-write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data)
+write_data_1d(std::ostream& s, const Array<num_dimensions, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data)
 {
   if (!s || (dynamic_cast<std::ofstream*>(&s) != 0 && !dynamic_cast<std::ofstream*>(&s)->is_open())
       || (dynamic_cast<std::fstream*>(&s) != 0 && !dynamic_cast<std::fstream*>(&s)->is_open()))
@@ -44,7 +45,7 @@ write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte
   /*
   if (!byte_order.is_native_order())
   {
-  Array<1, elemT> a_copy(data);
+  Array<num_dimensions, elemT> a_copy(data);
   for(int i=data.get_min_index(); i<=data.get_max_index(); i++)
   ByteOrder::swap_order(a_copy[i]);
   return write_data(s, a_copy, ByteOrder::native, true);
@@ -52,32 +53,32 @@ write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte
   */
   if (!byte_order.is_native_order())
     {
-      Array<1, elemT>& data_ref = const_cast<Array<1, elemT>&>(data);
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data_ref[i]);
+      Array<num_dimensions, elemT>& data_ref = const_cast<Array<num_dimensions, elemT>&>(data);
+      for (auto iter = data_ref.begin_all(); iter != data_ref.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   // note: find num_to_write (using size()) outside of s.write() function call
   // otherwise Array::check_state() in size() might abort if
   // get_const_data_ptr() is called before size() (which is compiler dependent)
-  const std::streamsize num_to_write = static_cast<std::streamsize>(data.size()) * sizeof(elemT);
+  const std::streamsize num_to_write = static_cast<std::streamsize>(data.size_all()) * sizeof(elemT);
   bool writing_ok = true;
   try
     {
-      s.write(reinterpret_cast<const char*>(data.get_const_data_ptr()), num_to_write);
+      s.write(reinterpret_cast<const char*>(data.get_const_full_data_ptr()), num_to_write);
     }
   catch (...)
     {
       writing_ok = false;
     }
 
-  data.release_const_data_ptr();
+  data.release_const_full_data_ptr();
 
   if (!can_corrupt_data && !byte_order.is_native_order())
     {
-      Array<1, elemT>& data_ref = const_cast<Array<1, elemT>&>(data);
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data_ref[i]);
+      Array<num_dimensions, elemT>& data_ref = const_cast<Array<num_dimensions, elemT>&>(data);
+      for (auto iter = data_ref.begin_all(); iter != data_ref.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   if (!writing_ok || !s)
@@ -92,9 +93,9 @@ write_data_1d(std::ostream& s, const Array<1, elemT>& data, const ByteOrder byte
 /***************** version for FILE *******************************/
 // largely a copy of above, but with calls to stdio function
 
-template <class elemT>
+template <int num_dimensions, class elemT>
 inline Succeeded
-write_data_1d(FILE*& fptr_ref, const Array<1, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data)
+write_data_1d(FILE*& fptr_ref, const Array<num_dimensions, elemT>& data, const ByteOrder byte_order, const bool can_corrupt_data)
 {
   FILE* fptr = fptr_ref;
   if (fptr == 0 || ferror(fptr))
@@ -109,7 +110,7 @@ write_data_1d(FILE*& fptr_ref, const Array<1, elemT>& data, const ByteOrder byte
   /*
   if (!byte_order.is_native_order())
   {
-  Array<1, elemT> a_copy(data);
+  Array<num_dimensions, elemT> a_copy(data);
   for(int i=data.get_min_index(); i<=data.get_max_index(); i++)
   ByteOrder::swap_order(a_copy[i]);
   return write_data(s, a_copy, ByteOrder::native, true);
@@ -117,25 +118,25 @@ write_data_1d(FILE*& fptr_ref, const Array<1, elemT>& data, const ByteOrder byte
   */
   if (!byte_order.is_native_order())
     {
-      Array<1, elemT>& data_ref = const_cast<Array<1, elemT>&>(data);
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data_ref[i]);
+      Array<num_dimensions, elemT>& data_ref = const_cast<Array<num_dimensions, elemT>&>(data);
+      for (auto iter = data_ref.begin_all(); iter != data_ref.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   // note: find num_to_write (using size()) outside of s.write() function call
   // otherwise Array::check_state() in size() might abort if
   // get_const_data_ptr() is called before size() (which is compiler dependent)
-  const std::size_t num_to_write = static_cast<std::size_t>(data.size());
+  const std::size_t num_to_write = static_cast<std::size_t>(data.size_all());
   const std::size_t num_written
-      = fwrite(reinterpret_cast<const char*>(data.get_const_data_ptr()), sizeof(elemT), num_to_write, fptr);
+      = fwrite(reinterpret_cast<const char*>(data.get_const_full_data_ptr()), sizeof(elemT), num_to_write, fptr);
 
-  data.release_const_data_ptr();
+  data.release_const_full_data_ptr();
 
   if (!can_corrupt_data && !byte_order.is_native_order())
     {
-      Array<1, elemT>& data_ref = const_cast<Array<1, elemT>&>(data);
-      for (int i = data.get_min_index(); i <= data.get_max_index(); ++i)
-        ByteOrder::swap_order(data_ref[i]);
+      Array<num_dimensions, elemT>& data_ref = const_cast<Array<num_dimensions, elemT>&>(data);
+      for (auto iter = data_ref.begin_all(); iter != data_ref.end_all(); ++iter)
+        ByteOrder::swap_order(*iter);
     }
 
   if (num_written != num_to_write || ferror(fptr))
diff --git a/src/include/stir/IndexRange.h b/src/include/stir/IndexRange.h
index e16bbc957..882cfb33c 100644
--- a/src/include/stir/IndexRange.h
+++ b/src/include/stir/IndexRange.h
@@ -90,6 +90,9 @@ class IndexRange : public VectorWithOffset<IndexRange<num_dimensions - 1>>
   //! Construct a regular range given by sizes (minimum indices will be 0)
   inline IndexRange(const BasicCoordinate<num_dimensions, int>& sizes);
 
+  //! return the total number of elements in this range
+  inline size_t size_all() const;
+
   // these are derived from VectorWithOffset
   //  TODO these should be overloaded, to set regular_range as well.
   /*
@@ -139,6 +142,8 @@ class IndexRange<1>
   inline int get_min_index() const;
   inline int get_max_index() const;
   inline int get_length() const;
+  //! return the total number of elements in this range
+  inline size_t size_all() const;
 
   inline bool operator==(const IndexRange<1>& range2) const;
 
diff --git a/src/include/stir/IndexRange.inl b/src/include/stir/IndexRange.inl
index 2e981cbd4..8e02a65a2 100644
--- a/src/include/stir/IndexRange.inl
+++ b/src/include/stir/IndexRange.inl
@@ -66,6 +66,20 @@ IndexRange<num_dimensions>::IndexRange(const BasicCoordinate<num_dimensions, int
   this->fill(lower_dims);
 }
 
+template <int num_dimensions>
+std::size_t
+IndexRange<num_dimensions>::size_all() const
+{
+  this->check_state();
+  if (this->is_regular_range == regular_true && this->get_length() > 0)
+    return this->get_length() * this->begin()->size_all();
+  // else
+  size_t acc = 0;
+  for (int i = this->get_min_index(); i <= this->get_max_index(); i++)
+    acc += this->num[i].size_all();
+  return acc;
+}
+
 template <int num_dimensions>
 bool
 IndexRange<num_dimensions>::operator==(const IndexRange<num_dimensions>& range2) const
@@ -150,6 +164,12 @@ IndexRange<1>::get_length() const
   return max - min + 1;
 }
 
+std::size_t
+IndexRange<1>::size_all() const
+{
+  return std::size_t(this->get_length());
+}
+
 bool
 IndexRange<1>::operator==(const IndexRange<1>& range2) const
 {
diff --git a/src/include/stir/NumericVectorWithOffset.h b/src/include/stir/NumericVectorWithOffset.h
index 0d1383668..fae4620f0 100644
--- a/src/include/stir/NumericVectorWithOffset.h
+++ b/src/include/stir/NumericVectorWithOffset.h
@@ -4,7 +4,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2005-06-03, Hammersmith Imanet Ltd
     Copyright (C) 2011-07-01 - 2012, Kris Thielemans
-    Copyright (C) 2020, UCL
+    Copyright (C) 2020, 2023 University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -50,18 +50,30 @@ class NumericVectorWithOffset : public VectorWithOffset<T>
   typedef VectorWithOffset<T> base_type;
 
 public:
-  //! Construct an empty NumericVectorWithOffset
-  inline NumericVectorWithOffset();
-
-  //! Construct a NumericVectorWithOffset of given length
-  inline explicit NumericVectorWithOffset(const int hsz);
-
-  //! Construct a NumericVectorWithOffset of elements with offset \c min_index
-  inline NumericVectorWithOffset(const int min_index, const int max_index);
+  using base_type::base_type;
 
   //! Constructor from an object of this class' base_type
   inline NumericVectorWithOffset(const VectorWithOffset<T>& t);
 
+  //! Constructor from an object of this class' base_type
+  inline NumericVectorWithOffset(const NumericVectorWithOffset& t)
+      : NumericVectorWithOffset(static_cast<const base_type&>(t))
+  {}
+
+  //! Swap content/members of 2 objects
+  // implementation in .h because of templates/friends/whatever, see https://stackoverflow.com/a/61020224
+  friend inline void swap(NumericVectorWithOffset& first, NumericVectorWithOffset& second) // nothrow
+  {
+    swap(static_cast<base_type&>(first), static_cast<base_type&>(second));
+  }
+
+  //! move constructor
+  /*! implementation uses the copy-and-swap idiom, see e.g. https://stackoverflow.com/a/3279550 */
+  NumericVectorWithOffset(NumericVectorWithOffset&& other) noexcept;
+
+  //! assignment
+  NumericVectorWithOffset& operator=(const NumericVectorWithOffset& other);
+
   // arithmetic operations with a vector, combining element by element
 
   //! adding vectors, element by element
diff --git a/src/include/stir/NumericVectorWithOffset.inl b/src/include/stir/NumericVectorWithOffset.inl
index 0371ac205..dba3a7a97 100644
--- a/src/include/stir/NumericVectorWithOffset.inl
+++ b/src/include/stir/NumericVectorWithOffset.inl
@@ -4,7 +4,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2005-06-03, Hammersmith Imanet Ltd
     Copyright (C) 2011-07-01 - 2012, Kris Thielemans
-    Copyright (C) 2013, 2020 University College London
+    Copyright (C) 2013, 2020, 2023 University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -14,7 +14,7 @@
 /*!
   \file
   \ingroup Array
-  \brief inline implementations for NumericVectorWithOffset
+  \brief inline implementations for stir::NumericVectorWithOffset
 
   \author Kris Thielemans
   \author PARAPET project
@@ -28,24 +28,25 @@
 START_NAMESPACE_STIR
 
 template <class T, class NUMBER>
-inline NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset()
-    : base_type()
-{}
-
-template <class T, class NUMBER>
-inline NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset(const int hsz)
-    : base_type(hsz)
+NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset(const VectorWithOffset<T>& t)
+    : base_type(t)
 {}
 
 template <class T, class NUMBER>
-inline NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset(const int min_index, const int max_index)
-    : base_type(min_index, max_index)
-{}
+NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset(NumericVectorWithOffset<T, NUMBER>&& other) noexcept
+    : NumericVectorWithOffset()
+{
+  swap(*this, other);
+}
 
+// assignment
 template <class T, class NUMBER>
-NumericVectorWithOffset<T, NUMBER>::NumericVectorWithOffset(const VectorWithOffset<T>& t)
-    : base_type(t)
-{}
+NumericVectorWithOffset<T, NUMBER>&
+NumericVectorWithOffset<T, NUMBER>::operator=(const NumericVectorWithOffset<T, NUMBER>& other)
+{
+  base_type::operator=(other);
+  return *this;
+}
 
 // addition
 template <class T, class NUMBER>
diff --git a/src/include/stir/VectorWithOffset.h b/src/include/stir/VectorWithOffset.h
index 81b8c85d8..78043ea10 100644
--- a/src/include/stir/VectorWithOffset.h
+++ b/src/include/stir/VectorWithOffset.h
@@ -4,10 +4,10 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2007-10-08, Hammersmith Imanet Ltd
     Copyright (C) 2012-06-01 - 2012, Kris Thielemans
+    Copyright (C) 2023 - 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
-    Copyright (C) 2000- 2012, Hammersmith Imanet Ltd
     See STIR/LICENSE.txt for details
 */
 #ifndef __stir_VectorWithOffset_H__
@@ -23,7 +23,8 @@
 
 */
 
-#include "stir/common.h"
+#include "stir/shared_ptr.h"
+#include "stir/deprecated.h"
 #include "boost/iterator/iterator_adaptor.hpp"
 #include "boost/iterator/reverse_iterator.hpp"
 
@@ -138,11 +139,47 @@ class VectorWithOffset
   //! Construct a VectorWithOffset with offset \c min_index (initialised with \c  T())
   inline VectorWithOffset(const int min_index, const int max_index);
 
-  //! Construct a VectorWithOffset of given length using existing data (no initialisation)
-  inline explicit VectorWithOffset(const int hsz, T* const data_ptr, T* const end_of_data_ptr);
+#if STIR_VERSION < 070000
+  //! Construct a VectorWithOffset of given length pointing to existing data
+  /*!
+    \warning This refers to the original memory range, so any modifications to this object will modify
+    the original data as well.
 
-  //! Construct a VectorWithOffset with offset \c min_index using existing data (no initialisation)
-  inline VectorWithOffset(const int min_index, const int max_index, T* const data_ptr, T* const end_of_data_ptr);
+    \deprecated
+  */
+  STIR_DEPRECATED VectorWithOffset(const int hsz, T* const data_ptr, T* const end_of_data_ptr);
+
+  //! Construct a VectorWithOffset with offset \c min_index pointing to existing data
+  /*!
+    \warning This refers to the original memory range, so any modifications to this object will modify
+    the original data as well.
+
+    \deprecated
+  */
+  STIR_DEPRECATED inline VectorWithOffset(const int min_index, const int max_index, T* const data_ptr, T* const end_of_data_ptr);
+#endif
+
+  //! Construct a VectorWithOffset of given length from a bare pointer (copying data)
+  VectorWithOffset(const int hsz, const T* const data_ptr);
+
+  //! Construct a VectorWithOffset with offset \c min_index from a bare pointer (copying data)
+  inline VectorWithOffset(const int min_index, const int max_index, const T* const data_ptr);
+
+  //! Construct a VectorWithOffset sharing existing data
+  /*!
+    \warning This refers to the original memory range, so any modifications to this object will modify
+    the original data as well.
+  */
+  inline VectorWithOffset(const int min_index, const int max_index, shared_ptr<T[]> data_sptr);
+
+  //! Construct a VectorWithOffset sharing existing data
+  /*!
+    \warning This refers to the original memory range, so any modifications to this object will modify
+    the original data as well.
+  */
+  inline VectorWithOffset(const int sz, shared_ptr<T[]> data_sptr)
+      : VectorWithOffset(0, sz - 1, data_sptr)
+  {}
 
   //! copy constructor
   inline VectorWithOffset(const VectorWithOffset& il);
@@ -150,6 +187,25 @@ class VectorWithOffset
   //! Destructor
   inline virtual ~VectorWithOffset();
 
+  //! Swap content/members of 2 objects
+  // implementation in .h because of templates/friends/whatever, see https://stackoverflow.com/a/61020224
+  friend inline void swap(VectorWithOffset& first, VectorWithOffset& second) // nothrow
+  {
+    using std::swap;
+    //  swap the members of two objects
+    swap(first.num, second.num);
+    swap(first.length, second.length);
+    swap(first.start, second.start);
+    swap(first.begin_allocated_memory, second.begin_allocated_memory);
+    swap(first.end_allocated_memory, second.end_allocated_memory);
+    swap(first.pointer_access, second.pointer_access);
+    swap(first.allocated_memory_sptr, second.allocated_memory_sptr);
+  }
+
+  //! move constructor
+  /*! implementation uses the copy-and-swap idiom, see e.g. https://stackoverflow.com/a/3279550 */
+  VectorWithOffset(VectorWithOffset&& other) noexcept;
+
   //! Free all memory and make object as if default-constructed
   /*! This is not the same as resize(0), as the latter does not
     deallocate the memory (i.e. does not change the capacity()).
@@ -157,6 +213,7 @@ class VectorWithOffset
   inline void recycle();
 
   //! assignment operator with another vector
+  /*! implementation avoids reallocating if sufficient memory already exists. */
   inline VectorWithOffset& operator=(const VectorWithOffset& il);
 
   //! \name index range operations
@@ -196,7 +253,7 @@ class VectorWithOffset
   //! change the range of the vector, new elements are set to \c T()
   /*! New memory is allocated if the range grows outside the range specified by
       get_capacity_min_index() till get_capacity_max_index(). Data is then copied
-      and old memory deallocated (unless owns_memory_for_data() is false).
+      and old memory deallocated (unless it is shared).
 
       \todo in principle reallocation could be avoided when the new range would fit in the
       old one by shifting.
@@ -361,10 +418,15 @@ class VectorWithOffset
 
 protected:
   //! pointer to (*this)[0] (taking get_min_index() into account that is).
-  T* num;
+  T* num; // TODO make private
 
   //! Called internally to see if all variables are consistent
   inline void check_state() const;
+  //! change vector with new index range and point to \c data_ptr
+  /*!
+    \arg data_ptr should start to a contiguous block of correct size
+  */
+  inline void init(const int min_index, const int max_index, T* const data_ptr, bool copy_data);
 
 private:
   //! length of vector
@@ -375,6 +437,9 @@ class VectorWithOffset
   T* begin_allocated_memory;
   T* end_allocated_memory;
 
+  //! shared_ptr to memory
+  shared_ptr<T[]> allocated_memory_sptr;
+
   //! Default member settings for all constructors
   inline void init();
 
@@ -384,7 +449,6 @@ class VectorWithOffset
   //! boolean to test if get_data_ptr is called
   // This variable is declared mutable such that get_const_data_ptr() can change it.
   mutable bool pointer_access;
-  bool _owns_memory_for_data;
 };
 
 END_NAMESPACE_STIR
diff --git a/src/include/stir/VectorWithOffset.inl b/src/include/stir/VectorWithOffset.inl
index 71193eaa3..ea003af39 100644
--- a/src/include/stir/VectorWithOffset.inl
+++ b/src/include/stir/VectorWithOffset.inl
@@ -4,6 +4,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000 - 2010-07-01, Hammersmith Imanet Ltd
     Copyright (C) 2012-06-01 - 2012, Kris Thielemans
+    Copyright (C) 2023 - 2024, University College London
     This file is part of STIR.
 
     SPDX-License-Identifier: Apache-2.0 AND License-ref-PARAPET-license
@@ -21,6 +22,7 @@
 
 */
 
+#include "stir/IndexRange.h"
 #include <algorithm>
 #include <stdexcept>
 #include "thresholding.h"
@@ -32,18 +34,40 @@ template <class T>
 void
 VectorWithOffset<T>::init()
 {
-  length = 0; // i.e. an empty row of zero length,
-  start = 0;  // no offsets
-  num = 0;    // and no data.
-  begin_allocated_memory = 0;
-  end_allocated_memory = 0;
+  length = 0;    // i.e. an empty row of zero length,
+  start = 0;     // no offsets
+  num = nullptr; // and no data.
+  begin_allocated_memory = nullptr;
+  end_allocated_memory = nullptr;
+  allocated_memory_sptr = nullptr;
+}
+
+template <class T>
+void
+VectorWithOffset<T>::init(const int min_index, const int max_index, T* const data_ptr, bool copy_data)
+{
+  this->pointer_access = false;
+  if (copy_data)
+    {
+      this->resize(min_index, max_index);
+      std::copy(data_ptr, data_ptr + this->length, this->begin());
+    }
+  else
+    {
+      this->length = static_cast<unsigned>(max_index - min_index) + 1;
+      this->start = min_index;
+      this->begin_allocated_memory = data_ptr;
+      this->end_allocated_memory = data_ptr + this->length;
+      this->num = this->begin_allocated_memory - this->start;
+      this->check_state();
+    }
 }
 
 template <class T>
 bool
 VectorWithOffset<T>::owns_memory_for_data() const
 {
-  return this->_owns_memory_for_data;
+  return this->allocated_memory_sptr ? true : false;
 }
 
 /*!
@@ -62,6 +86,7 @@ VectorWithOffset<T>::check_state() const
   assert(begin_allocated_memory <= num + start);
   assert(end_allocated_memory >= begin_allocated_memory);
   assert(static_cast<unsigned>(end_allocated_memory - begin_allocated_memory) >= length);
+  assert(!allocated_memory_sptr || (allocated_memory_sptr.get() == begin_allocated_memory));
 }
 
 template <class T>
@@ -74,11 +99,7 @@ VectorWithOffset<T>::_destruct_and_deallocate()
   // we'll have to be careful to delete only initialised elements
   // and just de-allocate the rest
 
-  // Check on capacity probably not really necessary
-  // as begin_allocated_memory is == 0 in that case, and delete[] 0 doesn't do anything
-  // (I think). Anyway, we're on the safe side now...
-  if (this->owns_memory_for_data() && this->capacity() != 0)
-    delete[] this->begin_allocated_memory;
+  this->allocated_memory_sptr = nullptr;
 }
 
 template <class T>
@@ -222,55 +243,41 @@ VectorWithOffset<T>::rend() const
 
 template <class T>
 VectorWithOffset<T>::VectorWithOffset()
-    : _owns_memory_for_data(true)
+    : pointer_access(false)
 {
-  pointer_access = false;
   this->init();
 }
 
 template <class T>
 VectorWithOffset<T>::VectorWithOffset(const int hsz)
-    : length(hsz),
-      start(0),
-      pointer_access(false),
-      _owns_memory_for_data(true)
-{
-  if ((hsz > 0))
-    {
-      num = new T[hsz];
-      begin_allocated_memory = num;
-      end_allocated_memory = num + length;
-    }
-  else
-    this->init();
-  this->check_state();
-}
+    : VectorWithOffset(0, hsz - 1)
+{}
 
 template <class T>
 VectorWithOffset<T>::VectorWithOffset(const int min_index, const int max_index)
     : length(static_cast<unsigned>(max_index - min_index) + 1),
       start(min_index),
-      pointer_access(false),
-      _owns_memory_for_data(true)
+      pointer_access(false)
 {
   if (max_index >= min_index)
     {
-      num = new T[length];
-      begin_allocated_memory = num;
-      end_allocated_memory = num + length;
-      num -= min_index;
+      allocated_memory_sptr = shared_ptr<T[]>(new T[length]);
+      begin_allocated_memory = allocated_memory_sptr.get();
+      end_allocated_memory = begin_allocated_memory + length;
+      num = begin_allocated_memory - min_index;
     }
   else
     this->init();
   this->check_state();
 }
 
+#if STIR_VERSION < 070000
 template <class T>
-VectorWithOffset<T>::VectorWithOffset(const int sz, T* const data_ptr, T* const end_of_data_ptr)
-    : length(static_cast<unsigned>(sz)),
-      start(0),
+VectorWithOffset<T>::VectorWithOffset(const int min_index, const int max_index, T* const data_ptr, T* const end_of_data_ptr)
+    : length(static_cast<unsigned>(max_index - min_index) + 1),
+      start(min_index),
       pointer_access(false),
-      _owns_memory_for_data(false)
+      allocated_memory_sptr(nullptr) // we don't own the data
 {
   this->begin_allocated_memory = data_ptr;
   this->end_allocated_memory = end_of_data_ptr;
@@ -279,16 +286,37 @@ VectorWithOffset<T>::VectorWithOffset(const int sz, T* const data_ptr, T* const
 }
 
 template <class T>
-VectorWithOffset<T>::VectorWithOffset(const int min_index, const int max_index, T* const data_ptr, T* const end_of_data_ptr)
-    : length(static_cast<unsigned>(max_index - min_index) + 1),
-      start(min_index),
-      pointer_access(false),
-      _owns_memory_for_data(false)
+VectorWithOffset<T>::VectorWithOffset(const int sz, T* const data_ptr, T* const end_of_data_ptr)
+    : VectorWithOffset(0, sz - 1, data_ptr, end_of_data_ptr)
+{}
+#endif // STIR_VERSION < 070000
+
+template <class T>
+VectorWithOffset<T>::VectorWithOffset(const int min_index, const int max_index, const T* const data_ptr)
 {
-  this->begin_allocated_memory = data_ptr;
-  this->end_allocated_memory = end_of_data_ptr;
-  this->num = this->begin_allocated_memory - this->start;
-  this->check_state();
+  // first set empty, such that resize() will work ok
+  this->init();
+  // note: need a const_cast, but it's safe because we will copy the data
+  this->init(min_index, max_index, const_cast<T*>(data_ptr), /* copy_data = */ true);
+}
+
+template <class T>
+VectorWithOffset<T>::VectorWithOffset(const int sz, const T* const data_ptr)
+    : VectorWithOffset(0, sz - 1, data_ptr)
+{}
+
+template <class T>
+VectorWithOffset<T>::VectorWithOffset(const int min_index, const int max_index, shared_ptr<T[]> data_sptr)
+{
+  this->allocated_memory_sptr = data_sptr;
+  this->init(min_index, max_index, data_sptr.get(), /* copy_data = */ false);
+}
+
+template <class T>
+VectorWithOffset<T>::VectorWithOffset(VectorWithOffset<T>&& other) noexcept
+    : VectorWithOffset()
+{
+  swap(*this, other);
 }
 
 template <class T>
@@ -364,13 +392,13 @@ VectorWithOffset<T>::reserve(const int new_capacity_min_index, const int new_cap
   // check if data is being accessed via a pointer (see get_data_ptr())
   assert(pointer_access == false);
   // TODO use allocator here instead of new
-  T* newmem = new T[new_capacity];
+  shared_ptr<T[]> new_allocated_memory_sptr(new T[new_capacity]);
   const unsigned extra_at_the_left = length == 0 ? 0U : std::max(0, this->get_min_index() - actual_capacity_min_index);
-  std::copy(this->begin(), this->end(), newmem + extra_at_the_left);
+  std::copy(this->begin(), this->end(), new_allocated_memory_sptr.get() + extra_at_the_left);
   this->_destruct_and_deallocate();
-  begin_allocated_memory = newmem;
+  allocated_memory_sptr = std::move(new_allocated_memory_sptr);
+  begin_allocated_memory = allocated_memory_sptr.get();
   end_allocated_memory = begin_allocated_memory + new_capacity;
-  _owns_memory_for_data = true;
   num = begin_allocated_memory + extra_at_the_left - (length > 0 ? start : 0);
   this->check_state();
 }
@@ -495,8 +523,7 @@ VectorWithOffset<T>::operator=(const VectorWithOffset& il)
 
 template <class T>
 VectorWithOffset<T>::VectorWithOffset(const VectorWithOffset& il)
-    : pointer_access(false),
-      _owns_memory_for_data(true)
+    : pointer_access(false)
 {
   this->init();
   *this = il; // Uses assignment operator (above)
diff --git a/src/include/stir/shared_ptr.h b/src/include/stir/shared_ptr.h
index 1b7fd14ff..a24edfe3c 100644
--- a/src/include/stir/shared_ptr.h
+++ b/src/include/stir/shared_ptr.h
@@ -35,7 +35,9 @@ using boost::static_pointer_cast;
 #  define MAKE_SHARED boost::make_shared
 } // namespace stir
 #else
+
 #  include <memory>
+
 namespace stir
 {
 using std::shared_ptr;
diff --git a/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx b/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx
index 2589c62b4..474372018 100644
--- a/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx
+++ b/src/recon_buildblock/Parallelproj_projector/BackProjectorByBinParallelproj.cxx
@@ -133,8 +133,19 @@ TOF_transpose(std::vector<float>& mem_for_PP_back,
 void
 BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density) const
 {
+  std::vector<float> image_vec;
+  float* image_ptr;
+  if (_density_sptr->is_contiguous())
+    {
+      image_ptr = _density_sptr->get_full_data_ptr();
+    }
+  else
+    {
+      image_vec.resize(density.size_all());
+      std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
+      image_ptr = image_vec.data();
+    }
 
-  std::vector<float> image_vec(density.size_all());
   // create an alias for the projection data
   const ProjDataInMemory& p(*_proj_data_to_backproject_sptr);
 
@@ -149,7 +160,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
   long long offset = 0;
 
   // send image to all visible CUDA devices
-  float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);
+  float** image_on_cuda_devices = copy_float_array_to_all_devices(image_ptr, _helper->num_image_voxel);
 
   // do (chuck-wise) back projection on the CUDA devices
   for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
@@ -211,7 +222,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
   sum_float_arrays_on_first_device(image_on_cuda_devices, _helper->num_image_voxel);
 
   // copy summed image back to host
-  get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_vec.data());
+  get_float_array_from_device(image_on_cuda_devices, _helper->num_image_voxel, 0, image_ptr);
 
   // free image array from CUDA devices
   free_float_array_on_all_devices(image_on_cuda_devices);
@@ -226,7 +237,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
 
       joseph3d_back_tof_sino(_helper->xend.data(),
                              _helper->xstart.data(),
-                             image_vec.data(),
+                             image_ptr,
                              _helper->origin.data(),
                              _helper->voxsize.data(),
                              mem_for_PP_back.data(),
@@ -245,7 +256,7 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
     {
       joseph3d_back(_helper->xstart.data(),
                     _helper->xend.data(),
-                    image_vec.data(),
+                    image_ptr,
                     _helper->origin.data(),
                     _helper->voxsize.data(),
                     p.get_const_data_ptr(),
@@ -260,7 +271,14 @@ BackProjectorByBinParallelproj::get_output(DiscretisedDensity<3, float>& density
   // --------------------------------------------------------------- //
   //   Parallelproj -> STIR image conversion
   // --------------------------------------------------------------- //
-  std::copy(image_vec.begin(), image_vec.end(), density.begin_all());
+  if (_density_sptr->is_contiguous())
+    {
+      _density_sptr->release_full_data_ptr();
+    }
+  else
+    {
+      std::copy(image_vec.begin(), image_vec.end(), density.begin_all());
+    }
 
   // After the back projection, we enforce a truncation outside of the FOV.
   // This is because the parallelproj projector seems to have some trouble at the edges and this
diff --git a/src/recon_buildblock/Parallelproj_projector/ForwardProjectorByBinParallelproj.cxx b/src/recon_buildblock/Parallelproj_projector/ForwardProjectorByBinParallelproj.cxx
index 9c445a9d1..ac4a3704c 100644
--- a/src/recon_buildblock/Parallelproj_projector/ForwardProjectorByBinParallelproj.cxx
+++ b/src/recon_buildblock/Parallelproj_projector/ForwardProjectorByBinParallelproj.cxx
@@ -154,8 +154,18 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
     truncate_rim(*_density_sptr, static_cast<int>(std::max((image_radius - radius) / _helper->voxsize[2], 0.F)));
   }
 
-  std::vector<float> image_vec(density.size_all());
-  std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
+  std::vector<float> image_vec;
+  float* image_ptr;
+  if (_density_sptr->is_contiguous())
+    {
+      image_ptr = _density_sptr->get_full_data_ptr();
+    }
+  else
+    {
+      image_vec.resize(density.size_all());
+      std::copy(_density_sptr->begin_all(), _density_sptr->end_all(), image_vec.begin());
+      image_ptr = image_vec.data();
+    }
 
 #if 0
     // needed to set output to zero as parallelproj accumulates but is no longer the case
@@ -174,7 +184,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
   long long offset = 0;
 
   // send image to all visible CUDA devices
-  float** image_on_cuda_devices = copy_float_array_to_all_devices(image_vec.data(), _helper->num_image_voxel);
+  float** image_on_cuda_devices = copy_float_array_to_all_devices(image_ptr, _helper->num_image_voxel);
 
   // do (chuck-wise) projection on the CUDA devices
   for (int chunk_num = 0; chunk_num < _num_gpu_chunks; chunk_num++)
@@ -246,7 +256,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
       std::vector<float> mem_for_PP(_helper->num_lors * _helper->num_tof_bins);
       joseph3d_fwd_tof_sino(_helper->xend.data(),
                             _helper->xstart.data(),
-                            image_vec.data(),
+                            image_ptr,
                             _helper->origin.data(),
                             _helper->voxsize.data(),
                             mem_for_PP.data(),
@@ -268,7 +278,7 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
     {
       joseph3d_fwd(_helper->xstart.data(),
                    _helper->xend.data(),
-                   image_vec.data(),
+                   image_ptr,
                    _helper->origin.data(),
                    _helper->voxsize.data(),
                    _projected_data_sptr->get_data_ptr(),
@@ -278,6 +288,10 @@ ForwardProjectorByBinParallelproj::set_input(const DiscretisedDensity<3, float>&
 #endif
   info("done", 2);
 
+  if (_density_sptr->is_contiguous())
+    {
+      _density_sptr->release_full_data_ptr();
+    }
   _projected_data_sptr->release_data_ptr();
 }
 
diff --git a/src/swig/stir.i b/src/swig/stir.i
index 9ac439079..a90824e9b 100644
--- a/src/swig/stir.i
+++ b/src/swig/stir.i
@@ -576,6 +576,10 @@
 %newobject *::get_empty_copy;
 %newobject *::read_from_file;
 
+// SWIG complains "Warning 503: Can't wrap 'stir::swap' unless renamed to a valid identifier."
+// But it's probably dangerous to expose swap anyway, so let's ignore it.
+%ignore **::swap;
+%ignore stir::swap;
 %ignore *::ask_parameters;
 %ignore *::create_shared_clone;
 %ignore *::read_from_stream;
@@ -583,6 +587,10 @@
 %ignore *::get_const_data_ptr;
 %ignore *::release_data_ptr;
 %ignore *::release_const_data_ptr;
+%ignore *::get_full_data_ptr;
+%ignore *::get_const_full_data_ptr;
+%ignore *::release_full_data_ptr;
+%ignore *::release_const_full_data_ptr;
 
 #if defined(SWIGPYTHON)
 %rename(__assign__) *::operator=; 
diff --git a/src/test/test_Array.cxx b/src/test/test_Array.cxx
index 0baebf827..8371e4825 100644
--- a/src/test/test_Array.cxx
+++ b/src/test/test_Array.cxx
@@ -2,7 +2,7 @@
     Copyright (C) 2000 PARAPET partners
     Copyright (C) 2000-2011, Hammersmith Imanet Ltd
     Copyright (C) 2013 Kris Thielemans
-    Copyright (C) 2013, 2020 University College London
+    Copyright (C) 2013, 2020, 2023, 2024 University College London
 
     This file is part of STIR.
 
@@ -47,6 +47,10 @@
 
 // for open_read/write_binary
 #include "stir/utilities.h"
+#include "stir/info.h"
+#include "stir/error.h"
+
+#include "stir/HighResWallClockTimer.h"
 
 #include <stdio.h>
 #include <fstream>
@@ -275,6 +279,15 @@ class ArrayTests : public RunTests
   void run_tests() override;
 };
 
+// helper function to create a shared_ptr that doesn't delete the data (as it's still owned by the vector)
+template <typename T>
+shared_ptr<T[]>
+vec_to_shared(std::vector<T>& v)
+{
+  shared_ptr<T[]> sptr(v.data(), [](auto) {});
+  return sptr;
+}
+
 void
 ArrayTests::run_tests()
 {
@@ -344,6 +357,60 @@ ArrayTests::run_tests()
         Array<1, float> test3 = test2 + test;
         check_if_zero(test3[-2], "test growing during operator+");
       }
+
+      // using preallocated memory
+      {
+        std::vector<float> mem(test.get_index_range().size_all());
+        std::copy(test.begin_all_const(), test.end_all_const(), mem.begin());
+        Array<1, float> preallocated(test.get_index_range(), vec_to_shared(mem));
+        // shared_ptr<float[]> mem_sptr(new float [test.get_index_range().size_all()]);
+        // auto mem = mem_sptr.get();
+        // std::copy(test.begin_all_const(), test.end_all_const(), mem);
+        // Array<1,float> preallocated(test.get_index_range(), mem_sptr, false);
+        check_if_equal(test, preallocated, "test preallocated: equality");
+        std::copy(test.begin_all_const(), test.end_all_const(), preallocated.begin_all());
+        check_if_equal(test, preallocated, "test preallocated: copy with full_iterator");
+        check(test.is_contiguous(), "test Array1D is contiguous");
+        check(preallocated.is_contiguous(), "test Array1D is contiguous (preallocated)");
+        check(preallocated.get_full_data_ptr() == &mem[0], "test Array1D preallocated pointer access");
+        preallocated.release_full_data_ptr();
+        check(preallocated.get_const_full_data_ptr() == &mem[0], "test Array1D preallocated const pointer access");
+        preallocated.release_const_full_data_ptr();
+        // test memory is shared between the Array and mem
+        mem[0] = *test.begin() + 345;
+        check_if_equal(*preallocated.begin(), mem[0], "test preallocated: direct buffer mod");
+        *(preallocated.begin() + 1) += 4;
+        check_if_equal(*(preallocated.begin() + 1), mem[1], "test preallocated: indirect buffer mod");
+        // test resize
+        {
+          const auto min = preallocated.get_min_index();
+          const auto max = preallocated.get_max_index();
+          // resizing to smaller range will keep pointing to the same memory
+          preallocated.resize(min + 1, max - 1);
+          std::fill(mem.begin(), mem.end(), 12345.F);
+          check_if_equal(preallocated[min + 1], 12345.F, "test preallocated: resize smaller uses same memory");
+          // resizing to non-overlapping range will reallocate
+          preallocated.resize(min - 1, max - 1);
+          std::fill(mem.begin(), mem.end(), 123456.F);
+          check_if_equal(preallocated[min + 1], 12345.F, "test preallocated: grow uses different memory");
+        }
+      }
+
+      // copying from existing memory
+      {
+        std::vector<float> mem(test.get_index_range().size_all());
+        std::copy(test.begin_all_const(), test.end_all_const(), mem.begin());
+        Array<1, float> test_from_mem(test.get_index_range(), reinterpret_cast<const float*>(mem.data()));
+        check(test_from_mem.owns_memory_for_data(), "test preallocated with copy: should own memory");
+        check_if_equal(test, test_from_mem, "test construct from mem: equality");
+        std::copy(test.begin_all_const(), test.end_all_const(), test_from_mem.begin_all());
+        check_if_equal(test, test_from_mem, "test construct from mem: copy with full_iterator");
+        // test memory is not shared between the Array and mem
+        mem[0] = *test.begin() + 345;
+        check_if_equal(*test_from_mem.begin(), *test.begin(), "test construct from mem: direct buffer mod");
+        *(test_from_mem.begin() + 1) += 4;
+        check_if_equal(*(test_from_mem.begin() + 1), mem[1] + 4, "test construct from mem: indirect buffer mod");
+      }
     }
 #if 1
     {
@@ -451,11 +518,55 @@ ArrayTests::run_tests()
       {
         // copy constructor;
         Array<2, float> t21(t2);
-        check_if_equal(t21, t2, "test Array2D copy constructor");
+        check_if_equal(t21[3][2], 6.F, "test Array2D copy consructor element");
+        check_if_equal(t21, t2, "test Array2D copy constructor all elements");
         // 'assignment constructor' (this simply calls copy constructor)
         Array<2, float> t22 = t2;
         check_if_equal(t22, t2, "test Array2D copy constructor");
       }
+
+      // using preallocated memory
+      {
+        std::vector<float> mem(t2.get_index_range().size_all());
+        std::copy(t2.begin_all_const(), t2.end_all_const(), mem.begin());
+        {
+          // test iterator access is row-major
+          auto first_min_idx = t2.get_min_index();
+          check_if_equal(t2[3][2],
+                         mem[(3 - first_min_idx) * t2[first_min_idx].size_all() + 2 - t2[first_min_idx].get_min_index()],
+                         "check row-major order in 2D");
+        }
+        // Array<2,float> preallocated(t2.get_index_range(), &mem[0], false);
+        Array<2, float> preallocated(t2.get_index_range(), vec_to_shared(mem));
+        // check(!preallocated.owns_memory_for_data(), "test preallocated without copy: should not own memory");
+        check_if_equal(t2, preallocated, "test preallocated: equality");
+        std::copy(t2.begin_all_const(), t2.end_all_const(), preallocated.begin_all());
+        check_if_equal(t2, preallocated, "test preallocated: copy with full_iterator");
+
+        check(preallocated.is_contiguous(), "test Array2D is contiguous (preallocated)");
+        check(preallocated.get_full_data_ptr() == &mem[0], "test Array2D preallocated pointer access");
+        preallocated.release_full_data_ptr();
+        check(preallocated.get_const_full_data_ptr() == &mem[0], "test Array2D preallocated const pointer access");
+        preallocated.release_const_full_data_ptr();
+        // test memory is shared between the Array and mem
+        mem[0] = *t2.begin_all() + 345;
+        check_if_equal(*preallocated.begin_all(), mem[0], "test preallocated: direct buffer mod");
+        *(preallocated.begin_all()) += 4;
+        check_if_equal(*(preallocated.begin_all()), mem[0], "test preallocated: indirect buffer mod");
+        // test resize
+        {
+          BasicCoordinate<2, int> min, max;
+          preallocated.get_regular_range(min, max);
+          // resizing to smaller range will keep pointing to the same memory
+          preallocated.resize(IndexRange<2>(min + 1, max - 1));
+          std::fill(mem.begin(), mem.end(), 12345.F);
+          check_if_equal(preallocated[min + 1], 12345.F, "test preallocated: resize smaller uses same memory");
+          check(!preallocated.is_contiguous(), "test preallocated: no longer contiguous after resize");
+          preallocated.resize(IndexRange<2>(min - 1, max - 1));
+          std::fill(mem.begin(), mem.end(), 123456.F);
+          check_if_equal(preallocated[min + 1], 12345.F, "test preallocated: grow uses different memory");
+        }
+      }
     }
     // size_all with irregular range
     {
@@ -889,6 +1000,43 @@ ArrayTests::run_tests()
     check_if_equal(arr1, arr3, "make_array inline vs function with assignment");
     check_if_equal(arr1, arr4, "make_array inline constructor from function");
   }
+  std::cerr << "timings\n";
+  {
+    HighResWallClockTimer t;
+    IndexRange<4> range(Coordinate4D<int>(20, 100, 400, 600));
+    t.start();
+    double create_duration;
+    {
+      Array<4, int> a1(range);
+      t.stop();
+      std::cerr << "creation of non-contiguous 4D Array " << t.value() * 1000 << "ms\n";
+      create_duration = t.value();
+      t.start();
+    }
+    t.stop();
+    std::cerr << "deletion " << (t.value() - create_duration) * 1000 << " ms\n";
+    t.reset();
+    t.start();
+    {
+      const auto s = range.size_all();
+      t.stop();
+      std::cerr << "range size_all " << t.value() * 1000 << "ms\n";
+      t.start();
+      std::vector<int> v(s, 0);
+      t.stop();
+      std::cerr << "vector creation " << t.value() * 1000 << "ms\n";
+      t.start();
+      // Array<4,int> a1(range, v.data(), false);
+      Array<4, int> a1(range, vec_to_shared(v));
+      t.stop();
+      // check(!a1.owns_memory_for_data(), "test preallocated without copy: should not own memory");
+      create_duration = t.value();
+      std::cerr << "contiguous array creation (total) " << t.value() * 1000 << "ms\n";
+      t.start();
+    }
+    t.stop();
+    std::cerr << "deletion " << (t.value() - create_duration) * 1000 << " ms\n";
+  }
 }
 
 END_NAMESPACE_STIR