Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDARelativeDifferencePrior #1408

Merged
merged 51 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
5745b8c
first attempt at integration with stir
Imraj-Singh Apr 25, 2024
2c4fa32
attempting to add to registry
Imraj-Singh Apr 25, 2024
52c4943
registry add and cmake cuda package/include dirs
Imraj-Singh Apr 25, 2024
f41728e
changes to cudarelativedifference prior const etc
Imraj-Singh Apr 25, 2024
d3f3732
updated Cmakelists
Imraj-Singh Apr 25, 2024
063633e
attempting to get the registries to work...
Imraj-Singh Apr 25, 2024
483cc13
update cuda error
Imraj-Singh Apr 25, 2024
7e26bd4
Merge branch 'UCL:master' into cuda_stir
Imraj-Singh May 16, 2024
0249e93
Register errors
Imraj-Singh May 17, 2024
843cd13
Merge branch 'UCL:master' into cuda_stir
Imraj-Singh Jun 4, 2024
5eec80b
error handling in cmake
Imraj-Singh Jun 4, 2024
2e90b17
Merge branch 'UCL:master' into cuda_stir
Imraj-Singh Jun 30, 2024
13def4f
Not working, but added the functionality and the registery works than…
Imraj-Singh Jul 4, 2024
5ddce33
Added small test, constructors not inheriting correctly causing an error
Imraj-Singh Jul 4, 2024
1a7d4d4
Merge branch 'UCL:master' into cuda_stir
Imraj-Singh Jul 4, 2024
1c61543
using parent constructors in RegisteredParsingObject
KrisThielemans Jul 4, 2024
5b3f768
fix CUDARDP constructors and set_up etc
KrisThielemans Jul 4, 2024
8cc358f
fix set_up() signature for both RDP versions
KrisThielemans Jul 4, 2024
08d0f81
run clang-format on .cu files
KrisThielemans Jul 4, 2024
227c48e
[recon_test_pack] add a test for parallelproj and CUDARDP
KrisThielemans Jul 4, 2024
95e8428
minor clean-up of CUDA RDP
KrisThielemans Jul 4, 2024
aed4a1d
[CMake] use check_language(CUDA)
KrisThielemans Jul 4, 2024
f163578
run dos2unix
KrisThielemans Jul 4, 2024
f79f95b
[CMake] only compile cuda_rdptest if CUDA is enabled
KrisThielemans Jul 4, 2024
35937dd
disable using constructors for SWIG
KrisThielemans Jul 5, 2024
9e8daae
[recon_test_pack] remove set -e to avoid early exit
KrisThielemans Jul 6, 2024
2e45fdc
[SWIG] reduce some SWIG warnings
KrisThielemans Jul 6, 2024
304f121
fix CUDA error handling
KrisThielemans Jul 6, 2024
8825485
create helper functions for Arrays <-> CUDA device
KrisThielemans Jul 6, 2024
50ae299
more *Prior::set_* functions require set_up()
KrisThielemans Jul 6, 2024
51ef4fc
CudaRDP: move weights and kappa to set_up
KrisThielemans Jul 6, 2024
06ec3de
CudaRDP: use float in kernel for value
KrisThielemans Jul 6, 2024
3e04184
don't run CUDA tests on GHA
KrisThielemans Jul 6, 2024
a5ccc4c
CudaRDP: minor clean-up
KrisThielemans Jul 6, 2024
d2f6d6d
correct check for kappa
KrisThielemans Jul 6, 2024
c31953e
CudaRDP: more checks and document 3x3x3 limitation
KrisThielemans Jul 7, 2024
a0114ab
CudaRDP: put cppdim3 type in the class definition
KrisThielemans Jul 7, 2024
ef66303
add CudaRDP to test_priors (but fails)
KrisThielemans Jul 7, 2024
c634a0d
Cuda RDP fixes
KrisThielemans Jul 7, 2024
165da28
decrease verbosity of SeperableGaussianFilter
KrisThielemans Jul 7, 2024
7ac03fd
correct array_to_device for non-contiguous case
KrisThielemans Jul 7, 2024
fa4d7fe
correct test on kappa characteristics in all priors
KrisThielemans Jul 7, 2024
ccc962a
add and correct tests for priors with kappa (still fails on RDP)
KrisThielemans Jul 7, 2024
dcd3b86
reduce eps for numerical Hessian test
KrisThielemans Jul 7, 2024
3e43ca7
make running of CUDA tests optional
KrisThielemans Jul 7, 2024
66c082b
skip CUDA test if nvidia-smi not found
KrisThielemans Jul 8, 2024
7a2cf76
CudaRDP: honour elemT template argument
KrisThielemans Jul 8, 2024
44dad5a
update release notes on the CUDA RDP [ci skip]
KrisThielemans Jul 8, 2024
d4f2c9b
added ctest for CUDA vs non-CUDA RDP
KrisThielemans Jul 8, 2024
05cdfaf
added RDP timings to stir_timings
KrisThielemans Jul 8, 2024
1f0d692
disable CUDA RDP tests if no CUDA found
KrisThielemans Jul 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ repos:
rev: '62302476'
hooks:
- id: clang-format
files: \.(c|cc|cxx|cpp|h|hpp|hxx|inl|txx)$
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|inl|txx)$
32 changes: 32 additions & 0 deletions CMakeLists.txt
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,38 @@ else()
message(STATUS "NiftyPET projector support disabled.")
endif()

if(NOT DISABLE_STIR_CUDA)
include(CheckLanguage)
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_package(CUDAToolkit QUIET)
if (NOT CUDAToolkit_FOUND)
message(WARNING "CUDAToolkit not detected. Setting DISABLE_STIR_CUDA to ON.")
set(DISABLE_STIR_CUDA ON)
set(STIR_WITH_CUDA OFF)
else()
if("${CMAKE_VERSION}" VERSION_LESS "3.23")
message(FATAL_ERROR "CMake 3.23 or newer is required to use CMAKE_CUDA_ARCHITECTURES set to 'all'. Upgrade your CMake or set DISABLE_STIR_CUDA to ON.")
else()
set(CMAKE_CUDA_ARCHITECTURES "all")
endif()
message(STATUS "STIR CUDA support enabled. Using CUDA version ${CUDAToolkit_VERSION}.")
set(STIR_WITH_CUDA ON)
endif()
# check if run-time is available for ctests
find_program(NVIDIA_SMI nvidia-smi)
if (NOT NVIDIA_SMI)
message(WARNING "nvidia-smi not found. Disabling the ctests using CUDA")
set(SKIP_CUDA_TESTS ON)
endif()
else()
message(WARNING "No CUDA compiler found. Setting DISABLE_STIR_CUDA to ON.")
set(DISABLE_STIR_CUDA ON)
set(STIR_WITH_CUDA OFF)
endif()
endif()

# Parallelproj
if(NOT DISABLE_Parallelproj_PROJECTOR)
find_package(parallelproj 1.3.4 CONFIG)
Expand Down
16 changes: 14 additions & 2 deletions documentation/release_6.2.htm
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ <h2>Overall summary</h2>
</p>

<p>
This release contains mainly code written by Nicole Jurjew (UCL) and Kris Thielemans (UCL).
This release contains mainly code written by Nicole Jurjew (UCL) (SSRB for TOF),
Imraj Singh (UCL) (CUDA version of the Relative Difference Prior) and Kris Thielemans (UCL).
</p>

<h2>Patch release info</h2>
Expand Down Expand Up @@ -51,6 +52,13 @@ <h3>New functionality</h3>
in <code>ProjData</code>).<br>
<a href=https://github.com/UCL/STIR/pull/1439>PR #1439</a> and <a href=https://github.com/UCL/STIR/pull/1448>PR #1448</a>
</li>
<li>
New prior <code>CudaRelativeDifferencePrior</code> (use <tt>Cuda Relative Difference Prior</tt> in <tt>.par</tt> files), only
available if the CUDA toolkit is found during building. Results are identical to <code>RelativeDifferencePrior</code>
up to numerical rounding issues. However, the code is currently limited to 3x3x3 weights.<br>
Added timings for the RDP (both non-CUDA and CUDA) tothe <tt>stir_timings</tt> utility.<br>
<a href=https://github.com/UCL/STIR/pull/1408>PR #1408</a>
</li>
</ul>


Expand Down Expand Up @@ -106,7 +114,7 @@ <h3>Known problems</h3>
<p>See <a href=https://github.com/UCL/STIR/labels/bug>our issue tracker</a>.</p>


<H2>What's new for developers (aside from what should be obvious
<H2>What is new for developers (aside from what should be obvious
from the above):</H2>

<h3>Changed functionality</h3>
Expand Down Expand Up @@ -155,6 +163,10 @@ <h3>Build system</h3>
Force C++ version according to CERN ROOT versions: ROOT 6.28.10 needs C++17 and 6.30.2 needs C++20.
Also some fixes when relying on <code>root-config</code>.
</li>
<li>
Optionally enable CUDA as a CMake language (for the CUDA RDP). <strong>You should use CMake 3.23 or later</strong>
if you use CUDA.
</li>
</ul>

<h3>Test changes</h3>
Expand Down
46 changes: 46 additions & 0 deletions recon_test_pack/CUDA/OSMAPOSL_test_sim_Parallelproj_CUDARDP.par
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
OSMAPOSLParameters :=
; test file for OSMAPOSL with a quadratic prior (and ray tracing projection matrix)
objective function type:= PoissonLogLikelihoodWithLinearModelForMeanAndProjData
PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

input file := my_prompts${suffix}.hs
zero end planes of segment 0:= 0 ; segment 0 is has direct and indirect planes
; if disabled, defaults to maximum segment number in the file
;maximum absolute segment number to process := 3

projector pair type := parallelproj
Projector Pair Using Parallelproj Parameters:=
End Projector Pair Using Parallelproj Parameters:=

Bin Normalisation type := from projdata
Bin Normalisation From ProjData :=
normalisation projdata filename:= my_acfs${suffix}.hs
End Bin Normalisation From ProjData:=

additive sinogram := my_additive_sinogram${suffix}.hs

xy output image size (in pixels) := 91
zoom := .5

use subset sensitivities:=1
subset sensitivity filenames:= my_test_sim${suffix}_sens_PP_s${num_subsets}_%d.hv
recompute_sensitivity:=1

prior type := Relative Difference Prior
Relative Difference Prior Parameters:=
penalisation factor := 0.5
; next defaults to 0, set to 1 for 2D inverse Euclidean weights, 0 for 3D
only 2D:= 0
END Relative Difference Prior Parameters:=

end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

output filename prefix := my_test_sim${suffix}_image_OSMAPOSL_PP_CUDARDP
number of subsets:= ${num_subsets}
start at subset:= 0
number of subiterations:= 28
save estimates at subiteration intervals:= ${num_subsets}

;report objective function values interval := 1

END :=
32 changes: 22 additions & 10 deletions recon_test_pack/run_test_simulate_and_recon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# Copyright (C) 2011 - 2011-01-14, Hammersmith Imanet Ltd
# Copyright (C) 2011-07-01 - 2011, Kris Thielemans
# Copyright (C) 2014, 2022 University College London
# Copyright (C) 2014, 2022, 2024 University College London
# This file is part of STIR.
#
# SPDX-License-Identifier: Apache-2.0
Expand All @@ -13,13 +13,7 @@
# Author Kris Thielemans
#

# Scripts should exit with error code when a test fails:
if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
# The code runs inside Travis or GHA
set -e
fi

echo This script should work with STIR version 6.0. If you have
echo This script should work with STIR version 6.2. If you have
echo a later version, you might have to update your test pack.
echo Please check the web site.
echo
Expand Down Expand Up @@ -62,6 +56,8 @@ if [ $# -eq 1 ]; then
fi

echo "Using `command -v OSMAPOSL`"
echo "Using `command -v OSSPS`"
echo "Using `command -v FBP2D`"

# first need to set this to the C locale, as this is what the STIR utilities use
# otherwise, awk might interpret floating point numbers incorrectly
Expand Down Expand Up @@ -102,7 +98,23 @@ input_ROI_mean=`awk 'NR>2 {print $2}' ${input_image}.roistats`
# and reuses its subset sensitivities
for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do
echo "========== Testing `command -v ${recon}`"
for parfile in ${recon}_test_sim*.par; do
# Check if we have CUDA code and parallelproj.
# If so, check for test files in CUDA/*
if stir_list_registries |grep -i cuda > /dev/null
then
if stir_list_registries |grep -i parallelproj > /dev/null
then
extra_par_files=`ls CUDA/${recon}_test_sim*.par 2> /dev/null`
if [ -n "$TRAVIS" -o -n "$GITHUB_WORKSPACE" ]; then
# The code runs inside Travis or GHA
if [ -n "$extra_par_files" ]; then
echo "Not running ${extra_par_files} due to no CUDA run-time"
extra_par_files=""
fi
fi
fi
fi
for parfile in ${recon}_test_sim*.par ${extra_par_files}; do
for dataSuffix in "" "$TOF_suffix"; do
echo "===== data suffix: \"$dataSuffix\""
# test first if analytic reconstruction and if so, run pre-correction
Expand Down Expand Up @@ -136,7 +148,7 @@ for recon in FBP2D FBP3DRP OSMAPOSL OSSPS; do

# run actual reconstruction
echo "Running ${recon} ${parfile}"
logfile="my_${parfile}${suffix}.log"
logfile="my_`basename ${parfile}`${suffix}.log"
${MPIRUN} ${recon} ${parfile} > "$logfile" 2>&1
if [ $? -ne 0 ]; then
echo "Error running reconstruction. CHECK RECONSTRUCTION LOG \"$logfile\""
Expand Down
2 changes: 1 addition & 1 deletion src/buildblock/SeparableGaussianArrayFilter.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ SeparableGaussianArrayFilter<num_dimensions, elemT>::construct_filter(bool norma
{
std::stringstream ss;
ss << "Gaussian filter dim[" << i << "] =" << filter_coefficients;
info(ss.str(), 2);
info(ss.str(), 3);
}

this->all_1d_array_filters[i - 1].reset(new ArrayFilter1DUsingConvolution<float>(filter_coefficients));
Expand Down
6 changes: 6 additions & 0 deletions src/cmake/STIRConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,12 @@ if(@STIR_WITH_NiftyPET_PROJECTOR@)
set(STIR_WITH_NiftyPET_PROJECTOR TRUE)
endif()

if(@STIR_WITH_CUDA@)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
set(STIR_WITH_CUDA TRUE)
endif()

if(@STIR_WITH_Parallelproj_PROJECTOR@)
find_package(parallelproj REQUIRED CONFIG)
set(STIR_WITH_Parallelproj_PROJECTOR TRUE)
Expand Down
2 changes: 2 additions & 0 deletions src/cmake/STIRConfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ namespace stir {

#cmakedefine STIR_WITH_NiftyPET_PROJECTOR

#cmakedefine STIR_WITH_CUDA

#cmakedefine STIR_WITH_Parallelproj_PROJECTOR
#cmakedefine parallelproj_built_with_CUDA

Expand Down
5 changes: 4 additions & 1 deletion src/include/stir/NumericVectorWithOffset.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ START_NAMESPACE_STIR
template <class T, class elemT>
class NumericVectorWithOffset : public VectorWithOffset<T>
{
private:
#ifdef SWIG
public: // needs to be public for SWIG to be able to parse the "using" statement below
#endif

typedef VectorWithOffset<T> base_type;

public:
Expand Down
12 changes: 10 additions & 2 deletions src/include/stir/RegisteredParsingObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ template <typename Derived, typename Base, typename Parent = Base>
class RegisteredParsingObject : public Parent
{
public:
// import constructors from Parent
// Note: disabled for older SWIG as that generates an error before 4.2, and a warning for 4.2
#if !defined(SWIG) || (SWIG_VERSION >= 0x040300)
using Parent::Parent;
#endif

//! Construct a new object (of type Derived) by parsing the istream
/*! When the istream * is 0, questions are asked interactively.

Expand All @@ -93,6 +99,7 @@ class RegisteredParsingObject : public Parent
inline std::string parameter_info() override;

public:
#ifndef SWIG
//! A helper class to allow automatic registration.
struct RegisterIt
{
Expand All @@ -111,18 +118,19 @@ class RegisteredParsingObject : public Parent
*/
~RegisterIt()
{
#if 0
# if 0
// does not work yet, as registry might be destructed before this
// RegisterIt object. A solution to this problem is coming up.
cerr << "In RegisterIt destructor for " << Derived::registered_name<<endl;
cerr <<"Current keys: ";
Parent::registry().list_keys(cerr);
Parent::registry().remove_from_registry(Derived::registered_name);
#endif
# endif
}
};
// RegisterIt needs to be a friend to have access to registry()
friend struct RegisterIt;
#endif
};

END_NAMESPACE_STIR
Expand Down
69 changes: 69 additions & 0 deletions src/include/stir/cuda_utilities.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
Copyright (C) 2024, University College London
This file is part of STIR.

SPDX-License-Identifier: Apache-2.0

See STIR/LICENSE.txt for details
*/

#ifndef __stir_cuda_utilities_H__
#define __stir_cuda_utilities_H__

/*!
\file
\ingroup CUDA
\brief some utilities for STIR and CUDA

\author Kris Thielemans
*/
#include "stir/Array.h"
#include "stir/info.h"
#include <vector>

START_NAMESPACE_STIR

template <int num_dimensions, typename elemT>
inline void
array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
{
if (stir_array.is_contiguous())
{
info("array_to_device contiguous", 100);
cudaMemcpy(dev_data, stir_array.get_const_full_data_ptr(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
stir_array.release_const_full_data_ptr();
}
else
{
info("array_to_device non-contiguous", 100);
// Allocate host memory to get contiguous vector, copy array to it and copy from device to host
std::vector<elemT> tmp_data(stir_array.size_all());
std::copy(stir_array.begin_all(), stir_array.end_all(), tmp_data.begin());
cudaMemcpy(dev_data, tmp_data.data(), stir_array.size_all() * sizeof(elemT), cudaMemcpyHostToDevice);
}
}

template <int num_dimensions, typename elemT>
inline void
array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data)
{
if (stir_array.is_contiguous())
{
info("array_to_host contiguous", 100);
cudaMemcpy(stir_array.get_full_data_ptr(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
stir_array.release_full_data_ptr();
}
else
{
info("array_to_host non-contiguous", 100);
// Allocate host memory for the result and copy from device to host
std::vector<elemT> tmp_data(stir_array.size_all());
cudaMemcpy(tmp_data.data(), dev_data, stir_array.size_all() * sizeof(elemT), cudaMemcpyDeviceToHost);
// Copy the data to the stir_array
std::copy(tmp_data.begin(), tmp_data.end(), stir_array.begin_all());
}
}

END_NAMESPACE_STIR

#endif
5 changes: 5 additions & 0 deletions src/include/stir/doxygengroups.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ be selected at run-time.
\ingroup buildblock
*/

/*!
\defgroup CUDA Items relating to CUDA and STIR buildblock objects.
\ingroup buildblock
*/

/*!
\defgroup data_buildblock Acquisition data building blocks
\ingroup STIR_library
Expand Down
Loading
Loading