-
Notifications
You must be signed in to change notification settings - Fork 83
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add optional DUCC FFT support #463
Changes from all commits
6b17007
6479f47
bc6f789
0ff245a
2a8b39a
127e015
910d656
83c7ecb
2de0e50
34013ab
4fabaef
ff0944e
2142092
a252fff
32bc8a1
4975637
be575fc
09874c2
b7d10fd
601468a
9436792
9f86e46
c8cc746
aa957c4
fe42592
caae2fc
132a755
5206e14
5a5acb4
632b335
8b4fadf
17e8e04
6f8e4c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ option(FINUFFT_USE_CUDA "Whether to build CUDA accelerated FINUFFT library (libc | |
option(FINUFFT_USE_CPU "Whether to build the ordinary FINUFFT library (libfinufft)." ON) | ||
option(FINUFFT_STATIC_LINKING "Whether to link the static FINUFFT library (libfinufft_static)." ON) | ||
option(FINUFFT_BUILD_DEVEL "Whether to build development executables" OFF) | ||
option(FINUFFT_USE_DUCC0 "Whether to use DUCC0 for CPU fft library" ON) | ||
# sphinx tag (don't remove): @cmake_opts_end | ||
|
||
if (FINUFFT_USE_CPU) | ||
|
@@ -53,9 +54,21 @@ if (FINUFFT_USE_CPU) | |
set(FFTW_VERSION 3.3.10) | ||
set(XTL_VERSION 0.7.7) | ||
set(XSIMD_VERSION 13.0.0) | ||
set(DUCC0_VERSION ducc0_0_34_0) | ||
set(FINUFFT_FFTW_LIBRARIES) | ||
include(cmake/setupCPM.cmake) | ||
include(cmake/setupFFTW.cmake) | ||
include(cmake/setupXSIMD.cmake) | ||
if (FINUFFT_USE_DUCC0) | ||
include(cmake/setupDUCC.cmake) | ||
else () | ||
include(cmake/setupFFTW.cmake) | ||
endif () | ||
endif () | ||
|
||
if (FINUFFT_USE_DUCC0) | ||
set (FINUFFT_FFTLIBS ducc0) | ||
else () | ||
set (FINUFFT_FFTLIBS ${FINUFFT_FFTW_LIBRARIES}) | ||
endif () | ||
|
||
if (FINUFFT_BUILD_MATLAB) | ||
|
@@ -84,7 +97,7 @@ endif () | |
|
||
# This set of sources is compiled twice, once in single precision and once in double precision | ||
# The single precision compilation is done with -DSINGLE | ||
set(FINUFFT_PRECISION_DEPENDENT_SOURCES src/finufft.cpp src/simpleinterfaces.cpp src/spreadinterp.cpp src/utils.cpp fortran/finufftfort.cpp) | ||
set(FINUFFT_PRECISION_DEPENDENT_SOURCES src/finufft.cpp src/fft.cpp src/simpleinterfaces.cpp src/spreadinterp.cpp src/utils.cpp fortran/finufftfort.cpp) | ||
|
||
# Set of compilers which behave like gcc | ||
set(FINUFFT_GNU_LIKE_COMPILERS AppleClang Clang GNU) | ||
|
@@ -104,6 +117,11 @@ endfunction() | |
|
||
# Utility function to link static/dynamic lib | ||
function(finufft_link_test target) | ||
|
||
if (FINUFFT_USE_DUCC0) | ||
target_compile_definitions(${target} PRIVATE FINUFFT_USE_DUCC0) | ||
endif () | ||
|
||
if (FINUFFT_STATIC_LINKING) | ||
target_link_libraries(${target} PRIVATE finufft_static) | ||
if (FINUFFT_USE_OPENMP) | ||
|
@@ -151,37 +169,52 @@ function(set_finufft_options target) | |
endif () | ||
endif () | ||
|
||
# FFTW CMAKE file includes the APIs only as an install target, so we need to manually | ||
# include them since we need them for build not for install | ||
# trying to include them directly into the fftw and fftwf targets causes issues with | ||
# the latest version of cmake, so we do it here instead. | ||
if ((NOT FFTW_FOUND) OR (FINUFFT_FFTW_LIBRARIES STREQUAL DOWNLOAD)) | ||
list(GET FINUFFT_FFTW_LIBRARIES 0 element) | ||
get_property(FFTW_SOURCE_DIR TARGET ${element} PROPERTY SOURCE_DIR) | ||
set(FFTW_INCLUDE_DIR ${FFTW_SOURCE_DIR}/api) | ||
target_include_directories(${target} PUBLIC ${FFTW_INCLUDE_DIR}) | ||
if (FINUFFT_USE_DUCC0) | ||
target_compile_definitions(${target} PRIVATE FINUFFT_USE_DUCC0) | ||
mreineck marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else () | ||
# FFTW CMAKE file includes the APIs only as an install target, so we need to manually | ||
# include them since we need them for build not for install | ||
# trying to include them directly into the fftw and fftwf targets causes issues with | ||
# the latest version of cmake, so we do it here instead. | ||
if ((NOT FFTW_FOUND) OR (FINUFFT_FFTW_LIBRARIES STREQUAL DOWNLOAD)) | ||
list(GET FINUFFT_FFTW_LIBRARIES 0 element) | ||
get_property(FFTW_SOURCE_DIR TARGET ${element} PROPERTY SOURCE_DIR) | ||
set(FFTW_INCLUDE_DIR ${FFTW_SOURCE_DIR}/api) | ||
target_include_directories(${target} PUBLIC ${FFTW_INCLUDE_DIR}) | ||
endif () | ||
endif () | ||
target_link_libraries(${target} PRIVATE xsimd) | ||
target_link_libraries(${target} PUBLIC ${FINUFFT_FFTW_LIBRARIES}) | ||
# target_link_libraries(${target} PRIVATE xsimd) | ||
# target_link_libraries(${target} PUBLIC ${FINUFFT_FFTW_LIBRARIES}) | ||
endfunction() | ||
|
||
if (FINUFFT_USE_CPU) | ||
# Main finufft libraries | ||
if (FINUFFT_USE_DUCC0) | ||
# okay for now, to be cleaned up | ||
set_finufft_options(ducc0) | ||
endif () | ||
|
||
add_library(finufft_f32 OBJECT ${FINUFFT_PRECISION_DEPENDENT_SOURCES}) | ||
target_compile_definitions(finufft_f32 PRIVATE SINGLE) | ||
set_finufft_options(finufft_f32) | ||
target_link_libraries(finufft_f32 PUBLIC ${FINUFFT_FFTLIBS}) | ||
target_link_libraries(finufft_f32 PRIVATE xsimd) | ||
lu1and10 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
add_library(finufft_f64 OBJECT ${FINUFFT_PRECISION_DEPENDENT_SOURCES}) | ||
target_compile_definitions(finufft_f64 PRIVATE) | ||
set_finufft_options(finufft_f64) | ||
target_link_libraries(finufft_f64 PUBLIC ${FINUFFT_FFTLIBS}) | ||
target_link_libraries(finufft_f64 PRIVATE xsimd) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @DiamonDinoia Marco, the reason windows can not find xsimd seems to be here, this line is moved out of set_finufft_options function. the below if(WIN32) does not add link xsimd. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great thanks for fixing it! |
||
if (WIN32) | ||
add_library(finufft_f32_dll OBJECT ${FINUFFT_PRECISION_DEPENDENT_SOURCES}) | ||
target_compile_definitions(finufft_f32_dll PRIVATE SINGLE dll_EXPORTS FINUFFT_DLL) | ||
set_finufft_options(finufft_f32_dll) | ||
target_link_libraries(finufft_f32_dll PUBLIC ${FINUFFT_FFTLIBS}) | ||
|
||
add_library(finufft_f64_dll OBJECT ${FINUFFT_PRECISION_DEPENDENT_SOURCES}) | ||
target_compile_definitions(finufft_f64_dll PRIVATE dll_EXPORTS FINUFFT_DLL) | ||
set_finufft_options(finufft_f64_dll) | ||
target_link_libraries(finufft_f64_dll PUBLIC ${FINUFFT_FFTLIBS}) | ||
endif () | ||
|
||
add_library(finufft SHARED src/utils_precindep.cpp contrib/legendre_rule_fast.cpp) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
CPMAddPackage( | ||
NAME ducc0 | ||
GIT_REPOSITORY https://gitlab.mpcdf.mpg.de/mtr/ducc.git | ||
GIT_TAG ${DUCC0_VERSION} | ||
DOWNLOAD_ONLY YES | ||
) | ||
|
||
|
||
if(ducc0_ADDED) | ||
add_library(ducc0 STATIC | ||
${ducc0_SOURCE_DIR}/src/ducc0/infra/string_utils.cc | ||
${ducc0_SOURCE_DIR}/src/ducc0/infra/threading.cc | ||
${ducc0_SOURCE_DIR}/src/ducc0/infra/mav.cc | ||
${ducc0_SOURCE_DIR}/src/ducc0/math/gridding_kernel.cc | ||
${ducc0_SOURCE_DIR}/src/ducc0/math/gl_integrator.cc | ||
) | ||
target_include_directories(ducc0 PUBLIC ${ducc0_SOURCE_DIR}/src/) | ||
target_compile_options(ducc0 PRIVATE -ffast-math) | ||
target_compile_features(ducc0 PRIVATE cxx_std_17) # private because we do not want to propagate this requirement | ||
|
||
if (NOT OpenMP_CXX_FOUND) | ||
find_package(Threads REQUIRED) | ||
target_link_libraries(ducc0 PRIVATE Threads::Threads) | ||
endif () | ||
endif () |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#ifndef FINUFFT_INCLUDE_FINUFFT_FFT_H | ||
#define FINUFFT_INCLUDE_FINUFFT_FFT_H | ||
|
||
#ifdef FINUFFT_USE_DUCC0 | ||
#include "ducc0/fft/fftnd_impl.h" | ||
#define FFTW_FORGET_WISDOM() // temporary hack since some tests call this unconditionally | ||
#define FFTW_CLEANUP() // temporary hack since some tests call this unconditionally | ||
#define FFTW_CLEANUP_THREADS() // temporary hack since some tests call this | ||
// unconditionally | ||
#else | ||
#include "fftw_defs.h" | ||
#endif | ||
#include <finufft/defs.h> | ||
|
||
int *gridsize_for_fft(FINUFFT_PLAN p); | ||
void do_fft(FINUFFT_PLAN p); | ||
|
||
#endif // FINUFFT_INCLUDE_FINUFFT_FFT_H |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#include <algorithm> | ||
#include <finufft/fft.h> | ||
|
||
using namespace std; | ||
|
||
int *gridsize_for_fft(FINUFFT_PLAN p) { | ||
// local helper func returns a new int array of length dim, extracted from | ||
// the finufft plan, that fftw_plan_many_dft needs as its 2nd argument. | ||
int *nf; | ||
if (p->dim == 1) { | ||
nf = new int[1]; | ||
nf[0] = (int)p->nf1; | ||
} else if (p->dim == 2) { | ||
nf = new int[2]; | ||
nf[0] = (int)p->nf2; | ||
nf[1] = (int)p->nf1; | ||
} // fftw enforced row major ordering, ie dims are backwards ordered | ||
else { | ||
nf = new int[3]; | ||
nf[0] = (int)p->nf3; | ||
nf[1] = (int)p->nf2; | ||
nf[2] = (int)p->nf1; | ||
} | ||
return nf; | ||
} | ||
|
||
void do_fft(FINUFFT_PLAN p) { | ||
#ifdef FINUFFT_USE_DUCC0 | ||
ahbarnett marked this conversation as resolved.
Show resolved
Hide resolved
|
||
size_t nthreads = min<size_t>(MY_OMP_GET_MAX_THREADS(), p->opts.nthreads); | ||
int *ns = gridsize_for_fft(p); | ||
vector<size_t> arrdims, axes; | ||
arrdims.push_back(size_t(p->batchSize)); | ||
arrdims.push_back(size_t(ns[0])); | ||
axes.push_back(1); | ||
if (p->dim >= 2) { | ||
arrdims.push_back(size_t(ns[1])); | ||
axes.push_back(2); | ||
} | ||
if (p->dim >= 3) { | ||
arrdims.push_back(size_t(ns[2])); | ||
axes.push_back(3); | ||
} | ||
ducc0::vfmav<CPX> data(p->fwBatch, arrdims); | ||
#ifdef FINUFFT_NO_DUCC0_TWEAKS | ||
ducc0::c2c(data, data, axes, p->fftSign < 0, FLT(1), nthreads); | ||
#else | ||
/* For type 1 NUFFTs, only the low-frequency parts of the output fine grid are | ||
going to be used, and for type 2 NUFFTs, the high frequency parts of the | ||
input fine grid are zero by definition. This can be used to reduce the | ||
total FFT work for 2D and 3D NUFFTs. One of the FFT axes always has to be | ||
transformed fully (that's why there is no savings for 1D NUFFTs), for the | ||
second axis we need to do (roughly) a fraction of 1/oversampling_factor | ||
of all 1D FFTs, and for the last remaining axis the factor is | ||
1/oversampling_factor^2. */ | ||
if (p->dim == 1) // 1D: no chance for FFT shortcuts | ||
ducc0::c2c(data, data, axes, p->fftSign < 0, FLT(1), nthreads); | ||
else if (p->dim == 2) { // 2D: do partial FFTs | ||
if (p->ms < 2) // something is weird, do standard FFT | ||
ducc0::c2c(data, data, axes, p->fftSign < 0, FLT(1), nthreads); | ||
else { | ||
size_t y_lo = size_t((p->ms + 1) / 2); | ||
size_t y_hi = size_t(ns[1] - p->ms / 2); | ||
auto sub1 = ducc0::subarray(data, {{}, {}, {0, y_lo}}); | ||
auto sub2 = ducc0::subarray(data, {{}, {}, {y_hi, ducc0::MAXIDX}}); | ||
if (p->type == 1) // spreading, not all parts of the output array are needed | ||
// do axis 2 in full | ||
ducc0::c2c(data, data, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
// do only parts of axis 1 | ||
ducc0::c2c(sub1, sub1, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub2, sub2, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
if (p->type == 2) // interpolation, parts of the input array are zero | ||
// do axis 2 in full | ||
ducc0::c2c(data, data, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
} | ||
} else { // 3D | ||
if ((p->ms < 2) || (p->mt < 2)) // something is weird, do standard FFT | ||
ducc0::c2c(data, data, axes, p->fftSign < 0, FLT(1), nthreads); | ||
else { | ||
size_t z_lo = size_t((p->ms + 1) / 2); | ||
size_t z_hi = size_t(ns[2] - p->ms / 2); | ||
size_t y_lo = size_t((p->mt + 1) / 2); | ||
size_t y_hi = size_t(ns[1] - p->mt / 2); | ||
auto sub1 = ducc0::subarray(data, {{}, {}, {}, {0, z_lo}}); | ||
auto sub2 = ducc0::subarray(data, {{}, {}, {}, {z_hi, ducc0::MAXIDX}}); | ||
auto sub3 = ducc0::subarray(sub1, {{}, {}, {0, y_lo}, {}}); | ||
auto sub4 = ducc0::subarray(sub1, {{}, {}, {y_hi, ducc0::MAXIDX}, {}}); | ||
auto sub5 = ducc0::subarray(sub2, {{}, {}, {0, y_lo}, {}}); | ||
auto sub6 = ducc0::subarray(sub2, {{}, {}, {y_hi, ducc0::MAXIDX}, {}}); | ||
if (p->type == 1) { // spreading, not all parts of the output array are needed | ||
// do axis 3 in full | ||
ducc0::c2c(data, data, {3}, p->fftSign < 0, FLT(1), nthreads); | ||
// do only parts of axis 2 | ||
ducc0::c2c(sub1, sub1, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub2, sub2, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
} | ||
// do even smaller parts of axis 1 | ||
ducc0::c2c(sub3, sub3, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub4, sub4, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub5, sub5, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub6, sub6, {1}, p->fftSign < 0, FLT(1), nthreads); | ||
if (p->type == 2) { // interpolation, parts of the input array are zero | ||
// do only parts of axis 2 | ||
ducc0::c2c(sub1, sub1, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
ducc0::c2c(sub2, sub2, {2}, p->fftSign < 0, FLT(1), nthreads); | ||
// do axis 3 in full | ||
ducc0::c2c(data, data, {3}, p->fftSign < 0, FLT(1), nthreads); | ||
} | ||
} | ||
} | ||
#endif | ||
delete[] ns; | ||
#else | ||
FFTW_EX(p->fftwPlan); // if thisBatchSize<batchSize it wastes some flops | ||
#endif | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, this has to be OFF by default, since users will need to be eased into this over time, if it turns out to be useful for them..