Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new P3M tuning options #5017

Merged
merged 4 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/doxygen/Doxyfile.in
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,7 @@ MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/
# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
# This tag requires that the tag USE_MATHJAX is set to YES.

MATHJAX_EXTENSIONS =
MATHJAX_EXTENSIONS = TeX/AMSmath

# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
# of code that will be used on startup of the MathJax code. See the MathJax site
Expand Down
8 changes: 4 additions & 4 deletions src/core/electrostatics/elc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,10 +368,6 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
auto const &box_geo = *get_system().box_geo;
auto const xy_area_inv = box_geo.length_inv()[0] * box_geo.length_inv()[1];
auto const pref = prefactor * 2. * std::numbers::pi * xy_area_inv;
auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
auto const fac_delta = delta / (1. - delta);

/* for non-neutral systems, this shift gives the background contribution
* (rsp. for this shift, the DM of the background is zero) */
Expand All @@ -397,6 +393,10 @@ ElectrostaticLayerCorrection::z_energy(ParticleRange const &particles) const {
}
} else {
// metallic boundaries
auto const delta = elc.delta_mid_top * elc.delta_mid_bot;
auto const fac_delta_mid_bot = elc.delta_mid_bot / (1. - delta);
auto const fac_delta_mid_top = elc.delta_mid_top / (1. - delta);
auto const fac_delta = delta / (1. - delta);
clear_vec(gblcblk, size);
auto const h = elc.box_h;
ImageSum const image_sum{delta, shift, lz};
Expand Down
56 changes: 51 additions & 5 deletions src/core/electrostatics/p3m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
#include <utils/integral_parameter.hpp>
#include <utils/math/int_pow.hpp>
#include <utils/math/sqr.hpp>
#include <utils/serialization/array.hpp>

#include <boost/mpi/collectives/all_reduce.hpp>
#include <boost/mpi/collectives/broadcast.hpp>
Expand Down Expand Up @@ -126,7 +127,7 @@ void CoulombP3MImpl<FloatType, Architecture>::count_charged_particles() {
template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_force = grid_influence_function<FloatType, 1>(
p3m.g_force = grid_influence_function<FloatType, 1, P3M_BRILLOUIN>(
p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
get_system().box_geo->length_inv());
}
Expand All @@ -137,7 +138,7 @@ void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
template <typename FloatType, Arch Architecture>
void CoulombP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
p3m.g_energy = grid_influence_function<FloatType, 0>(
p3m.g_energy = grid_influence_function<FloatType, 0, P3M_BRILLOUIN>(
p3m.params, p3m.mesh.start, p3m.mesh.stop, KX, KY, KZ,
get_system().box_geo->length_inv());
}
Expand Down Expand Up @@ -597,14 +598,17 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
double m_mesh_density_min = -1., m_mesh_density_max = -1.;
// indicates if mesh should be tuned
bool m_tune_mesh = false;
std::pair<std::optional<int>, std::optional<int>> m_tune_limits;

protected:
P3MParameters &get_params() override { return p3m.params; }

public:
CoulombTuningAlgorithm(System::System &system, auto &input_p3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m} {}
double prefactor, int timings,
decltype(m_tune_limits) tune_limits)
: TuningAlgorithm(system, prefactor, timings), p3m{input_p3m},
m_tune_limits{std::move(tune_limits)} {}

void on_solver_change() const override { m_system.on_coulomb_change(); }

Expand Down Expand Up @@ -632,6 +636,38 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
return {};
}

std::optional<std::string> fft_decomposition_veto(
Utils::Vector3i const &mesh_size_r_space) const override {
#ifdef CUDA
if constexpr (Architecture == Arch::GPU) {
return std::nullopt;
}
#endif
using Array3i = Utils::Array<int, 3u>;
auto const [KX, KY, KZ] = p3m.fft->get_permutations();
auto valid_decomposition = false;
Array3i mesh_size_k_space = {};
boost::mpi::reduce(
::comm_cart, Array3i(p3m.mesh.stop), mesh_size_k_space,
[](Array3i const &lhs, Array3i const &rhs) {
return Array3i{{std::max(lhs[0u], rhs[0u]),
std::max(lhs[1u], rhs[1u]),
std::max(lhs[2u], rhs[2u])}};
},
0);
if (::this_node == 0) {
valid_decomposition = (mesh_size_r_space[0u] == mesh_size_k_space[KX] and
mesh_size_r_space[1u] == mesh_size_k_space[KY] and
mesh_size_r_space[2u] == mesh_size_k_space[KZ]);
}
boost::mpi::broadcast(::comm_cart, valid_decomposition, 0);
std::optional<std::string> retval{"conflict with FFT domain decomposition"};
if (valid_decomposition) {
retval = std::nullopt;
}
return retval;
}

std::tuple<double, double, double, double>
calculate_accuracy(Utils::Vector3i const &mesh, int cao,
double r_cut_iL) const override {
Expand Down Expand Up @@ -689,6 +725,16 @@ class CoulombTuningAlgorithm : public TuningAlgorithm {
max_npart_per_dim, std::cbrt(static_cast<double>(p3m.sum_qpart)));
m_mesh_density_min = min_npart_per_dim / normalized_box_dim;
m_mesh_density_max = max_npart_per_dim / normalized_box_dim;
if (m_tune_limits.first or m_tune_limits.second) {
auto const &box_l = box_geo.length();
auto const dim = std::max({box_l[0], box_l[1], box_l[2]});
if (m_tune_limits.first) {
m_mesh_density_min = static_cast<double>(*m_tune_limits.first) / dim;
}
if (m_tune_limits.second) {
m_mesh_density_max = static_cast<double>(*m_tune_limits.second) / dim;
}
}
m_tune_mesh = true;
} else {
m_mesh_density_min = m_mesh_density_max = mesh_density;
Expand Down Expand Up @@ -772,7 +818,7 @@ void CoulombP3MImpl<FloatType, Architecture>::tune() {
}
try {
CoulombTuningAlgorithm<FloatType, Architecture> parameters(
system, p3m, prefactor, tune_timings);
system, p3m, prefactor, tune_timings, tune_limits);
parameters.setup_logger(tune_verbose);
// parameter ranges
parameters.determine_mesh_limits();
Expand Down
6 changes: 4 additions & 2 deletions src/core/electrostatics/p3m.impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utils/Vector.hpp>

#include <memory>
#include <optional>
#include <stdexcept>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -69,6 +70,7 @@ struct CoulombP3MImpl : public CoulombP3M {
/** @brief Coulomb P3M meshes and FFT algorithm. */
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> p3m_impl;
int tune_timings;
std::pair<std::optional<int>, std::optional<int>> tune_limits;
bool tune_verbose;
bool check_complex_residuals;
bool m_is_tuned;
Expand All @@ -77,10 +79,10 @@ struct CoulombP3MImpl : public CoulombP3M {
CoulombP3MImpl(
std::unique_ptr<p3m_data_struct_coulomb<FloatType>> &&p3m_handle,
double prefactor, int tune_timings, bool tune_verbose,
bool check_complex_residuals)
decltype(tune_limits) tune_limits, bool check_complex_residuals)
: CoulombP3M(p3m_handle->params), p3m{*p3m_handle},
p3m_impl{std::move(p3m_handle)}, tune_timings{tune_timings},
tune_verbose{tune_verbose},
tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose},
check_complex_residuals{check_complex_residuals} {

if (tune_timings <= 0) {
Expand Down
18 changes: 15 additions & 3 deletions src/core/magnetostatics/dlc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
#include <cmath>
#include <cstdio>
#include <functional>
#include <limits>
#include <numbers>
#include <numeric>
#include <stdexcept>
Expand Down Expand Up @@ -430,15 +431,26 @@ double DipolarLayerCorrection::tune_far_cut() const {
}

auto constexpr limitkc = 200;
auto constexpr exp_max = +709.8; // for IEEE-compatible double
auto constexpr exp_min = -708.4; // for IEEE-compatible double
auto const log_max = std::log(std::numeric_limits<double>::max());
auto const piarea = std::numbers::pi / (lx * ly);
auto const nmp = static_cast<double>(count_magnetic_particles(system));
auto const h = dlc.box_h;
auto far_cut = -1.;
for (int kc = 1; kc < limitkc; kc++) {
auto const gc = kc * 2. * std::numbers::pi / lx;
auto const fa0 = sqrt(9. * exp(+2. * gc * h) * g1_DLC_dip(gc, lz - h) +
9. * exp(-2. * gc * h) * g1_DLC_dip(gc, lz + h) +
22. * g1_DLC_dip(gc, lz));
auto const pref1 = g1_DLC_dip(gc, lz - h);
auto const pref2 = g1_DLC_dip(gc, lz + h);
auto const pref0 = g1_DLC_dip(gc, lz);
auto const exp_term = 2. * gc * h;
auto const log_term1 = exp_term + std::log(9. * pref1);
if (exp_term >= exp_max or log_term1 >= log_max or -exp_term <= exp_min) {
// no solution can be found at larger k values due to overflow/underflow
break;
}
auto const fa0 = std::sqrt(9. * std::exp(+exp_term) * pref1 +
9. * std::exp(-exp_term) * pref2 + 22. * pref0);
Comment on lines +443 to +453
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably a task for a coding day, but I think we should investigate where the need for such large values comes from. We either have the same problem in ELC or this situation can be prevented by a proper tuning. Either way I think it's worth taking a look at some point.

auto const fa1 = sqrt(0.125 * piarea) * fa0;
auto const fa2 = g2_DLC_dip(gc, lz);
auto const de = nmp * mu_max_sq / (4. * (exp(gc * lz) - 1.)) * (fa1 + fa2);
Expand Down
25 changes: 18 additions & 7 deletions src/core/magnetostatics/dp3m.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include <sstream>
#include <stdexcept>
#include <tuple>
#include <utility>
#include <vector>

#ifdef FFTW3_H
Expand Down Expand Up @@ -115,8 +116,9 @@ double
DipolarP3MImpl<FloatType, Architecture>::calc_average_self_energy_k_space()
const {
auto const &box_geo = *get_system().box_geo;
auto const node_phi = grid_influence_function_self_energy(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);
auto const node_phi =
grid_influence_function_self_energy<FloatType, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop, dp3m.g_energy);

double phi = 0.;
boost::mpi::reduce(comm_cart, node_phi, phi, std::plus<>(), 0);
Expand Down Expand Up @@ -518,14 +520,14 @@ double DipolarP3MImpl<FloatType, Architecture>::calc_surface_term(

template <typename FloatType, Arch Architecture>
void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_force() {
dp3m.g_force = grid_influence_function<FloatType, 3>(
dp3m.g_force = grid_influence_function<FloatType, 3, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
get_system().box_geo->length_inv());
}

template <typename FloatType, Arch Architecture>
void DipolarP3MImpl<FloatType, Architecture>::calc_influence_function_energy() {
dp3m.g_energy = grid_influence_function<FloatType, 2>(
dp3m.g_energy = grid_influence_function<FloatType, 2, P3M_BRILLOUIN>(
dp3m.params, dp3m.mesh.start, dp3m.mesh.stop,
get_system().box_geo->length_inv());
}
Expand All @@ -534,11 +536,14 @@ template <typename FloatType, Arch Architecture>
class DipolarTuningAlgorithm : public TuningAlgorithm {
p3m_data_struct_dipoles<FloatType> &dp3m;
int m_mesh_max = -1, m_mesh_min = -1;
std::pair<std::optional<int>, std::optional<int>> m_tune_limits;

public:
DipolarTuningAlgorithm(System::System &system, decltype(dp3m) &input_dp3m,
double prefactor, int timings)
: TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m} {}
double prefactor, int timings,
decltype(m_tune_limits) tune_limits)
: TuningAlgorithm(system, prefactor, timings), dp3m{input_dp3m},
m_tune_limits{std::move(tune_limits)} {}

P3MParameters &get_params() override { return dp3m.params; }

Expand Down Expand Up @@ -603,6 +608,12 @@ class DipolarTuningAlgorithm : public TuningAlgorithm {
m_mesh_min = static_cast<int>(std::round(std::pow(2., std::floor(expo))));
/* avoid using more than 1 GB of FFT arrays */
m_mesh_max = 128;
if (m_tune_limits.first) {
m_mesh_min = *m_tune_limits.first;
}
if (m_tune_limits.second) {
m_mesh_max = *m_tune_limits.second;
}
} else {
m_mesh_min = m_mesh_max = dp3m.params.mesh[0];
m_logger->report_fixed_mesh(dp3m.params.mesh);
Expand Down Expand Up @@ -662,7 +673,7 @@ void DipolarP3MImpl<FloatType, Architecture>::tune() {
}
try {
DipolarTuningAlgorithm<FloatType, Architecture> parameters(
system, dp3m, prefactor, tune_timings);
system, dp3m, prefactor, tune_timings, tune_limits);
parameters.setup_logger(tune_verbose);
// parameter ranges
parameters.determine_mesh_limits();
Expand Down
7 changes: 5 additions & 2 deletions src/core/magnetostatics/dp3m.impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <utils/Vector.hpp>

#include <memory>
#include <optional>
#include <stdexcept>
#include <type_traits>
#include <utility>
Expand Down Expand Up @@ -72,16 +73,18 @@ struct DipolarP3MImpl : public DipolarP3M {
/** @brief Coulomb P3M meshes and FFT algorithm. */
std::unique_ptr<p3m_data_struct_dipoles<FloatType>> dp3m_impl;
int tune_timings;
std::pair<std::optional<int>, std::optional<int>> tune_limits;
bool tune_verbose;
bool m_is_tuned;

public:
DipolarP3MImpl(
std::unique_ptr<p3m_data_struct_dipoles<FloatType>> &&dp3m_handle,
double prefactor, int tune_timings, bool tune_verbose)
double prefactor, int tune_timings, bool tune_verbose,
decltype(tune_limits) tune_limits)
: DipolarP3M(dp3m_handle->params), dp3m{*dp3m_handle},
dp3m_impl{std::move(dp3m_handle)}, tune_timings{tune_timings},
tune_verbose{tune_verbose} {
tune_limits{std::move(tune_limits)}, tune_verbose{tune_verbose} {

if (tune_timings <= 0) {
throw std::domain_error("Parameter 'timings' must be > 0");
Expand Down
20 changes: 15 additions & 5 deletions src/core/p3m/TuningAlgorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ static auto constexpr P3M_TUNE_CAO_TOO_LARGE = 1.;
static auto constexpr P3M_TUNE_ELC_GAP_SIZE = 2.;
/** could not achieve target accuracy */
static auto constexpr P3M_TUNE_ACCURACY_TOO_LARGE = 3.;
/** conflict with FFT domain decomposition */
static auto constexpr P3M_TUNE_FFT_MESH_SIZE = 4.;
/**@}*/

/** @brief Precision threshold for a non-zero real-space cutoff. */
Expand Down Expand Up @@ -110,7 +112,7 @@ void TuningAlgorithm::commit(Utils::Vector3i const &mesh, int cao,
* @param[in,out] tuned_accuracy @copybrief P3MParameters::accuracy
*
* @returns The integration time in case of success, otherwise
* -@ref P3M_TUNE_ACCURACY_TOO_LARGE,
* -@ref P3M_TUNE_ACCURACY_TOO_LARGE, -@ref P3M_TUNE_FFT_MESH_SIZE,
* -@ref P3M_TUNE_CAO_TOO_LARGE, or -@ref P3M_TUNE_ELC_GAP_SIZE
*/
double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
Expand Down Expand Up @@ -171,17 +173,25 @@ double TuningAlgorithm::get_mc_time(Utils::Vector3i const &mesh, int cao,
* we know that the desired minimal accuracy is obtained */
tuned_r_cut_iL = r_cut_iL = r_cut_iL_max;

auto const report_veto = [&](auto const &veto) {
if (veto) {
m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
tuned_accuracy, rs_err, ks_err);
}
return static_cast<bool>(veto);
};

/* if we are running P3M+ELC, check that r_cut is compatible */
auto const r_cut = r_cut_iL * box_geo.length()[0];
auto const veto = layer_correction_veto_r_cut(r_cut);
if (veto) {
m_logger->log_skip(*veto, mesh[0], cao, r_cut_iL, tuned_alpha_L,
tuned_accuracy, rs_err, ks_err);
if (report_veto(layer_correction_veto_r_cut(r_cut))) {
return -P3M_TUNE_ELC_GAP_SIZE;
}

commit(mesh, cao, r_cut_iL, tuned_alpha_L);
on_solver_change();
if (report_veto(fft_decomposition_veto(mesh))) {
return -P3M_TUNE_FFT_MESH_SIZE;
}
auto const int_time = benchmark_integration_step(m_system, m_timings);

std::tie(tuned_accuracy, rs_err, ks_err, tuned_alpha_L) =
Expand Down
6 changes: 6 additions & 0 deletions src/core/p3m/TuningAlgorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,12 @@ class TuningAlgorithm {
virtual std::optional<std::string>
layer_correction_veto_r_cut(double r_cut) const = 0;

/** @brief Veto FFT decomposition in non-cubic boxes. */
virtual std::optional<std::string>
fft_decomposition_veto(Utils::Vector3i const &) const {
return std::nullopt;
}

/** @brief Write tuned parameters to the P3M parameter struct. */
void commit(Utils::Vector3i const &mesh, int cao, double r_cut_iL,
double alpha_L);
Expand Down
Loading
Loading