Skip to content

Commit

Permalink
gpu nlvlm + cleanup + pretty output
Browse files Browse the repository at this point in the history
  • Loading branch information
samayala22 committed Aug 28, 2024
1 parent c8be417 commit e6de862
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 88 deletions.
14 changes: 6 additions & 8 deletions headeronly/tinycpuid.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
#include <cstdlib>
#include <cstring>
#include <stdint.h>
#include <string_view>
#include <string>
#include <map>

#include "tinycmap.hpp"

namespace tiny {

#if defined(_MSC_VER)
Expand Down Expand Up @@ -113,7 +111,7 @@ static bool set_cpu_affinity(int logical_cpu) {
#error "Unsupported platform"
#endif

#define EXTRACTS_BITS(reg, highbit, lowbit) ((reg >> lowbit) & ((1ULL << (highbit - lowbit + 1)) - 1))
#define EXTRACTS_BITS(reg, highbit, lowbit) (((reg) >> (lowbit)) & ((1ULL << ((highbit) - (lowbit) + 1)) - 1))

bool HWMTSupported() {
unsigned int e_x[4];
Expand Down Expand Up @@ -145,7 +143,7 @@ class CPUID {
CPUID();
~CPUID() = default;
void print_info() noexcept;
constexpr bool has(const std::string_view feature) noexcept;
constexpr bool has(const std::string& feature) noexcept;
private:
unsigned int instr_set = 0u; // bitfield of supported instruction sets

Expand All @@ -168,7 +166,7 @@ struct Bytes {
};
};

static constexpr auto InstrSets = tiny::make_map<std::string_view, unsigned int>({
static auto InstrSets = std::map<std::string, unsigned int>({
{"MMX", 1u << 0},
{"SSE", 1u << 1},
{"SSE2", 1u << 2},
Expand Down Expand Up @@ -343,7 +341,7 @@ void CPUID::get_instr_set() noexcept {
if (eax & (1 << 5)) instr_set |= InstrSets["AVX512_BF16"];
}

constexpr bool CPUID::has(std::string_view feature) noexcept {
constexpr bool CPUID::has(const std::string& feature) noexcept {
return (instr_set & InstrSets[feature]);
}

Expand Down Expand Up @@ -396,7 +394,7 @@ void CPUID::print_info() noexcept {
std::cout << "Logical cores: " << threads << "\n";

std::cout << "Instruction sets: ";
for (auto& [key, value] : InstrSets.data) {
for (auto& [key, value] : InstrSets) {
if (instr_set & value) {
std::cout << key << " ";
}
Expand Down
69 changes: 69 additions & 0 deletions headeronly/tinycpuid2.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#pragma once

#include <string>

#if defined(_MSC_VER)
#include <intrin.h>
static inline void cpuid(unsigned int info[4], unsigned int eax, unsigned int ecx) {
__cpuidex(reinterpret_cast<int*>(info), static_cast<int>(eax), static_cast<int>(ecx));
}
static inline void cpuid(unsigned int info[4], unsigned int eax) {
__cpuid(reinterpret_cast<int*>(info), static_cast<int>(eax));
}
#else // GCC, Clang, ICC
#include <cpuid.h>
static inline void cpuid(unsigned int info[4], unsigned int eax, unsigned int ecx) {
__cpuid_count(eax, ecx, info[0], info[1], info[2], info[3]);
}
static inline void cpuid(unsigned int info[4], unsigned int eax) {
__cpuid(eax, info[0], info[1], info[2], info[3]);
}
#endif // _MSC_VER

namespace tiny {

class CPUID2 {
public:
std::string vendor;
std::string full_name;

CPUID2();
~CPUID2() = default;
private:
void get_vendor() noexcept;
void get_full_name() noexcept;
};

CPUID2::CPUID2() {
get_vendor();
get_full_name();
}

inline void CPUID2::get_vendor() noexcept {
unsigned int cpu_info[4] = {0, 0, 0, 0}; // registers
// Call to cpuid with eax = 0
cpuid(cpu_info, 0);

// The vendor string is composed as EBX + EDX + ECX.
// Each reg is 4 bytes so the string is 3 x 4 + 1 (null terminator) = 13 bytes
vendor.clear();
vendor.reserve(12);
vendor.append(reinterpret_cast<const char*>(&cpu_info[1]), 4);
vendor.append(reinterpret_cast<const char*>(&cpu_info[3]), 4);
vendor.append(reinterpret_cast<const char*>(&cpu_info[2]), 4);
}

inline void CPUID2::get_full_name() noexcept {
unsigned int cpu_info[4] = {0, 0, 0, 0}; // registers
full_name.clear();
full_name.reserve(48); // 3 * 16 bytes (cpu_info)

// Use Extended Function CPUID
unsigned int eax_leafs[3] = {0x8000'0002, 0x8000'0003, 0x8000'0004};
for (unsigned int i = 0; i < 3; i++) {
cpuid(cpu_info, eax_leafs[i]);
full_name.append(reinterpret_cast<const char*>(&cpu_info[0]), 16);
}
}

} // tiny namespace
48 changes: 21 additions & 27 deletions tests/nlvlm_curves.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,26 +47,9 @@ void linspace(T start, T end, u64 n, std::vector<T>& out) {
}
}

template<typename T>
void write_vector_pair(const std::string& filename, const std::vector<T>& vec1, const std::vector<T>& vec2) {
assert(vec1.size() == vec2.size());
std::ofstream outFile(filename + ".dat");

// Check if file is open
if (!outFile.is_open()) throw std::runtime_error("Failed to open file: " + filename);

const u64 n = vec1.size();
outFile << n << '\n';
for (u64 i = 0; i < n; i++) {
outFile << vec1[i] << ' ' << vec2[i] << '\n';
}
outFile.close();
}

int main(int /*argc*/, char** /*argv*/) {
const std::vector<std::string> meshes = {"../../../../mesh/infinite_rectangular_5x200.x"};
// const std::vector<std::string> backends = get_available_backends();
const std::vector<std::string> backends = {"cpu"};
const std::vector<std::string> backends = get_available_backends();
std::vector<std::pair<std::string, std::unique_ptr<LiftCurveFunctor>>> lift_curves;
lift_curves.emplace_back(std::make_pair("spallart1", std::make_unique<SpallartLiftCurve>(1.2f, 0.28f, 0.02f, 2.f*PI_f, 2.f*PI_f)));
lift_curves.emplace_back(std::make_pair("spallart2", std::make_unique<SpallartLiftCurve>(0.72f, 0.28f, 0.04f, 2.f*PI_f, 1.5f*PI_f)));
Expand All @@ -75,7 +58,6 @@ int main(int /*argc*/, char** /*argv*/) {
std::vector<f32> test_alphas = {0, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20};

std::transform(test_alphas.begin(), test_alphas.end(), test_alphas.begin(), to_radians);
std::vector<f32> test_cl(test_alphas.size());

std::vector<f32> db_alphas;
linspace(to_radians(0.f), to_radians(20.f), 100, db_alphas);
Expand All @@ -87,8 +69,9 @@ int main(int /*argc*/, char** /*argv*/) {
NLVLM simulation{backend_name, {mesh_name}};

for (const auto& lift_curve : lift_curves) {
std::printf("LIFT CURVE: %s\n", lift_curve.first.c_str());

std::transform(db_alphas.begin(), db_alphas.end(), db_cl.begin(), [&lift_curve](float alpha){ return (*lift_curve.second)(alpha); });
write_vector_pair(lift_curve.first + "_analytical_cl", db_alphas, db_cl);

Database db;
db.profiles.emplace_back(
Expand All @@ -103,18 +86,29 @@ int main(int /*argc*/, char** /*argv*/) {
);
db.profiles_pos.emplace_back(0.0f);

std::printf("\n| Alpha | CL | CD | CMx | CMy | CMz | CL Error | CD Error |\n");
std::printf("|------------|------------|------------|------------|------------|------------|-------------|-------------|\n");
for (u64 i = 0; i < test_alphas.size(); i++) {
const FlowData flow{test_alphas[i], 0.0f, 1.0f, 1.0f};
auto coeffs = simulation.run(flow, db);
test_cl[i] = coeffs.cl;
std::printf(">>> Alpha: %.1f | CL = %.6f CD = %.6f CMx = %.6f CMy = %.6f CMz = %.6f\n", to_degrees(test_alphas[i]), coeffs.cl, coeffs.cd, coeffs.cm.x, coeffs.cm.y, coeffs.cm.z);

const f32 analytical_cl = (*lift_curve.second)(flow.alpha);
const f32 abs_error = std::abs(coeffs.cl - analytical_cl);
const f32 rel_error = abs_error / (analytical_cl + std::numeric_limits<f32>::epsilon());
std::printf(">>> Analytical: %.6f | Abs Error: %.3E | Relative Error: %.5f%% \n", analytical_cl, abs_error, rel_error*100.f);
if (rel_error > 0.01f) return 1; // Failure
const f32 cl_aerr = std::abs(coeffs.cl - analytical_cl);
const f32 cl_rerr = cl_aerr / (analytical_cl + std::numeric_limits<f32>::epsilon());
std::printf("| %10.1f | %10.6f | %10.7f | %10.6f | %10.6f | %10.6f | %10.3f%% | %10.3f%% |\n",
to_degrees(flow.alpha),
coeffs.cl,
coeffs.cd,
coeffs.cm.x,
coeffs.cm.y,
coeffs.cm.z,
cl_rerr * 100.0f,
0.0f
);

if (cl_rerr > 0.01f) return 1; // Failure
}
write_vector_pair(lift_curve.first + "_nonlinear_cl", test_alphas, test_cl);
std::printf("\n");
}
}

Expand Down
3 changes: 0 additions & 3 deletions tests/vlm_elliptic_coeffs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,6 @@ int main(int /*argc*/, char ** /*argv*/) {

auto solvers = tiny::make_combination(meshes, backends);
for (const auto& [mesh_name, backend_name] : solvers) {
std::printf("\nBACKEND: %s\n", backend_name.get().c_str());
std::printf("MESH: %s\n", mesh_name.get().c_str());

VLM simulation{backend_name, {mesh_name}};

std::printf("\n| Alpha | CL | CD | CMx | CMy | CMz | CL Error | CD Error |\n");
Expand Down
5 changes: 0 additions & 5 deletions tests/vlm_square_coeffs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ int main(int /*argc*/, char ** /*argv*/) {

auto solvers = tiny::make_combination(meshes, backends);
for (const auto& [meshes_names, backend_name] : solvers) {
std::printf("\nBACKEND: %s\n", backend_name.get().c_str());
for (const auto& mesh_name : meshes_names.get()) {
std::printf("MESH: %s\n", mesh_name.c_str());
}

VLM simulation{backend_name, meshes_names};

std::printf("\n| Alpha | CL | CD | CMx | CMy | CMz | CL Error | CD Error |\n");
Expand Down
13 changes: 11 additions & 2 deletions vlm/backends/cpu/src/vlm_backend_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "linalg.h"
#include "tinytimer.hpp"
#include "tinycpuid2.hpp"
#include "vlm_mesh.hpp"
#include "vlm_data.hpp"
#include "vlm_types.hpp"
Expand All @@ -12,8 +13,8 @@
#include <algorithm> // std::fill
#include <iostream> // std::cout
#include <cstdio> // std::printf
#include <thread> // std::hardware_concurrency()

#include <stdint.h>
#include <taskflow/algorithm/for_each.hpp>

#include <lapacke.h>
Expand All @@ -33,7 +34,15 @@ class MemoryCPU final : public Memory {
void fill_f32(MemoryLocation location, float* ptr, float value, std::size_t size) const override {std::fill(ptr, ptr + size, value);}
};

BackendCPU::BackendCPU() : Backend(std::make_unique<MemoryCPU>()) {}
void print_cpu_info() {
tiny::CPUID2 cpuid;
std::printf("DEVICE: %s (%d threads)\n", cpuid.full_name.c_str(), std::thread::hardware_concurrency());
}

BackendCPU::BackendCPU() : Backend(std::make_unique<MemoryCPU>()) {
print_cpu_info();
}

BackendCPU::~BackendCPU() {}

/// @brief Compute the gamma_delta vector
Expand Down
28 changes: 6 additions & 22 deletions vlm/backends/cuda/src/vlm_backend_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,27 +50,11 @@ using namespace vlm;
} \
} while (0)

void printCudaInfo() {
int deviceCount = 0;
cudaError_t err = cudaGetDeviceCount(&deviceCount);

std::printf("----- CUDA Device information -----\n");
std::printf("Found %d device(s)\n", deviceCount);
// Get CUDA Runtime version
int cudaRuntimeVersion = 0;
cudaRuntimeGetVersion(&cudaRuntimeVersion);
std::printf("CUDA Runtime: %d.%d\n", cudaRuntimeVersion / 1000, (cudaRuntimeVersion % 100) / 10);

for (int i=0; i<deviceCount; i++) {
cudaDeviceProp deviceProps;
cudaGetDeviceProperties(&deviceProps, i);
std::printf("Device %d: %s\n", i, deviceProps.name);
std::printf(" SMs: %d\n", deviceProps.multiProcessorCount);
std::printf(" Global mem: %.0f MB\n",
static_cast<float>(deviceProps.totalGlobalMem) / (1024 * 1024));
std::printf(" CUDA Cap: %d.%d\n", deviceProps.major, deviceProps.minor);
}
std::printf("-----------------------------------\n");

void print_cuda_info() {
cudaDeviceProp device_props;
cudaGetDeviceProperties(&device_props, 0);
std::printf("DEVICE: %s (%d SMs, %llu MB, CUDA %d.%d)\n", device_props.name, device_props.multiProcessorCount, device_props.totalGlobalMem / (1024ull * 1024ull), device_props.major, device_props.minor);
}

// Singleton class to manage CUDA context (handle)
Expand Down Expand Up @@ -160,7 +144,7 @@ class MemoryCUDA final : public Memory {
};

BackendCUDA::BackendCUDA() : Backend(std::make_unique<MemoryCUDA>()) {
printCudaInfo();
print_cuda_info();
CtxManager::getInstance().create();
}

Expand Down
28 changes: 14 additions & 14 deletions vlm/src/vlm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,20 +149,20 @@ AeroCoefficients VLM::run(const FlowData& flow) {
backend->gamma_shed(gamma_wing.d_view(), gamma_wing_prev.d_view(), gamma_wake.d_view(), 0);
backend->gamma_delta(gamma_wing_delta.d_view(), gamma_wing.d_view());

mesh.verts_wing_init.to_host();
mesh.verts_wing.to_host();
mesh.verts_wake.to_host();
mesh.normals.to_host();
mesh.colloc.to_host();
mesh.area.to_host();
lhs.to_host();
rhs.to_host();
gamma_wing.to_host();
gamma_wake.to_host();
gamma_wing_prev.to_host();
gamma_wing_delta.to_host();
local_velocities.to_host();
transforms.to_host();
// mesh.verts_wing_init.to_host();
// mesh.verts_wing.to_host();
// mesh.verts_wake.to_host();
// mesh.normals.to_host();
// mesh.colloc.to_host();
// mesh.area.to_host();
// lhs.to_host();
// rhs.to_host();
// gamma_wing.to_host();
// gamma_wake.to_host();
// gamma_wing_prev.to_host();
// gamma_wing_delta.to_host();
// local_velocities.to_host();
// transforms.to_host();

return AeroCoefficients{
backend->coeff_steady_cl_multi(mesh.verts_wing.d_view(), gamma_wing_delta.d_view(), flow, mesh.area.d_view()),
Expand Down
4 changes: 0 additions & 4 deletions vlm/src/vlm_backend.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "vlm_backend.hpp"
#include "vlm_mesh.hpp"
#include "tinycpuid.hpp"

#include <string>

Expand All @@ -14,9 +13,6 @@
using namespace vlm;

std::unique_ptr<Backend> vlm::create_backend(const std::string& backend_name) {
//tiny::CPUID cpuid;
//cpuid.print_info();

#ifdef VLM_CPU
if (backend_name == "cpu") {
return std::make_unique<BackendCPU>();
Expand Down
4 changes: 1 addition & 3 deletions vlm/src/vlm_mesh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@ SurfDims MeshIO::get_dims(const std::string& filename) const {
throw std::runtime_error("Failed to open mesh file");
}
auto dims = _file->get_dims(file);
std::cout << "Number of panels: " << dims.first * dims.second << "\n";
std::cout << "nc: " << dims.first << "\n";
std::cout << "ns: " << dims.second << "\n";
std::printf("MESH: %s (%llu x %llu)\n", filename.c_str(), dims.first, dims.second);
return dims;
}

Expand Down

0 comments on commit e6de862

Please sign in to comment.