Clean up FFT gravity

This commit is contained in:
Tamás Bálint Misius
2023-01-27 20:04:10 +01:00
parent 85d492bad6
commit aa78a1ee6b
5 changed files with 102 additions and 133 deletions

View File

@@ -3806,10 +3806,10 @@ void Simulation::BeforeSim()
grav->gravity_update_async(); grav->gravity_update_async();
//Get updated buffer pointers for gravity //Get updated buffer pointers for gravity
gravx = grav->gravx; gravx = &grav->gravx[0];
gravy = grav->gravy; gravy = &grav->gravy[0];
gravp = grav->gravp; gravp = &grav->gravp[0];
gravmap = grav->gravmap; gravmap = &grav->gravmap[0];
} }
if(emp_decor>0) if(emp_decor>0)
emp_decor -= emp_decor/25+2; emp_decor -= emp_decor/25+2;
@@ -4045,10 +4045,10 @@ Simulation::Simulation():
//Give air sim references to our data //Give air sim references to our data
grav->bmap = bmap; grav->bmap = bmap;
//Gravity sim gives us maps to use //Gravity sim gives us maps to use
gravx = grav->gravx; gravx = &grav->gravx[0];
gravy = grav->gravy; gravy = &grav->gravy[0];
gravp = grav->gravp; gravp = &grav->gravp[0];
gravmap = grav->gravmap; gravmap = &grav->gravmap[0];
//Create and attach air simulation //Create and attach air simulation
air = new Air(*this); air = new Air(*this);

View File

@@ -9,44 +9,30 @@
Gravity::Gravity(CtorTag) Gravity::Gravity(CtorTag)
{ {
// Allocate full size Gravmaps th_ogravmap.resize(NCELL);
unsigned int size = NCELL; th_gravmap.resize(NCELL);
th_ogravmap = new float[size]; th_gravy.resize(NCELL);
th_gravmap = new float[size]; th_gravx.resize(NCELL);
th_gravy = new float[size]; th_gravp.resize(NCELL);
th_gravx = new float[size]; gravmap.resize(NCELL);
th_gravp = new float[size]; gravy.resize(NCELL);
gravmap = new float[size]; gravx.resize(NCELL);
gravy = new float[size]; gravp.resize(NCELL);
gravx = new float[size]; gravmask.resize(NCELL);
gravp = new float[size];
gravmask = new unsigned[size];
} }
Gravity::~Gravity() Gravity::~Gravity()
{ {
stop_grav_async(); stop_grav_async();
delete[] th_ogravmap;
delete[] th_gravmap;
delete[] th_gravy;
delete[] th_gravx;
delete[] th_gravp;
delete[] gravmap;
delete[] gravy;
delete[] gravx;
delete[] gravp;
delete[] gravmask;
} }
void Gravity::Clear() void Gravity::Clear()
{ {
int size = NCELL; std::fill(&gravy[0], &gravy[0] + NCELL, 0.0f);
std::fill(gravy, gravy + size, 0.0f); std::fill(&gravx[0], &gravx[0] + NCELL, 0.0f);
std::fill(gravx, gravx + size, 0.0f); std::fill(&gravp[0], &gravp[0] + NCELL, 0.0f);
std::fill(gravp, gravp + size, 0.0f); std::fill(&gravmap[0], &gravmap[0] + NCELL, 0.0f);
std::fill(gravmap, gravmap + size, 0.0f); std::fill(&gravmask[0], &gravmask[0] + NCELL, UINT32_C(0xFFFFFFFF));
std::fill(gravmask, gravmask + size, 0xFFFFFFFF);
ignoreNextResult = true; ignoreNextResult = true;
} }
@@ -86,8 +72,8 @@ void Gravity::gravity_update_async()
gravcv.notify_one(); gravcv.notify_one();
} }
unsigned int size = NCELL; unsigned int size = NCELL;
membwand(gravy, gravmask, size * sizeof(float), size * sizeof(unsigned)); membwand(&gravy[0], &gravmask[0], size * sizeof(float), size * sizeof(uint32_t));
membwand(gravx, gravmask, size * sizeof(float), size * sizeof(unsigned)); membwand(&gravx[0], &gravmask[0], size * sizeof(float), size * sizeof(uint32_t));
std::fill(&gravmap[0], &gravmap[0] + size, 0.0f); std::fill(&gravmap[0], &gravmap[0] + size, 0.0f);
} }
@@ -244,8 +230,6 @@ void Gravity::gravity_mask()
unsigned maskvalue; unsigned maskvalue;
mask_el *t_mask_el = nullptr; mask_el *t_mask_el = nullptr;
mask_el *c_mask_el = nullptr; mask_el *c_mask_el = nullptr;
if (!gravmask)
return;
memset(checkmap, 0, sizeof(checkmap)); memset(checkmap, 0, sizeof(checkmap));
for (int x = 0; x < XCELLS; x++) for (int x = 0; x < XCELLS; x++)
{ {

View File

@@ -2,19 +2,37 @@
#include "Misc.h" #include "Misc.h"
#include <cstring> #include <cstring>
#include <cmath> #include <cmath>
#include <complex>
#include <fftw3.h> #include <fftw3.h>
constexpr auto xblock2 = XCELLS * 2;
constexpr auto yblock2 = YCELLS * 2;
constexpr auto fft_tsize = (xblock2 / 2 + 1) * yblock2;
//NCELL*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
constexpr auto scaleFactor = -float(M_GRAV) / (NCELL * 4);
static_assert(sizeof(std::complex<float>) == sizeof(fftwf_complex));
struct FftwArrayDeleter { void operator ()(float ptr[]) const { fftwf_free(ptr); } };
struct FftwComplexArrayDeleter { void operator ()(std::complex<float> ptr[]) const { fftwf_free(ptr); } };
struct FftwPlanDeleter { void operator ()(fftwf_plan ptr ) const { fftwf_destroy_plan(ptr); } };
using FftwArrayPtr = std::unique_ptr<float [], FftwArrayDeleter >;
using FftwComplexArrayPtr = std::unique_ptr<std::complex<float> [], FftwComplexArrayDeleter>;
using FftwPlanPtr = std::unique_ptr<std::remove_pointer<fftwf_plan>::type, FftwPlanDeleter >;
FftwArrayPtr FftwArray(size_t size)
{
return FftwArrayPtr(reinterpret_cast<float *>(fftwf_malloc(size * sizeof(float))));
}
FftwComplexArrayPtr FftwComplexArray(size_t size)
{
return FftwComplexArrayPtr(reinterpret_cast<std::complex<float> *>(fftwf_malloc(size * sizeof(std::complex<float>))));
}
struct GravityImpl : public Gravity struct GravityImpl : public Gravity
{ {
bool grav_fft_status = false; bool grav_fft_status = false;
float *th_ptgravx = nullptr; FftwArrayPtr th_gravmapbig , th_gravxbig , th_gravybig ;
float *th_ptgravy = nullptr; FftwComplexArrayPtr th_ptgravxt, th_ptgravyt, th_gravmapbigt, th_gravxbigt, th_gravybigt;
float *th_gravmapbig = nullptr; FftwPlanPtr plan_gravmap, plan_gravx_inverse, plan_gravy_inverse;
float *th_gravxbig = nullptr;
float *th_gravybig = nullptr;
fftwf_complex *th_ptgravxt, *th_ptgravyt, *th_gravmapbigt, *th_gravxbigt, *th_gravybigt;
fftwf_plan plan_gravmap, plan_gravx_inverse, plan_gravy_inverse;
void grav_fft_init(); void grav_fft_init();
void grav_fft_cleanup(); void grav_fft_cleanup();
@@ -34,34 +52,28 @@ GravityImpl::~GravityImpl()
void GravityImpl::grav_fft_init() void GravityImpl::grav_fft_init()
{ {
int xblock2 = XCELLS*2;
int yblock2 = YCELLS*2;
int fft_tsize = (xblock2/2+1)*yblock2;
float distance, scaleFactor;
fftwf_plan plan_ptgravx, plan_ptgravy;
if (grav_fft_status) return; if (grav_fft_status) return;
FftwPlanPtr plan_ptgravx, plan_ptgravy;
//use fftw malloc function to ensure arrays are aligned, to get better performance //use fftw malloc function to ensure arrays are aligned, to get better performance
th_ptgravx = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float))); FftwArrayPtr th_ptgravx = FftwArray(xblock2 * yblock2);
th_ptgravy = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float))); FftwArrayPtr th_ptgravy = FftwArray(xblock2 * yblock2);
th_ptgravxt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex))); th_ptgravxt = FftwComplexArray(fft_tsize);
th_ptgravyt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex))); th_ptgravyt = FftwComplexArray(fft_tsize);
th_gravmapbig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float))); th_gravmapbig = FftwArray(xblock2 * yblock2);
th_gravmapbigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex))); th_gravmapbigt = FftwComplexArray(fft_tsize);
th_gravxbig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float))); th_gravxbig = FftwArray(xblock2 * yblock2);
th_gravybig = reinterpret_cast<float*>(fftwf_malloc(xblock2 * yblock2 * sizeof(float))); th_gravybig = FftwArray(xblock2 * yblock2);
th_gravxbigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex))); th_gravxbigt = FftwComplexArray(fft_tsize);
th_gravybigt = reinterpret_cast<fftwf_complex*>(fftwf_malloc(fft_tsize * sizeof(fftwf_complex))); th_gravybigt = FftwComplexArray(fft_tsize);
//select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed //select best algorithm, could use FFTW_PATIENT or FFTW_EXHAUSTIVE but that increases the time taken to plan, and I don't see much increase in execution speed
plan_ptgravx = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx, th_ptgravxt, FFTW_MEASURE); plan_ptgravx = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravx.get(), reinterpret_cast<fftwf_complex *>(th_ptgravxt.get()), FFTW_MEASURE));
plan_ptgravy = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy, th_ptgravyt, FFTW_MEASURE); plan_ptgravy = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_ptgravy.get(), reinterpret_cast<fftwf_complex *>(th_ptgravyt.get()), FFTW_MEASURE));
plan_gravmap = fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig, th_gravmapbigt, FFTW_MEASURE); plan_gravmap = FftwPlanPtr(fftwf_plan_dft_r2c_2d(yblock2, xblock2, th_gravmapbig.get(), reinterpret_cast<fftwf_complex *>(th_gravmapbigt.get()), FFTW_MEASURE));
plan_gravx_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravxbigt, th_gravxbig, FFTW_MEASURE); plan_gravx_inverse = FftwPlanPtr(fftwf_plan_dft_c2r_2d(yblock2, xblock2, reinterpret_cast<fftwf_complex *>(th_gravxbigt.get()), th_gravxbig.get(), FFTW_MEASURE));
plan_gravy_inverse = fftwf_plan_dft_c2r_2d(yblock2, xblock2, th_gravybigt, th_gravybig, FFTW_MEASURE); plan_gravy_inverse = FftwPlanPtr(fftwf_plan_dft_c2r_2d(yblock2, xblock2, reinterpret_cast<fftwf_complex *>(th_gravybigt.get()), th_gravybig.get(), FFTW_MEASURE));
//NCELL*4 is size of data array, scaling needed because FFTW calculates an unnormalized DFT
scaleFactor = -float(M_GRAV)/(NCELL*4);
//calculate velocity map caused by a point mass //calculate velocity map caused by a point mass
for (int y = 0; y < yblock2; y++) for (int y = 0; y < yblock2; y++)
{ {
@@ -69,7 +81,7 @@ void GravityImpl::grav_fft_init()
{ {
if (x == XCELLS && y == YCELLS) if (x == XCELLS && y == YCELLS)
continue; continue;
distance = hypotf(float(x-XCELLS), float(y-YCELLS)); auto distance = hypotf(float(x-XCELLS), float(y-YCELLS));
th_ptgravx[y * xblock2 + x] = scaleFactor * (x - XCELLS) / powf(distance, 3); th_ptgravx[y * xblock2 + x] = scaleFactor * (x - XCELLS) / powf(distance, 3);
th_ptgravy[y * xblock2 + x] = scaleFactor * (y - YCELLS) / powf(distance, 3); th_ptgravy[y * xblock2 + x] = scaleFactor * (y - YCELLS) / powf(distance, 3);
} }
@@ -78,15 +90,11 @@ void GravityImpl::grav_fft_init()
th_ptgravy[yblock2 * xblock2 / 2 + xblock2 / 2] = 0.0f; th_ptgravy[yblock2 * xblock2 / 2 + xblock2 / 2] = 0.0f;
//transform point mass velocity maps //transform point mass velocity maps
fftwf_execute(plan_ptgravx); fftwf_execute(plan_ptgravx.get());
fftwf_execute(plan_ptgravy); fftwf_execute(plan_ptgravy.get());
fftwf_destroy_plan(plan_ptgravx);
fftwf_destroy_plan(plan_ptgravy);
fftwf_free(th_ptgravx);
fftwf_free(th_ptgravy);
//clear padded gravmap //clear padded gravmap
memset(th_gravmapbig, 0, xblock2 * yblock2 * sizeof(float)); memset(th_gravmapbig.get(), 0, xblock2 * yblock2 * sizeof(float));
grav_fft_status = true; grav_fft_status = true;
} }
@@ -94,17 +102,6 @@ void GravityImpl::grav_fft_init()
void GravityImpl::grav_fft_cleanup() void GravityImpl::grav_fft_cleanup()
{ {
if (!grav_fft_status) return; if (!grav_fft_status) return;
fftwf_free(th_ptgravxt);
fftwf_free(th_ptgravyt);
fftwf_free(th_gravmapbig);
fftwf_free(th_gravmapbigt);
fftwf_free(th_gravxbig);
fftwf_free(th_gravybig);
fftwf_free(th_gravxbigt);
fftwf_free(th_gravybigt);
fftwf_destroy_plan(plan_gravmap);
fftwf_destroy_plan(plan_gravx_inverse);
fftwf_destroy_plan(plan_gravy_inverse);
grav_fft_status = false; grav_fft_status = false;
} }
@@ -121,26 +118,23 @@ void Gravity::update_grav()
if (!fftGravity->grav_fft_status) if (!fftGravity->grav_fft_status)
fftGravity->grav_fft_init(); fftGravity->grav_fft_init();
auto *th_gravmapbig = fftGravity->th_gravmapbig; auto *th_gravmapbig = fftGravity->th_gravmapbig.get();
auto *th_gravxbig = fftGravity->th_gravxbig; auto *th_gravxbig = fftGravity->th_gravxbig.get();
auto *th_gravybig = fftGravity->th_gravybig; auto *th_gravybig = fftGravity->th_gravybig.get();
auto *th_ptgravxt = fftGravity->th_ptgravxt; auto *th_ptgravxt = fftGravity->th_ptgravxt.get();
auto *th_ptgravyt = fftGravity->th_ptgravyt; auto *th_ptgravyt = fftGravity->th_ptgravyt.get();
auto *th_gravmapbigt = fftGravity->th_gravmapbigt; auto *th_gravmapbigt = fftGravity->th_gravmapbigt.get();
auto *th_gravxbigt = fftGravity->th_gravxbigt; auto *th_gravxbigt = fftGravity->th_gravxbigt.get();
auto *th_gravybigt = fftGravity->th_gravybigt; auto *th_gravybigt = fftGravity->th_gravybigt.get();
auto &plan_gravmap = fftGravity->plan_gravmap; auto &plan_gravmap = fftGravity->plan_gravmap;
auto &plan_gravx_inverse = fftGravity->plan_gravx_inverse; auto &plan_gravx_inverse = fftGravity->plan_gravx_inverse;
auto &plan_gravy_inverse = fftGravity->plan_gravy_inverse; auto &plan_gravy_inverse = fftGravity->plan_gravy_inverse;
int xblock2 = XCELLS*2, yblock2 = YCELLS*2; if (memcmp(&th_ogravmap[0], &th_gravmap[0], sizeof(float) * NCELL) != 0)
int fft_tsize = (xblock2/2+1)*yblock2;
float mr, mc, pr, pc, gr, gc;
if (memcmp(th_ogravmap, th_gravmap, sizeof(float)*NCELL) != 0)
{ {
th_gravchanged = 1; th_gravchanged = 1;
membwand(th_gravmap, gravmask, NCELL*sizeof(float), NCELL*sizeof(unsigned)); membwand(&th_gravmap[0], &gravmask[0], NCELL * sizeof(float), NCELL * sizeof(uint32_t));
//copy gravmap into padded gravmap array //copy gravmap into padded gravmap array
for (int y = 0; y < YCELLS; y++) for (int y = 0; y < YCELLS; y++)
{ {
@@ -150,28 +144,16 @@ void Gravity::update_grav()
} }
} }
//transform gravmap //transform gravmap
fftwf_execute(plan_gravmap); fftwf_execute(plan_gravmap.get());
//do convolution (multiply the complex numbers) //do convolution (multiply the complex numbers)
for (int i = 0; i < fft_tsize; i++) for (int i = 0; i < fft_tsize; i++)
{ {
mr = th_gravmapbigt[i][0]; th_gravxbigt[i] = th_gravmapbigt[i] * th_ptgravxt[i];
mc = th_gravmapbigt[i][1]; th_gravybigt[i] = th_gravmapbigt[i] * th_ptgravyt[i];
pr = th_ptgravxt[i][0];
pc = th_ptgravxt[i][1];
gr = mr*pr-mc*pc;
gc = mr*pc+mc*pr;
th_gravxbigt[i][0] = gr;
th_gravxbigt[i][1] = gc;
pr = th_ptgravyt[i][0];
pc = th_ptgravyt[i][1];
gr = mr*pr-mc*pc;
gc = mr*pc+mc*pr;
th_gravybigt[i][0] = gr;
th_gravybigt[i][1] = gc;
} }
//inverse transform, and copy from padded arrays into normal velocity maps //inverse transform, and copy from padded arrays into normal velocity maps
fftwf_execute(plan_gravx_inverse); fftwf_execute(plan_gravx_inverse.get());
fftwf_execute(plan_gravy_inverse); fftwf_execute(plan_gravy_inverse.get());
for (int y = 0; y < YCELLS; y++) for (int y = 0; y < YCELLS; y++)
{ {
for (int x = 0; x < XCELLS; x++) for (int x = 0; x < XCELLS; x++)

View File

@@ -6,6 +6,8 @@
#include <mutex> #include <mutex>
#include <condition_variable> #include <condition_variable>
#include <memory> #include <memory>
#include <vector>
#include <cstdint>
class Simulation; class Simulation;
@@ -15,11 +17,11 @@ protected:
bool enabled = false; bool enabled = false;
// Maps to be processed by the gravity thread // Maps to be processed by the gravity thread
float *th_ogravmap = nullptr; std::vector<float> th_ogravmap;
float *th_gravmap = nullptr; std::vector<float> th_gravmap;
float *th_gravx = nullptr; std::vector<float> th_gravx;
float *th_gravy = nullptr; std::vector<float> th_gravy;
float *th_gravp = nullptr; std::vector<float> th_gravp;
int th_gravchanged = 0; int th_gravchanged = 0;
@@ -53,11 +55,12 @@ public:
~Gravity(); ~Gravity();
//Maps to be used by the main thread //Maps to be used by the main thread
float *gravmap = nullptr; std::vector<float> gravmap;
float *gravp = nullptr; std::vector<float> gravp;
float *gravy = nullptr; std::vector<float> gravy;
float *gravx = nullptr; std::vector<float> gravx;
unsigned *gravmask = nullptr; std::vector<uint32_t> gravmask;
static_assert(sizeof(float) == sizeof(uint32_t));
unsigned char (*bmap)[XCELLS]; unsigned char (*bmap)[XCELLS];

View File

@@ -6,9 +6,9 @@
void Gravity::get_result() void Gravity::get_result()
{ {
memcpy(gravy, th_gravy, NCELL*sizeof(float)); memcpy(&gravy[0], &th_gravy[0], NCELL * sizeof(float));
memcpy(gravx, th_gravx, NCELL*sizeof(float)); memcpy(&gravx[0], &th_gravx[0], NCELL * sizeof(float));
memcpy(gravp, th_gravp, NCELL*sizeof(float)); memcpy(&gravp[0], &th_gravp[0], NCELL * sizeof(float));
} }
void Gravity::update_grav(void) void Gravity::update_grav(void)