You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
414 lines
14 KiB
414 lines
14 KiB
|
|
|
|
#include "oc_fftcc.h"
|
|
|
|
namespace opencorr
|
|
{
|
|
FFTW* FFTW::allocate(int subset_radius_x, int subset_radius_y)
|
|
{
|
|
int width = 2 * subset_radius_x;
|
|
int height = 2 * subset_radius_y;
|
|
int buffer_length = width * (subset_radius_y + 1);
|
|
unsigned int subset_size = width * height;
|
|
|
|
FFTW* FFTW_instance = new FFTW;
|
|
|
|
FFTW_instance->ref_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
FFTW_instance->tar_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
FFTW_instance->zncc_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
|
|
FFTW_instance->ref_subset = new float[subset_size];
|
|
FFTW_instance->tar_subset = new float[subset_size];
|
|
FFTW_instance->zncc = new float[subset_size];
|
|
|
|
#pragma omp critical
|
|
{
|
|
FFTW_instance->ref_plan = fftwf_plan_dft_r2c_2d(width, height, FFTW_instance->ref_subset, FFTW_instance->ref_freq, FFTW_ESTIMATE);
|
|
FFTW_instance->tar_plan = fftwf_plan_dft_r2c_2d(width, height, FFTW_instance->tar_subset, FFTW_instance->tar_freq, FFTW_ESTIMATE);
|
|
FFTW_instance->zncc_plan = fftwf_plan_dft_c2r_2d(width, height, FFTW_instance->zncc_freq, FFTW_instance->zncc, FFTW_ESTIMATE);
|
|
}
|
|
|
|
return FFTW_instance;
|
|
}
|
|
|
|
FFTW* FFTW::allocate(int subset_radius_x, int subset_radius_y, int subset_radius_z)
|
|
{
|
|
int dim_x = 2 * subset_radius_x;
|
|
int dim_y = 2 * subset_radius_y;
|
|
int dim_z = 2 * subset_radius_z;
|
|
int buffer_length = dim_x * dim_y * (subset_radius_z + 1);
|
|
unsigned int subset_size = dim_x * dim_y * dim_z;
|
|
|
|
FFTW* FFTW_instance = new FFTW;
|
|
|
|
FFTW_instance->ref_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
FFTW_instance->tar_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
FFTW_instance->zncc_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
|
|
FFTW_instance->ref_subset = new float[subset_size];
|
|
FFTW_instance->tar_subset = new float[subset_size];
|
|
FFTW_instance->zncc = new float[subset_size];
|
|
|
|
#pragma omp critical
|
|
{
|
|
FFTW_instance->ref_plan = fftwf_plan_dft_r2c_3d(dim_x, dim_y, dim_z, FFTW_instance->ref_subset, FFTW_instance->ref_freq, FFTW_ESTIMATE);
|
|
FFTW_instance->tar_plan = fftwf_plan_dft_r2c_3d(dim_x, dim_y, dim_z, FFTW_instance->tar_subset, FFTW_instance->tar_freq, FFTW_ESTIMATE);
|
|
FFTW_instance->zncc_plan = fftwf_plan_dft_c2r_3d(dim_x, dim_y, dim_z, FFTW_instance->zncc_freq, FFTW_instance->zncc, FFTW_ESTIMATE);
|
|
}
|
|
|
|
return FFTW_instance;
|
|
}
|
|
|
|
void FFTW::release(FFTW* instance)
|
|
{
|
|
delete[] instance->ref_subset;
|
|
delete[] instance->tar_subset;
|
|
delete[] instance->zncc;
|
|
fftw_free(instance->ref_freq);
|
|
fftw_free(instance->tar_freq);
|
|
fftw_free(instance->zncc_freq);
|
|
fftwf_destroy_plan(instance->ref_plan);
|
|
fftwf_destroy_plan(instance->tar_plan);
|
|
fftwf_destroy_plan(instance->zncc_plan);
|
|
}
|
|
|
|
void FFTW::reallocate(FFTW* instance, int subset_radius_x, int subset_radius_y)
|
|
{
|
|
release(instance);
|
|
|
|
int width = 2 * subset_radius_x;
|
|
int height = 2 * subset_radius_y;
|
|
int buffer_length = width * (subset_radius_y + 1);
|
|
unsigned int subset_size = width * height;
|
|
|
|
instance->ref_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
instance->tar_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
instance->zncc_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
|
|
instance->ref_subset = new float[subset_size];
|
|
instance->tar_subset = new float[subset_size];
|
|
instance->zncc = new float[subset_size];
|
|
|
|
#pragma omp critical
|
|
{
|
|
instance->ref_plan = fftwf_plan_dft_r2c_2d(width, height, instance->ref_subset, instance->ref_freq, FFTW_ESTIMATE);
|
|
instance->tar_plan = fftwf_plan_dft_r2c_2d(width, height, instance->tar_subset, instance->tar_freq, FFTW_ESTIMATE);
|
|
instance->zncc_plan = fftwf_plan_dft_c2r_2d(width, height, instance->zncc_freq, instance->zncc, FFTW_ESTIMATE);
|
|
}
|
|
}
|
|
|
|
void FFTW::reallocate(FFTW* instance, int subset_radius_x, int subset_radius_y, int subset_radius_z)
|
|
{
|
|
release(instance);
|
|
|
|
int dim_x = 2 * subset_radius_x;
|
|
int dim_y = 2 * subset_radius_y;
|
|
int dim_z = 2 * subset_radius_z;
|
|
int buffer_length = dim_x * dim_y * (subset_radius_z + 1);
|
|
unsigned int subset_size = dim_x * dim_y * dim_z;
|
|
|
|
instance->ref_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
instance->tar_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
instance->zncc_freq = (fftwf_complex*)fftw_malloc(sizeof(fftwf_complex) * buffer_length);
|
|
|
|
instance->ref_subset = new float[subset_size];
|
|
instance->tar_subset = new float[subset_size];
|
|
instance->zncc = new float[subset_size];
|
|
|
|
#pragma omp critical
|
|
{
|
|
instance->ref_plan = fftwf_plan_dft_r2c_3d(dim_x, dim_y, dim_z, instance->ref_subset, instance->ref_freq, FFTW_ESTIMATE);
|
|
instance->tar_plan = fftwf_plan_dft_r2c_3d(dim_x, dim_y, dim_z, instance->tar_subset, instance->tar_freq, FFTW_ESTIMATE);
|
|
instance->zncc_plan = fftwf_plan_dft_c2r_3d(dim_x, dim_y, dim_z, instance->zncc_freq, instance->zncc, FFTW_ESTIMATE);
|
|
}
|
|
}
|
|
|
|
//FFT accelerated cross correlation 2D
|
|
FFTCC2D::FFTCC2D(int subset_radius_x, int subset_radius_y, int thread_number)
|
|
{
|
|
this->subset_radius_x = subset_radius_x;
|
|
this->subset_radius_y = subset_radius_y;
|
|
this->thread_number = thread_number;
|
|
|
|
for (int i = 0; i < thread_number; i++)
|
|
{
|
|
FFTW* instance = FFTW::allocate(subset_radius_x, subset_radius_y);
|
|
instance_pool.push_back(instance);
|
|
}
|
|
}
|
|
|
|
FFTCC2D::~FFTCC2D()
|
|
{
|
|
for (auto& instance : instance_pool)
|
|
{
|
|
FFTW::release(instance);
|
|
delete instance;
|
|
}
|
|
instance_pool.clear();
|
|
}
|
|
|
|
FFTW* FFTCC2D::getInstance(int tid)
|
|
{
|
|
if (tid >= (int)instance_pool.size())
|
|
{
|
|
std::cerr << "CPU thread ID over limit" << std::endl;
|
|
}
|
|
|
|
return instance_pool[tid];
|
|
}
|
|
|
|
void FFTCC2D::compute(POI2D* poi)
|
|
{
|
|
//set instance w.r.t. thread id
|
|
FFTW* current_instance = getInstance(omp_get_thread_num());
|
|
|
|
int subset_width = subset_radius_x * 2;
|
|
int subset_height = subset_radius_y * 2;
|
|
int subset_size = subset_width * subset_height;
|
|
|
|
//set initial guess of displacement
|
|
Point2D initial_displacement(poi->deformation.u, poi->deformation.v);
|
|
|
|
//initialize mean and norm in subsets
|
|
float ref_mean = 0;
|
|
float tar_mean = 0;
|
|
float ref_norm = 0;
|
|
float tar_norm = 0;
|
|
|
|
for (int r = 0; r < subset_height; r++)
|
|
{
|
|
for (int c = 0; c < subset_width; c++)
|
|
{
|
|
//fill reference subset
|
|
Point2D ref_point(poi->x + c - subset_radius_x, poi->y + r - subset_radius_y);
|
|
float value = ref_img->eg_mat((int)ref_point.y, (int)ref_point.x);
|
|
current_instance->ref_subset[r * subset_width + c] = value;
|
|
ref_mean += value;
|
|
|
|
//fill the target subset with initial guess of displacement
|
|
Point2D tar_point = ref_point + initial_displacement;
|
|
value = tar_img->eg_mat((int)tar_point.y, (int)tar_point.x);
|
|
current_instance->tar_subset[r * subset_width + c] = value;
|
|
tar_mean += value;
|
|
}
|
|
}
|
|
ref_mean /= subset_size;
|
|
tar_mean /= subset_size;
|
|
|
|
//zero-mean operation of gray-scale values in the two subsets
|
|
for (int i = 0; i < subset_size; i++)
|
|
{
|
|
current_instance->ref_subset[i] -= ref_mean;
|
|
current_instance->tar_subset[i] -= tar_mean;
|
|
ref_norm += current_instance->ref_subset[i] * current_instance->ref_subset[i];
|
|
tar_norm += current_instance->tar_subset[i] * current_instance->tar_subset[i];
|
|
}
|
|
|
|
fftwf_execute(current_instance->ref_plan);
|
|
fftwf_execute(current_instance->tar_plan);
|
|
|
|
int buffer_length = subset_width * (subset_radius_y + 1);
|
|
for (int n = 0; n < buffer_length; n++)
|
|
{
|
|
current_instance->zncc_freq[n][0] = (current_instance->ref_freq[n][0] * current_instance->tar_freq[n][0])
|
|
+ (current_instance->ref_freq[n][1] * current_instance->tar_freq[n][1]);
|
|
current_instance->zncc_freq[n][1] = (current_instance->ref_freq[n][0] * current_instance->tar_freq[n][1])
|
|
- (current_instance->ref_freq[n][1] * current_instance->tar_freq[n][0]);
|
|
}
|
|
|
|
fftwf_execute(current_instance->zncc_plan);
|
|
|
|
//search for max ZCC
|
|
float max_zncc = -2;
|
|
int max_zncc_index = 0;
|
|
for (int i = 0; i < subset_size; i++)
|
|
{
|
|
if (current_instance->zncc[i] > max_zncc)
|
|
{
|
|
max_zncc = current_instance->zncc[i];
|
|
max_zncc_index = i;
|
|
}
|
|
}
|
|
int local_displacement_u = max_zncc_index % subset_width;
|
|
int local_displacement_v = max_zncc_index / subset_width;
|
|
|
|
if (local_displacement_u > subset_radius_x)
|
|
{
|
|
local_displacement_u -= subset_width;
|
|
}
|
|
if (local_displacement_v > subset_radius_y)
|
|
{
|
|
local_displacement_v -= subset_height;
|
|
}
|
|
|
|
//store the final results
|
|
poi->deformation.u = (float)local_displacement_u + initial_displacement.x;
|
|
poi->deformation.v = (float)local_displacement_v + initial_displacement.y;
|
|
|
|
poi->result.u0 = initial_displacement.x;
|
|
poi->result.v0 = initial_displacement.y;
|
|
poi->result.zncc = max_zncc / (sqrt(ref_norm * tar_norm) * subset_size); //convert ZCC to ZNCC
|
|
}
|
|
|
|
void FFTCC2D::compute(std::vector<POI2D>& poi_queue)
|
|
{
|
|
int queue_length = (int)poi_queue.size();
|
|
#pragma omp parallel for
|
|
for (int i = 0; i < queue_length; i++)
|
|
{
|
|
compute(&poi_queue[i]);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
//FFT accelerated cross correlation 3D
|
|
FFTCC3D::FFTCC3D(int subset_radius_x, int subset_radius_y, int subset_radius_z, int thread_number)
|
|
{
|
|
this->subset_radius_x = subset_radius_x;
|
|
this->subset_radius_y = subset_radius_y;
|
|
this->subset_radius_z = subset_radius_z;
|
|
this->thread_number = thread_number;
|
|
|
|
for (int i = 0; i < thread_number; i++)
|
|
{
|
|
FFTW* instance = FFTW::allocate(subset_radius_x, subset_radius_y, subset_radius_z);
|
|
instance_pool.push_back(instance);
|
|
}
|
|
}
|
|
|
|
FFTCC3D::~FFTCC3D()
|
|
{
|
|
for (auto& instance : instance_pool)
|
|
{
|
|
FFTW::release(instance);
|
|
delete instance;
|
|
}
|
|
instance_pool.clear();
|
|
}
|
|
|
|
FFTW* FFTCC3D::getInstance(int tid)
|
|
{
|
|
if (tid >= (int)instance_pool.size())
|
|
{
|
|
std::cerr << "CPU thread ID over limit" << std::endl;
|
|
}
|
|
|
|
return instance_pool[tid];
|
|
}
|
|
|
|
void FFTCC3D::compute(POI3D* poi)
|
|
{
|
|
//set instance w.r.t. thread id
|
|
FFTW* current_instance = getInstance(omp_get_thread_num());
|
|
|
|
int subset_dim_x = subset_radius_x * 2;
|
|
int subset_dim_y = subset_radius_y * 2;
|
|
int subset_dim_z = subset_radius_z * 2;
|
|
int subset_size = subset_dim_x * subset_dim_y * subset_dim_z;
|
|
|
|
//set initial guess of displacement
|
|
Point3D initial_displacement(poi->deformation.u, poi->deformation.v, poi->deformation.w);
|
|
|
|
//initialize mean and norm in subsets
|
|
float ref_mean = 0;
|
|
float tar_mean = 0;
|
|
float ref_norm = 0;
|
|
float tar_norm = 0;
|
|
|
|
for (int i = 0; i < subset_dim_z; i++)
|
|
{
|
|
for (int j = 0; j < subset_dim_y; j++)
|
|
{
|
|
for (int k = 0; k < subset_dim_x; k++)
|
|
{
|
|
//fill the reference subset
|
|
Point3D ref_point(poi->x + k - subset_radius_x, poi->y + j - subset_radius_y, poi->z + i - subset_radius_z);
|
|
float value = ref_img->vol_mat[(int)ref_point.z][(int)ref_point.y][(int)ref_point.x];
|
|
current_instance->ref_subset[(i * subset_dim_y + j) * subset_dim_x + k] = value;
|
|
ref_mean += value;
|
|
|
|
//fill the target subset with initial guess of displacement
|
|
Point3D tar_point = ref_point + initial_displacement;
|
|
value = tar_img->vol_mat[(int)tar_point.z][(int)tar_point.y][(int)tar_point.x];
|
|
current_instance->tar_subset[(i * subset_dim_y + j) * subset_dim_x + k] = value;
|
|
tar_mean += value;
|
|
}
|
|
}
|
|
}
|
|
ref_mean /= subset_size;
|
|
tar_mean /= subset_size;
|
|
|
|
//zero-mean operation of gray-scale values in the two subsets
|
|
for (int i = 0; i < subset_size; i++)
|
|
{
|
|
current_instance->ref_subset[i] -= ref_mean;
|
|
current_instance->tar_subset[i] -= tar_mean;
|
|
ref_norm += current_instance->ref_subset[i] * current_instance->ref_subset[i];
|
|
tar_norm += current_instance->tar_subset[i] * current_instance->tar_subset[i];
|
|
}
|
|
|
|
fftwf_execute(current_instance->ref_plan);
|
|
fftwf_execute(current_instance->tar_plan);
|
|
|
|
unsigned int buffer_length = subset_dim_x * subset_dim_y * (subset_radius_z + 1);
|
|
for (unsigned int n = 0; n < buffer_length; n++)
|
|
{
|
|
current_instance->zncc_freq[n][0] = (current_instance->ref_freq[n][0] * current_instance->tar_freq[n][0])
|
|
+ (current_instance->ref_freq[n][1] * current_instance->tar_freq[n][1]);
|
|
current_instance->zncc_freq[n][1] = (current_instance->ref_freq[n][0] * current_instance->tar_freq[n][1])
|
|
- (current_instance->ref_freq[n][1] * current_instance->tar_freq[n][0]);
|
|
}
|
|
|
|
fftwf_execute(current_instance->zncc_plan);
|
|
|
|
//search for max ZCC
|
|
float max_zncc = -2;
|
|
int max_zncc_index = 0;
|
|
for (int i = 0; i < subset_size; i++)
|
|
{
|
|
if (current_instance->zncc[i] > max_zncc)
|
|
{
|
|
max_zncc = current_instance->zncc[i];
|
|
max_zncc_index = i;
|
|
}
|
|
}
|
|
int local_displacement_u = max_zncc_index % subset_dim_x;
|
|
int local_displacement_v = (max_zncc_index / subset_dim_x) % subset_dim_y;
|
|
int local_displacement_w = max_zncc_index / (subset_dim_x * subset_dim_y);
|
|
|
|
if (local_displacement_u > subset_radius_x)
|
|
{
|
|
local_displacement_u -= subset_dim_x;
|
|
}
|
|
if (local_displacement_v > subset_radius_y)
|
|
{
|
|
local_displacement_v -= subset_dim_y;
|
|
}
|
|
if (local_displacement_w > subset_radius_z)
|
|
{
|
|
local_displacement_w -= subset_dim_z;
|
|
}
|
|
|
|
//store the final results
|
|
poi->deformation.u = (float)local_displacement_u + initial_displacement.x;
|
|
poi->deformation.v = (float)local_displacement_v + initial_displacement.y;
|
|
poi->deformation.w = (float)local_displacement_w + initial_displacement.z;
|
|
|
|
poi->result.u0 = initial_displacement.x;
|
|
poi->result.v0 = initial_displacement.y;
|
|
poi->result.w0 = initial_displacement.z;
|
|
poi->result.zncc = max_zncc / (sqrt(ref_norm * tar_norm) * subset_size); //convert ZCC to ZNCC
|
|
}
|
|
|
|
void FFTCC3D::compute(std::vector<POI3D>& poi_queue)
|
|
{
|
|
int queue_length = (int)poi_queue.size();
|
|
#pragma omp parallel for
|
|
for (int i = 0; i < queue_length; ++i)
|
|
{
|
|
compute(&poi_queue[i]);
|
|
}
|
|
}
|
|
|
|
}//namespace opencorr
|