34 #if defined(NFFT_SINGLE) 
   35 #define X(name) CONCAT(nfctf_, name) 
   36 #elif defined(NFFT_LDOUBLE) 
   37 #define X(name) CONCAT(nfctl_, name) 
   39 #define X(name) CONCAT(nfct_, name) 
   42 static inline int fftw_2N(
int n)
 
   47 static inline int fftw_2N_rev(
int n)
 
   49   return (LRINT(K(0.5) * n) + 1);
 
   52 static inline int prod_int(
int *vec, 
int d)
 
   56   for (t = 0; t < d; t++)
 
   63 #define NFCT_DEFAULT_FLAGS PRE_PHI_HUT | PRE_PSI | MALLOC_X | MALLOC_F_HAT | \ 
   64   MALLOC_F | FFTW_INIT | FFT_OUT_OF_PLACE 
   66 #define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT 
   68 #define NFCT_SUMMANDS (2 * ths->m + 2) 
   69 #define NODE(p,r) (ths->x[(p) * ths->d + (r)]) 
   71 #define MACRO_ndct_init_result_trafo \ 
   72   memset(f, 0, ths->M_total * sizeof(R)); 
   73 #define MACRO_ndct_init_result_adjoint \ 
   74   memset(f_hat, 0, ths->N_total * sizeof(R)); 
   76 #define MACRO_nfct_D_init_result_A \ 
   77   memset(g_hat, 0, prod_int(ths->n, ths->d) * sizeof(R)); 
   78 #define MACRO_nfct_D_init_result_T \ 
   79   memset(f_hat, 0, ths->N_total * sizeof(R)); 
   81 #define MACRO_nfct_B_init_result_A \ 
   82   memset(f, 0, ths->M_total * sizeof(R)); 
   83 #define MACRO_nfct_B_init_result_T \ 
   84   memset(g, 0, prod_int(ths->n, ths->d) * sizeof(R)); 
   86 #define NFCT_PRE_WINFUN(d) ths->N[d] = 2 * ths->N[d]; \ 
   87   ths->n[d] = fftw_2N(ths->n[d]); 
   89 #define NFCT_POST_WINFUN(d) ths->N[d] = LRINT(K(0.5) * ths->N[d]); \ 
   90   ths->n[d] = fftw_2N_rev(ths->n[d]); 
   92 #define NFCT_WINDOW_HELP_INIT WINDOW_HELP_INIT 
   94 R X(phi_hut)(X(plan) *ths, 
int k, 
int d)
 
   97   R phi_hut_tmp = PHI_HUT(k, d);
 
  103 R X(phi)(X(plan) *ths, R x, 
int d)
 
  106   R phi_tmp = PHI(x, d);
 
  112 #define MACRO_with_cos_vec cos_vec[t][ka[t]] 
  113 #define MACRO_without_cos_vec COS(K(2.0) * KPI * ka[t] * NODE(j,t)) 
  115 #define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]; 
  116 #define MACRO_compute_PHI_HUT_INV (K(1.0) / (X(phi_hut)(ths, kg[t], t))) 
  118 #define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * NFCT_SUMMANDS + lc[t]] 
  119 #define MACRO_compute_PSI X(phi)(ths, NODE(j,t) - ((R)(lc[t] + lb[t])) / (K(2.0)*((R)(ths->n[t])-K(1.0))), t) 
  136 #define MACRO_ndct_malloc__cos_vec \ 
  138   cos_vec = (R**)Y(malloc)(ths->d * sizeof(R*)); \ 
  139   for (t = 0; t < ths->d; t++) \ 
  140     cos_vec[t] = (R*)Y(malloc)(ths->N[t] * sizeof(R)); 
  142 #define MACRO_ndct_free__cos_vec \ 
  145   for (t = 0; t < ths->d; t++) \ 
  146     Y(free)(cos_vec[t]); \ 
  150 #define MACRO_ndct_init__cos_vec \ 
  152   for(t = 0; t < ths->d; t++) \ 
  154     cos_vec[t][0] = K(1.0); \ 
  155     cos_vec[t][1] = COS(K(2.0) * KPI * NODE(j,t)); \ 
  156     for (k = 2; k < ths->N[t]; k++) \ 
  157       cos_vec[t][k] = K(2.0) * cos_vec[t][1] * cos_vec[t][k-1] - \ 
  162 #define MACRO_ndct_init__k__cos_k(which_one) \ 
  165   for (t = 0; t < ths->d; t++) \ 
  168   for (t = 0; t < ths->d; t++) \ 
  170     cos_k[t+1] = cos_k[t] * MACRO_ ##which_one; \ 
  174 #define MACRO_ndct_count__k__cos_k(which_one) \ 
  178   while ((ka[i] == ths->N[i]) && (i > 0)) \ 
  184   for (t = i; t < ths->d; t++) \ 
  185     cos_k[t+1] = cos_k[t] * MACRO_ ##which_one; \ 
  188 #define MACRO_ndct_compute__trafo \ 
  190   f[j] += f_hat[k] * cos_k[ths->d]; \ 
  193 #define MACRO_ndct_compute__adjoint \ 
  195   f_hat[k] += f[j] * cos_k[ths->d]; \ 
  199 #define MACRO_ndct(which_one) \ 
  200   void X(which_one ## _direct) (X(plan) *ths) \ 
  206     R *f_hat = ths->f_hat; \ 
  208     MACRO_ndct_init_result_ ## which_one; \ 
  210       for (j = 0; j < ths->M_total; j++) \ 
  212         for (k = 0; k < ths->N[0]; k++) \ 
  214           cos_k[ths->d] = COS(K(2.0) * KPI * k * NODE(j,0)); \ 
  215           MACRO_ndct_compute__ ## which_one; \ 
  221       MACRO_ndct_malloc__cos_vec; \ 
  223       for (j = 0; j < ths->M_total; j++) \ 
  225         MACRO_ndct_init__cos_vec; \ 
  226         MACRO_ndct_init__k__cos_k(with_cos_vec); \ 
  228         for (k = 0; k < ths->N_total; k++) \ 
  230           MACRO_ndct_compute__ ## which_one; \ 
  231           MACRO_ndct_count__k__cos_k(with_cos_vec); \ 
  234       MACRO_ndct_free__cos_vec; \ 
  255 #define MACRO_nfct__lower_boundary(j,act_dim) \ 
  258     LRINT(NODE((j),(act_dim)) * fftw_2N(ths->n[(act_dim)])) - ths->m; \ 
  261 #define MACRO_nfct_D_compute_A \ 
  263   g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \ 
  266 #define MACRO_nfct_D_compute_T \ 
  268   f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \ 
  271 #define MACRO_init__kg \ 
  273   for (t = 0; t < ths->d; t++) \ 
  278 #define MACRO_count__kg \ 
  283   while ((kg[i] == ths->N[i]) && (i > 0)) \ 
  291 #define MACRO_update__phi_inv_k__kg_plain(what_kind, which_phi) \ 
  293   for (t = i; t < ths->d; t++) \ 
  295     MACRO__c_phi_inv_k__ ## what_kind(which_phi); \ 
  296     kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \ 
  300 #define MACRO__c_phi_inv_k__A(which_phi) \ 
  304     c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \ 
  308     c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \ 
  312 #define MACRO__c_phi_inv_k__T(which_phi) \ 
  314   c_phi_inv_k[t+1] = c_phi_inv_k[t] * MACRO_ ## which_phi; \ 
  317 #define MACRO_nfct_D(which_one) \ 
  318 static inline void D_ ## which_one (X(plan) *ths) \ 
  323   R c_phi_inv_k[ths->d+1];  \ 
  324   int kg_plain[ths->d+1];  \ 
  327   g_hat = ths->g_hat; \ 
  328   f_hat = ths->f_hat; \ 
  330   MACRO_nfct_D_init_result_ ## which_one \ 
  332   c_phi_inv_k[0] = K(1.0); \ 
  337   if (ths->nfct_flags & PRE_PHI_HUT) \ 
  338     for (k_L = 0; k_L < ths->N_total; k_L++) \ 
  340       MACRO_update__phi_inv_k__kg_plain(which_one, with_PRE_PHI_HUT); \ 
  341       MACRO_nfct_D_compute_ ## which_one; \ 
  345     for (k_L = 0; k_L < ths->N_total; k_L++) \ 
  347       MACRO_update__phi_inv_k__kg_plain(which_one,compute_PHI_HUT_INV); \ 
  348       MACRO_nfct_D_compute_ ## which_one; \ 
  359 #define MACRO_nfct_B_PRE_FULL_PSI_compute_A \ 
  361   (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \ 
  364 #define MACRO_nfct_B_PRE_FULL_PSI_compute_T \ 
  366   g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \ 
  369 #define MACRO_nfct_B_compute_A \ 
  371   (*fj) += phi_tilde[ths->d] * g[lg_plain[ths->d]]; \ 
  374 #define MACRO_nfct_B_compute_T \ 
  376   g[lg_plain[ths->d]] += phi_tilde[ths->d] * (*fj); \ 
  379 #define MACRO_compute_lg_offset__count_lg(i0) \ 
  384       (lb[(i0)] % fftw_2N(ths->n[(i0)])) + fftw_2N(ths->n[(i0)]); \ 
  386     lg_offset[(i0)] = lb[(i0)] % (fftw_2N(ths->n[(i0)])); \ 
  387     if (lg_offset[(i0)] >= ths->n[(i0)]) \ 
  388       lg_offset[(i0)] = -(fftw_2N(ths->n[(i0)]) - lg_offset[(i0)]); \ 
  391 #define MACRO_set__lg__to__lg_offset \ 
  393   if (lg_offset[i] <= 0) \ 
  395     lg[i] = -lg_offset[i]; \ 
  400     lg[i] = +lg_offset[i]; \ 
  405 #define MACRO_count__lg(dim) \ 
  408   if ((lg[(dim)] == 0) || (lg[(dim)] == ths->n[(dim)]-1)) \ 
  409     count_lg[(dim)] *= -1; \ 
  411   lg[(dim)] += count_lg[(dim)]; \ 
  414 #define MACRO_init_lb_lg_lc \ 
  416   for (i = 0; i < ths->d; i++) \ 
  418     MACRO_nfct__lower_boundary(j, i); \ 
  419     MACRO_compute_lg_offset__count_lg(i); \ 
  420     MACRO_set__lg__to__lg_offset; \ 
  427 #define MACRO_count__lg_lc \ 
  429   MACRO_count__lg(ths->d-1); \ 
  432   while ((lc[i] == NFCT_SUMMANDS) && (i > 0)) \ 
  437     MACRO_count__lg(i - 1); \ 
  439     MACRO_set__lg__to__lg_offset; \ 
  444 #define  MACRO_update_phi_tilde_lg_plain(which_one, which_psi) \ 
  446   for (t = i; t < ths->d; t++) \ 
  448     MACRO__phi_tilde__ ## which_one(which_psi); \ 
  449     lg_plain[t+1]  = lg_plain[t]  * ths->n[t] + lg[t]; \ 
  453 #define MACRO__phi_tilde__A(which_psi) \ 
  455   phi_tilde[t+1] = phi_tilde[t] * MACRO_ ## which_psi; \ 
  458 #define MACRO__phi_tilde__T(which_psi) \ 
  460   if(lg[t] == 0 || lg[t] == ths->n[t] - 1) \ 
  462     phi_tilde[t+1] = phi_tilde[t] * MACRO_ ## which_psi; \ 
  466     phi_tilde[t+1] = K(0.5) * phi_tilde[t] * MACRO_ ## which_psi; \ 
  470 #define MACRO_nfct_B(which_one) \ 
  471 static inline void B_ ## which_one (nfct_plan *ths) \ 
  475   int lprod, l_L, ix;  \ 
  478   int lg_offset[ths->d];  \ 
  479   int count_lg[ths->d];  \ 
  480   int lg_plain[ths->d+1];  \ 
  482   R phi_tilde[ths->d+1];  \ 
  485   f = ths->f; g = ths->g; \ 
  487   MACRO_nfct_B_init_result_ ## which_one \ 
  490   if ((ths->nfct_flags & PRE_PSI)&&(ths->nfct_flags & PRE_FULL_PSI)) \ 
  492     for (ix = 0, j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \ 
  493       for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \ 
  495         MACRO_nfct_B_PRE_FULL_PSI_compute_ ## which_one; \ 
  500     phi_tilde[0] = K(1.0); \ 
  503     for (t = 0, lprod = 1; t < ths->d; t++) \ 
  504       lprod *= NFCT_SUMMANDS; \ 
  507     if (ths->nfct_flags & PRE_PSI) \ 
  508       for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \ 
  510           MACRO_init_lb_lg_lc; \ 
  511           for (l_L = 0; l_L < lprod; l_L++) \ 
  513             MACRO_update_phi_tilde_lg_plain(which_one, with_PRE_PSI); \ 
  514             MACRO_nfct_B_compute_ ## which_one; \ 
  515             MACRO_count__lg_lc; \ 
  521       for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \ 
  523         MACRO_init_lb_lg_lc; \ 
  524         for (l_L = 0; l_L < lprod; l_L++) \ 
  526           MACRO_update_phi_tilde_lg_plain(which_one,compute_PSI); \ 
  527           MACRO_nfct_B_compute_ ## which_one; \ 
  528           MACRO_count__lg_lc; \ 
  538 #define MACRO_nfct_full_psi(which_one) \ 
  539 static inline void full_psi__ ## which_one(nfct_plan *ths) \ 
  545   int lg_plain[ths->d+1];  \ 
  546   int count_lg[ths->d]; \ 
  547   int lg_offset[ths->d]; \ 
  552   R phi_tilde[ths->d+1]; \ 
  553   R eps = ths->nfct_full_psi_eps; \ 
  555   int *index_g, *index_f; \ 
  557   int ix, ix_old, size_psi; \ 
  559   phi_tilde[0] = K(1.0); \ 
  562   if (ths->nfct_flags & PRE_PSI) \ 
  564     size_psi = ths->M_total; \ 
  565     index_f = (int*)Y(malloc)(ths->M_total  * sizeof(int)); \ 
  566     index_g = (int*)Y(malloc)(size_psi * sizeof(int)); \ 
  567     new_psi = (R*)Y(malloc)(size_psi * sizeof(R)); \ 
  569     for (t = 0,lprod = 1; t < ths->d; t++) \ 
  571       lprod *= NFCT_SUMMANDS; \ 
  572       eps *= nfct_phi(ths, K(0.0), t); \ 
  575     for (ix = 0, ix_old = 0, j = 0; j < ths->M_total; j++) \ 
  577       MACRO_init_lb_lg_lc; \ 
  579       for (l_L = 0; l_L < lprod; l_L++) \ 
  581         MACRO_update_phi_tilde_lg_plain(which_one, with_PRE_PSI); \ 
  583         if (phi_tilde[ths->d] > eps) \ 
  585           index_g[ix] = lg_plain[ths->d]; \ 
  586           new_psi[ix] = phi_tilde[ths->d]; \ 
  589           if (ix == size_psi) \ 
  591             size_psi += ths->M_total; \ 
  592             index_g = (int*)realloc(index_g, size_psi * sizeof(int)); \ 
  593             new_psi = (R*)realloc(new_psi, size_psi * sizeof(R)); \ 
  597         MACRO_count__lg_lc; \ 
  601       index_f[j] = ix - ix_old; \ 
  608     ths->size_psi = size_psi; \ 
  610     index_g = (int*)realloc(index_g, size_psi * sizeof(int)); \ 
  611     new_psi = (R*)realloc(new_psi, size_psi * sizeof(R)); \ 
  613     ths->psi = new_psi; \ 
  614     ths->psi_index_g = index_g; \ 
  615     ths->psi_index_f = index_f; \ 
  620 MACRO_nfct_full_psi(A)
 
  621 MACRO_nfct_full_psi(T)
 
  625 void X(trafo)(X(plan) *ths)
 
  628   ths->g_hat = ths->g1;
 
  641   Z(execute)(ths->my_fftw_r2r_plan);
 
  644   if (ths->nfct_flags & PRE_FULL_PSI)
 
  653   if (ths->nfct_flags & PRE_FULL_PSI)
 
  655     Y(free)(ths->psi_index_g);
 
  656     Y(free)(ths->psi_index_f);
 
  660 void X(adjoint)(X(plan) *ths)
 
  663   ths->g_hat = ths->g2;
 
  666   if (ths->nfct_flags & PRE_FULL_PSI)
 
  675   if (ths->nfct_flags & PRE_FULL_PSI)
 
  677     Y(free)(ths->psi_index_g);
 
  678     Y(free)(ths->psi_index_f);
 
  685   Z(execute)(ths->my_fftw_r2r_plan);
 
  696 static inline 
void precompute_phi_hut(X(plan) *ths)
 
  701   ths->c_phi_inv = (R**)Y(malloc)(ths->d * 
sizeof(R*));
 
  703   for (t = 0; t < ths->d; t++)
 
  705     ths->c_phi_inv[t] = (R*)Y(malloc)(ths->N[t] * 
sizeof(R));
 
  707     for (kg[t] = 0; kg[t] < ths->N[t]; kg[t]++)
 
  709       ths->c_phi_inv[t][kg[t]] = MACRO_compute_PHI_HUT_INV;
 
  714 void X(precompute_psi)(X(plan) *ths)
 
  721   for (t = 0; t < ths->d; t++)
 
  723     for (j = 0; j < ths->M_total; j++)
 
  725       MACRO_nfct__lower_boundary(j, t);
 
  726       for(lc[t] = 0; lc[t] < NFCT_SUMMANDS; lc[t]++)
 
  727         ths->psi[(j * ths->d + t) * NFCT_SUMMANDS + lc[t]] = MACRO_compute_PSI;
 
  732 static inline void init_help(X(plan) *ths)
 
  736   ths->N_total = prod_int(ths->N, ths->d);
 
  737   ths->sigma = (R*)Y(malloc)(ths->d * 
sizeof(R));
 
  739   for (t = 0; t < ths->d; t++)
 
  740     ths->sigma[t] = ((R)(ths->n[t] - 1)) / ths->N[t];
 
  743   ths->r2r_kind = (Z(r2r_kind)*)Y(malloc)(ths->d * 
sizeof (Z(r2r_kind)));
 
  744   for (t = 0; t < ths->d; t++)
 
  745     ths->r2r_kind[t] = FFTW_REDFT00;
 
  747   NFCT_WINDOW_HELP_INIT;
 
  749   if (ths->nfct_flags & MALLOC_X)
 
  750     ths->x = (R*)Y(malloc)(ths->d * ths->M_total * 
sizeof(R));
 
  752   if (ths->nfct_flags & MALLOC_F_HAT)
 
  753     ths->f_hat = (R*)Y(malloc)(ths->N_total * 
sizeof(R));
 
  755   if (ths->nfct_flags & MALLOC_F)
 
  756     ths->f = (R*)Y(malloc)(ths->M_total * 
sizeof(R));
 
  758   if (ths->nfct_flags & PRE_PHI_HUT)
 
  759     precompute_phi_hut(ths);
 
  762   if (ths->nfct_flags & PRE_PSI)
 
  765       (R*)Y(malloc)(ths->M_total * ths->d * NFCT_SUMMANDS * 
sizeof(R));
 
  768     ths->nfct_full_psi_eps = POW(K(10.0), K(-10.0));
 
  771   if (ths->nfct_flags & FFTW_INIT)
 
  774       (R*)Y(malloc)(prod_int(ths->n, ths->d) * 
sizeof(R));
 
  776     if (ths->nfct_flags & FFT_OUT_OF_PLACE)
 
  778         (R*) Y(malloc)(prod_int(ths->n, ths->d) * 
sizeof(R));
 
  782     ths->my_fftw_r2r_plan =
 
  783       Z(plan_r2r)(ths->d, ths->n, ths->g1, ths->g2, ths->r2r_kind,
 
  787   ths->mv_trafo = (void (*) (
void* ))X(trafo);
 
  788   ths->mv_adjoint = (void (*) (
void* ))X(adjoint);
 
  791 void X(init)(X(plan) *ths, 
int d, 
int *N, 
int M_total)
 
  796   ths->M_total = M_total;
 
  797   ths->N = (
int*) Y(malloc)(ths->d * 
sizeof(int));
 
  799   for (t = 0;t < d; t++)
 
  802   ths->n = (
int*) Y(malloc)(ths->d * 
sizeof(int));
 
  804   for (t = 0; t < d; t++)
 
  805     ths->n[t] = fftw_2N(Y(next_power_of_2)(ths->N[t]));
 
  811   ths->nfct_flags = NFCT_DEFAULT_FLAGS;
 
  812   ths->fftw_flags = FFTW_DEFAULT_FLAGS;
 
  830 void X(init_guru)(X(plan) *ths, 
int d, 
int *N, 
int M_total, 
int *n, 
int m,
 
  831   unsigned nfct_flags, 
unsigned fftw_flags)
 
  836   ths->M_total = M_total;
 
  838   ths->N = (
int*)Y(malloc)(ths->d * 
sizeof(int));
 
  840   for (t = 0; t < d; t++)
 
  843   ths->n = (
int*)Y(malloc)(ths->d * 
sizeof(int));
 
  845   for (t = 0; t < d; t++)
 
  850   ths->nfct_flags = nfct_flags;
 
  851   ths->fftw_flags = fftw_flags;
 
  856 void X(init_1d)(X(plan) *ths, 
int N0, 
int M_total)
 
  861   X(init)(ths, 1, N, M_total);
 
  864 void X(init_2d)(X(plan) *ths, 
int N0, 
int N1, 
int M_total)
 
  870   X(init)(ths, 2, N, M_total);
 
  873 void X(init_3d)(X(plan) *ths, 
int N0, 
int N1, 
int N2, 
int M_total)
 
  880   X(init)(ths, 3, N, M_total);
 
  883 void X(finalize)(X(plan) *ths)
 
  887   if (ths->nfct_flags & FFTW_INIT)
 
  889     Z(destroy_plan)(ths->my_fftw_r2r_plan);
 
  891     if (ths->nfct_flags & FFT_OUT_OF_PLACE)
 
  898   if (ths->nfct_flags & PRE_PSI)
 
  903   if (ths->nfct_flags & PRE_PHI_HUT)
 
  905     for (t = 0; t < ths->d; t++)
 
  906       Y(free)(ths->c_phi_inv[t]);
 
  907     Y(free)(ths->c_phi_inv);
 
  910   if (ths->nfct_flags & MALLOC_F)
 
  913   if(ths->nfct_flags & MALLOC_F_HAT)
 
  916   if (ths->nfct_flags & MALLOC_X)
 
  919   WINDOW_HELP_FINALIZE;
 
  925   Y(free)(ths->r2r_kind);