Actual source code: fftw.c
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34: /* MatMult_SeqFFTW performs forward DFT in parallel
35: Input parameter:
36: A - the matrix
37: x - the vector on which FDFT will be performed
39: Output parameter:
40: y - vector that stores result of FDFT
41: */
42: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
43: {
45: Mat_FFT *fft = (Mat_FFT*)A->data;
46: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
47: const PetscScalar *x_array;
48: PetscScalar *y_array;
49: #if defined(PETSC_USE_COMPLEX)
50: #if defined(PETSC_USE_64BIT_INDICES)
51: fftw_iodim64 *iodims;
52: #else
53: fftw_iodim *iodims;
54: #endif
55: PetscInt i;
56: #endif
57: PetscInt ndim = fft->ndim,*dim = fft->dim;
60: VecGetArrayRead(x,&x_array);
61: VecGetArray(y,&y_array);
62: if (!fftw->p_forward) { /* create a plan, then excute it */
63: switch (ndim) {
64: case 1:
65: #if defined(PETSC_USE_COMPLEX)
66: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67: #else
68: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69: #endif
70: break;
71: case 2:
72: #if defined(PETSC_USE_COMPLEX)
73: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74: #else
75: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76: #endif
77: break;
78: case 3:
79: #if defined(PETSC_USE_COMPLEX)
80: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81: #else
82: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83: #endif
84: break;
85: default:
86: #if defined(PETSC_USE_COMPLEX)
87: iodims = fftw->iodims;
88: #if defined(PETSC_USE_64BIT_INDICES)
89: if (ndim) {
90: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91: iodims[ndim-1].is = iodims[ndim-1].os = 1;
92: for (i=ndim-2; i>=0; --i) {
93: iodims[i].n = (ptrdiff_t)dim[i];
94: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95: }
96: }
97: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98: #else
99: if (ndim) {
100: iodims[ndim-1].n = (int)dim[ndim-1];
101: iodims[ndim-1].is = iodims[ndim-1].os = 1;
102: for (i=ndim-2; i>=0; --i) {
103: iodims[i].n = (int)dim[i];
104: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105: }
106: }
107: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108: #endif
110: #else
111: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112: #endif
113: break;
114: }
115: fftw->finarray = (PetscScalar *) x_array;
116: fftw->foutarray = y_array;
117: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119: fftw_execute(fftw->p_forward);
120: } else { /* use existing plan */
121: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: #if defined(PETSC_USE_COMPLEX)
123: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
124: #else
125: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
126: #endif
127: } else {
128: fftw_execute(fftw->p_forward);
129: }
130: }
131: VecRestoreArray(y,&y_array);
132: VecRestoreArrayRead(x,&x_array);
133: return(0);
134: }
136: /* MatMultTranspose_SeqFFTW performs serial backward DFT
137: Input parameter:
138: A - the matrix
139: x - the vector on which BDFT will be performed
141: Output parameter:
142: y - vector that stores result of BDFT
143: */
145: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
146: {
148: Mat_FFT *fft = (Mat_FFT*)A->data;
149: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
150: const PetscScalar *x_array;
151: PetscScalar *y_array;
152: PetscInt ndim=fft->ndim,*dim=fft->dim;
153: #if defined(PETSC_USE_COMPLEX)
154: #if defined(PETSC_USE_64BIT_INDICES)
155: fftw_iodim64 *iodims=fftw->iodims;
156: #else
157: fftw_iodim *iodims=fftw->iodims;
158: #endif
159: #endif
162: VecGetArrayRead(x,&x_array);
163: VecGetArray(y,&y_array);
164: if (!fftw->p_backward) { /* create a plan, then excute it */
165: switch (ndim) {
166: case 1:
167: #if defined(PETSC_USE_COMPLEX)
168: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
169: #else
170: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
171: #endif
172: break;
173: case 2:
174: #if defined(PETSC_USE_COMPLEX)
175: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
176: #else
177: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
178: #endif
179: break;
180: case 3:
181: #if defined(PETSC_USE_COMPLEX)
182: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
183: #else
184: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
185: #endif
186: break;
187: default:
188: #if defined(PETSC_USE_COMPLEX)
189: #if defined(PETSC_USE_64BIT_INDICES)
190: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
191: #else
192: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
193: #endif
194: #else
195: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
196: #endif
197: break;
198: }
199: fftw->binarray = (PetscScalar *) x_array;
200: fftw->boutarray = y_array;
201: fftw_execute(fftw->p_backward);
202: } else { /* use existing plan */
203: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
204: #if defined(PETSC_USE_COMPLEX)
205: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
206: #else
207: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
208: #endif
209: } else {
210: fftw_execute(fftw->p_backward);
211: }
212: }
213: VecRestoreArray(y,&y_array);
214: VecRestoreArrayRead(x,&x_array);
215: return(0);
216: }
218: /* MatMult_MPIFFTW performs forward DFT in parallel
219: Input parameter:
220: A - the matrix
221: x - the vector on which FDFT will be performed
223: Output parameter:
224: y - vector that stores result of FDFT
225: */
226: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
227: {
229: Mat_FFT *fft = (Mat_FFT*)A->data;
230: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
231: const PetscScalar *x_array;
232: PetscScalar *y_array;
233: PetscInt ndim=fft->ndim,*dim=fft->dim;
234: MPI_Comm comm;
237: PetscObjectGetComm((PetscObject)A,&comm);
238: VecGetArrayRead(x,&x_array);
239: VecGetArray(y,&y_array);
240: if (!fftw->p_forward) { /* create a plan, then excute it */
241: switch (ndim) {
242: case 1:
243: #if defined(PETSC_USE_COMPLEX)
244: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245: #else
246: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
247: #endif
248: break;
249: case 2:
250: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
252: #else
253: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
254: #endif
255: break;
256: case 3:
257: #if defined(PETSC_USE_COMPLEX)
258: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
259: #else
260: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
261: #endif
262: break;
263: default:
264: #if defined(PETSC_USE_COMPLEX)
265: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
266: #else
267: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
268: #endif
269: break;
270: }
271: fftw->finarray = (PetscScalar *) x_array;
272: fftw->foutarray = y_array;
273: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
274: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
275: fftw_execute(fftw->p_forward);
276: } else { /* use existing plan */
277: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
278: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
279: } else {
280: fftw_execute(fftw->p_forward);
281: }
282: }
283: VecRestoreArray(y,&y_array);
284: VecRestoreArrayRead(x,&x_array);
285: return(0);
286: }
288: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
289: Input parameter:
290: A - the matrix
291: x - the vector on which BDFT will be performed
293: Output parameter:
294: y - vector that stores result of BDFT
295: */
296: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
297: {
299: Mat_FFT *fft = (Mat_FFT*)A->data;
300: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
301: const PetscScalar *x_array;
302: PetscScalar *y_array;
303: PetscInt ndim=fft->ndim,*dim=fft->dim;
304: MPI_Comm comm;
307: PetscObjectGetComm((PetscObject)A,&comm);
308: VecGetArrayRead(x,&x_array);
309: VecGetArray(y,&y_array);
310: if (!fftw->p_backward) { /* create a plan, then excute it */
311: switch (ndim) {
312: case 1:
313: #if defined(PETSC_USE_COMPLEX)
314: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
315: #else
316: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
317: #endif
318: break;
319: case 2:
320: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322: #else
323: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
324: #endif
325: break;
326: case 3:
327: #if defined(PETSC_USE_COMPLEX)
328: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
331: #endif
332: break;
333: default:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
338: #endif
339: break;
340: }
341: fftw->binarray = (PetscScalar *) x_array;
342: fftw->boutarray = y_array;
343: fftw_execute(fftw->p_backward);
344: } else { /* use existing plan */
345: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
346: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
347: } else {
348: fftw_execute(fftw->p_backward);
349: }
350: }
351: VecRestoreArray(y,&y_array);
352: VecRestoreArrayRead(x,&x_array);
353: return(0);
354: }
356: PetscErrorCode MatDestroy_FFTW(Mat A)
357: {
358: Mat_FFT *fft = (Mat_FFT*)A->data;
359: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
363: fftw_destroy_plan(fftw->p_forward);
364: fftw_destroy_plan(fftw->p_backward);
365: if (fftw->iodims) {
366: free(fftw->iodims);
367: }
368: PetscFree(fftw->dim_fftw);
369: PetscFree(fft->data);
370: fftw_mpi_cleanup();
371: return(0);
372: }
374: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
375: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
376: {
378: PetscScalar *array;
381: VecGetArray(v,&array);
382: fftw_free((fftw_complex*)array);
383: VecRestoreArray(v,&array);
384: VecDestroy_MPI(v);
385: return(0);
386: }
388: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
389: {
391: Mat A;
394: PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);
395: MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);
396: return(0);
397: }
399: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
400: {
402: Mat A;
405: PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);
406: MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);
407: return(0);
408: }
410: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
411: {
413: Mat A;
416: PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);
417: MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);
418: return(0);
419: }
421: /*@
422: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
423: parallel layout determined by FFTW
425: Collective on Mat
427: Input Parameter:
428: . A - the matrix
430: Output Parameters:
431: + x - (optional) input vector of forward FFTW
432: . y - (optional) output vector of forward FFTW
433: - z - (optional) output vector of backward FFTW
435: Level: advanced
437: Note: The parallel layout of output of forward FFTW is always same as the input
438: of the backward FFTW. But parallel layout of the input vector of forward
439: FFTW might not be same as the output of backward FFTW.
440: Also note that we need to provide enough space while doing parallel real transform.
441: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
442: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
443: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
444: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
445: zeros if it is odd only one zero is needed.
446: Lastly one needs some scratch space at the end of data set in each process. alloc_local
447: figures out how much space is needed, i.e. it figures out the data+scratch space for
448: each processor and returns that.
450: .seealso: MatCreateFFT()
451: @*/
452: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
453: {
457: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
458: return(0);
459: }
461: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
462: {
464: PetscMPIInt size,rank;
465: MPI_Comm comm;
466: Mat_FFT *fft = (Mat_FFT*)A->data;
467: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
468: PetscInt N = fft->N;
469: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
474: PetscObjectGetComm((PetscObject)A,&comm);
476: MPI_Comm_size(comm, &size);
477: MPI_Comm_rank(comm, &rank);
478: if (size == 1) { /* sequential case */
479: #if defined(PETSC_USE_COMPLEX)
480: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
481: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
482: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
483: #else
484: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
485: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
486: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
487: #endif
488: } else { /* parallel cases */
489: ptrdiff_t alloc_local,local_n0,local_0_start;
490: ptrdiff_t local_n1;
491: fftw_complex *data_fout;
492: ptrdiff_t local_1_start;
493: #if defined(PETSC_USE_COMPLEX)
494: fftw_complex *data_fin,*data_bout;
495: #else
496: double *data_finr,*data_boutr;
497: PetscInt n1,N1;
498: ptrdiff_t temp;
499: #endif
501: switch (ndim) {
502: case 1:
503: #if !defined(PETSC_USE_COMPLEX)
504: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
505: #else
506: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
507: if (fin) {
508: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
510: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
511: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
512: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
513: }
514: if (fout) {
515: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
517: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
518: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
519: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
520: }
521: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
522: if (bout) {
523: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
524: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
525: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
526: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
527: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
528: }
529: break;
530: #endif
531: case 2:
532: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
533: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
534: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
535: if (fin) {
536: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
537: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
538: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
539: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
540: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
541: }
542: if (fout) {
543: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
544: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
545: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
546: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
547: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
548: }
549: if (bout) {
550: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
551: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
552: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
553: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
554: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
555: }
556: #else
557: /* Get local size */
558: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
559: if (fin) {
560: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
561: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
562: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
563: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
564: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
565: }
566: if (fout) {
567: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
568: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
569: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
570: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
571: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
572: }
573: if (bout) {
574: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
575: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
576: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
577: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
578: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
579: }
580: #endif
581: break;
582: case 3:
583: #if !defined(PETSC_USE_COMPLEX)
584: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
585: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
586: if (fin) {
587: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
588: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
589: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
590: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
591: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
592: }
593: if (fout) {
594: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
595: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
596: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
597: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
598: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
599: }
600: if (bout) {
601: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
602: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
603: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
604: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
605: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
606: }
607: #else
608: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
609: if (fin) {
610: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
611: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
612: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
613: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
614: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
615: }
616: if (fout) {
617: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
619: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
620: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
621: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
623: if (bout) {
624: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
625: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
626: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
627: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
628: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
629: }
630: #endif
631: break;
632: default:
633: #if !defined(PETSC_USE_COMPLEX)
634: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
636: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
638: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
639: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
641: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
643: if (fin) {
644: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
645: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
646: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
647: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
648: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
649: }
650: if (fout) {
651: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
652: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
653: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
654: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
655: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
656: }
657: if (bout) {
658: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
659: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
660: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
661: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
662: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
663: }
664: #else
665: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
666: if (fin) {
667: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
668: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
669: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
670: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
671: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
672: }
673: if (fout) {
674: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
675: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
676: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
677: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
678: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
679: }
680: if (bout) {
681: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
682: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
683: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
684: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
685: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
686: }
687: #endif
688: break;
689: }
690: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
691: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
692: memory leaks. We void these pointers here to avoid acident uses.
693: */
694: if (fin) (*fin)->ops->replacearray = NULL;
695: if (fout) (*fout)->ops->replacearray = NULL;
696: if (bout) (*bout)->ops->replacearray = NULL;
697: }
698: return(0);
699: }
701: /*@
702: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
704: Collective on Mat
706: Input Parameters:
707: + A - FFTW matrix
708: - x - the PETSc vector
710: Output Parameters:
711: . y - the FFTW vector
713: Options Database Keys:
714: . -mat_fftw_plannerflags - set FFTW planner flags
716: Level: intermediate
718: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
719: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
720: zeros. This routine does that job by scattering operation.
722: .seealso: VecScatterFFTWToPetsc()
723: @*/
724: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
725: {
729: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
730: return(0);
731: }
733: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
734: {
736: MPI_Comm comm;
737: Mat_FFT *fft = (Mat_FFT*)A->data;
738: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
739: PetscInt N =fft->N;
740: PetscInt ndim =fft->ndim,*dim=fft->dim;
741: PetscInt low;
742: PetscMPIInt rank,size;
743: PetscInt vsize,vsize1;
744: ptrdiff_t local_n0,local_0_start;
745: ptrdiff_t local_n1,local_1_start;
746: VecScatter vecscat;
747: IS list1,list2;
748: #if !defined(PETSC_USE_COMPLEX)
749: PetscInt i,j,k,partial_dim;
750: PetscInt *indx1, *indx2, tempindx, tempindx1;
751: PetscInt NM;
752: ptrdiff_t temp;
753: #endif
756: PetscObjectGetComm((PetscObject)A,&comm);
757: MPI_Comm_size(comm, &size);
758: MPI_Comm_rank(comm, &rank);
759: VecGetOwnershipRange(y,&low,NULL);
761: if (size==1) {
762: VecGetSize(x,&vsize);
763: VecGetSize(y,&vsize1);
764: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
765: VecScatterCreate(x,list1,y,list1,&vecscat);
766: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
767: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
768: VecScatterDestroy(&vecscat);
769: ISDestroy(&list1);
770: } else {
771: switch (ndim) {
772: case 1:
773: #if defined(PETSC_USE_COMPLEX)
774: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
776: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
777: ISCreateStride(comm,local_n0,low,1,&list2);
778: VecScatterCreate(x,list1,y,list2,&vecscat);
779: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
780: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
781: VecScatterDestroy(&vecscat);
782: ISDestroy(&list1);
783: ISDestroy(&list2);
784: #else
785: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
786: #endif
787: break;
788: case 2:
789: #if defined(PETSC_USE_COMPLEX)
790: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
792: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
793: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
794: VecScatterCreate(x,list1,y,list2,&vecscat);
795: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
796: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
797: VecScatterDestroy(&vecscat);
798: ISDestroy(&list1);
799: ISDestroy(&list2);
800: #else
801: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
803: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
804: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
806: if (dim[1]%2==0) {
807: NM = dim[1]+2;
808: } else {
809: NM = dim[1]+1;
810: }
811: for (i=0; i<local_n0; i++) {
812: for (j=0; j<dim[1]; j++) {
813: tempindx = i*dim[1] + j;
814: tempindx1 = i*NM + j;
816: indx1[tempindx]=local_0_start*dim[1]+tempindx;
817: indx2[tempindx]=low+tempindx1;
818: }
819: }
821: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
822: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
824: VecScatterCreate(x,list1,y,list2,&vecscat);
825: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
826: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
827: VecScatterDestroy(&vecscat);
828: ISDestroy(&list1);
829: ISDestroy(&list2);
830: PetscFree(indx1);
831: PetscFree(indx2);
832: #endif
833: break;
835: case 3:
836: #if defined(PETSC_USE_COMPLEX)
837: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
839: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
840: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
841: VecScatterCreate(x,list1,y,list2,&vecscat);
842: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
843: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
844: VecScatterDestroy(&vecscat);
845: ISDestroy(&list1);
846: ISDestroy(&list2);
847: #else
848: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
849: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
850: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
852: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
853: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
855: if (dim[2]%2==0) NM = dim[2]+2;
856: else NM = dim[2]+1;
858: for (i=0; i<local_n0; i++) {
859: for (j=0; j<dim[1]; j++) {
860: for (k=0; k<dim[2]; k++) {
861: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
862: tempindx1 = i*dim[1]*NM + j*NM + k;
864: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
865: indx2[tempindx]=low+tempindx1;
866: }
867: }
868: }
870: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
871: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
872: VecScatterCreate(x,list1,y,list2,&vecscat);
873: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
874: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
875: VecScatterDestroy(&vecscat);
876: ISDestroy(&list1);
877: ISDestroy(&list2);
878: PetscFree(indx1);
879: PetscFree(indx2);
880: #endif
881: break;
883: default:
884: #if defined(PETSC_USE_COMPLEX)
885: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
887: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
888: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
889: VecScatterCreate(x,list1,y,list2,&vecscat);
890: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
891: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
892: VecScatterDestroy(&vecscat);
893: ISDestroy(&list1);
894: ISDestroy(&list2);
895: #else
896: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
897: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
898: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
900: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
902: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
904: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
906: partial_dim = fftw->partial_dim;
908: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
909: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
911: if (dim[ndim-1]%2==0) NM = 2;
912: else NM = 1;
914: j = low;
915: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
916: indx1[i] = local_0_start*partial_dim + i;
917: indx2[i] = j;
918: if (k%dim[ndim-1]==0) j+=NM;
919: j++;
920: }
921: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
922: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
923: VecScatterCreate(x,list1,y,list2,&vecscat);
924: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
925: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
926: VecScatterDestroy(&vecscat);
927: ISDestroy(&list1);
928: ISDestroy(&list2);
929: PetscFree(indx1);
930: PetscFree(indx2);
931: #endif
932: break;
933: }
934: }
935: return(0);
936: }
938: /*@
939: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
941: Collective on Mat
943: Input Parameters:
944: + A - FFTW matrix
945: - x - FFTW vector
947: Output Parameters:
948: . y - PETSc vector
950: Level: intermediate
952: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
953: VecScatterFFTWToPetsc removes those extra zeros.
955: .seealso: VecScatterPetscToFFTW()
956: @*/
957: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
958: {
962: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
963: return(0);
964: }
966: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
967: {
969: MPI_Comm comm;
970: Mat_FFT *fft = (Mat_FFT*)A->data;
971: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
972: PetscInt N = fft->N;
973: PetscInt ndim = fft->ndim,*dim=fft->dim;
974: PetscInt low;
975: PetscMPIInt rank,size;
976: ptrdiff_t local_n0,local_0_start;
977: ptrdiff_t local_n1,local_1_start;
978: VecScatter vecscat;
979: IS list1,list2;
980: #if !defined(PETSC_USE_COMPLEX)
981: PetscInt i,j,k,partial_dim;
982: PetscInt *indx1, *indx2, tempindx, tempindx1;
983: PetscInt NM;
984: ptrdiff_t temp;
985: #endif
988: PetscObjectGetComm((PetscObject)A,&comm);
989: MPI_Comm_size(comm, &size);
990: MPI_Comm_rank(comm, &rank);
991: VecGetOwnershipRange(x,&low,NULL);
993: if (size==1) {
994: ISCreateStride(comm,N,0,1,&list1);
995: VecScatterCreate(x,list1,y,list1,&vecscat);
996: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
997: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
998: VecScatterDestroy(&vecscat);
999: ISDestroy(&list1);
1001: } else {
1002: switch (ndim) {
1003: case 1:
1004: #if defined(PETSC_USE_COMPLEX)
1005: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1007: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
1008: ISCreateStride(comm,local_n1,low,1,&list2);
1009: VecScatterCreate(x,list1,y,list2,&vecscat);
1010: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1011: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1012: VecScatterDestroy(&vecscat);
1013: ISDestroy(&list1);
1014: ISDestroy(&list2);
1015: #else
1016: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1017: #endif
1018: break;
1019: case 2:
1020: #if defined(PETSC_USE_COMPLEX)
1021: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1023: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1024: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
1025: VecScatterCreate(x,list2,y,list1,&vecscat);
1026: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1027: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1028: VecScatterDestroy(&vecscat);
1029: ISDestroy(&list1);
1030: ISDestroy(&list2);
1031: #else
1032: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1034: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
1035: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
1037: if (dim[1]%2==0) NM = dim[1]+2;
1038: else NM = dim[1]+1;
1040: for (i=0; i<local_n0; i++) {
1041: for (j=0; j<dim[1]; j++) {
1042: tempindx = i*dim[1] + j;
1043: tempindx1 = i*NM + j;
1045: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1046: indx2[tempindx]=low+tempindx1;
1047: }
1048: }
1050: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1051: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1053: VecScatterCreate(x,list2,y,list1,&vecscat);
1054: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1055: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1056: VecScatterDestroy(&vecscat);
1057: ISDestroy(&list1);
1058: ISDestroy(&list2);
1059: PetscFree(indx1);
1060: PetscFree(indx2);
1061: #endif
1062: break;
1063: case 3:
1064: #if defined(PETSC_USE_COMPLEX)
1065: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1067: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1068: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1069: VecScatterCreate(x,list1,y,list2,&vecscat);
1070: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1071: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1072: VecScatterDestroy(&vecscat);
1073: ISDestroy(&list1);
1074: ISDestroy(&list2);
1075: #else
1076: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1078: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1079: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1081: if (dim[2]%2==0) NM = dim[2]+2;
1082: else NM = dim[2]+1;
1084: for (i=0; i<local_n0; i++) {
1085: for (j=0; j<dim[1]; j++) {
1086: for (k=0; k<dim[2]; k++) {
1087: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1088: tempindx1 = i*dim[1]*NM + j*NM + k;
1090: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1091: indx2[tempindx]=low+tempindx1;
1092: }
1093: }
1094: }
1096: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1097: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1099: VecScatterCreate(x,list2,y,list1,&vecscat);
1100: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1101: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1102: VecScatterDestroy(&vecscat);
1103: ISDestroy(&list1);
1104: ISDestroy(&list2);
1105: PetscFree(indx1);
1106: PetscFree(indx2);
1107: #endif
1108: break;
1109: default:
1110: #if defined(PETSC_USE_COMPLEX)
1111: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1113: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1114: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1115: VecScatterCreate(x,list1,y,list2,&vecscat);
1116: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1117: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1118: VecScatterDestroy(&vecscat);
1119: ISDestroy(&list1);
1120: ISDestroy(&list2);
1121: #else
1122: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1124: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1126: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1128: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1130: partial_dim = fftw->partial_dim;
1132: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1133: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1135: if (dim[ndim-1]%2==0) NM = 2;
1136: else NM = 1;
1138: j = low;
1139: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1140: indx1[i] = local_0_start*partial_dim + i;
1141: indx2[i] = j;
1142: if (k%dim[ndim-1]==0) j+=NM;
1143: j++;
1144: }
1145: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1146: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1148: VecScatterCreate(x,list2,y,list1,&vecscat);
1149: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1150: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1151: VecScatterDestroy(&vecscat);
1152: ISDestroy(&list1);
1153: ISDestroy(&list2);
1154: PetscFree(indx1);
1155: PetscFree(indx2);
1156: #endif
1157: break;
1158: }
1159: }
1160: return(0);
1161: }
1163: /*
1164: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1166: Options Database Keys:
1167: + -mat_fftw_plannerflags - set FFTW planner flags
1169: Level: intermediate
1171: */
1172: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1173: {
1175: MPI_Comm comm;
1176: Mat_FFT *fft=(Mat_FFT*)A->data;
1177: Mat_FFTW *fftw;
1178: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1179: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1180: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1181: PetscBool flg;
1182: PetscInt p_flag,partial_dim=1,ctr;
1183: PetscMPIInt size,rank;
1184: ptrdiff_t *pdim;
1185: ptrdiff_t local_n1,local_1_start;
1186: #if !defined(PETSC_USE_COMPLEX)
1187: ptrdiff_t temp;
1188: PetscInt N1,tot_dim;
1189: #else
1190: PetscInt n1;
1191: #endif
1194: PetscObjectGetComm((PetscObject)A,&comm);
1195: MPI_Comm_size(comm, &size);
1196: MPI_Comm_rank(comm, &rank);
1198: fftw_mpi_init();
1199: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1200: pdim[0] = dim[0];
1201: #if !defined(PETSC_USE_COMPLEX)
1202: tot_dim = 2*dim[0];
1203: #endif
1204: for (ctr=1; ctr<ndim; ctr++) {
1205: partial_dim *= dim[ctr];
1206: pdim[ctr] = dim[ctr];
1207: #if !defined(PETSC_USE_COMPLEX)
1208: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1209: else tot_dim *= dim[ctr];
1210: #endif
1211: }
1213: if (size == 1) {
1214: #if defined(PETSC_USE_COMPLEX)
1215: MatSetSizes(A,N,N,N,N);
1216: n = N;
1217: #else
1218: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1219: n = tot_dim;
1220: #endif
1222: } else {
1223: ptrdiff_t local_n0,local_0_start;
1224: switch (ndim) {
1225: case 1:
1226: #if !defined(PETSC_USE_COMPLEX)
1227: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1228: #else
1229: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1231: n = (PetscInt)local_n0;
1232: n1 = (PetscInt)local_n1;
1233: MatSetSizes(A,n1,n,N,N);
1234: #endif
1235: break;
1236: case 2:
1237: #if defined(PETSC_USE_COMPLEX)
1238: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1239: /*
1240: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1241: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1242: */
1243: n = (PetscInt)local_n0*dim[1];
1244: MatSetSizes(A,n,n,N,N);
1245: #else
1246: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1248: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1249: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1250: #endif
1251: break;
1252: case 3:
1253: #if defined(PETSC_USE_COMPLEX)
1254: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1256: n = (PetscInt)local_n0*dim[1]*dim[2];
1257: MatSetSizes(A,n,n,N,N);
1258: #else
1259: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1261: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1262: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1263: #endif
1264: break;
1265: default:
1266: #if defined(PETSC_USE_COMPLEX)
1267: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1269: n = (PetscInt)local_n0*partial_dim;
1270: MatSetSizes(A,n,n,N,N);
1271: #else
1272: temp = pdim[ndim-1];
1274: pdim[ndim-1] = temp/2 + 1;
1276: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1278: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1279: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1281: pdim[ndim-1] = temp;
1283: MatSetSizes(A,n,n,N1,N1);
1284: #endif
1285: break;
1286: }
1287: }
1288: free(pdim);
1289: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1290: PetscNewLog(A,&fftw);
1291: fft->data = (void*)fftw;
1293: fft->n = n;
1294: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1295: fftw->partial_dim = partial_dim;
1297: PetscMalloc1(ndim, &(fftw->dim_fftw));
1298: if (size == 1) {
1299: #if defined(PETSC_USE_64BIT_INDICES)
1300: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1301: #else
1302: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1303: #endif
1304: }
1306: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1308: fftw->p_forward = NULL;
1309: fftw->p_backward = NULL;
1310: fftw->p_flag = FFTW_ESTIMATE;
1312: if (size == 1) {
1313: A->ops->mult = MatMult_SeqFFTW;
1314: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1315: } else {
1316: A->ops->mult = MatMult_MPIFFTW;
1317: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1318: }
1319: fft->matdestroy = MatDestroy_FFTW;
1320: A->assembled = PETSC_TRUE;
1321: A->preallocated = PETSC_TRUE;
1323: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1324: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1325: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1327: /* get runtime options */
1328: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1329: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1330: if (flg) {
1331: fftw->p_flag = iplans[p_flag];
1332: }
1333: PetscOptionsEnd();
1334: return(0);
1335: }