Actual source code: fftw.c
petsc-3.14.6 2021-03-30
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34: /* MatMult_SeqFFTW performs forward DFT in parallel
35: Input parameter:
36: A - the matrix
37: x - the vector on which FDFT will be performed
39: Output parameter:
40: y - vector that stores result of FDFT
41: */
42: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
43: {
45: Mat_FFT *fft = (Mat_FFT*)A->data;
46: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
47: const PetscScalar *x_array;
48: PetscScalar *y_array;
49: #if defined(PETSC_USE_COMPLEX)
50: #if defined(PETSC_USE_64BIT_INDICES)
51: fftw_iodim64 *iodims;
52: #else
53: fftw_iodim *iodims;
54: #endif
55: PetscInt i;
56: #endif
57: PetscInt ndim = fft->ndim,*dim = fft->dim;
60: VecGetArrayRead(x,&x_array);
61: VecGetArray(y,&y_array);
62: if (!fftw->p_forward) { /* create a plan, then excute it */
63: switch (ndim) {
64: case 1:
65: #if defined(PETSC_USE_COMPLEX)
66: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67: #else
68: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69: #endif
70: break;
71: case 2:
72: #if defined(PETSC_USE_COMPLEX)
73: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74: #else
75: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76: #endif
77: break;
78: case 3:
79: #if defined(PETSC_USE_COMPLEX)
80: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81: #else
82: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83: #endif
84: break;
85: default:
86: #if defined(PETSC_USE_COMPLEX)
87: iodims = fftw->iodims;
88: #if defined(PETSC_USE_64BIT_INDICES)
89: if (ndim) {
90: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91: iodims[ndim-1].is = iodims[ndim-1].os = 1;
92: for (i=ndim-2; i>=0; --i) {
93: iodims[i].n = (ptrdiff_t)dim[i];
94: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95: }
96: }
97: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98: #else
99: if (ndim) {
100: iodims[ndim-1].n = (int)dim[ndim-1];
101: iodims[ndim-1].is = iodims[ndim-1].os = 1;
102: for (i=ndim-2; i>=0; --i) {
103: iodims[i].n = (int)dim[i];
104: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105: }
106: }
107: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108: #endif
110: #else
111: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112: #endif
113: break;
114: }
115: fftw->finarray = (PetscScalar *) x_array;
116: fftw->foutarray = y_array;
117: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119: fftw_execute(fftw->p_forward);
120: } else { /* use existing plan */
121: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: #if defined(PETSC_USE_COMPLEX)
123: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
124: #else
125: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
126: #endif
127: } else {
128: fftw_execute(fftw->p_forward);
129: }
130: }
131: VecRestoreArray(y,&y_array);
132: VecRestoreArrayRead(x,&x_array);
133: return(0);
134: }
136: /* MatMultTranspose_SeqFFTW performs serial backward DFT
137: Input parameter:
138: A - the matrix
139: x - the vector on which BDFT will be performed
141: Output parameter:
142: y - vector that stores result of BDFT
143: */
145: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
146: {
148: Mat_FFT *fft = (Mat_FFT*)A->data;
149: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
150: const PetscScalar *x_array;
151: PetscScalar *y_array;
152: PetscInt ndim=fft->ndim,*dim=fft->dim;
153: #if defined(PETSC_USE_COMPLEX)
154: #if defined(PETSC_USE_64BIT_INDICES)
155: fftw_iodim64 *iodims=fftw->iodims;
156: #else
157: fftw_iodim *iodims=fftw->iodims;
158: #endif
159: #endif
162: VecGetArrayRead(x,&x_array);
163: VecGetArray(y,&y_array);
164: if (!fftw->p_backward) { /* create a plan, then excute it */
165: switch (ndim) {
166: case 1:
167: #if defined(PETSC_USE_COMPLEX)
168: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
169: #else
170: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
171: #endif
172: break;
173: case 2:
174: #if defined(PETSC_USE_COMPLEX)
175: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
176: #else
177: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
178: #endif
179: break;
180: case 3:
181: #if defined(PETSC_USE_COMPLEX)
182: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
183: #else
184: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
185: #endif
186: break;
187: default:
188: #if defined(PETSC_USE_COMPLEX)
189: #if defined(PETSC_USE_64BIT_INDICES)
190: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
191: #else
192: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
193: #endif
194: #else
195: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
196: #endif
197: break;
198: }
199: fftw->binarray = (PetscScalar *) x_array;
200: fftw->boutarray = y_array;
201: fftw_execute(fftw->p_backward);
202: } else { /* use existing plan */
203: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
204: #if defined(PETSC_USE_COMPLEX)
205: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
206: #else
207: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
208: #endif
209: } else {
210: fftw_execute(fftw->p_backward);
211: }
212: }
213: VecRestoreArray(y,&y_array);
214: VecRestoreArrayRead(x,&x_array);
215: return(0);
216: }
218: /* MatMult_MPIFFTW performs forward DFT in parallel
219: Input parameter:
220: A - the matrix
221: x - the vector on which FDFT will be performed
223: Output parameter:
224: y - vector that stores result of FDFT
225: */
226: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
227: {
229: Mat_FFT *fft = (Mat_FFT*)A->data;
230: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
231: const PetscScalar *x_array;
232: PetscScalar *y_array;
233: PetscInt ndim=fft->ndim,*dim=fft->dim;
234: MPI_Comm comm;
237: PetscObjectGetComm((PetscObject)A,&comm);
238: VecGetArrayRead(x,&x_array);
239: VecGetArray(y,&y_array);
240: if (!fftw->p_forward) { /* create a plan, then excute it */
241: switch (ndim) {
242: case 1:
243: #if defined(PETSC_USE_COMPLEX)
244: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245: #else
246: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
247: #endif
248: break;
249: case 2:
250: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
252: #else
253: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
254: #endif
255: break;
256: case 3:
257: #if defined(PETSC_USE_COMPLEX)
258: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
259: #else
260: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
261: #endif
262: break;
263: default:
264: #if defined(PETSC_USE_COMPLEX)
265: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
266: #else
267: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
268: #endif
269: break;
270: }
271: fftw->finarray = (PetscScalar *) x_array;
272: fftw->foutarray = y_array;
273: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
274: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
275: fftw_execute(fftw->p_forward);
276: } else { /* use existing plan */
277: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
278: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
279: } else {
280: fftw_execute(fftw->p_forward);
281: }
282: }
283: VecRestoreArray(y,&y_array);
284: VecRestoreArrayRead(x,&x_array);
285: return(0);
286: }
288: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
289: Input parameter:
290: A - the matrix
291: x - the vector on which BDFT will be performed
293: Output parameter:
294: y - vector that stores result of BDFT
295: */
296: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
297: {
299: Mat_FFT *fft = (Mat_FFT*)A->data;
300: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
301: const PetscScalar *x_array;
302: PetscScalar *y_array;
303: PetscInt ndim=fft->ndim,*dim=fft->dim;
304: MPI_Comm comm;
307: PetscObjectGetComm((PetscObject)A,&comm);
308: VecGetArrayRead(x,&x_array);
309: VecGetArray(y,&y_array);
310: if (!fftw->p_backward) { /* create a plan, then excute it */
311: switch (ndim) {
312: case 1:
313: #if defined(PETSC_USE_COMPLEX)
314: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
315: #else
316: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
317: #endif
318: break;
319: case 2:
320: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322: #else
323: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
324: #endif
325: break;
326: case 3:
327: #if defined(PETSC_USE_COMPLEX)
328: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
331: #endif
332: break;
333: default:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
338: #endif
339: break;
340: }
341: fftw->binarray = (PetscScalar *) x_array;
342: fftw->boutarray = y_array;
343: fftw_execute(fftw->p_backward);
344: } else { /* use existing plan */
345: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
346: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
347: } else {
348: fftw_execute(fftw->p_backward);
349: }
350: }
351: VecRestoreArray(y,&y_array);
352: VecRestoreArrayRead(x,&x_array);
353: return(0);
354: }
356: PetscErrorCode MatDestroy_FFTW(Mat A)
357: {
358: Mat_FFT *fft = (Mat_FFT*)A->data;
359: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
363: fftw_destroy_plan(fftw->p_forward);
364: fftw_destroy_plan(fftw->p_backward);
365: if (fftw->iodims) {
366: free(fftw->iodims);
367: }
368: PetscFree(fftw->dim_fftw);
369: PetscFree(fft->data);
370: fftw_mpi_cleanup();
371: return(0);
372: }
374: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
375: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
376: {
378: PetscScalar *array;
381: VecGetArray(v,&array);
382: fftw_free((fftw_complex*)array);
383: VecRestoreArray(v,&array);
384: VecDestroy_MPI(v);
385: return(0);
386: }
389: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
390: {
392: Mat A;
395: PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);
396: MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);
397: return(0);
398: }
400: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
401: {
403: Mat A;
406: PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);
407: MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);
408: return(0);
409: }
411: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
412: {
414: Mat A;
417: PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);
418: MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);
419: return(0);
420: }
423: /*@
424: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
425: parallel layout determined by FFTW
427: Collective on Mat
429: Input Parameter:
430: . A - the matrix
432: Output Parameter:
433: + x - (optional) input vector of forward FFTW
434: - y - (optional) output vector of forward FFTW
435: - z - (optional) output vector of backward FFTW
437: Level: advanced
439: Note: The parallel layout of output of forward FFTW is always same as the input
440: of the backward FFTW. But parallel layout of the input vector of forward
441: FFTW might not be same as the output of backward FFTW.
442: Also note that we need to provide enough space while doing parallel real transform.
443: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
444: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
445: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
446: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
447: zeros if it is odd only one zero is needed.
448: Lastly one needs some scratch space at the end of data set in each process. alloc_local
449: figures out how much space is needed, i.e. it figures out the data+scratch space for
450: each processor and returns that.
452: .seealso: MatCreateFFT()
453: @*/
454: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
455: {
459: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
460: return(0);
461: }
463: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
464: {
466: PetscMPIInt size,rank;
467: MPI_Comm comm;
468: Mat_FFT *fft = (Mat_FFT*)A->data;
469: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
470: PetscInt N = fft->N;
471: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
476: PetscObjectGetComm((PetscObject)A,&comm);
478: MPI_Comm_size(comm, &size);
479: MPI_Comm_rank(comm, &rank);
480: if (size == 1) { /* sequential case */
481: #if defined(PETSC_USE_COMPLEX)
482: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
483: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
484: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
485: #else
486: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
487: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
488: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
489: #endif
490: } else { /* parallel cases */
491: ptrdiff_t alloc_local,local_n0,local_0_start;
492: ptrdiff_t local_n1;
493: fftw_complex *data_fout;
494: ptrdiff_t local_1_start;
495: #if defined(PETSC_USE_COMPLEX)
496: fftw_complex *data_fin,*data_bout;
497: #else
498: double *data_finr,*data_boutr;
499: PetscInt n1,N1;
500: ptrdiff_t temp;
501: #endif
503: switch (ndim) {
504: case 1:
505: #if !defined(PETSC_USE_COMPLEX)
506: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
507: #else
508: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
509: if (fin) {
510: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
512: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
513: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
514: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
515: }
516: if (fout) {
517: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
518: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
519: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
520: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
521: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
522: }
523: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
524: if (bout) {
525: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
527: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
528: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
529: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
530: }
531: break;
532: #endif
533: case 2:
534: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
535: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
536: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
537: if (fin) {
538: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
539: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
540: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
541: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
542: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
543: }
544: if (fout) {
545: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
546: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
547: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
548: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
549: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
550: }
551: if (bout) {
552: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
554: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
555: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
556: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
557: }
558: #else
559: /* Get local size */
560: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
561: if (fin) {
562: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
563: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
564: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
565: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
566: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
567: }
568: if (fout) {
569: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
571: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
572: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
573: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
574: }
575: if (bout) {
576: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
578: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
579: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
580: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
581: }
582: #endif
583: break;
584: case 3:
585: #if !defined(PETSC_USE_COMPLEX)
586: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
587: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
588: if (fin) {
589: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
590: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
591: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
592: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
593: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
594: }
595: if (fout) {
596: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
597: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
598: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
599: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
600: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
601: }
602: if (bout) {
603: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
604: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
605: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
606: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
607: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: #else
610: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
611: if (fin) {
612: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
613: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
614: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
615: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
616: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
617: }
618: if (fout) {
619: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
620: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
621: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
622: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
623: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
624: }
625: if (bout) {
626: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
627: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
628: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
629: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
630: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: #endif
633: break;
634: default:
635: #if !defined(PETSC_USE_COMPLEX)
636: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
638: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
640: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
641: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
643: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
645: if (fin) {
646: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
647: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
648: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
649: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
650: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
651: }
652: if (fout) {
653: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
654: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
655: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
656: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
657: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
658: }
659: if (bout) {
660: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
661: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
662: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
663: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
664: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
665: }
666: #else
667: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
668: if (fin) {
669: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
670: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
671: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
672: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
673: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
674: }
675: if (fout) {
676: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
677: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
678: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
679: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
680: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
681: }
682: if (bout) {
683: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
684: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
685: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
686: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
687: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
688: }
689: #endif
690: break;
691: }
692: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
693: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
694: memory leaks. We void these pointers here to avoid acident uses.
695: */
696: if (fin) (*fin)->ops->replacearray = NULL;
697: if (fout) (*fout)->ops->replacearray = NULL;
698: if (bout) (*bout)->ops->replacearray = NULL;
699: }
700: return(0);
701: }
703: /*@
704: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
706: Collective on Mat
708: Input Parameters:
709: + A - FFTW matrix
710: - x - the PETSc vector
712: Output Parameters:
713: . y - the FFTW vector
715: Options Database Keys:
716: . -mat_fftw_plannerflags - set FFTW planner flags
718: Level: intermediate
720: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
721: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
722: zeros. This routine does that job by scattering operation.
724: .seealso: VecScatterFFTWToPetsc()
725: @*/
726: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
727: {
731: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
732: return(0);
733: }
735: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
736: {
738: MPI_Comm comm;
739: Mat_FFT *fft = (Mat_FFT*)A->data;
740: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
741: PetscInt N =fft->N;
742: PetscInt ndim =fft->ndim,*dim=fft->dim;
743: PetscInt low;
744: PetscMPIInt rank,size;
745: PetscInt vsize,vsize1;
746: ptrdiff_t local_n0,local_0_start;
747: ptrdiff_t local_n1,local_1_start;
748: VecScatter vecscat;
749: IS list1,list2;
750: #if !defined(PETSC_USE_COMPLEX)
751: PetscInt i,j,k,partial_dim;
752: PetscInt *indx1, *indx2, tempindx, tempindx1;
753: PetscInt NM;
754: ptrdiff_t temp;
755: #endif
758: PetscObjectGetComm((PetscObject)A,&comm);
759: MPI_Comm_size(comm, &size);
760: MPI_Comm_rank(comm, &rank);
761: VecGetOwnershipRange(y,&low,NULL);
763: if (size==1) {
764: VecGetSize(x,&vsize);
765: VecGetSize(y,&vsize1);
766: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
767: VecScatterCreate(x,list1,y,list1,&vecscat);
768: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
769: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
770: VecScatterDestroy(&vecscat);
771: ISDestroy(&list1);
772: } else {
773: switch (ndim) {
774: case 1:
775: #if defined(PETSC_USE_COMPLEX)
776: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
778: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
779: ISCreateStride(comm,local_n0,low,1,&list2);
780: VecScatterCreate(x,list1,y,list2,&vecscat);
781: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
782: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
783: VecScatterDestroy(&vecscat);
784: ISDestroy(&list1);
785: ISDestroy(&list2);
786: #else
787: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
788: #endif
789: break;
790: case 2:
791: #if defined(PETSC_USE_COMPLEX)
792: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
794: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
795: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
796: VecScatterCreate(x,list1,y,list2,&vecscat);
797: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
798: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
799: VecScatterDestroy(&vecscat);
800: ISDestroy(&list1);
801: ISDestroy(&list2);
802: #else
803: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
805: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
806: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
808: if (dim[1]%2==0) {
809: NM = dim[1]+2;
810: } else {
811: NM = dim[1]+1;
812: }
813: for (i=0; i<local_n0; i++) {
814: for (j=0; j<dim[1]; j++) {
815: tempindx = i*dim[1] + j;
816: tempindx1 = i*NM + j;
818: indx1[tempindx]=local_0_start*dim[1]+tempindx;
819: indx2[tempindx]=low+tempindx1;
820: }
821: }
823: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
824: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
826: VecScatterCreate(x,list1,y,list2,&vecscat);
827: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
828: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
829: VecScatterDestroy(&vecscat);
830: ISDestroy(&list1);
831: ISDestroy(&list2);
832: PetscFree(indx1);
833: PetscFree(indx2);
834: #endif
835: break;
837: case 3:
838: #if defined(PETSC_USE_COMPLEX)
839: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
841: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
842: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
843: VecScatterCreate(x,list1,y,list2,&vecscat);
844: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
845: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
846: VecScatterDestroy(&vecscat);
847: ISDestroy(&list1);
848: ISDestroy(&list2);
849: #else
850: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
851: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
852: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
854: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
855: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
857: if (dim[2]%2==0) NM = dim[2]+2;
858: else NM = dim[2]+1;
860: for (i=0; i<local_n0; i++) {
861: for (j=0; j<dim[1]; j++) {
862: for (k=0; k<dim[2]; k++) {
863: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
864: tempindx1 = i*dim[1]*NM + j*NM + k;
866: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
867: indx2[tempindx]=low+tempindx1;
868: }
869: }
870: }
872: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
873: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
874: VecScatterCreate(x,list1,y,list2,&vecscat);
875: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
876: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
877: VecScatterDestroy(&vecscat);
878: ISDestroy(&list1);
879: ISDestroy(&list2);
880: PetscFree(indx1);
881: PetscFree(indx2);
882: #endif
883: break;
885: default:
886: #if defined(PETSC_USE_COMPLEX)
887: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
889: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
890: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
891: VecScatterCreate(x,list1,y,list2,&vecscat);
892: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
893: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
894: VecScatterDestroy(&vecscat);
895: ISDestroy(&list1);
896: ISDestroy(&list2);
897: #else
898: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
899: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
900: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
902: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
904: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
906: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
908: partial_dim = fftw->partial_dim;
910: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
911: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
913: if (dim[ndim-1]%2==0) NM = 2;
914: else NM = 1;
916: j = low;
917: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
918: indx1[i] = local_0_start*partial_dim + i;
919: indx2[i] = j;
920: if (k%dim[ndim-1]==0) j+=NM;
921: j++;
922: }
923: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
924: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
925: VecScatterCreate(x,list1,y,list2,&vecscat);
926: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
927: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
928: VecScatterDestroy(&vecscat);
929: ISDestroy(&list1);
930: ISDestroy(&list2);
931: PetscFree(indx1);
932: PetscFree(indx2);
933: #endif
934: break;
935: }
936: }
937: return(0);
938: }
940: /*@
941: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
943: Collective on Mat
945: Input Parameters:
946: + A - FFTW matrix
947: - x - FFTW vector
949: Output Parameters:
950: . y - PETSc vector
952: Level: intermediate
954: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
955: VecScatterFFTWToPetsc removes those extra zeros.
957: .seealso: VecScatterPetscToFFTW()
958: @*/
959: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
960: {
964: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
965: return(0);
966: }
968: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
969: {
971: MPI_Comm comm;
972: Mat_FFT *fft = (Mat_FFT*)A->data;
973: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
974: PetscInt N = fft->N;
975: PetscInt ndim = fft->ndim,*dim=fft->dim;
976: PetscInt low;
977: PetscMPIInt rank,size;
978: ptrdiff_t local_n0,local_0_start;
979: ptrdiff_t local_n1,local_1_start;
980: VecScatter vecscat;
981: IS list1,list2;
982: #if !defined(PETSC_USE_COMPLEX)
983: PetscInt i,j,k,partial_dim;
984: PetscInt *indx1, *indx2, tempindx, tempindx1;
985: PetscInt NM;
986: ptrdiff_t temp;
987: #endif
990: PetscObjectGetComm((PetscObject)A,&comm);
991: MPI_Comm_size(comm, &size);
992: MPI_Comm_rank(comm, &rank);
993: VecGetOwnershipRange(x,&low,NULL);
995: if (size==1) {
996: ISCreateStride(comm,N,0,1,&list1);
997: VecScatterCreate(x,list1,y,list1,&vecscat);
998: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
999: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1000: VecScatterDestroy(&vecscat);
1001: ISDestroy(&list1);
1003: } else {
1004: switch (ndim) {
1005: case 1:
1006: #if defined(PETSC_USE_COMPLEX)
1007: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1009: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
1010: ISCreateStride(comm,local_n1,low,1,&list2);
1011: VecScatterCreate(x,list1,y,list2,&vecscat);
1012: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1013: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1014: VecScatterDestroy(&vecscat);
1015: ISDestroy(&list1);
1016: ISDestroy(&list2);
1017: #else
1018: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1019: #endif
1020: break;
1021: case 2:
1022: #if defined(PETSC_USE_COMPLEX)
1023: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1025: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1026: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
1027: VecScatterCreate(x,list2,y,list1,&vecscat);
1028: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1029: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1030: VecScatterDestroy(&vecscat);
1031: ISDestroy(&list1);
1032: ISDestroy(&list2);
1033: #else
1034: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1036: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
1037: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
1039: if (dim[1]%2==0) NM = dim[1]+2;
1040: else NM = dim[1]+1;
1042: for (i=0; i<local_n0; i++) {
1043: for (j=0; j<dim[1]; j++) {
1044: tempindx = i*dim[1] + j;
1045: tempindx1 = i*NM + j;
1047: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1048: indx2[tempindx]=low+tempindx1;
1049: }
1050: }
1052: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1053: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1055: VecScatterCreate(x,list2,y,list1,&vecscat);
1056: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1057: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1058: VecScatterDestroy(&vecscat);
1059: ISDestroy(&list1);
1060: ISDestroy(&list2);
1061: PetscFree(indx1);
1062: PetscFree(indx2);
1063: #endif
1064: break;
1065: case 3:
1066: #if defined(PETSC_USE_COMPLEX)
1067: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1069: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1070: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1071: VecScatterCreate(x,list1,y,list2,&vecscat);
1072: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1073: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1074: VecScatterDestroy(&vecscat);
1075: ISDestroy(&list1);
1076: ISDestroy(&list2);
1077: #else
1078: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1080: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1081: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1083: if (dim[2]%2==0) NM = dim[2]+2;
1084: else NM = dim[2]+1;
1086: for (i=0; i<local_n0; i++) {
1087: for (j=0; j<dim[1]; j++) {
1088: for (k=0; k<dim[2]; k++) {
1089: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1090: tempindx1 = i*dim[1]*NM + j*NM + k;
1092: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1093: indx2[tempindx]=low+tempindx1;
1094: }
1095: }
1096: }
1098: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1099: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1101: VecScatterCreate(x,list2,y,list1,&vecscat);
1102: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1103: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1104: VecScatterDestroy(&vecscat);
1105: ISDestroy(&list1);
1106: ISDestroy(&list2);
1107: PetscFree(indx1);
1108: PetscFree(indx2);
1109: #endif
1110: break;
1111: default:
1112: #if defined(PETSC_USE_COMPLEX)
1113: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1115: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1116: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1117: VecScatterCreate(x,list1,y,list2,&vecscat);
1118: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1119: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1120: VecScatterDestroy(&vecscat);
1121: ISDestroy(&list1);
1122: ISDestroy(&list2);
1123: #else
1124: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1126: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1128: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1130: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1132: partial_dim = fftw->partial_dim;
1134: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1135: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1137: if (dim[ndim-1]%2==0) NM = 2;
1138: else NM = 1;
1140: j = low;
1141: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1142: indx1[i] = local_0_start*partial_dim + i;
1143: indx2[i] = j;
1144: if (k%dim[ndim-1]==0) j+=NM;
1145: j++;
1146: }
1147: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1148: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1150: VecScatterCreate(x,list2,y,list1,&vecscat);
1151: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1152: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1153: VecScatterDestroy(&vecscat);
1154: ISDestroy(&list1);
1155: ISDestroy(&list2);
1156: PetscFree(indx1);
1157: PetscFree(indx2);
1158: #endif
1159: break;
1160: }
1161: }
1162: return(0);
1163: }
1165: /*
1166: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1168: Options Database Keys:
1169: + -mat_fftw_plannerflags - set FFTW planner flags
1171: Level: intermediate
1173: */
1174: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1175: {
1177: MPI_Comm comm;
1178: Mat_FFT *fft=(Mat_FFT*)A->data;
1179: Mat_FFTW *fftw;
1180: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1181: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1182: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1183: PetscBool flg;
1184: PetscInt p_flag,partial_dim=1,ctr;
1185: PetscMPIInt size,rank;
1186: ptrdiff_t *pdim;
1187: ptrdiff_t local_n1,local_1_start;
1188: #if !defined(PETSC_USE_COMPLEX)
1189: ptrdiff_t temp;
1190: PetscInt N1,tot_dim;
1191: #else
1192: PetscInt n1;
1193: #endif
1196: PetscObjectGetComm((PetscObject)A,&comm);
1197: MPI_Comm_size(comm, &size);
1198: MPI_Comm_rank(comm, &rank);
1200: fftw_mpi_init();
1201: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1202: pdim[0] = dim[0];
1203: #if !defined(PETSC_USE_COMPLEX)
1204: tot_dim = 2*dim[0];
1205: #endif
1206: for (ctr=1; ctr<ndim; ctr++) {
1207: partial_dim *= dim[ctr];
1208: pdim[ctr] = dim[ctr];
1209: #if !defined(PETSC_USE_COMPLEX)
1210: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1211: else tot_dim *= dim[ctr];
1212: #endif
1213: }
1215: if (size == 1) {
1216: #if defined(PETSC_USE_COMPLEX)
1217: MatSetSizes(A,N,N,N,N);
1218: n = N;
1219: #else
1220: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1221: n = tot_dim;
1222: #endif
1224: } else {
1225: ptrdiff_t local_n0,local_0_start;
1226: switch (ndim) {
1227: case 1:
1228: #if !defined(PETSC_USE_COMPLEX)
1229: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1230: #else
1231: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1233: n = (PetscInt)local_n0;
1234: n1 = (PetscInt)local_n1;
1235: MatSetSizes(A,n1,n,N,N);
1236: #endif
1237: break;
1238: case 2:
1239: #if defined(PETSC_USE_COMPLEX)
1240: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1241: /*
1242: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1243: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1244: */
1245: n = (PetscInt)local_n0*dim[1];
1246: MatSetSizes(A,n,n,N,N);
1247: #else
1248: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1250: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1251: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1252: #endif
1253: break;
1254: case 3:
1255: #if defined(PETSC_USE_COMPLEX)
1256: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1258: n = (PetscInt)local_n0*dim[1]*dim[2];
1259: MatSetSizes(A,n,n,N,N);
1260: #else
1261: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1263: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1264: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1265: #endif
1266: break;
1267: default:
1268: #if defined(PETSC_USE_COMPLEX)
1269: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1271: n = (PetscInt)local_n0*partial_dim;
1272: MatSetSizes(A,n,n,N,N);
1273: #else
1274: temp = pdim[ndim-1];
1276: pdim[ndim-1] = temp/2 + 1;
1278: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1280: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1281: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1283: pdim[ndim-1] = temp;
1285: MatSetSizes(A,n,n,N1,N1);
1286: #endif
1287: break;
1288: }
1289: }
1290: free(pdim);
1291: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1292: PetscNewLog(A,&fftw);
1293: fft->data = (void*)fftw;
1295: fft->n = n;
1296: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1297: fftw->partial_dim = partial_dim;
1299: PetscMalloc1(ndim, &(fftw->dim_fftw));
1300: if (size == 1) {
1301: #if defined(PETSC_USE_64BIT_INDICES)
1302: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1303: #else
1304: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1305: #endif
1306: }
1308: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1310: fftw->p_forward = NULL;
1311: fftw->p_backward = NULL;
1312: fftw->p_flag = FFTW_ESTIMATE;
1314: if (size == 1) {
1315: A->ops->mult = MatMult_SeqFFTW;
1316: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1317: } else {
1318: A->ops->mult = MatMult_MPIFFTW;
1319: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1320: }
1321: fft->matdestroy = MatDestroy_FFTW;
1322: A->assembled = PETSC_TRUE;
1323: A->preallocated = PETSC_TRUE;
1325: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1326: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1327: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1329: /* get runtime options */
1330: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1331: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1332: if (flg) {
1333: fftw->p_flag = iplans[p_flag];
1334: }
1335: PetscOptionsEnd();
1336: return(0);
1337: }