Actual source code: fftw.c
petsc-3.6.4 2016-04-12
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
33: /* MatMult_SeqFFTW performs forward DFT in parallel
34: Input parameter:
35: A - the matrix
36: x - the vector on which FDFT will be performed
38: Output parameter:
39: y - vector that stores result of FDFT
40: */
43: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44: {
46: Mat_FFT *fft = (Mat_FFT*)A->data;
47: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
48: const PetscScalar *x_array;
49: PetscScalar *y_array;
50: #if defined(PETSC_USE_COMPLEX)
51: #if defined(PETSC_USE_64BIT_INDICES)
52: fftw_iodim64 *iodims;
53: #else
54: fftw_iodim *iodims;
55: #endif
56: PetscInt i;
57: #endif
58: PetscInt ndim = fft->ndim,*dim = fft->dim;
61: VecGetArrayRead(x,&x_array);
62: VecGetArray(y,&y_array);
63: if (!fftw->p_forward) { /* create a plan, then excute it */
64: switch (ndim) {
65: case 1:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
70: #endif
71: break;
72: case 2:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77: #endif
78: break;
79: case 3:
80: #if defined(PETSC_USE_COMPLEX)
81: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
82: #else
83: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
84: #endif
85: break;
86: default:
87: #if defined(PETSC_USE_COMPLEX)
88: iodims = fftw->iodims;
89: #if defined(PETSC_USE_64BIT_INDICES)
90: if (ndim) {
91: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
92: iodims[ndim-1].is = iodims[ndim-1].os = 1;
93: for (i=ndim-2; i>=0; --i) {
94: iodims[i].n = (ptrdiff_t)dim[i];
95: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
96: }
97: }
98: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
99: #else
100: if (ndim) {
101: iodims[ndim-1].n = (int)dim[ndim-1];
102: iodims[ndim-1].is = iodims[ndim-1].os = 1;
103: for (i=ndim-2; i>=0; --i) {
104: iodims[i].n = (int)dim[i];
105: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
106: }
107: }
108: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
109: #endif
111: #else
112: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
113: #endif
114: break;
115: }
116: fftw->finarray = (PetscScalar *) x_array;
117: fftw->foutarray = y_array;
118: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
119: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
120: fftw_execute(fftw->p_forward);
121: } else { /* use existing plan */
122: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
123: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
124: } else {
125: fftw_execute(fftw->p_forward);
126: }
127: }
128: VecRestoreArray(y,&y_array);
129: VecRestoreArrayRead(x,&x_array);
130: return(0);
131: }
133: /* MatMultTranspose_SeqFFTW performs serial backward DFT
134: Input parameter:
135: A - the matrix
136: x - the vector on which BDFT will be performed
138: Output parameter:
139: y - vector that stores result of BDFT
140: */
144: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
145: {
147: Mat_FFT *fft = (Mat_FFT*)A->data;
148: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
149: const PetscScalar *x_array;
150: PetscScalar *y_array;
151: PetscInt ndim=fft->ndim,*dim=fft->dim;
152: #if defined(PETSC_USE_COMPLEX)
153: #if defined(PETSC_USE_64BIT_INDICES)
154: fftw_iodim64 *iodims=fftw->iodims;
155: #else
156: fftw_iodim *iodims=fftw->iodims;
157: #endif
158: #endif
159:
161: VecGetArrayRead(x,&x_array);
162: VecGetArray(y,&y_array);
163: if (!fftw->p_backward) { /* create a plan, then excute it */
164: switch (ndim) {
165: case 1:
166: #if defined(PETSC_USE_COMPLEX)
167: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
168: #else
169: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
170: #endif
171: break;
172: case 2:
173: #if defined(PETSC_USE_COMPLEX)
174: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175: #else
176: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177: #endif
178: break;
179: case 3:
180: #if defined(PETSC_USE_COMPLEX)
181: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
182: #else
183: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
184: #endif
185: break;
186: default:
187: #if defined(PETSC_USE_COMPLEX)
188: #if defined(PETSC_USE_64BIT_INDICES)
189: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190: #else
191: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
192: #endif
193: #else
194: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
195: #endif
196: break;
197: }
198: fftw->binarray = (PetscScalar *) x_array;
199: fftw->boutarray = y_array;
200: fftw_execute(fftw->p_backward);
201: } else { /* use existing plan */
202: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
203: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
204: } else {
205: fftw_execute(fftw->p_backward);
206: }
207: }
208: VecRestoreArray(y,&y_array);
209: VecRestoreArrayRead(x,&x_array);
210: return(0);
211: }
213: /* MatMult_MPIFFTW performs forward DFT in parallel
214: Input parameter:
215: A - the matrix
216: x - the vector on which FDFT will be performed
218: Output parameter:
219: y - vector that stores result of FDFT
220: */
223: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
224: {
226: Mat_FFT *fft = (Mat_FFT*)A->data;
227: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
228: const PetscScalar *x_array;
229: PetscScalar *y_array;
230: PetscInt ndim=fft->ndim,*dim=fft->dim;
231: MPI_Comm comm;
234: PetscObjectGetComm((PetscObject)A,&comm);
235: VecGetArrayRead(x,&x_array);
236: VecGetArray(y,&y_array);
237: if (!fftw->p_forward) { /* create a plan, then excute it */
238: switch (ndim) {
239: case 1:
240: #if defined(PETSC_USE_COMPLEX)
241: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
242: #else
243: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
244: #endif
245: break;
246: case 2:
247: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
248: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
249: #else
250: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
251: #endif
252: break;
253: case 3:
254: #if defined(PETSC_USE_COMPLEX)
255: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
256: #else
257: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
258: #endif
259: break;
260: default:
261: #if defined(PETSC_USE_COMPLEX)
262: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
263: #else
264: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
265: #endif
266: break;
267: }
268: fftw->finarray = (PetscScalar *) x_array;
269: fftw->foutarray = y_array;
270: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
271: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
272: fftw_execute(fftw->p_forward);
273: } else { /* use existing plan */
274: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
275: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
276: } else {
277: fftw_execute(fftw->p_forward);
278: }
279: }
280: VecRestoreArray(y,&y_array);
281: VecRestoreArrayRead(x,&x_array);
282: return(0);
283: }
285: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
286: Input parameter:
287: A - the matrix
288: x - the vector on which BDFT will be performed
290: Output parameter:
291: y - vector that stores result of BDFT
292: */
295: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
296: {
298: Mat_FFT *fft = (Mat_FFT*)A->data;
299: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
300: const PetscScalar *x_array;
301: PetscScalar *y_array;
302: PetscInt ndim=fft->ndim,*dim=fft->dim;
303: MPI_Comm comm;
306: PetscObjectGetComm((PetscObject)A,&comm);
307: VecGetArrayRead(x,&x_array);
308: VecGetArray(y,&y_array);
309: if (!fftw->p_backward) { /* create a plan, then excute it */
310: switch (ndim) {
311: case 1:
312: #if defined(PETSC_USE_COMPLEX)
313: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
314: #else
315: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
316: #endif
317: break;
318: case 2:
319: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
320: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
321: #else
322: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
323: #endif
324: break;
325: case 3:
326: #if defined(PETSC_USE_COMPLEX)
327: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
328: #else
329: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
330: #endif
331: break;
332: default:
333: #if defined(PETSC_USE_COMPLEX)
334: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
335: #else
336: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
337: #endif
338: break;
339: }
340: fftw->binarray = (PetscScalar *) x_array;
341: fftw->boutarray = y_array;
342: fftw_execute(fftw->p_backward);
343: } else { /* use existing plan */
344: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
345: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
346: } else {
347: fftw_execute(fftw->p_backward);
348: }
349: }
350: VecRestoreArray(y,&y_array);
351: VecRestoreArrayRead(x,&x_array);
352: return(0);
353: }
357: PetscErrorCode MatDestroy_FFTW(Mat A)
358: {
359: Mat_FFT *fft = (Mat_FFT*)A->data;
360: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
364: fftw_destroy_plan(fftw->p_forward);
365: fftw_destroy_plan(fftw->p_backward);
366: if (fftw->iodims) {
367: free(fftw->iodims);
368: }
369: PetscFree(fftw->dim_fftw);
370: PetscFree(fft->data);
371: fftw_mpi_cleanup();
372: return(0);
373: }
375: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
378: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
379: {
381: PetscScalar *array;
384: VecGetArray(v,&array);
385: fftw_free((fftw_complex*)array);
386: VecRestoreArray(v,&array);
387: VecDestroy_MPI(v);
388: return(0);
389: }
393: /*@
394: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
395: parallel layout determined by FFTW
397: Collective on Mat
399: Input Parameter:
400: . A - the matrix
402: Output Parameter:
403: + x - (optional) input vector of forward FFTW
404: - y - (optional) output vector of forward FFTW
405: - z - (optional) output vector of backward FFTW
407: Level: advanced
409: Note: The parallel layout of output of forward FFTW is always same as the input
410: of the backward FFTW. But parallel layout of the input vector of forward
411: FFTW might not be same as the output of backward FFTW.
412: Also note that we need to provide enough space while doing parallel real transform.
413: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
414: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
415: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
416: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
417: zeros if it is odd only one zero is needed.
418: Lastly one needs some scratch space at the end of data set in each process. alloc_local
419: figures out how much space is needed, i.e. it figures out the data+scratch space for
420: each processor and returns that.
422: .seealso: MatCreateFFTW()
423: @*/
424: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
425: {
429: PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
430: return(0);
431: }
435: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
436: {
438: PetscMPIInt size,rank;
439: MPI_Comm comm;
440: Mat_FFT *fft = (Mat_FFT*)A->data;
441: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
442: PetscInt N = fft->N;
443: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
448: PetscObjectGetComm((PetscObject)A,&comm);
450: MPI_Comm_size(comm, &size);
451: MPI_Comm_rank(comm, &rank);
452: if (size == 1) { /* sequential case */
453: #if defined(PETSC_USE_COMPLEX)
454: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
455: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
456: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
457: #else
458: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
459: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
460: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
461: #endif
462: } else { /* parallel cases */
463: ptrdiff_t alloc_local,local_n0,local_0_start;
464: ptrdiff_t local_n1;
465: fftw_complex *data_fout;
466: ptrdiff_t local_1_start;
467: #if defined(PETSC_USE_COMPLEX)
468: fftw_complex *data_fin,*data_bout;
469: #else
470: double *data_finr,*data_boutr;
471: PetscInt n1,N1;
472: ptrdiff_t temp;
473: #endif
475: switch (ndim) {
476: case 1:
477: #if !defined(PETSC_USE_COMPLEX)
478: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
479: #else
480: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
481: if (fin) {
482: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
485: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
486: }
487: if (fout) {
488: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
491: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
492: }
493: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
494: if (bout) {
495: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
498: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
499: }
500: break;
501: #endif
502: case 2:
503: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
504: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
505: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
506: if (fin) {
507: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
508: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
510: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
511: }
512: if (bout) {
513: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
514: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
516: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
517: }
518: if (fout) {
519: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
522: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
523: }
524: #else
525: /* Get local size */
526: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
527: if (fin) {
528: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
529: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
531: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
532: }
533: if (fout) {
534: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
535: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
537: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
538: }
539: if (bout) {
540: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
541: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
543: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
544: }
545: #endif
546: break;
547: case 3:
548: #if !defined(PETSC_USE_COMPLEX)
549: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
550: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
551: if (fin) {
552: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
555: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
556: }
557: if (bout) {
558: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
559: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
561: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
562: }
564: if (fout) {
565: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
568: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
569: }
570: #else
571: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
572: if (fin) {
573: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
576: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
577: }
578: if (fout) {
579: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
582: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
583: }
584: if (bout) {
585: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
588: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
589: }
590: #endif
591: break;
592: default:
593: #if !defined(PETSC_USE_COMPLEX)
594: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
596: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
598: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
599: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
601: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
603: if (fin) {
604: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
605: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
607: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: if (bout) {
610: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
611: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
613: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
614: }
616: if (fout) {
617: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
620: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
621: }
622: #else
623: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
624: if (fin) {
625: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
626: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
628: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
629: }
630: if (fout) {
631: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
632: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
634: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
635: }
636: if (bout) {
637: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
638: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
640: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
641: }
642: #endif
643: break;
644: }
645: }
646: return(0);
647: }
651: /*@
652: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
654: Collective on Mat
656: Input Parameters:
657: + A - FFTW matrix
658: - x - the PETSc vector
660: Output Parameters:
661: . y - the FFTW vector
663: Options Database Keys:
664: . -mat_fftw_plannerflags - set FFTW planner flags
666: Level: intermediate
668: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
669: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
670: zeros. This routine does that job by scattering operation.
672: .seealso: VecScatterFFTWToPetsc()
673: @*/
674: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
675: {
679: PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
680: return(0);
681: }
685: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
686: {
688: MPI_Comm comm;
689: Mat_FFT *fft = (Mat_FFT*)A->data;
690: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
691: PetscInt N =fft->N;
692: PetscInt ndim =fft->ndim,*dim=fft->dim;
693: PetscInt low;
694: PetscMPIInt rank,size;
695: PetscInt vsize,vsize1;
696: ptrdiff_t local_n0,local_0_start;
697: ptrdiff_t local_n1,local_1_start;
698: VecScatter vecscat;
699: IS list1,list2;
700: #if !defined(PETSC_USE_COMPLEX)
701: PetscInt i,j,k,partial_dim;
702: PetscInt *indx1, *indx2, tempindx, tempindx1;
703: PetscInt NM;
704: ptrdiff_t temp;
705: #endif
708: PetscObjectGetComm((PetscObject)A,&comm);
709: MPI_Comm_size(comm, &size);
710: MPI_Comm_rank(comm, &rank);
711: VecGetOwnershipRange(y,&low,NULL);
713: if (size==1) {
714: VecGetSize(x,&vsize);
715: VecGetSize(y,&vsize1);
716: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
717: VecScatterCreate(x,list1,y,list1,&vecscat);
718: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
719: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
720: VecScatterDestroy(&vecscat);
721: ISDestroy(&list1);
722: } else {
723: switch (ndim) {
724: case 1:
725: #if defined(PETSC_USE_COMPLEX)
726: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
728: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
729: ISCreateStride(comm,local_n0,low,1,&list2);
730: VecScatterCreate(x,list1,y,list2,&vecscat);
731: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
732: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
733: VecScatterDestroy(&vecscat);
734: ISDestroy(&list1);
735: ISDestroy(&list2);
736: #else
737: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
738: #endif
739: break;
740: case 2:
741: #if defined(PETSC_USE_COMPLEX)
742: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
744: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
745: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
746: VecScatterCreate(x,list1,y,list2,&vecscat);
747: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
748: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
749: VecScatterDestroy(&vecscat);
750: ISDestroy(&list1);
751: ISDestroy(&list2);
752: #else
753: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
755: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
756: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
758: if (dim[1]%2==0) {
759: NM = dim[1]+2;
760: } else {
761: NM = dim[1]+1;
762: }
763: for (i=0; i<local_n0; i++) {
764: for (j=0; j<dim[1]; j++) {
765: tempindx = i*dim[1] + j;
766: tempindx1 = i*NM + j;
768: indx1[tempindx]=local_0_start*dim[1]+tempindx;
769: indx2[tempindx]=low+tempindx1;
770: }
771: }
773: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
774: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
776: VecScatterCreate(x,list1,y,list2,&vecscat);
777: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
778: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
779: VecScatterDestroy(&vecscat);
780: ISDestroy(&list1);
781: ISDestroy(&list2);
782: PetscFree(indx1);
783: PetscFree(indx2);
784: #endif
785: break;
787: case 3:
788: #if defined(PETSC_USE_COMPLEX)
789: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
791: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
792: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
793: VecScatterCreate(x,list1,y,list2,&vecscat);
794: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
795: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
796: VecScatterDestroy(&vecscat);
797: ISDestroy(&list1);
798: ISDestroy(&list2);
799: #else
800: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
801: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
802: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
804: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
805: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
807: if (dim[2]%2==0) NM = dim[2]+2;
808: else NM = dim[2]+1;
810: for (i=0; i<local_n0; i++) {
811: for (j=0; j<dim[1]; j++) {
812: for (k=0; k<dim[2]; k++) {
813: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
814: tempindx1 = i*dim[1]*NM + j*NM + k;
816: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
817: indx2[tempindx]=low+tempindx1;
818: }
819: }
820: }
822: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
823: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
824: VecScatterCreate(x,list1,y,list2,&vecscat);
825: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
826: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
827: VecScatterDestroy(&vecscat);
828: ISDestroy(&list1);
829: ISDestroy(&list2);
830: PetscFree(indx1);
831: PetscFree(indx2);
832: #endif
833: break;
835: default:
836: #if defined(PETSC_USE_COMPLEX)
837: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
839: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
840: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
841: VecScatterCreate(x,list1,y,list2,&vecscat);
842: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
843: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
844: VecScatterDestroy(&vecscat);
845: ISDestroy(&list1);
846: ISDestroy(&list2);
847: #else
848: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
849: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
850: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
852: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
854: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
856: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
858: partial_dim = fftw->partial_dim;
860: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
861: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
863: if (dim[ndim-1]%2==0) NM = 2;
864: else NM = 1;
866: j = low;
867: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
868: indx1[i] = local_0_start*partial_dim + i;
869: indx2[i] = j;
870: if (k%dim[ndim-1]==0) j+=NM;
871: j++;
872: }
873: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
874: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
875: VecScatterCreate(x,list1,y,list2,&vecscat);
876: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
877: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
878: VecScatterDestroy(&vecscat);
879: ISDestroy(&list1);
880: ISDestroy(&list2);
881: PetscFree(indx1);
882: PetscFree(indx2);
883: #endif
884: break;
885: }
886: }
887: return(0);
888: }
892: /*@
893: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
895: Collective on Mat
897: Input Parameters:
898: + A - FFTW matrix
899: - x - FFTW vector
901: Output Parameters:
902: . y - PETSc vector
904: Level: intermediate
906: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
907: VecScatterFFTWToPetsc removes those extra zeros.
909: .seealso: VecScatterPetscToFFTW()
910: @*/
911: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
912: {
916: PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
917: return(0);
918: }
922: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
923: {
925: MPI_Comm comm;
926: Mat_FFT *fft = (Mat_FFT*)A->data;
927: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
928: PetscInt N = fft->N;
929: PetscInt ndim = fft->ndim,*dim=fft->dim;
930: PetscInt low;
931: PetscMPIInt rank,size;
932: ptrdiff_t local_n0,local_0_start;
933: ptrdiff_t local_n1,local_1_start;
934: VecScatter vecscat;
935: IS list1,list2;
936: #if !defined(PETSC_USE_COMPLEX)
937: PetscInt i,j,k,partial_dim;
938: PetscInt *indx1, *indx2, tempindx, tempindx1;
939: PetscInt NM;
940: ptrdiff_t temp;
941: #endif
944: PetscObjectGetComm((PetscObject)A,&comm);
945: MPI_Comm_size(comm, &size);
946: MPI_Comm_rank(comm, &rank);
947: VecGetOwnershipRange(x,&low,NULL);
949: if (size==1) {
950: ISCreateStride(comm,N,0,1,&list1);
951: VecScatterCreate(x,list1,y,list1,&vecscat);
952: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
953: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
954: VecScatterDestroy(&vecscat);
955: ISDestroy(&list1);
957: } else {
958: switch (ndim) {
959: case 1:
960: #if defined(PETSC_USE_COMPLEX)
961: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
963: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
964: ISCreateStride(comm,local_n1,low,1,&list2);
965: VecScatterCreate(x,list1,y,list2,&vecscat);
966: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
967: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
968: VecScatterDestroy(&vecscat);
969: ISDestroy(&list1);
970: ISDestroy(&list2);
971: #else
972: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
973: #endif
974: break;
975: case 2:
976: #if defined(PETSC_USE_COMPLEX)
977: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
979: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
980: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
981: VecScatterCreate(x,list2,y,list1,&vecscat);
982: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
983: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
984: VecScatterDestroy(&vecscat);
985: ISDestroy(&list1);
986: ISDestroy(&list2);
987: #else
988: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
990: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
991: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
993: if (dim[1]%2==0) NM = dim[1]+2;
994: else NM = dim[1]+1;
996: for (i=0; i<local_n0; i++) {
997: for (j=0; j<dim[1]; j++) {
998: tempindx = i*dim[1] + j;
999: tempindx1 = i*NM + j;
1001: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1002: indx2[tempindx]=low+tempindx1;
1003: }
1004: }
1006: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1007: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1009: VecScatterCreate(x,list2,y,list1,&vecscat);
1010: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1011: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1012: VecScatterDestroy(&vecscat);
1013: ISDestroy(&list1);
1014: ISDestroy(&list2);
1015: PetscFree(indx1);
1016: PetscFree(indx2);
1017: #endif
1018: break;
1019: case 3:
1020: #if defined(PETSC_USE_COMPLEX)
1021: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1023: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1024: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1025: VecScatterCreate(x,list1,y,list2,&vecscat);
1026: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1027: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1028: VecScatterDestroy(&vecscat);
1029: ISDestroy(&list1);
1030: ISDestroy(&list2);
1031: #else
1032: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1034: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1035: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1037: if (dim[2]%2==0) NM = dim[2]+2;
1038: else NM = dim[2]+1;
1040: for (i=0; i<local_n0; i++) {
1041: for (j=0; j<dim[1]; j++) {
1042: for (k=0; k<dim[2]; k++) {
1043: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1044: tempindx1 = i*dim[1]*NM + j*NM + k;
1046: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1047: indx2[tempindx]=low+tempindx1;
1048: }
1049: }
1050: }
1052: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1053: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1055: VecScatterCreate(x,list2,y,list1,&vecscat);
1056: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1057: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1058: VecScatterDestroy(&vecscat);
1059: ISDestroy(&list1);
1060: ISDestroy(&list2);
1061: PetscFree(indx1);
1062: PetscFree(indx2);
1063: #endif
1064: break;
1065: default:
1066: #if defined(PETSC_USE_COMPLEX)
1067: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1069: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1070: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1071: VecScatterCreate(x,list1,y,list2,&vecscat);
1072: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1073: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1074: VecScatterDestroy(&vecscat);
1075: ISDestroy(&list1);
1076: ISDestroy(&list2);
1077: #else
1078: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1080: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1082: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1084: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1086: partial_dim = fftw->partial_dim;
1088: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1089: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1091: if (dim[ndim-1]%2==0) NM = 2;
1092: else NM = 1;
1094: j = low;
1095: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1096: indx1[i] = local_0_start*partial_dim + i;
1097: indx2[i] = j;
1098: if (k%dim[ndim-1]==0) j+=NM;
1099: j++;
1100: }
1101: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1102: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1104: VecScatterCreate(x,list2,y,list1,&vecscat);
1105: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1106: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1107: VecScatterDestroy(&vecscat);
1108: ISDestroy(&list1);
1109: ISDestroy(&list2);
1110: PetscFree(indx1);
1111: PetscFree(indx2);
1112: #endif
1113: break;
1114: }
1115: }
1116: return(0);
1117: }
1121: /*
1122: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1124: Options Database Keys:
1125: + -mat_fftw_plannerflags - set FFTW planner flags
1127: Level: intermediate
1129: */
1130: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1131: {
1133: MPI_Comm comm;
1134: Mat_FFT *fft=(Mat_FFT*)A->data;
1135: Mat_FFTW *fftw;
1136: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1137: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1138: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1139: PetscBool flg;
1140: PetscInt p_flag,partial_dim=1,ctr;
1141: PetscMPIInt size,rank;
1142: ptrdiff_t *pdim;
1143: ptrdiff_t local_n1,local_1_start;
1144: #if !defined(PETSC_USE_COMPLEX)
1145: ptrdiff_t temp;
1146: PetscInt N1,tot_dim;
1147: #else
1148: PetscInt n1;
1149: #endif
1152: PetscObjectGetComm((PetscObject)A,&comm);
1153: MPI_Comm_size(comm, &size);
1154: MPI_Comm_rank(comm, &rank);
1156: fftw_mpi_init();
1157: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1158: pdim[0] = dim[0];
1159: #if !defined(PETSC_USE_COMPLEX)
1160: tot_dim = 2*dim[0];
1161: #endif
1162: for (ctr=1; ctr<ndim; ctr++) {
1163: partial_dim *= dim[ctr];
1164: pdim[ctr] = dim[ctr];
1165: #if !defined(PETSC_USE_COMPLEX)
1166: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1167: else tot_dim *= dim[ctr];
1168: #endif
1169: }
1171: if (size == 1) {
1172: #if defined(PETSC_USE_COMPLEX)
1173: MatSetSizes(A,N,N,N,N);
1174: n = N;
1175: #else
1176: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1177: n = tot_dim;
1178: #endif
1180: } else {
1181: ptrdiff_t local_n0,local_0_start;
1182: switch (ndim) {
1183: case 1:
1184: #if !defined(PETSC_USE_COMPLEX)
1185: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1186: #else
1187: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1189: n = (PetscInt)local_n0;
1190: n1 = (PetscInt)local_n1;
1191: MatSetSizes(A,n1,n,N,N);
1192: #endif
1193: break;
1194: case 2:
1195: #if defined(PETSC_USE_COMPLEX)
1196: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1197: /*
1198: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1199: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1200: */
1201: n = (PetscInt)local_n0*dim[1];
1202: MatSetSizes(A,n,n,N,N);
1203: #else
1204: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1206: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1207: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1208: #endif
1209: break;
1210: case 3:
1211: #if defined(PETSC_USE_COMPLEX)
1212: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1214: n = (PetscInt)local_n0*dim[1]*dim[2];
1215: MatSetSizes(A,n,n,N,N);
1216: #else
1217: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1219: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1220: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1221: #endif
1222: break;
1223: default:
1224: #if defined(PETSC_USE_COMPLEX)
1225: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1227: n = (PetscInt)local_n0*partial_dim;
1228: MatSetSizes(A,n,n,N,N);
1229: #else
1230: temp = pdim[ndim-1];
1232: pdim[ndim-1] = temp/2 + 1;
1234: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1236: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1237: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1239: pdim[ndim-1] = temp;
1241: MatSetSizes(A,n,n,N1,N1);
1242: #endif
1243: break;
1244: }
1245: }
1246: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1247: PetscNewLog(A,&fftw);
1248: fft->data = (void*)fftw;
1250: fft->n = n;
1251: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1252: fftw->partial_dim = partial_dim;
1254: PetscMalloc1(ndim, &(fftw->dim_fftw));
1255: if (size == 1) {
1256: #if defined(PETSC_USE_64BIT_INDICES)
1257: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1258: #else
1259: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1260: #endif
1261: }
1263: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1265: fftw->p_forward = 0;
1266: fftw->p_backward = 0;
1267: fftw->p_flag = FFTW_ESTIMATE;
1269: if (size == 1) {
1270: A->ops->mult = MatMult_SeqFFTW;
1271: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1272: } else {
1273: A->ops->mult = MatMult_MPIFFTW;
1274: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1275: }
1276: fft->matdestroy = MatDestroy_FFTW;
1277: A->assembled = PETSC_TRUE;
1278: A->preallocated = PETSC_TRUE;
1280: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1281: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1282: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1284: /* get runtime options */
1285: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1286: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1287: if (flg) {
1288: fftw->p_flag = iplans[p_flag];
1289: }
1290: PetscOptionsEnd();
1291: return(0);
1292: }