Actual source code: fftw.c
petsc-3.7.3 2016-08-01
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
33: /* MatMult_SeqFFTW performs forward DFT in parallel
34: Input parameter:
35: A - the matrix
36: x - the vector on which FDFT will be performed
38: Output parameter:
39: y - vector that stores result of FDFT
40: */
43: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44: {
46: Mat_FFT *fft = (Mat_FFT*)A->data;
47: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
48: const PetscScalar *x_array;
49: PetscScalar *y_array;
50: #if defined(PETSC_USE_COMPLEX)
51: #if defined(PETSC_USE_64BIT_INDICES)
52: fftw_iodim64 *iodims;
53: #else
54: fftw_iodim *iodims;
55: #endif
56: PetscInt i;
57: #endif
58: PetscInt ndim = fft->ndim,*dim = fft->dim;
61: VecGetArrayRead(x,&x_array);
62: VecGetArray(y,&y_array);
63: if (!fftw->p_forward) { /* create a plan, then excute it */
64: switch (ndim) {
65: case 1:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
70: #endif
71: break;
72: case 2:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77: #endif
78: break;
79: case 3:
80: #if defined(PETSC_USE_COMPLEX)
81: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
82: #else
83: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
84: #endif
85: break;
86: default:
87: #if defined(PETSC_USE_COMPLEX)
88: iodims = fftw->iodims;
89: #if defined(PETSC_USE_64BIT_INDICES)
90: if (ndim) {
91: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
92: iodims[ndim-1].is = iodims[ndim-1].os = 1;
93: for (i=ndim-2; i>=0; --i) {
94: iodims[i].n = (ptrdiff_t)dim[i];
95: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
96: }
97: }
98: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
99: #else
100: if (ndim) {
101: iodims[ndim-1].n = (int)dim[ndim-1];
102: iodims[ndim-1].is = iodims[ndim-1].os = 1;
103: for (i=ndim-2; i>=0; --i) {
104: iodims[i].n = (int)dim[i];
105: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
106: }
107: }
108: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
109: #endif
111: #else
112: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
113: #endif
114: break;
115: }
116: fftw->finarray = (PetscScalar *) x_array;
117: fftw->foutarray = y_array;
118: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
119: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
120: fftw_execute(fftw->p_forward);
121: } else { /* use existing plan */
122: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
123: #if defined(PETSC_USE_COMPLEX)
124: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
125: #else
126: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
127: #endif
128: } else {
129: fftw_execute(fftw->p_forward);
130: }
131: }
132: VecRestoreArray(y,&y_array);
133: VecRestoreArrayRead(x,&x_array);
134: return(0);
135: }
137: /* MatMultTranspose_SeqFFTW performs serial backward DFT
138: Input parameter:
139: A - the matrix
140: x - the vector on which BDFT will be performed
142: Output parameter:
143: y - vector that stores result of BDFT
144: */
148: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
149: {
151: Mat_FFT *fft = (Mat_FFT*)A->data;
152: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
153: const PetscScalar *x_array;
154: PetscScalar *y_array;
155: PetscInt ndim=fft->ndim,*dim=fft->dim;
156: #if defined(PETSC_USE_COMPLEX)
157: #if defined(PETSC_USE_64BIT_INDICES)
158: fftw_iodim64 *iodims=fftw->iodims;
159: #else
160: fftw_iodim *iodims=fftw->iodims;
161: #endif
162: #endif
163:
165: VecGetArrayRead(x,&x_array);
166: VecGetArray(y,&y_array);
167: if (!fftw->p_backward) { /* create a plan, then excute it */
168: switch (ndim) {
169: case 1:
170: #if defined(PETSC_USE_COMPLEX)
171: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
172: #else
173: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
174: #endif
175: break;
176: case 2:
177: #if defined(PETSC_USE_COMPLEX)
178: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
179: #else
180: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
181: #endif
182: break;
183: case 3:
184: #if defined(PETSC_USE_COMPLEX)
185: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
186: #else
187: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
188: #endif
189: break;
190: default:
191: #if defined(PETSC_USE_COMPLEX)
192: #if defined(PETSC_USE_64BIT_INDICES)
193: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
194: #else
195: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196: #endif
197: #else
198: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
199: #endif
200: break;
201: }
202: fftw->binarray = (PetscScalar *) x_array;
203: fftw->boutarray = y_array;
204: fftw_execute(fftw->p_backward);
205: } else { /* use existing plan */
206: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
207: #if defined(PETSC_USE_COMPLEX)
208: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
209: #else
210: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
211: #endif
212: } else {
213: fftw_execute(fftw->p_backward);
214: }
215: }
216: VecRestoreArray(y,&y_array);
217: VecRestoreArrayRead(x,&x_array);
218: return(0);
219: }
221: /* MatMult_MPIFFTW performs forward DFT in parallel
222: Input parameter:
223: A - the matrix
224: x - the vector on which FDFT will be performed
226: Output parameter:
227: y - vector that stores result of FDFT
228: */
231: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
232: {
234: Mat_FFT *fft = (Mat_FFT*)A->data;
235: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
236: const PetscScalar *x_array;
237: PetscScalar *y_array;
238: PetscInt ndim=fft->ndim,*dim=fft->dim;
239: MPI_Comm comm;
242: PetscObjectGetComm((PetscObject)A,&comm);
243: VecGetArrayRead(x,&x_array);
244: VecGetArray(y,&y_array);
245: if (!fftw->p_forward) { /* create a plan, then excute it */
246: switch (ndim) {
247: case 1:
248: #if defined(PETSC_USE_COMPLEX)
249: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250: #else
251: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
252: #endif
253: break;
254: case 2:
255: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
257: #else
258: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
259: #endif
260: break;
261: case 3:
262: #if defined(PETSC_USE_COMPLEX)
263: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
264: #else
265: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
266: #endif
267: break;
268: default:
269: #if defined(PETSC_USE_COMPLEX)
270: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
271: #else
272: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
273: #endif
274: break;
275: }
276: fftw->finarray = (PetscScalar *) x_array;
277: fftw->foutarray = y_array;
278: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280: fftw_execute(fftw->p_forward);
281: } else { /* use existing plan */
282: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284: } else {
285: fftw_execute(fftw->p_forward);
286: }
287: }
288: VecRestoreArray(y,&y_array);
289: VecRestoreArrayRead(x,&x_array);
290: return(0);
291: }
293: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
294: Input parameter:
295: A - the matrix
296: x - the vector on which BDFT will be performed
298: Output parameter:
299: y - vector that stores result of BDFT
300: */
303: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
304: {
306: Mat_FFT *fft = (Mat_FFT*)A->data;
307: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
308: const PetscScalar *x_array;
309: PetscScalar *y_array;
310: PetscInt ndim=fft->ndim,*dim=fft->dim;
311: MPI_Comm comm;
314: PetscObjectGetComm((PetscObject)A,&comm);
315: VecGetArrayRead(x,&x_array);
316: VecGetArray(y,&y_array);
317: if (!fftw->p_backward) { /* create a plan, then excute it */
318: switch (ndim) {
319: case 1:
320: #if defined(PETSC_USE_COMPLEX)
321: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322: #else
323: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
324: #endif
325: break;
326: case 2:
327: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
328: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
329: #else
330: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
331: #endif
332: break;
333: case 3:
334: #if defined(PETSC_USE_COMPLEX)
335: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
336: #else
337: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
338: #endif
339: break;
340: default:
341: #if defined(PETSC_USE_COMPLEX)
342: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
343: #else
344: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
345: #endif
346: break;
347: }
348: fftw->binarray = (PetscScalar *) x_array;
349: fftw->boutarray = y_array;
350: fftw_execute(fftw->p_backward);
351: } else { /* use existing plan */
352: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
354: } else {
355: fftw_execute(fftw->p_backward);
356: }
357: }
358: VecRestoreArray(y,&y_array);
359: VecRestoreArrayRead(x,&x_array);
360: return(0);
361: }
365: PetscErrorCode MatDestroy_FFTW(Mat A)
366: {
367: Mat_FFT *fft = (Mat_FFT*)A->data;
368: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
372: fftw_destroy_plan(fftw->p_forward);
373: fftw_destroy_plan(fftw->p_backward);
374: if (fftw->iodims) {
375: free(fftw->iodims);
376: }
377: PetscFree(fftw->dim_fftw);
378: PetscFree(fft->data);
379: fftw_mpi_cleanup();
380: return(0);
381: }
383: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
386: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
387: {
389: PetscScalar *array;
392: VecGetArray(v,&array);
393: fftw_free((fftw_complex*)array);
394: VecRestoreArray(v,&array);
395: VecDestroy_MPI(v);
396: return(0);
397: }
401: /*@
402: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
403: parallel layout determined by FFTW
405: Collective on Mat
407: Input Parameter:
408: . A - the matrix
410: Output Parameter:
411: + x - (optional) input vector of forward FFTW
412: - y - (optional) output vector of forward FFTW
413: - z - (optional) output vector of backward FFTW
415: Level: advanced
417: Note: The parallel layout of output of forward FFTW is always same as the input
418: of the backward FFTW. But parallel layout of the input vector of forward
419: FFTW might not be same as the output of backward FFTW.
420: Also note that we need to provide enough space while doing parallel real transform.
421: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
422: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
423: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
424: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
425: zeros if it is odd only one zero is needed.
426: Lastly one needs some scratch space at the end of data set in each process. alloc_local
427: figures out how much space is needed, i.e. it figures out the data+scratch space for
428: each processor and returns that.
430: .seealso: MatCreateFFTW()
431: @*/
432: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
433: {
437: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
438: return(0);
439: }
443: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
444: {
446: PetscMPIInt size,rank;
447: MPI_Comm comm;
448: Mat_FFT *fft = (Mat_FFT*)A->data;
449: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
450: PetscInt N = fft->N;
451: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
456: PetscObjectGetComm((PetscObject)A,&comm);
458: MPI_Comm_size(comm, &size);
459: MPI_Comm_rank(comm, &rank);
460: if (size == 1) { /* sequential case */
461: #if defined(PETSC_USE_COMPLEX)
462: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
463: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
464: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
465: #else
466: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
467: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
468: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
469: #endif
470: } else { /* parallel cases */
471: ptrdiff_t alloc_local,local_n0,local_0_start;
472: ptrdiff_t local_n1;
473: fftw_complex *data_fout;
474: ptrdiff_t local_1_start;
475: #if defined(PETSC_USE_COMPLEX)
476: fftw_complex *data_fin,*data_bout;
477: #else
478: double *data_finr,*data_boutr;
479: PetscInt n1,N1;
480: ptrdiff_t temp;
481: #endif
483: switch (ndim) {
484: case 1:
485: #if !defined(PETSC_USE_COMPLEX)
486: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
487: #else
488: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
489: if (fin) {
490: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
491: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
493: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
494: }
495: if (fout) {
496: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
499: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
500: }
501: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
502: if (bout) {
503: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
506: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
507: }
508: break;
509: #endif
510: case 2:
511: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
512: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
513: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
514: if (fin) {
515: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
516: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
518: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
519: }
520: if (bout) {
521: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
522: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
524: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
525: }
526: if (fout) {
527: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
530: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
531: }
532: #else
533: /* Get local size */
534: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
535: if (fin) {
536: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
539: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
540: }
541: if (fout) {
542: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
545: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
546: }
547: if (bout) {
548: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
551: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
552: }
553: #endif
554: break;
555: case 3:
556: #if !defined(PETSC_USE_COMPLEX)
557: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
558: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
559: if (fin) {
560: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
561: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
563: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
564: }
565: if (bout) {
566: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
567: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
569: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
570: }
572: if (fout) {
573: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
576: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
577: }
578: #else
579: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
580: if (fin) {
581: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
584: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
585: }
586: if (fout) {
587: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
588: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
590: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
591: }
592: if (bout) {
593: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
594: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
596: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
597: }
598: #endif
599: break;
600: default:
601: #if !defined(PETSC_USE_COMPLEX)
602: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
604: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
606: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
607: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
609: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
611: if (fin) {
612: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
613: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
615: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
616: }
617: if (bout) {
618: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
619: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
621: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
624: if (fout) {
625: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
626: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
628: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
629: }
630: #else
631: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
632: if (fin) {
633: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
636: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
637: }
638: if (fout) {
639: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
640: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
642: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
643: }
644: if (bout) {
645: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
646: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
648: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
649: }
650: #endif
651: break;
652: }
653: }
654: return(0);
655: }
659: /*@
660: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
662: Collective on Mat
664: Input Parameters:
665: + A - FFTW matrix
666: - x - the PETSc vector
668: Output Parameters:
669: . y - the FFTW vector
671: Options Database Keys:
672: . -mat_fftw_plannerflags - set FFTW planner flags
674: Level: intermediate
676: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
677: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
678: zeros. This routine does that job by scattering operation.
680: .seealso: VecScatterFFTWToPetsc()
681: @*/
682: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
683: {
687: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
688: return(0);
689: }
693: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
694: {
696: MPI_Comm comm;
697: Mat_FFT *fft = (Mat_FFT*)A->data;
698: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
699: PetscInt N =fft->N;
700: PetscInt ndim =fft->ndim,*dim=fft->dim;
701: PetscInt low;
702: PetscMPIInt rank,size;
703: PetscInt vsize,vsize1;
704: ptrdiff_t local_n0,local_0_start;
705: ptrdiff_t local_n1,local_1_start;
706: VecScatter vecscat;
707: IS list1,list2;
708: #if !defined(PETSC_USE_COMPLEX)
709: PetscInt i,j,k,partial_dim;
710: PetscInt *indx1, *indx2, tempindx, tempindx1;
711: PetscInt NM;
712: ptrdiff_t temp;
713: #endif
716: PetscObjectGetComm((PetscObject)A,&comm);
717: MPI_Comm_size(comm, &size);
718: MPI_Comm_rank(comm, &rank);
719: VecGetOwnershipRange(y,&low,NULL);
721: if (size==1) {
722: VecGetSize(x,&vsize);
723: VecGetSize(y,&vsize1);
724: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
725: VecScatterCreate(x,list1,y,list1,&vecscat);
726: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
727: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
728: VecScatterDestroy(&vecscat);
729: ISDestroy(&list1);
730: } else {
731: switch (ndim) {
732: case 1:
733: #if defined(PETSC_USE_COMPLEX)
734: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
736: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
737: ISCreateStride(comm,local_n0,low,1,&list2);
738: VecScatterCreate(x,list1,y,list2,&vecscat);
739: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
740: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
741: VecScatterDestroy(&vecscat);
742: ISDestroy(&list1);
743: ISDestroy(&list2);
744: #else
745: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
746: #endif
747: break;
748: case 2:
749: #if defined(PETSC_USE_COMPLEX)
750: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
752: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
753: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
754: VecScatterCreate(x,list1,y,list2,&vecscat);
755: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
756: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
757: VecScatterDestroy(&vecscat);
758: ISDestroy(&list1);
759: ISDestroy(&list2);
760: #else
761: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
763: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
764: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
766: if (dim[1]%2==0) {
767: NM = dim[1]+2;
768: } else {
769: NM = dim[1]+1;
770: }
771: for (i=0; i<local_n0; i++) {
772: for (j=0; j<dim[1]; j++) {
773: tempindx = i*dim[1] + j;
774: tempindx1 = i*NM + j;
776: indx1[tempindx]=local_0_start*dim[1]+tempindx;
777: indx2[tempindx]=low+tempindx1;
778: }
779: }
781: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
782: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
784: VecScatterCreate(x,list1,y,list2,&vecscat);
785: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
786: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
787: VecScatterDestroy(&vecscat);
788: ISDestroy(&list1);
789: ISDestroy(&list2);
790: PetscFree(indx1);
791: PetscFree(indx2);
792: #endif
793: break;
795: case 3:
796: #if defined(PETSC_USE_COMPLEX)
797: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
799: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
800: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
801: VecScatterCreate(x,list1,y,list2,&vecscat);
802: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
803: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
804: VecScatterDestroy(&vecscat);
805: ISDestroy(&list1);
806: ISDestroy(&list2);
807: #else
808: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
809: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
810: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
812: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
813: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
815: if (dim[2]%2==0) NM = dim[2]+2;
816: else NM = dim[2]+1;
818: for (i=0; i<local_n0; i++) {
819: for (j=0; j<dim[1]; j++) {
820: for (k=0; k<dim[2]; k++) {
821: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
822: tempindx1 = i*dim[1]*NM + j*NM + k;
824: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
825: indx2[tempindx]=low+tempindx1;
826: }
827: }
828: }
830: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
831: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
832: VecScatterCreate(x,list1,y,list2,&vecscat);
833: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
834: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
835: VecScatterDestroy(&vecscat);
836: ISDestroy(&list1);
837: ISDestroy(&list2);
838: PetscFree(indx1);
839: PetscFree(indx2);
840: #endif
841: break;
843: default:
844: #if defined(PETSC_USE_COMPLEX)
845: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
847: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
848: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
849: VecScatterCreate(x,list1,y,list2,&vecscat);
850: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
851: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
852: VecScatterDestroy(&vecscat);
853: ISDestroy(&list1);
854: ISDestroy(&list2);
855: #else
856: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
857: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
858: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
860: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
862: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
864: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
866: partial_dim = fftw->partial_dim;
868: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
869: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
871: if (dim[ndim-1]%2==0) NM = 2;
872: else NM = 1;
874: j = low;
875: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
876: indx1[i] = local_0_start*partial_dim + i;
877: indx2[i] = j;
878: if (k%dim[ndim-1]==0) j+=NM;
879: j++;
880: }
881: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
882: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
883: VecScatterCreate(x,list1,y,list2,&vecscat);
884: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
885: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
886: VecScatterDestroy(&vecscat);
887: ISDestroy(&list1);
888: ISDestroy(&list2);
889: PetscFree(indx1);
890: PetscFree(indx2);
891: #endif
892: break;
893: }
894: }
895: return(0);
896: }
900: /*@
901: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
903: Collective on Mat
905: Input Parameters:
906: + A - FFTW matrix
907: - x - FFTW vector
909: Output Parameters:
910: . y - PETSc vector
912: Level: intermediate
914: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
915: VecScatterFFTWToPetsc removes those extra zeros.
917: .seealso: VecScatterPetscToFFTW()
918: @*/
919: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
920: {
924: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
925: return(0);
926: }
930: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
931: {
933: MPI_Comm comm;
934: Mat_FFT *fft = (Mat_FFT*)A->data;
935: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
936: PetscInt N = fft->N;
937: PetscInt ndim = fft->ndim,*dim=fft->dim;
938: PetscInt low;
939: PetscMPIInt rank,size;
940: ptrdiff_t local_n0,local_0_start;
941: ptrdiff_t local_n1,local_1_start;
942: VecScatter vecscat;
943: IS list1,list2;
944: #if !defined(PETSC_USE_COMPLEX)
945: PetscInt i,j,k,partial_dim;
946: PetscInt *indx1, *indx2, tempindx, tempindx1;
947: PetscInt NM;
948: ptrdiff_t temp;
949: #endif
952: PetscObjectGetComm((PetscObject)A,&comm);
953: MPI_Comm_size(comm, &size);
954: MPI_Comm_rank(comm, &rank);
955: VecGetOwnershipRange(x,&low,NULL);
957: if (size==1) {
958: ISCreateStride(comm,N,0,1,&list1);
959: VecScatterCreate(x,list1,y,list1,&vecscat);
960: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
961: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
962: VecScatterDestroy(&vecscat);
963: ISDestroy(&list1);
965: } else {
966: switch (ndim) {
967: case 1:
968: #if defined(PETSC_USE_COMPLEX)
969: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
971: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
972: ISCreateStride(comm,local_n1,low,1,&list2);
973: VecScatterCreate(x,list1,y,list2,&vecscat);
974: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
975: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
976: VecScatterDestroy(&vecscat);
977: ISDestroy(&list1);
978: ISDestroy(&list2);
979: #else
980: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
981: #endif
982: break;
983: case 2:
984: #if defined(PETSC_USE_COMPLEX)
985: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
987: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
988: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
989: VecScatterCreate(x,list2,y,list1,&vecscat);
990: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
991: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
992: VecScatterDestroy(&vecscat);
993: ISDestroy(&list1);
994: ISDestroy(&list2);
995: #else
996: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
998: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
999: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
1001: if (dim[1]%2==0) NM = dim[1]+2;
1002: else NM = dim[1]+1;
1004: for (i=0; i<local_n0; i++) {
1005: for (j=0; j<dim[1]; j++) {
1006: tempindx = i*dim[1] + j;
1007: tempindx1 = i*NM + j;
1009: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1010: indx2[tempindx]=low+tempindx1;
1011: }
1012: }
1014: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1015: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1017: VecScatterCreate(x,list2,y,list1,&vecscat);
1018: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1019: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1020: VecScatterDestroy(&vecscat);
1021: ISDestroy(&list1);
1022: ISDestroy(&list2);
1023: PetscFree(indx1);
1024: PetscFree(indx2);
1025: #endif
1026: break;
1027: case 3:
1028: #if defined(PETSC_USE_COMPLEX)
1029: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1031: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1032: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1033: VecScatterCreate(x,list1,y,list2,&vecscat);
1034: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1035: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1036: VecScatterDestroy(&vecscat);
1037: ISDestroy(&list1);
1038: ISDestroy(&list2);
1039: #else
1040: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1042: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1043: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1045: if (dim[2]%2==0) NM = dim[2]+2;
1046: else NM = dim[2]+1;
1048: for (i=0; i<local_n0; i++) {
1049: for (j=0; j<dim[1]; j++) {
1050: for (k=0; k<dim[2]; k++) {
1051: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1052: tempindx1 = i*dim[1]*NM + j*NM + k;
1054: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1055: indx2[tempindx]=low+tempindx1;
1056: }
1057: }
1058: }
1060: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1061: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1063: VecScatterCreate(x,list2,y,list1,&vecscat);
1064: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1065: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1066: VecScatterDestroy(&vecscat);
1067: ISDestroy(&list1);
1068: ISDestroy(&list2);
1069: PetscFree(indx1);
1070: PetscFree(indx2);
1071: #endif
1072: break;
1073: default:
1074: #if defined(PETSC_USE_COMPLEX)
1075: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1077: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1078: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1079: VecScatterCreate(x,list1,y,list2,&vecscat);
1080: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1081: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1082: VecScatterDestroy(&vecscat);
1083: ISDestroy(&list1);
1084: ISDestroy(&list2);
1085: #else
1086: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1088: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1090: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1092: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1094: partial_dim = fftw->partial_dim;
1096: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1097: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1099: if (dim[ndim-1]%2==0) NM = 2;
1100: else NM = 1;
1102: j = low;
1103: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1104: indx1[i] = local_0_start*partial_dim + i;
1105: indx2[i] = j;
1106: if (k%dim[ndim-1]==0) j+=NM;
1107: j++;
1108: }
1109: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1110: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1112: VecScatterCreate(x,list2,y,list1,&vecscat);
1113: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1114: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1115: VecScatterDestroy(&vecscat);
1116: ISDestroy(&list1);
1117: ISDestroy(&list2);
1118: PetscFree(indx1);
1119: PetscFree(indx2);
1120: #endif
1121: break;
1122: }
1123: }
1124: return(0);
1125: }
1129: /*
1130: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1132: Options Database Keys:
1133: + -mat_fftw_plannerflags - set FFTW planner flags
1135: Level: intermediate
1137: */
1138: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1139: {
1141: MPI_Comm comm;
1142: Mat_FFT *fft=(Mat_FFT*)A->data;
1143: Mat_FFTW *fftw;
1144: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1145: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1146: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1147: PetscBool flg;
1148: PetscInt p_flag,partial_dim=1,ctr;
1149: PetscMPIInt size,rank;
1150: ptrdiff_t *pdim;
1151: ptrdiff_t local_n1,local_1_start;
1152: #if !defined(PETSC_USE_COMPLEX)
1153: ptrdiff_t temp;
1154: PetscInt N1,tot_dim;
1155: #else
1156: PetscInt n1;
1157: #endif
1160: PetscObjectGetComm((PetscObject)A,&comm);
1161: MPI_Comm_size(comm, &size);
1162: MPI_Comm_rank(comm, &rank);
1164: fftw_mpi_init();
1165: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1166: pdim[0] = dim[0];
1167: #if !defined(PETSC_USE_COMPLEX)
1168: tot_dim = 2*dim[0];
1169: #endif
1170: for (ctr=1; ctr<ndim; ctr++) {
1171: partial_dim *= dim[ctr];
1172: pdim[ctr] = dim[ctr];
1173: #if !defined(PETSC_USE_COMPLEX)
1174: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1175: else tot_dim *= dim[ctr];
1176: #endif
1177: }
1179: if (size == 1) {
1180: #if defined(PETSC_USE_COMPLEX)
1181: MatSetSizes(A,N,N,N,N);
1182: n = N;
1183: #else
1184: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1185: n = tot_dim;
1186: #endif
1188: } else {
1189: ptrdiff_t local_n0,local_0_start;
1190: switch (ndim) {
1191: case 1:
1192: #if !defined(PETSC_USE_COMPLEX)
1193: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1194: #else
1195: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1197: n = (PetscInt)local_n0;
1198: n1 = (PetscInt)local_n1;
1199: MatSetSizes(A,n1,n,N,N);
1200: #endif
1201: break;
1202: case 2:
1203: #if defined(PETSC_USE_COMPLEX)
1204: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1205: /*
1206: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1207: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1208: */
1209: n = (PetscInt)local_n0*dim[1];
1210: MatSetSizes(A,n,n,N,N);
1211: #else
1212: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1214: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1215: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1216: #endif
1217: break;
1218: case 3:
1219: #if defined(PETSC_USE_COMPLEX)
1220: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1222: n = (PetscInt)local_n0*dim[1]*dim[2];
1223: MatSetSizes(A,n,n,N,N);
1224: #else
1225: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1227: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1228: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1229: #endif
1230: break;
1231: default:
1232: #if defined(PETSC_USE_COMPLEX)
1233: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1235: n = (PetscInt)local_n0*partial_dim;
1236: MatSetSizes(A,n,n,N,N);
1237: #else
1238: temp = pdim[ndim-1];
1240: pdim[ndim-1] = temp/2 + 1;
1242: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1244: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1245: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1247: pdim[ndim-1] = temp;
1249: MatSetSizes(A,n,n,N1,N1);
1250: #endif
1251: break;
1252: }
1253: }
1254: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1255: PetscNewLog(A,&fftw);
1256: fft->data = (void*)fftw;
1258: fft->n = n;
1259: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1260: fftw->partial_dim = partial_dim;
1262: PetscMalloc1(ndim, &(fftw->dim_fftw));
1263: if (size == 1) {
1264: #if defined(PETSC_USE_64BIT_INDICES)
1265: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1266: #else
1267: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1268: #endif
1269: }
1271: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1273: fftw->p_forward = 0;
1274: fftw->p_backward = 0;
1275: fftw->p_flag = FFTW_ESTIMATE;
1277: if (size == 1) {
1278: A->ops->mult = MatMult_SeqFFTW;
1279: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1280: } else {
1281: A->ops->mult = MatMult_MPIFFTW;
1282: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1283: }
1284: fft->matdestroy = MatDestroy_FFTW;
1285: A->assembled = PETSC_TRUE;
1286: A->preallocated = PETSC_TRUE;
1288: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1289: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1290: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1292: /* get runtime options */
1293: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1294: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1295: if (flg) {
1296: fftw->p_flag = iplans[p_flag];
1297: }
1298: PetscOptionsEnd();
1299: return(0);
1300: }