Actual source code: fftw.c
petsc-3.5.4 2015-05-23
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
33: /* MatMult_SeqFFTW performs forward DFT in parallel
34: Input parameter:
35: A - the matrix
36: x - the vector on which FDFT will be performed
38: Output parameter:
39: y - vector that stores result of FDFT
40: */
43: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44: {
46: Mat_FFT *fft = (Mat_FFT*)A->data;
47: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
48: PetscScalar *x_array,*y_array;
49: #if defined(PETSC_USE_COMPLEX)
50: #if defined(PETSC_USE_64BIT_INDICES)
51: fftw_iodim64 *iodims;
52: #else
53: fftw_iodim *iodims;
54: #endif
55: PetscInt i;
56: #endif
57: PetscInt ndim = fft->ndim,*dim = fft->dim;
60: VecGetArray(x,&x_array);
61: VecGetArray(y,&y_array);
62: if (!fftw->p_forward) { /* create a plan, then excute it */
63: switch (ndim) {
64: case 1:
65: #if defined(PETSC_USE_COMPLEX)
66: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67: #else
68: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69: #endif
70: break;
71: case 2:
72: #if defined(PETSC_USE_COMPLEX)
73: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74: #else
75: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76: #endif
77: break;
78: case 3:
79: #if defined(PETSC_USE_COMPLEX)
80: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81: #else
82: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83: #endif
84: break;
85: default:
86: #if defined(PETSC_USE_COMPLEX)
87: iodims = fftw->iodims;
88: #if defined(PETSC_USE_64BIT_INDICES)
89: if (ndim) {
90: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91: iodims[ndim-1].is = iodims[ndim-1].os = 1;
92: for (i=ndim-2; i>=0; --i) {
93: iodims[i].n = (ptrdiff_t)dim[i];
94: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95: }
96: }
97: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98: #else
99: if (ndim) {
100: iodims[ndim-1].n = (int)dim[ndim-1];
101: iodims[ndim-1].is = iodims[ndim-1].os = 1;
102: for (i=ndim-2; i>=0; --i) {
103: iodims[i].n = (int)dim[i];
104: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105: }
106: }
107: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108: #endif
110: #else
111: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112: #endif
113: break;
114: }
115: fftw->finarray = x_array;
116: fftw->foutarray = y_array;
117: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119: fftw_execute(fftw->p_forward);
120: } else { /* use existing plan */
121: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123: } else {
124: fftw_execute(fftw->p_forward);
125: }
126: }
127: VecRestoreArray(y,&y_array);
128: VecRestoreArray(x,&x_array);
129: return(0);
130: }
132: /* MatMultTranspose_SeqFFTW performs serial backward DFT
133: Input parameter:
134: A - the matrix
135: x - the vector on which BDFT will be performed
137: Output parameter:
138: y - vector that stores result of BDFT
139: */
143: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
144: {
146: Mat_FFT *fft = (Mat_FFT*)A->data;
147: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
148: PetscScalar *x_array,*y_array;
149: PetscInt ndim=fft->ndim,*dim=fft->dim;
150: #if defined(PETSC_USE_COMPLEX)
151: #if defined(PETSC_USE_64BIT_INDICES)
152: fftw_iodim64 *iodims=fftw->iodims;
153: #else
154: fftw_iodim *iodims=fftw->iodims;
155: #endif
156: #endif
157:
159: VecGetArray(x,&x_array);
160: VecGetArray(y,&y_array);
161: if (!fftw->p_backward) { /* create a plan, then excute it */
162: switch (ndim) {
163: case 1:
164: #if defined(PETSC_USE_COMPLEX)
165: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
166: #else
167: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
168: #endif
169: break;
170: case 2:
171: #if defined(PETSC_USE_COMPLEX)
172: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
173: #else
174: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
175: #endif
176: break;
177: case 3:
178: #if defined(PETSC_USE_COMPLEX)
179: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
180: #else
181: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
182: #endif
183: break;
184: default:
185: #if defined(PETSC_USE_COMPLEX)
186: #if defined(PETSC_USE_64BIT_INDICES)
187: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188: #else
189: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190: #endif
191: #else
192: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
193: #endif
194: break;
195: }
196: fftw->binarray = x_array;
197: fftw->boutarray = y_array;
198: fftw_execute(fftw->p_backward);
199: } else { /* use existing plan */
200: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
201: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202: } else {
203: fftw_execute(fftw->p_backward);
204: }
205: }
206: VecRestoreArray(y,&y_array);
207: VecRestoreArray(x,&x_array);
208: return(0);
209: }
211: /* MatMult_MPIFFTW performs forward DFT in parallel
212: Input parameter:
213: A - the matrix
214: x - the vector on which FDFT will be performed
216: Output parameter:
217: y - vector that stores result of FDFT
218: */
221: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
222: {
224: Mat_FFT *fft = (Mat_FFT*)A->data;
225: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
226: PetscScalar *x_array,*y_array;
227: PetscInt ndim=fft->ndim,*dim=fft->dim;
228: MPI_Comm comm;
231: PetscObjectGetComm((PetscObject)A,&comm);
232: VecGetArray(x,&x_array);
233: VecGetArray(y,&y_array);
234: if (!fftw->p_forward) { /* create a plan, then excute it */
235: switch (ndim) {
236: case 1:
237: #if defined(PETSC_USE_COMPLEX)
238: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
239: #else
240: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
241: #endif
242: break;
243: case 2:
244: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
245: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
246: #else
247: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
248: #endif
249: break;
250: case 3:
251: #if defined(PETSC_USE_COMPLEX)
252: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
253: #else
254: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
255: #endif
256: break;
257: default:
258: #if defined(PETSC_USE_COMPLEX)
259: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
260: #else
261: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
262: #endif
263: break;
264: }
265: fftw->finarray = x_array;
266: fftw->foutarray = y_array;
267: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
268: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
269: fftw_execute(fftw->p_forward);
270: } else { /* use existing plan */
271: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
272: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
273: } else {
274: fftw_execute(fftw->p_forward);
275: }
276: }
277: VecRestoreArray(y,&y_array);
278: VecRestoreArray(x,&x_array);
279: return(0);
280: }
282: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
283: Input parameter:
284: A - the matrix
285: x - the vector on which BDFT will be performed
287: Output parameter:
288: y - vector that stores result of BDFT
289: */
292: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
293: {
295: Mat_FFT *fft = (Mat_FFT*)A->data;
296: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
297: PetscScalar *x_array,*y_array;
298: PetscInt ndim=fft->ndim,*dim=fft->dim;
299: MPI_Comm comm;
302: PetscObjectGetComm((PetscObject)A,&comm);
303: VecGetArray(x,&x_array);
304: VecGetArray(y,&y_array);
305: if (!fftw->p_backward) { /* create a plan, then excute it */
306: switch (ndim) {
307: case 1:
308: #if defined(PETSC_USE_COMPLEX)
309: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
310: #else
311: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
312: #endif
313: break;
314: case 2:
315: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
316: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
317: #else
318: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
319: #endif
320: break;
321: case 3:
322: #if defined(PETSC_USE_COMPLEX)
323: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
324: #else
325: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
326: #endif
327: break;
328: default:
329: #if defined(PETSC_USE_COMPLEX)
330: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
331: #else
332: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
333: #endif
334: break;
335: }
336: fftw->binarray = x_array;
337: fftw->boutarray = y_array;
338: fftw_execute(fftw->p_backward);
339: } else { /* use existing plan */
340: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
341: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
342: } else {
343: fftw_execute(fftw->p_backward);
344: }
345: }
346: VecRestoreArray(y,&y_array);
347: VecRestoreArray(x,&x_array);
348: return(0);
349: }
353: PetscErrorCode MatDestroy_FFTW(Mat A)
354: {
355: Mat_FFT *fft = (Mat_FFT*)A->data;
356: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
360: fftw_destroy_plan(fftw->p_forward);
361: fftw_destroy_plan(fftw->p_backward);
362: if (fftw->iodims) {
363: free(fftw->iodims);
364: }
365: PetscFree(fftw->dim_fftw);
366: PetscFree(fft->data);
367: fftw_mpi_cleanup();
368: return(0);
369: }
371: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
374: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375: {
377: PetscScalar *array;
380: VecGetArray(v,&array);
381: fftw_free((fftw_complex*)array);
382: VecRestoreArray(v,&array);
383: VecDestroy_MPI(v);
384: return(0);
385: }
389: /*@
390: MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
391: parallel layout determined by FFTW
393: Collective on Mat
395: Input Parameter:
396: . A - the matrix
398: Output Parameter:
399: + x - (optional) input vector of forward FFTW
400: - y - (optional) output vector of forward FFTW
401: - z - (optional) output vector of backward FFTW
403: Level: advanced
405: Note: The parallel layout of output of forward FFTW is always same as the input
406: of the backward FFTW. But parallel layout of the input vector of forward
407: FFTW might not be same as the output of backward FFTW.
408: Also note that we need to provide enough space while doing parallel real transform.
409: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
410: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
411: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
412: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
413: zeros if it is odd only one zero is needed.
414: Lastly one needs some scratch space at the end of data set in each process. alloc_local
415: figures out how much space is needed, i.e. it figures out the data+scratch space for
416: each processor and returns that.
418: .seealso: MatCreateFFTW()
419: @*/
420: PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
421: {
425: PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
426: return(0);
427: }
431: PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
432: {
434: PetscMPIInt size,rank;
435: MPI_Comm comm;
436: Mat_FFT *fft = (Mat_FFT*)A->data;
437: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
438: PetscInt N = fft->N;
439: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
444: PetscObjectGetComm((PetscObject)A,&comm);
446: MPI_Comm_size(comm, &size);
447: MPI_Comm_rank(comm, &rank);
448: if (size == 1) { /* sequential case */
449: #if defined(PETSC_USE_COMPLEX)
450: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
451: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
452: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
453: #else
454: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
455: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
456: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
457: #endif
458: } else { /* parallel cases */
459: ptrdiff_t alloc_local,local_n0,local_0_start;
460: ptrdiff_t local_n1;
461: fftw_complex *data_fout;
462: ptrdiff_t local_1_start;
463: #if defined(PETSC_USE_COMPLEX)
464: fftw_complex *data_fin,*data_bout;
465: #else
466: double *data_finr,*data_boutr;
467: PetscInt n1,N1;
468: ptrdiff_t temp;
469: #endif
471: switch (ndim) {
472: case 1:
473: #if !defined(PETSC_USE_COMPLEX)
474: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
475: #else
476: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
477: if (fin) {
478: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
481: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
482: }
483: if (fout) {
484: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
487: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
488: }
489: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
490: if (bout) {
491: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
494: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
495: }
496: break;
497: #endif
498: case 2:
499: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
500: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
501: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
502: if (fin) {
503: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
504: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
506: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
507: }
508: if (bout) {
509: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
510: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
512: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
513: }
514: if (fout) {
515: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
518: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
519: }
520: #else
521: /* Get local size */
522: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
523: if (fin) {
524: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
527: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
528: }
529: if (fout) {
530: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
533: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: if (bout) {
536: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
539: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
540: }
541: #endif
542: break;
543: case 3:
544: #if !defined(PETSC_USE_COMPLEX)
545: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
546: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
547: if (fin) {
548: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
549: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
551: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
552: }
553: if (bout) {
554: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
555: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
557: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
558: }
560: if (fout) {
561: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
564: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
565: }
566: #else
567: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
568: if (fin) {
569: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
572: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
573: }
574: if (fout) {
575: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
578: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
579: }
580: if (bout) {
581: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
584: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
585: }
586: #endif
587: break;
588: default:
589: #if !defined(PETSC_USE_COMPLEX)
590: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
592: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
594: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
595: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
597: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
599: if (fin) {
600: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
601: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
603: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
604: }
605: if (bout) {
606: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
607: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
609: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
610: }
612: if (fout) {
613: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
616: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
617: }
618: #else
619: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
620: if (fin) {
621: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
624: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
625: }
626: if (fout) {
627: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
628: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
630: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
631: }
632: if (bout) {
633: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
636: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
637: }
638: #endif
639: break;
640: }
641: }
642: return(0);
643: }
647: /*@
648: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
650: Collective on Mat
652: Input Parameters:
653: + A - FFTW matrix
654: - x - the PETSc vector
656: Output Parameters:
657: . y - the FFTW vector
659: Options Database Keys:
660: . -mat_fftw_plannerflags - set FFTW planner flags
662: Level: intermediate
664: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
665: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
666: zeros. This routine does that job by scattering operation.
668: .seealso: VecScatterFFTWToPetsc()
669: @*/
670: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
671: {
675: PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
676: return(0);
677: }
681: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
682: {
684: MPI_Comm comm;
685: Mat_FFT *fft = (Mat_FFT*)A->data;
686: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
687: PetscInt N =fft->N;
688: PetscInt ndim =fft->ndim,*dim=fft->dim;
689: PetscInt low;
690: PetscMPIInt rank,size;
691: PetscInt vsize,vsize1;
692: ptrdiff_t local_n0,local_0_start;
693: ptrdiff_t local_n1,local_1_start;
694: VecScatter vecscat;
695: IS list1,list2;
696: #if !defined(PETSC_USE_COMPLEX)
697: PetscInt i,j,k,partial_dim;
698: PetscInt *indx1, *indx2, tempindx, tempindx1;
699: PetscInt NM;
700: ptrdiff_t temp;
701: #endif
704: PetscObjectGetComm((PetscObject)A,&comm);
705: MPI_Comm_size(comm, &size);
706: MPI_Comm_rank(comm, &rank);
707: VecGetOwnershipRange(y,&low,NULL);
709: if (size==1) {
710: VecGetSize(x,&vsize);
711: VecGetSize(y,&vsize1);
712: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
713: VecScatterCreate(x,list1,y,list1,&vecscat);
714: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
715: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
716: VecScatterDestroy(&vecscat);
717: ISDestroy(&list1);
718: } else {
719: switch (ndim) {
720: case 1:
721: #if defined(PETSC_USE_COMPLEX)
722: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
724: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
725: ISCreateStride(comm,local_n0,low,1,&list2);
726: VecScatterCreate(x,list1,y,list2,&vecscat);
727: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
728: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
729: VecScatterDestroy(&vecscat);
730: ISDestroy(&list1);
731: ISDestroy(&list2);
732: #else
733: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
734: #endif
735: break;
736: case 2:
737: #if defined(PETSC_USE_COMPLEX)
738: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
740: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
741: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
742: VecScatterCreate(x,list1,y,list2,&vecscat);
743: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
744: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
745: VecScatterDestroy(&vecscat);
746: ISDestroy(&list1);
747: ISDestroy(&list2);
748: #else
749: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
751: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
752: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
754: if (dim[1]%2==0) {
755: NM = dim[1]+2;
756: } else {
757: NM = dim[1]+1;
758: }
759: for (i=0; i<local_n0; i++) {
760: for (j=0; j<dim[1]; j++) {
761: tempindx = i*dim[1] + j;
762: tempindx1 = i*NM + j;
764: indx1[tempindx]=local_0_start*dim[1]+tempindx;
765: indx2[tempindx]=low+tempindx1;
766: }
767: }
769: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
770: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
772: VecScatterCreate(x,list1,y,list2,&vecscat);
773: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
774: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
775: VecScatterDestroy(&vecscat);
776: ISDestroy(&list1);
777: ISDestroy(&list2);
778: PetscFree(indx1);
779: PetscFree(indx2);
780: #endif
781: break;
783: case 3:
784: #if defined(PETSC_USE_COMPLEX)
785: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
787: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
788: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
789: VecScatterCreate(x,list1,y,list2,&vecscat);
790: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
791: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
792: VecScatterDestroy(&vecscat);
793: ISDestroy(&list1);
794: ISDestroy(&list2);
795: #else
796: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
797: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
798: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
800: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
801: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
803: if (dim[2]%2==0) NM = dim[2]+2;
804: else NM = dim[2]+1;
806: for (i=0; i<local_n0; i++) {
807: for (j=0; j<dim[1]; j++) {
808: for (k=0; k<dim[2]; k++) {
809: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
810: tempindx1 = i*dim[1]*NM + j*NM + k;
812: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
813: indx2[tempindx]=low+tempindx1;
814: }
815: }
816: }
818: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
819: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
820: VecScatterCreate(x,list1,y,list2,&vecscat);
821: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
822: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
823: VecScatterDestroy(&vecscat);
824: ISDestroy(&list1);
825: ISDestroy(&list2);
826: PetscFree(indx1);
827: PetscFree(indx2);
828: #endif
829: break;
831: default:
832: #if defined(PETSC_USE_COMPLEX)
833: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
835: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
836: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
837: VecScatterCreate(x,list1,y,list2,&vecscat);
838: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
839: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
840: VecScatterDestroy(&vecscat);
841: ISDestroy(&list1);
842: ISDestroy(&list2);
843: #else
844: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
845: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
846: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
848: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
850: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
852: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
854: partial_dim = fftw->partial_dim;
856: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
857: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
859: if (dim[ndim-1]%2==0) NM = 2;
860: else NM = 1;
862: j = low;
863: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
864: indx1[i] = local_0_start*partial_dim + i;
865: indx2[i] = j;
866: if (k%dim[ndim-1]==0) j+=NM;
867: j++;
868: }
869: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
870: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
871: VecScatterCreate(x,list1,y,list2,&vecscat);
872: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
873: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
874: VecScatterDestroy(&vecscat);
875: ISDestroy(&list1);
876: ISDestroy(&list2);
877: PetscFree(indx1);
878: PetscFree(indx2);
879: #endif
880: break;
881: }
882: }
883: return(0);
884: }
888: /*@
889: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
891: Collective on Mat
893: Input Parameters:
894: + A - FFTW matrix
895: - x - FFTW vector
897: Output Parameters:
898: . y - PETSc vector
900: Level: intermediate
902: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
903: VecScatterFFTWToPetsc removes those extra zeros.
905: .seealso: VecScatterPetscToFFTW()
906: @*/
907: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
908: {
912: PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
913: return(0);
914: }
918: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
919: {
921: MPI_Comm comm;
922: Mat_FFT *fft = (Mat_FFT*)A->data;
923: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
924: PetscInt N = fft->N;
925: PetscInt ndim = fft->ndim,*dim=fft->dim;
926: PetscInt low;
927: PetscMPIInt rank,size;
928: ptrdiff_t local_n0,local_0_start;
929: ptrdiff_t local_n1,local_1_start;
930: VecScatter vecscat;
931: IS list1,list2;
932: #if !defined(PETSC_USE_COMPLEX)
933: PetscInt i,j,k,partial_dim;
934: PetscInt *indx1, *indx2, tempindx, tempindx1;
935: PetscInt NM;
936: ptrdiff_t temp;
937: #endif
940: PetscObjectGetComm((PetscObject)A,&comm);
941: MPI_Comm_size(comm, &size);
942: MPI_Comm_rank(comm, &rank);
943: VecGetOwnershipRange(x,&low,NULL);
945: if (size==1) {
946: ISCreateStride(comm,N,0,1,&list1);
947: VecScatterCreate(x,list1,y,list1,&vecscat);
948: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
949: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
950: VecScatterDestroy(&vecscat);
951: ISDestroy(&list1);
953: } else {
954: switch (ndim) {
955: case 1:
956: #if defined(PETSC_USE_COMPLEX)
957: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
959: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
960: ISCreateStride(comm,local_n1,low,1,&list2);
961: VecScatterCreate(x,list1,y,list2,&vecscat);
962: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
963: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
964: VecScatterDestroy(&vecscat);
965: ISDestroy(&list1);
966: ISDestroy(&list2);
967: #else
968: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
969: #endif
970: break;
971: case 2:
972: #if defined(PETSC_USE_COMPLEX)
973: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
975: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
976: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
977: VecScatterCreate(x,list2,y,list1,&vecscat);
978: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
979: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
980: VecScatterDestroy(&vecscat);
981: ISDestroy(&list1);
982: ISDestroy(&list2);
983: #else
984: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
986: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
987: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
989: if (dim[1]%2==0) NM = dim[1]+2;
990: else NM = dim[1]+1;
992: for (i=0; i<local_n0; i++) {
993: for (j=0; j<dim[1]; j++) {
994: tempindx = i*dim[1] + j;
995: tempindx1 = i*NM + j;
997: indx1[tempindx]=local_0_start*dim[1]+tempindx;
998: indx2[tempindx]=low+tempindx1;
999: }
1000: }
1002: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1003: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1005: VecScatterCreate(x,list2,y,list1,&vecscat);
1006: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1007: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1008: VecScatterDestroy(&vecscat);
1009: ISDestroy(&list1);
1010: ISDestroy(&list2);
1011: PetscFree(indx1);
1012: PetscFree(indx2);
1013: #endif
1014: break;
1015: case 3:
1016: #if defined(PETSC_USE_COMPLEX)
1017: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1019: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1020: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1021: VecScatterCreate(x,list1,y,list2,&vecscat);
1022: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1023: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1024: VecScatterDestroy(&vecscat);
1025: ISDestroy(&list1);
1026: ISDestroy(&list2);
1027: #else
1028: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1030: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1031: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1033: if (dim[2]%2==0) NM = dim[2]+2;
1034: else NM = dim[2]+1;
1036: for (i=0; i<local_n0; i++) {
1037: for (j=0; j<dim[1]; j++) {
1038: for (k=0; k<dim[2]; k++) {
1039: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1040: tempindx1 = i*dim[1]*NM + j*NM + k;
1042: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1043: indx2[tempindx]=low+tempindx1;
1044: }
1045: }
1046: }
1048: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1049: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1051: VecScatterCreate(x,list2,y,list1,&vecscat);
1052: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1053: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1054: VecScatterDestroy(&vecscat);
1055: ISDestroy(&list1);
1056: ISDestroy(&list2);
1057: PetscFree(indx1);
1058: PetscFree(indx2);
1059: #endif
1060: break;
1061: default:
1062: #if defined(PETSC_USE_COMPLEX)
1063: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1065: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1066: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1067: VecScatterCreate(x,list1,y,list2,&vecscat);
1068: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1069: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1070: VecScatterDestroy(&vecscat);
1071: ISDestroy(&list1);
1072: ISDestroy(&list2);
1073: #else
1074: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1076: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1078: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1080: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1082: partial_dim = fftw->partial_dim;
1084: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
1085: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
1087: if (dim[ndim-1]%2==0) NM = 2;
1088: else NM = 1;
1090: j = low;
1091: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1092: indx1[i] = local_0_start*partial_dim + i;
1093: indx2[i] = j;
1094: if (k%dim[ndim-1]==0) j+=NM;
1095: j++;
1096: }
1097: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1098: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1100: VecScatterCreate(x,list2,y,list1,&vecscat);
1101: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1102: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1103: VecScatterDestroy(&vecscat);
1104: ISDestroy(&list1);
1105: ISDestroy(&list2);
1106: PetscFree(indx1);
1107: PetscFree(indx2);
1108: #endif
1109: break;
1110: }
1111: }
1112: return(0);
1113: }
1117: /*
1118: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1120: Options Database Keys:
1121: + -mat_fftw_plannerflags - set FFTW planner flags
1123: Level: intermediate
1125: */
1126: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1127: {
1129: MPI_Comm comm;
1130: Mat_FFT *fft=(Mat_FFT*)A->data;
1131: Mat_FFTW *fftw;
1132: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1133: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1134: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1135: PetscBool flg;
1136: PetscInt p_flag,partial_dim=1,ctr;
1137: PetscMPIInt size,rank;
1138: ptrdiff_t *pdim;
1139: ptrdiff_t local_n1,local_1_start;
1140: #if !defined(PETSC_USE_COMPLEX)
1141: ptrdiff_t temp;
1142: PetscInt N1,tot_dim;
1143: #else
1144: PetscInt n1;
1145: #endif
1148: PetscObjectGetComm((PetscObject)A,&comm);
1149: MPI_Comm_size(comm, &size);
1150: MPI_Comm_rank(comm, &rank);
1152: fftw_mpi_init();
1153: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1154: pdim[0] = dim[0];
1155: #if !defined(PETSC_USE_COMPLEX)
1156: tot_dim = 2*dim[0];
1157: #endif
1158: for (ctr=1; ctr<ndim; ctr++) {
1159: partial_dim *= dim[ctr];
1160: pdim[ctr] = dim[ctr];
1161: #if !defined(PETSC_USE_COMPLEX)
1162: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1163: else tot_dim *= dim[ctr];
1164: #endif
1165: }
1167: if (size == 1) {
1168: #if defined(PETSC_USE_COMPLEX)
1169: MatSetSizes(A,N,N,N,N);
1170: n = N;
1171: #else
1172: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1173: n = tot_dim;
1174: #endif
1176: } else {
1177: ptrdiff_t local_n0,local_0_start;
1178: switch (ndim) {
1179: case 1:
1180: #if !defined(PETSC_USE_COMPLEX)
1181: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1182: #else
1183: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1185: n = (PetscInt)local_n0;
1186: n1 = (PetscInt)local_n1;
1187: MatSetSizes(A,n1,n,N,N);
1188: #endif
1189: break;
1190: case 2:
1191: #if defined(PETSC_USE_COMPLEX)
1192: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1193: /*
1194: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1195: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1196: */
1197: n = (PetscInt)local_n0*dim[1];
1198: MatSetSizes(A,n,n,N,N);
1199: #else
1200: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1202: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1203: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1204: #endif
1205: break;
1206: case 3:
1207: #if defined(PETSC_USE_COMPLEX)
1208: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1210: n = (PetscInt)local_n0*dim[1]*dim[2];
1211: MatSetSizes(A,n,n,N,N);
1212: #else
1213: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1215: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1216: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1217: #endif
1218: break;
1219: default:
1220: #if defined(PETSC_USE_COMPLEX)
1221: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1223: n = (PetscInt)local_n0*partial_dim;
1224: MatSetSizes(A,n,n,N,N);
1225: #else
1226: temp = pdim[ndim-1];
1228: pdim[ndim-1] = temp/2 + 1;
1230: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1232: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1233: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1235: pdim[ndim-1] = temp;
1237: MatSetSizes(A,n,n,N1,N1);
1238: #endif
1239: break;
1240: }
1241: }
1242: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1243: PetscNewLog(A,&fftw);
1244: fft->data = (void*)fftw;
1246: fft->n = n;
1247: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1248: fftw->partial_dim = partial_dim;
1250: PetscMalloc1(ndim, &(fftw->dim_fftw));
1251: if (size == 1) {
1252: #if defined(PETSC_USE_64BIT_INDICES)
1253: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1254: #else
1255: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1256: #endif
1257: }
1259: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1261: fftw->p_forward = 0;
1262: fftw->p_backward = 0;
1263: fftw->p_flag = FFTW_ESTIMATE;
1265: if (size == 1) {
1266: A->ops->mult = MatMult_SeqFFTW;
1267: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1268: } else {
1269: A->ops->mult = MatMult_MPIFFTW;
1270: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1271: }
1272: fft->matdestroy = MatDestroy_FFTW;
1273: A->assembled = PETSC_TRUE;
1274: A->preallocated = PETSC_TRUE;
1276: PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);
1277: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1278: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1280: /* get runtime options */
1281: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1282: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1283: if (flg) {
1284: fftw->p_flag = iplans[p_flag];
1285: }
1286: PetscOptionsEnd();
1287: return(0);
1288: }