Actual source code: fftw.c
petsc-3.11.4 2019-09-28
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: #if defined(PETSC_USE_64BIT_INDICES)
15: fftw_iodim64 *iodims;
16: #else
17: fftw_iodim *iodims;
18: #endif
19: PetscInt partial_dim;
20: fftw_plan p_forward,p_backward;
21: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23: executed for the arrays with which the plan was created */
24: } Mat_FFTW;
26: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30: extern PetscErrorCode MatDestroy_FFTW(Mat);
31: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
33: /* MatMult_SeqFFTW performs forward DFT in parallel
34: Input parameter:
35: A - the matrix
36: x - the vector on which FDFT will be performed
38: Output parameter:
39: y - vector that stores result of FDFT
40: */
41: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
42: {
44: Mat_FFT *fft = (Mat_FFT*)A->data;
45: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
46: const PetscScalar *x_array;
47: PetscScalar *y_array;
48: #if defined(PETSC_USE_COMPLEX)
49: #if defined(PETSC_USE_64BIT_INDICES)
50: fftw_iodim64 *iodims;
51: #else
52: fftw_iodim *iodims;
53: #endif
54: PetscInt i;
55: #endif
56: PetscInt ndim = fft->ndim,*dim = fft->dim;
59: VecGetArrayRead(x,&x_array);
60: VecGetArray(y,&y_array);
61: if (!fftw->p_forward) { /* create a plan, then excute it */
62: switch (ndim) {
63: case 1:
64: #if defined(PETSC_USE_COMPLEX)
65: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
66: #else
67: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
68: #endif
69: break;
70: case 2:
71: #if defined(PETSC_USE_COMPLEX)
72: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
73: #else
74: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
75: #endif
76: break;
77: case 3:
78: #if defined(PETSC_USE_COMPLEX)
79: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
80: #else
81: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
82: #endif
83: break;
84: default:
85: #if defined(PETSC_USE_COMPLEX)
86: iodims = fftw->iodims;
87: #if defined(PETSC_USE_64BIT_INDICES)
88: if (ndim) {
89: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
90: iodims[ndim-1].is = iodims[ndim-1].os = 1;
91: for (i=ndim-2; i>=0; --i) {
92: iodims[i].n = (ptrdiff_t)dim[i];
93: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
94: }
95: }
96: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
97: #else
98: if (ndim) {
99: iodims[ndim-1].n = (int)dim[ndim-1];
100: iodims[ndim-1].is = iodims[ndim-1].os = 1;
101: for (i=ndim-2; i>=0; --i) {
102: iodims[i].n = (int)dim[i];
103: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
104: }
105: }
106: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
107: #endif
109: #else
110: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
111: #endif
112: break;
113: }
114: fftw->finarray = (PetscScalar *) x_array;
115: fftw->foutarray = y_array;
116: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
117: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
118: fftw_execute(fftw->p_forward);
119: } else { /* use existing plan */
120: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
121: #if defined(PETSC_USE_COMPLEX)
122: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123: #else
124: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
125: #endif
126: } else {
127: fftw_execute(fftw->p_forward);
128: }
129: }
130: VecRestoreArray(y,&y_array);
131: VecRestoreArrayRead(x,&x_array);
132: return(0);
133: }
135: /* MatMultTranspose_SeqFFTW performs serial backward DFT
136: Input parameter:
137: A - the matrix
138: x - the vector on which BDFT will be performed
140: Output parameter:
141: y - vector that stores result of BDFT
142: */
144: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
145: {
147: Mat_FFT *fft = (Mat_FFT*)A->data;
148: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
149: const PetscScalar *x_array;
150: PetscScalar *y_array;
151: PetscInt ndim=fft->ndim,*dim=fft->dim;
152: #if defined(PETSC_USE_COMPLEX)
153: #if defined(PETSC_USE_64BIT_INDICES)
154: fftw_iodim64 *iodims=fftw->iodims;
155: #else
156: fftw_iodim *iodims=fftw->iodims;
157: #endif
158: #endif
159:
161: VecGetArrayRead(x,&x_array);
162: VecGetArray(y,&y_array);
163: if (!fftw->p_backward) { /* create a plan, then excute it */
164: switch (ndim) {
165: case 1:
166: #if defined(PETSC_USE_COMPLEX)
167: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
168: #else
169: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
170: #endif
171: break;
172: case 2:
173: #if defined(PETSC_USE_COMPLEX)
174: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175: #else
176: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177: #endif
178: break;
179: case 3:
180: #if defined(PETSC_USE_COMPLEX)
181: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
182: #else
183: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
184: #endif
185: break;
186: default:
187: #if defined(PETSC_USE_COMPLEX)
188: #if defined(PETSC_USE_64BIT_INDICES)
189: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190: #else
191: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
192: #endif
193: #else
194: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
195: #endif
196: break;
197: }
198: fftw->binarray = (PetscScalar *) x_array;
199: fftw->boutarray = y_array;
200: fftw_execute(fftw->p_backward);
201: } else { /* use existing plan */
202: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
203: #if defined(PETSC_USE_COMPLEX)
204: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
205: #else
206: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
207: #endif
208: } else {
209: fftw_execute(fftw->p_backward);
210: }
211: }
212: VecRestoreArray(y,&y_array);
213: VecRestoreArrayRead(x,&x_array);
214: return(0);
215: }
217: /* MatMult_MPIFFTW performs forward DFT in parallel
218: Input parameter:
219: A - the matrix
220: x - the vector on which FDFT will be performed
222: Output parameter:
223: y - vector that stores result of FDFT
224: */
225: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
226: {
228: Mat_FFT *fft = (Mat_FFT*)A->data;
229: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
230: const PetscScalar *x_array;
231: PetscScalar *y_array;
232: PetscInt ndim=fft->ndim,*dim=fft->dim;
233: MPI_Comm comm;
236: PetscObjectGetComm((PetscObject)A,&comm);
237: VecGetArrayRead(x,&x_array);
238: VecGetArray(y,&y_array);
239: if (!fftw->p_forward) { /* create a plan, then excute it */
240: switch (ndim) {
241: case 1:
242: #if defined(PETSC_USE_COMPLEX)
243: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
244: #else
245: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
246: #endif
247: break;
248: case 2:
249: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
251: #else
252: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
253: #endif
254: break;
255: case 3:
256: #if defined(PETSC_USE_COMPLEX)
257: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
258: #else
259: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
260: #endif
261: break;
262: default:
263: #if defined(PETSC_USE_COMPLEX)
264: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
265: #else
266: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
267: #endif
268: break;
269: }
270: fftw->finarray = (PetscScalar *) x_array;
271: fftw->foutarray = y_array;
272: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
273: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
274: fftw_execute(fftw->p_forward);
275: } else { /* use existing plan */
276: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
277: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
278: } else {
279: fftw_execute(fftw->p_forward);
280: }
281: }
282: VecRestoreArray(y,&y_array);
283: VecRestoreArrayRead(x,&x_array);
284: return(0);
285: }
287: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
288: Input parameter:
289: A - the matrix
290: x - the vector on which BDFT will be performed
292: Output parameter:
293: y - vector that stores result of BDFT
294: */
295: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
296: {
298: Mat_FFT *fft = (Mat_FFT*)A->data;
299: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
300: const PetscScalar *x_array;
301: PetscScalar *y_array;
302: PetscInt ndim=fft->ndim,*dim=fft->dim;
303: MPI_Comm comm;
306: PetscObjectGetComm((PetscObject)A,&comm);
307: VecGetArrayRead(x,&x_array);
308: VecGetArray(y,&y_array);
309: if (!fftw->p_backward) { /* create a plan, then excute it */
310: switch (ndim) {
311: case 1:
312: #if defined(PETSC_USE_COMPLEX)
313: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
314: #else
315: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
316: #endif
317: break;
318: case 2:
319: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
320: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
321: #else
322: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
323: #endif
324: break;
325: case 3:
326: #if defined(PETSC_USE_COMPLEX)
327: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
328: #else
329: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
330: #endif
331: break;
332: default:
333: #if defined(PETSC_USE_COMPLEX)
334: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
335: #else
336: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
337: #endif
338: break;
339: }
340: fftw->binarray = (PetscScalar *) x_array;
341: fftw->boutarray = y_array;
342: fftw_execute(fftw->p_backward);
343: } else { /* use existing plan */
344: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
345: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
346: } else {
347: fftw_execute(fftw->p_backward);
348: }
349: }
350: VecRestoreArray(y,&y_array);
351: VecRestoreArrayRead(x,&x_array);
352: return(0);
353: }
355: PetscErrorCode MatDestroy_FFTW(Mat A)
356: {
357: Mat_FFT *fft = (Mat_FFT*)A->data;
358: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
362: fftw_destroy_plan(fftw->p_forward);
363: fftw_destroy_plan(fftw->p_backward);
364: if (fftw->iodims) {
365: free(fftw->iodims);
366: }
367: PetscFree(fftw->dim_fftw);
368: PetscFree(fft->data);
369: fftw_mpi_cleanup();
370: return(0);
371: }
373: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
374: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375: {
377: PetscScalar *array;
380: VecGetArray(v,&array);
381: fftw_free((fftw_complex*)array);
382: VecRestoreArray(v,&array);
383: VecDestroy_MPI(v);
384: return(0);
385: }
387: /*@
388: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
389: parallel layout determined by FFTW
391: Collective on Mat
393: Input Parameter:
394: . A - the matrix
396: Output Parameter:
397: + x - (optional) input vector of forward FFTW
398: - y - (optional) output vector of forward FFTW
399: - z - (optional) output vector of backward FFTW
401: Level: advanced
403: Note: The parallel layout of output of forward FFTW is always same as the input
404: of the backward FFTW. But parallel layout of the input vector of forward
405: FFTW might not be same as the output of backward FFTW.
406: Also note that we need to provide enough space while doing parallel real transform.
407: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
408: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
409: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
410: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
411: zeros if it is odd only one zero is needed.
412: Lastly one needs some scratch space at the end of data set in each process. alloc_local
413: figures out how much space is needed, i.e. it figures out the data+scratch space for
414: each processor and returns that.
416: .seealso: MatCreateFFTW()
417: @*/
418: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
419: {
423: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
424: return(0);
425: }
427: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
428: {
430: PetscMPIInt size,rank;
431: MPI_Comm comm;
432: Mat_FFT *fft = (Mat_FFT*)A->data;
433: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
434: PetscInt N = fft->N;
435: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
440: PetscObjectGetComm((PetscObject)A,&comm);
442: MPI_Comm_size(comm, &size);
443: MPI_Comm_rank(comm, &rank);
444: if (size == 1) { /* sequential case */
445: #if defined(PETSC_USE_COMPLEX)
446: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
447: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
448: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
449: #else
450: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
451: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
452: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
453: #endif
454: } else { /* parallel cases */
455: ptrdiff_t alloc_local,local_n0,local_0_start;
456: ptrdiff_t local_n1;
457: fftw_complex *data_fout;
458: ptrdiff_t local_1_start;
459: #if defined(PETSC_USE_COMPLEX)
460: fftw_complex *data_fin,*data_bout;
461: #else
462: double *data_finr,*data_boutr;
463: PetscInt n1,N1;
464: ptrdiff_t temp;
465: #endif
467: switch (ndim) {
468: case 1:
469: #if !defined(PETSC_USE_COMPLEX)
470: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
471: #else
472: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
473: if (fin) {
474: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
475: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
477: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
478: }
479: if (fout) {
480: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
483: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
484: }
485: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
486: if (bout) {
487: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
488: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
490: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
491: }
492: break;
493: #endif
494: case 2:
495: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
496: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
497: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
498: if (fin) {
499: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
500: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
502: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
503: }
504: if (bout) {
505: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
506: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
508: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
509: }
510: if (fout) {
511: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
514: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
515: }
516: #else
517: /* Get local size */
518: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
519: if (fin) {
520: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
523: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
524: }
525: if (fout) {
526: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
527: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
529: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
530: }
531: if (bout) {
532: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
535: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
536: }
537: #endif
538: break;
539: case 3:
540: #if !defined(PETSC_USE_COMPLEX)
541: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
542: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
543: if (fin) {
544: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
545: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
547: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
548: }
549: if (bout) {
550: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
551: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
553: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
554: }
556: if (fout) {
557: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
558: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
560: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
561: }
562: #else
563: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
564: if (fin) {
565: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
568: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
569: }
570: if (fout) {
571: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
574: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
575: }
576: if (bout) {
577: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
580: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
581: }
582: #endif
583: break;
584: default:
585: #if !defined(PETSC_USE_COMPLEX)
586: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
588: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
590: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
591: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
593: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
595: if (fin) {
596: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
597: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
599: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
600: }
601: if (bout) {
602: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
603: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
605: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
606: }
608: if (fout) {
609: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
610: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
612: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
613: }
614: #else
615: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
616: if (fin) {
617: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
620: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
621: }
622: if (fout) {
623: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
624: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
626: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
627: }
628: if (bout) {
629: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
630: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
632: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
633: }
634: #endif
635: break;
636: }
637: }
638: return(0);
639: }
641: /*@
642: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
644: Collective on Mat
646: Input Parameters:
647: + A - FFTW matrix
648: - x - the PETSc vector
650: Output Parameters:
651: . y - the FFTW vector
653: Options Database Keys:
654: . -mat_fftw_plannerflags - set FFTW planner flags
656: Level: intermediate
658: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
659: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
660: zeros. This routine does that job by scattering operation.
662: .seealso: VecScatterFFTWToPetsc()
663: @*/
664: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
665: {
669: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
670: return(0);
671: }
673: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
674: {
676: MPI_Comm comm;
677: Mat_FFT *fft = (Mat_FFT*)A->data;
678: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
679: PetscInt N =fft->N;
680: PetscInt ndim =fft->ndim,*dim=fft->dim;
681: PetscInt low;
682: PetscMPIInt rank,size;
683: PetscInt vsize,vsize1;
684: ptrdiff_t local_n0,local_0_start;
685: ptrdiff_t local_n1,local_1_start;
686: VecScatter vecscat;
687: IS list1,list2;
688: #if !defined(PETSC_USE_COMPLEX)
689: PetscInt i,j,k,partial_dim;
690: PetscInt *indx1, *indx2, tempindx, tempindx1;
691: PetscInt NM;
692: ptrdiff_t temp;
693: #endif
696: PetscObjectGetComm((PetscObject)A,&comm);
697: MPI_Comm_size(comm, &size);
698: MPI_Comm_rank(comm, &rank);
699: VecGetOwnershipRange(y,&low,NULL);
701: if (size==1) {
702: VecGetSize(x,&vsize);
703: VecGetSize(y,&vsize1);
704: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
705: VecScatterCreate(x,list1,y,list1,&vecscat);
706: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
707: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
708: VecScatterDestroy(&vecscat);
709: ISDestroy(&list1);
710: } else {
711: switch (ndim) {
712: case 1:
713: #if defined(PETSC_USE_COMPLEX)
714: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
716: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
717: ISCreateStride(comm,local_n0,low,1,&list2);
718: VecScatterCreate(x,list1,y,list2,&vecscat);
719: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
720: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
721: VecScatterDestroy(&vecscat);
722: ISDestroy(&list1);
723: ISDestroy(&list2);
724: #else
725: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
726: #endif
727: break;
728: case 2:
729: #if defined(PETSC_USE_COMPLEX)
730: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
732: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
733: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
734: VecScatterCreate(x,list1,y,list2,&vecscat);
735: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
736: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
737: VecScatterDestroy(&vecscat);
738: ISDestroy(&list1);
739: ISDestroy(&list2);
740: #else
741: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
743: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
744: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
746: if (dim[1]%2==0) {
747: NM = dim[1]+2;
748: } else {
749: NM = dim[1]+1;
750: }
751: for (i=0; i<local_n0; i++) {
752: for (j=0; j<dim[1]; j++) {
753: tempindx = i*dim[1] + j;
754: tempindx1 = i*NM + j;
756: indx1[tempindx]=local_0_start*dim[1]+tempindx;
757: indx2[tempindx]=low+tempindx1;
758: }
759: }
761: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
762: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
764: VecScatterCreate(x,list1,y,list2,&vecscat);
765: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
766: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
767: VecScatterDestroy(&vecscat);
768: ISDestroy(&list1);
769: ISDestroy(&list2);
770: PetscFree(indx1);
771: PetscFree(indx2);
772: #endif
773: break;
775: case 3:
776: #if defined(PETSC_USE_COMPLEX)
777: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
779: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
780: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
781: VecScatterCreate(x,list1,y,list2,&vecscat);
782: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
783: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
784: VecScatterDestroy(&vecscat);
785: ISDestroy(&list1);
786: ISDestroy(&list2);
787: #else
788: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
789: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
790: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
792: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
793: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
795: if (dim[2]%2==0) NM = dim[2]+2;
796: else NM = dim[2]+1;
798: for (i=0; i<local_n0; i++) {
799: for (j=0; j<dim[1]; j++) {
800: for (k=0; k<dim[2]; k++) {
801: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
802: tempindx1 = i*dim[1]*NM + j*NM + k;
804: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
805: indx2[tempindx]=low+tempindx1;
806: }
807: }
808: }
810: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
811: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
812: VecScatterCreate(x,list1,y,list2,&vecscat);
813: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
814: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
815: VecScatterDestroy(&vecscat);
816: ISDestroy(&list1);
817: ISDestroy(&list2);
818: PetscFree(indx1);
819: PetscFree(indx2);
820: #endif
821: break;
823: default:
824: #if defined(PETSC_USE_COMPLEX)
825: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
827: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
828: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
829: VecScatterCreate(x,list1,y,list2,&vecscat);
830: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
831: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
832: VecScatterDestroy(&vecscat);
833: ISDestroy(&list1);
834: ISDestroy(&list2);
835: #else
836: /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
837: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
838: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
840: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
842: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
844: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
846: partial_dim = fftw->partial_dim;
848: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
849: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
851: if (dim[ndim-1]%2==0) NM = 2;
852: else NM = 1;
854: j = low;
855: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
856: indx1[i] = local_0_start*partial_dim + i;
857: indx2[i] = j;
858: if (k%dim[ndim-1]==0) j+=NM;
859: j++;
860: }
861: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
862: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
863: VecScatterCreate(x,list1,y,list2,&vecscat);
864: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
865: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
866: VecScatterDestroy(&vecscat);
867: ISDestroy(&list1);
868: ISDestroy(&list2);
869: PetscFree(indx1);
870: PetscFree(indx2);
871: #endif
872: break;
873: }
874: }
875: return(0);
876: }
878: /*@
879: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
881: Collective on Mat
883: Input Parameters:
884: + A - FFTW matrix
885: - x - FFTW vector
887: Output Parameters:
888: . y - PETSc vector
890: Level: intermediate
892: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
893: VecScatterFFTWToPetsc removes those extra zeros.
895: .seealso: VecScatterPetscToFFTW()
896: @*/
897: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
898: {
902: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
903: return(0);
904: }
906: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
907: {
909: MPI_Comm comm;
910: Mat_FFT *fft = (Mat_FFT*)A->data;
911: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
912: PetscInt N = fft->N;
913: PetscInt ndim = fft->ndim,*dim=fft->dim;
914: PetscInt low;
915: PetscMPIInt rank,size;
916: ptrdiff_t local_n0,local_0_start;
917: ptrdiff_t local_n1,local_1_start;
918: VecScatter vecscat;
919: IS list1,list2;
920: #if !defined(PETSC_USE_COMPLEX)
921: PetscInt i,j,k,partial_dim;
922: PetscInt *indx1, *indx2, tempindx, tempindx1;
923: PetscInt NM;
924: ptrdiff_t temp;
925: #endif
928: PetscObjectGetComm((PetscObject)A,&comm);
929: MPI_Comm_size(comm, &size);
930: MPI_Comm_rank(comm, &rank);
931: VecGetOwnershipRange(x,&low,NULL);
933: if (size==1) {
934: ISCreateStride(comm,N,0,1,&list1);
935: VecScatterCreate(x,list1,y,list1,&vecscat);
936: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
937: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
938: VecScatterDestroy(&vecscat);
939: ISDestroy(&list1);
941: } else {
942: switch (ndim) {
943: case 1:
944: #if defined(PETSC_USE_COMPLEX)
945: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
947: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
948: ISCreateStride(comm,local_n1,low,1,&list2);
949: VecScatterCreate(x,list1,y,list2,&vecscat);
950: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
951: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
952: VecScatterDestroy(&vecscat);
953: ISDestroy(&list1);
954: ISDestroy(&list2);
955: #else
956: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
957: #endif
958: break;
959: case 2:
960: #if defined(PETSC_USE_COMPLEX)
961: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
963: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
964: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
965: VecScatterCreate(x,list2,y,list1,&vecscat);
966: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
967: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
968: VecScatterDestroy(&vecscat);
969: ISDestroy(&list1);
970: ISDestroy(&list2);
971: #else
972: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
974: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
975: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
977: if (dim[1]%2==0) NM = dim[1]+2;
978: else NM = dim[1]+1;
980: for (i=0; i<local_n0; i++) {
981: for (j=0; j<dim[1]; j++) {
982: tempindx = i*dim[1] + j;
983: tempindx1 = i*NM + j;
985: indx1[tempindx]=local_0_start*dim[1]+tempindx;
986: indx2[tempindx]=low+tempindx1;
987: }
988: }
990: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
991: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
993: VecScatterCreate(x,list2,y,list1,&vecscat);
994: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
995: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
996: VecScatterDestroy(&vecscat);
997: ISDestroy(&list1);
998: ISDestroy(&list2);
999: PetscFree(indx1);
1000: PetscFree(indx2);
1001: #endif
1002: break;
1003: case 3:
1004: #if defined(PETSC_USE_COMPLEX)
1005: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1007: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1008: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1009: VecScatterCreate(x,list1,y,list2,&vecscat);
1010: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1011: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1012: VecScatterDestroy(&vecscat);
1013: ISDestroy(&list1);
1014: ISDestroy(&list2);
1015: #else
1016: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1018: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1019: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1021: if (dim[2]%2==0) NM = dim[2]+2;
1022: else NM = dim[2]+1;
1024: for (i=0; i<local_n0; i++) {
1025: for (j=0; j<dim[1]; j++) {
1026: for (k=0; k<dim[2]; k++) {
1027: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1028: tempindx1 = i*dim[1]*NM + j*NM + k;
1030: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1031: indx2[tempindx]=low+tempindx1;
1032: }
1033: }
1034: }
1036: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1037: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1039: VecScatterCreate(x,list2,y,list1,&vecscat);
1040: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1041: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1042: VecScatterDestroy(&vecscat);
1043: ISDestroy(&list1);
1044: ISDestroy(&list2);
1045: PetscFree(indx1);
1046: PetscFree(indx2);
1047: #endif
1048: break;
1049: default:
1050: #if defined(PETSC_USE_COMPLEX)
1051: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1053: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1054: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1055: VecScatterCreate(x,list1,y,list2,&vecscat);
1056: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1057: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1058: VecScatterDestroy(&vecscat);
1059: ISDestroy(&list1);
1060: ISDestroy(&list2);
1061: #else
1062: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1064: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1066: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1068: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1070: partial_dim = fftw->partial_dim;
1072: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1073: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1075: if (dim[ndim-1]%2==0) NM = 2;
1076: else NM = 1;
1078: j = low;
1079: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1080: indx1[i] = local_0_start*partial_dim + i;
1081: indx2[i] = j;
1082: if (k%dim[ndim-1]==0) j+=NM;
1083: j++;
1084: }
1085: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1086: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1088: VecScatterCreate(x,list2,y,list1,&vecscat);
1089: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1090: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1091: VecScatterDestroy(&vecscat);
1092: ISDestroy(&list1);
1093: ISDestroy(&list2);
1094: PetscFree(indx1);
1095: PetscFree(indx2);
1096: #endif
1097: break;
1098: }
1099: }
1100: return(0);
1101: }
1103: /*
1104: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1106: Options Database Keys:
1107: + -mat_fftw_plannerflags - set FFTW planner flags
1109: Level: intermediate
1111: */
1112: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1113: {
1115: MPI_Comm comm;
1116: Mat_FFT *fft=(Mat_FFT*)A->data;
1117: Mat_FFTW *fftw;
1118: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1119: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1120: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1121: PetscBool flg;
1122: PetscInt p_flag,partial_dim=1,ctr;
1123: PetscMPIInt size,rank;
1124: ptrdiff_t *pdim;
1125: ptrdiff_t local_n1,local_1_start;
1126: #if !defined(PETSC_USE_COMPLEX)
1127: ptrdiff_t temp;
1128: PetscInt N1,tot_dim;
1129: #else
1130: PetscInt n1;
1131: #endif
1134: PetscObjectGetComm((PetscObject)A,&comm);
1135: MPI_Comm_size(comm, &size);
1136: MPI_Comm_rank(comm, &rank);
1138: fftw_mpi_init();
1139: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1140: pdim[0] = dim[0];
1141: #if !defined(PETSC_USE_COMPLEX)
1142: tot_dim = 2*dim[0];
1143: #endif
1144: for (ctr=1; ctr<ndim; ctr++) {
1145: partial_dim *= dim[ctr];
1146: pdim[ctr] = dim[ctr];
1147: #if !defined(PETSC_USE_COMPLEX)
1148: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1149: else tot_dim *= dim[ctr];
1150: #endif
1151: }
1153: if (size == 1) {
1154: #if defined(PETSC_USE_COMPLEX)
1155: MatSetSizes(A,N,N,N,N);
1156: n = N;
1157: #else
1158: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1159: n = tot_dim;
1160: #endif
1162: } else {
1163: ptrdiff_t local_n0,local_0_start;
1164: switch (ndim) {
1165: case 1:
1166: #if !defined(PETSC_USE_COMPLEX)
1167: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1168: #else
1169: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1171: n = (PetscInt)local_n0;
1172: n1 = (PetscInt)local_n1;
1173: MatSetSizes(A,n1,n,N,N);
1174: #endif
1175: break;
1176: case 2:
1177: #if defined(PETSC_USE_COMPLEX)
1178: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1179: /*
1180: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1181: PetscSynchronizedFlush(comm,PETSC_STDOUT);
1182: */
1183: n = (PetscInt)local_n0*dim[1];
1184: MatSetSizes(A,n,n,N,N);
1185: #else
1186: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1188: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1189: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1190: #endif
1191: break;
1192: case 3:
1193: #if defined(PETSC_USE_COMPLEX)
1194: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1196: n = (PetscInt)local_n0*dim[1]*dim[2];
1197: MatSetSizes(A,n,n,N,N);
1198: #else
1199: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1201: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1202: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1203: #endif
1204: break;
1205: default:
1206: #if defined(PETSC_USE_COMPLEX)
1207: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1209: n = (PetscInt)local_n0*partial_dim;
1210: MatSetSizes(A,n,n,N,N);
1211: #else
1212: temp = pdim[ndim-1];
1214: pdim[ndim-1] = temp/2 + 1;
1216: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1218: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1219: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1221: pdim[ndim-1] = temp;
1223: MatSetSizes(A,n,n,N1,N1);
1224: #endif
1225: break;
1226: }
1227: }
1228: free(pdim);
1229: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1230: PetscNewLog(A,&fftw);
1231: fft->data = (void*)fftw;
1233: fft->n = n;
1234: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1235: fftw->partial_dim = partial_dim;
1237: PetscMalloc1(ndim, &(fftw->dim_fftw));
1238: if (size == 1) {
1239: #if defined(PETSC_USE_64BIT_INDICES)
1240: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1241: #else
1242: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1243: #endif
1244: }
1246: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1248: fftw->p_forward = 0;
1249: fftw->p_backward = 0;
1250: fftw->p_flag = FFTW_ESTIMATE;
1252: if (size == 1) {
1253: A->ops->mult = MatMult_SeqFFTW;
1254: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1255: } else {
1256: A->ops->mult = MatMult_MPIFFTW;
1257: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1258: }
1259: fft->matdestroy = MatDestroy_FFTW;
1260: A->assembled = PETSC_TRUE;
1261: A->preallocated = PETSC_TRUE;
1263: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1264: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1265: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1267: /* get runtime options */
1268: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1269: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1270: if (flg) {
1271: fftw->p_flag = iplans[p_flag];
1272: }
1273: PetscOptionsEnd();
1274: return(0);
1275: }