Actual source code: fftw.c
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <../src/mat/impls/fft/fft.h>
8: EXTERN_C_BEGIN
9: #if !PetscDefined(HAVE_MPIUNI)
10: #include <fftw3-mpi.h>
11: #else
12: #include <fftw3.h>
13: #endif
14: EXTERN_C_END
16: typedef struct {
17: ptrdiff_t ndim_fftw,*dim_fftw;
18: #if defined(PETSC_USE_64BIT_INDICES)
19: fftw_iodim64 *iodims;
20: #else
21: fftw_iodim *iodims;
22: #endif
23: PetscInt partial_dim;
24: fftw_plan p_forward,p_backward;
25: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
27: executed for the arrays with which the plan was created */
28: } Mat_FFTW;
30: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
31: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
32: extern PetscErrorCode MatDestroy_FFTW(Mat);
33: extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34: #if !PetscDefined(HAVE_MPIUNI)
35: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
36: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
37: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38: #endif
40: /*
41: MatMult_SeqFFTW performs forward DFT
42: Input parameter:
43: A - the matrix
44: x - the vector on which FDFT will be performed
46: Output parameter:
47: y - vector that stores result of FDFT
48: */
49: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
50: {
51: Mat_FFT *fft = (Mat_FFT*)A->data;
52: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
53: const PetscScalar *x_array;
54: PetscScalar *y_array;
55: #if defined(PETSC_USE_COMPLEX)
56: #if defined(PETSC_USE_64BIT_INDICES)
57: fftw_iodim64 *iodims;
58: #else
59: fftw_iodim *iodims;
60: #endif
61: PetscInt i;
62: #endif
63: PetscInt ndim = fft->ndim,*dim = fft->dim;
65: VecGetArrayRead(x,&x_array);
66: VecGetArray(y,&y_array);
67: if (!fftw->p_forward) { /* create a plan, then execute it */
68: switch (ndim) {
69: case 1:
70: #if defined(PETSC_USE_COMPLEX)
71: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
72: #else
73: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
74: #endif
75: break;
76: case 2:
77: #if defined(PETSC_USE_COMPLEX)
78: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
79: #else
80: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
81: #endif
82: break;
83: case 3:
84: #if defined(PETSC_USE_COMPLEX)
85: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
86: #else
87: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
88: #endif
89: break;
90: default:
91: #if defined(PETSC_USE_COMPLEX)
92: iodims = fftw->iodims;
93: #if defined(PETSC_USE_64BIT_INDICES)
94: if (ndim) {
95: iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
96: iodims[ndim-1].is = iodims[ndim-1].os = 1;
97: for (i=ndim-2; i>=0; --i) {
98: iodims[i].n = (ptrdiff_t)dim[i];
99: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
100: }
101: }
102: fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
103: #else
104: if (ndim) {
105: iodims[ndim-1].n = (int)dim[ndim-1];
106: iodims[ndim-1].is = iodims[ndim-1].os = 1;
107: for (i=ndim-2; i>=0; --i) {
108: iodims[i].n = (int)dim[i];
109: iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
110: }
111: }
112: fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
113: #endif
115: #else
116: fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
117: #endif
118: break;
119: }
120: fftw->finarray = (PetscScalar *) x_array;
121: fftw->foutarray = y_array;
122: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
123: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
124: fftw_execute(fftw->p_forward);
125: } else { /* use existing plan */
126: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
127: #if defined(PETSC_USE_COMPLEX)
128: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
129: #else
130: fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
131: #endif
132: } else {
133: fftw_execute(fftw->p_forward);
134: }
135: }
136: VecRestoreArray(y,&y_array);
137: VecRestoreArrayRead(x,&x_array);
138: return 0;
139: }
141: /* MatMultTranspose_SeqFFTW performs serial backward DFT
142: Input parameter:
143: A - the matrix
144: x - the vector on which BDFT will be performed
146: Output parameter:
147: y - vector that stores result of BDFT
148: */
150: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
151: {
152: Mat_FFT *fft = (Mat_FFT*)A->data;
153: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
154: const PetscScalar *x_array;
155: PetscScalar *y_array;
156: PetscInt ndim = fft->ndim,*dim = fft->dim;
157: #if defined(PETSC_USE_COMPLEX)
158: #if defined(PETSC_USE_64BIT_INDICES)
159: fftw_iodim64 *iodims = fftw->iodims;
160: #else
161: fftw_iodim *iodims = fftw->iodims;
162: #endif
163: #endif
165: VecGetArrayRead(x,&x_array);
166: VecGetArray(y,&y_array);
167: if (!fftw->p_backward) { /* create a plan, then execute it */
168: switch (ndim) {
169: case 1:
170: #if defined(PETSC_USE_COMPLEX)
171: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
172: #else
173: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
174: #endif
175: break;
176: case 2:
177: #if defined(PETSC_USE_COMPLEX)
178: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
179: #else
180: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
181: #endif
182: break;
183: case 3:
184: #if defined(PETSC_USE_COMPLEX)
185: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
186: #else
187: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
188: #endif
189: break;
190: default:
191: #if defined(PETSC_USE_COMPLEX)
192: #if defined(PETSC_USE_64BIT_INDICES)
193: fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
194: #else
195: fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196: #endif
197: #else
198: fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
199: #endif
200: break;
201: }
202: fftw->binarray = (PetscScalar *) x_array;
203: fftw->boutarray = y_array;
204: fftw_execute(fftw->p_backward);
205: } else { /* use existing plan */
206: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
207: #if defined(PETSC_USE_COMPLEX)
208: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
209: #else
210: fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
211: #endif
212: } else {
213: fftw_execute(fftw->p_backward);
214: }
215: }
216: VecRestoreArray(y,&y_array);
217: VecRestoreArrayRead(x,&x_array);
218: return 0;
219: }
221: #if !PetscDefined(HAVE_MPIUNI)
222: /* MatMult_MPIFFTW performs forward DFT in parallel
223: Input parameter:
224: A - the matrix
225: x - the vector on which FDFT will be performed
227: Output parameter:
228: y - vector that stores result of FDFT
229: */
230: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
231: {
232: Mat_FFT *fft = (Mat_FFT*)A->data;
233: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
234: const PetscScalar *x_array;
235: PetscScalar *y_array;
236: PetscInt ndim = fft->ndim,*dim = fft->dim;
237: MPI_Comm comm;
239: PetscObjectGetComm((PetscObject)A,&comm);
240: VecGetArrayRead(x,&x_array);
241: VecGetArray(y,&y_array);
242: if (!fftw->p_forward) { /* create a plan, then execute it */
243: switch (ndim) {
244: case 1:
245: #if defined(PETSC_USE_COMPLEX)
246: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
247: #else
248: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
249: #endif
250: break;
251: case 2:
252: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
253: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
254: #else
255: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
256: #endif
257: break;
258: case 3:
259: #if defined(PETSC_USE_COMPLEX)
260: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
261: #else
262: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
263: #endif
264: break;
265: default:
266: #if defined(PETSC_USE_COMPLEX)
267: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
268: #else
269: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
270: #endif
271: break;
272: }
273: fftw->finarray = (PetscScalar *) x_array;
274: fftw->foutarray = y_array;
275: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
276: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
277: fftw_execute(fftw->p_forward);
278: } else { /* use existing plan */
279: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
280: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
281: } else {
282: fftw_execute(fftw->p_forward);
283: }
284: }
285: VecRestoreArray(y,&y_array);
286: VecRestoreArrayRead(x,&x_array);
287: return 0;
288: }
290: /*
291: MatMultTranspose_MPIFFTW performs parallel backward DFT
292: Input parameter:
293: A - the matrix
294: x - the vector on which BDFT will be performed
296: Output parameter:
297: y - vector that stores result of BDFT
298: */
299: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
300: {
301: Mat_FFT *fft = (Mat_FFT*)A->data;
302: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
303: const PetscScalar *x_array;
304: PetscScalar *y_array;
305: PetscInt ndim = fft->ndim,*dim = fft->dim;
306: MPI_Comm comm;
308: PetscObjectGetComm((PetscObject)A,&comm);
309: VecGetArrayRead(x,&x_array);
310: VecGetArray(y,&y_array);
311: if (!fftw->p_backward) { /* create a plan, then execute it */
312: switch (ndim) {
313: case 1:
314: #if defined(PETSC_USE_COMPLEX)
315: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
316: #else
317: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
318: #endif
319: break;
320: case 2:
321: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
323: #else
324: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
325: #endif
326: break;
327: case 3:
328: #if defined(PETSC_USE_COMPLEX)
329: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
330: #else
331: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
332: #endif
333: break;
334: default:
335: #if defined(PETSC_USE_COMPLEX)
336: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
337: #else
338: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
339: #endif
340: break;
341: }
342: fftw->binarray = (PetscScalar *) x_array;
343: fftw->boutarray = y_array;
344: fftw_execute(fftw->p_backward);
345: } else { /* use existing plan */
346: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
347: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
348: } else {
349: fftw_execute(fftw->p_backward);
350: }
351: }
352: VecRestoreArray(y,&y_array);
353: VecRestoreArrayRead(x,&x_array);
354: return 0;
355: }
356: #endif
358: PetscErrorCode MatDestroy_FFTW(Mat A)
359: {
360: Mat_FFT *fft = (Mat_FFT*)A->data;
361: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
363: fftw_destroy_plan(fftw->p_forward);
364: fftw_destroy_plan(fftw->p_backward);
365: if (fftw->iodims) {
366: free(fftw->iodims);
367: }
368: PetscFree(fftw->dim_fftw);
369: PetscFree(fft->data);
370: #if !PetscDefined(HAVE_MPIUNI)
371: fftw_mpi_cleanup();
372: #endif
373: return 0;
374: }
376: #if !PetscDefined(HAVE_MPIUNI)
377: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
378: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
379: {
380: PetscScalar *array;
382: VecGetArray(v,&array);
383: fftw_free((fftw_complex*)array);
384: VecRestoreArray(v,&array);
385: VecDestroy_MPI(v);
386: return 0;
387: }
388: #endif
390: #if !PetscDefined(HAVE_MPIUNI)
391: static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
392: {
393: Mat A;
395: PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);
396: MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);
397: return 0;
398: }
400: static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
401: {
402: Mat A;
404: PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);
405: MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);
406: return 0;
407: }
409: static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
410: {
411: Mat A;
413: PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);
414: MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);
415: return 0;
416: }
417: #endif
419: /*@
420: MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
421: parallel layout determined by FFTW
423: Collective on Mat
425: Input Parameter:
426: . A - the matrix
428: Output Parameters:
429: + x - (optional) input vector of forward FFTW
430: . y - (optional) output vector of forward FFTW
431: - z - (optional) output vector of backward FFTW
433: Level: advanced
435: Note: The parallel layout of output of forward FFTW is always same as the input
436: of the backward FFTW. But parallel layout of the input vector of forward
437: FFTW might not be same as the output of backward FFTW.
438: Also note that we need to provide enough space while doing parallel real transform.
439: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
440: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
441: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
442: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
443: zeros if it is odd only one zero is needed.
444: Lastly one needs some scratch space at the end of data set in each process. alloc_local
445: figures out how much space is needed, i.e. it figures out the data+scratch space for
446: each processor and returns that.
448: .seealso: MatCreateFFT()
449: @*/
450: PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
451: {
452: PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
453: return 0;
454: }
456: PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
457: {
458: PetscMPIInt size,rank;
459: MPI_Comm comm;
460: Mat_FFT *fft = (Mat_FFT*)A->data;
464: PetscObjectGetComm((PetscObject)A,&comm);
466: MPI_Comm_size(comm, &size);
467: MPI_Comm_rank(comm, &rank);
468: if (size == 1) { /* sequential case */
469: #if defined(PETSC_USE_COMPLEX)
470: if (fin) VecCreateSeq(PETSC_COMM_SELF,fft->N,fin);
471: if (fout) VecCreateSeq(PETSC_COMM_SELF,fft->N,fout);
472: if (bout) VecCreateSeq(PETSC_COMM_SELF,fft->N,bout);
473: #else
474: if (fin) VecCreateSeq(PETSC_COMM_SELF,fft->n,fin);
475: if (fout) VecCreateSeq(PETSC_COMM_SELF,fft->n,fout);
476: if (bout) VecCreateSeq(PETSC_COMM_SELF,fft->n,bout);
477: #endif
478: #if !PetscDefined(HAVE_MPIUNI)
479: } else { /* parallel cases */
480: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
481: PetscInt ndim = fft->ndim,*dim = fft->dim;
482: ptrdiff_t alloc_local,local_n0,local_0_start;
483: ptrdiff_t local_n1;
484: fftw_complex *data_fout;
485: ptrdiff_t local_1_start;
486: #if defined(PETSC_USE_COMPLEX)
487: fftw_complex *data_fin,*data_bout;
488: #else
489: double *data_finr,*data_boutr;
490: PetscInt n1,N1;
491: ptrdiff_t temp;
492: #endif
494: switch (ndim) {
495: case 1:
496: #if !defined(PETSC_USE_COMPLEX)
497: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
498: #else
499: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
500: if (fin) {
501: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
502: VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin);
503: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
504: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
505: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
506: }
507: if (fout) {
508: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509: VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout);
510: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
511: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
512: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
513: }
514: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
515: if (bout) {
516: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517: VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout);
518: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
519: (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
520: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
521: }
522: break;
523: #endif
524: case 2:
525: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
526: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
527: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
528: if (fin) {
529: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
530: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
531: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
532: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
533: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: if (fout) {
536: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
538: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
539: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
540: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
541: }
542: if (bout) {
543: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
544: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
545: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
546: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
547: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
548: }
549: #else
550: /* Get local size */
551: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
552: if (fin) {
553: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
554: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);
555: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
556: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
557: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
558: }
559: if (fout) {
560: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
561: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);
562: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
563: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
564: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
565: }
566: if (bout) {
567: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
568: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);
569: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
570: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
571: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
572: }
573: #endif
574: break;
575: case 3:
576: #if !defined(PETSC_USE_COMPLEX)
577: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
578: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
579: if (fin) {
580: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
581: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
582: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
583: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
584: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
585: }
586: if (fout) {
587: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
588: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
589: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
590: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
591: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
592: }
593: if (bout) {
594: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
595: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
596: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
597: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
598: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
599: }
600: #else
601: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
602: if (fin) {
603: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
604: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);
605: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
606: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
608: }
609: if (fout) {
610: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
611: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);
612: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
613: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
615: }
616: if (bout) {
617: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);
619: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
620: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
622: }
623: #endif
624: break;
625: default:
626: #if !defined(PETSC_USE_COMPLEX)
627: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
628: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
629: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
630: N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
631: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
633: if (fin) {
634: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
635: VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin);
636: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
637: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
638: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
639: }
640: if (fout) {
641: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
642: VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout);
643: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
644: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
645: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
646: }
647: if (bout) {
648: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
649: VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout);
650: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
651: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
652: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
653: }
654: #else
655: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
656: if (fin) {
657: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
658: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);
659: PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);
660: (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
662: }
663: if (fout) {
664: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
665: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);
666: PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);
667: (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
669: }
670: if (bout) {
671: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
672: VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);
673: PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);
674: (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
676: }
677: #endif
678: break;
679: }
680: /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
681: v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
682: memory leaks. We void these pointers here to avoid acident uses.
683: */
684: if (fin) (*fin)->ops->replacearray = NULL;
685: if (fout) (*fout)->ops->replacearray = NULL;
686: if (bout) (*bout)->ops->replacearray = NULL;
687: #endif
688: }
689: return 0;
690: }
692: /*@
693: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
695: Collective on Mat
697: Input Parameters:
698: + A - FFTW matrix
699: - x - the PETSc vector
701: Output Parameters:
702: . y - the FFTW vector
704: Options Database Keys:
705: . -mat_fftw_plannerflags - set FFTW planner flags
707: Level: intermediate
709: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
710: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
711: zeros. This routine does that job by scattering operation.
713: .seealso: VecScatterFFTWToPetsc()
714: @*/
715: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
716: {
717: PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
718: return 0;
719: }
721: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
722: {
723: MPI_Comm comm;
724: Mat_FFT *fft = (Mat_FFT*)A->data;
725: PetscInt low;
726: PetscMPIInt rank,size;
727: PetscInt vsize,vsize1;
728: VecScatter vecscat;
729: IS list1;
731: PetscObjectGetComm((PetscObject)A,&comm);
732: MPI_Comm_size(comm, &size);
733: MPI_Comm_rank(comm, &rank);
734: VecGetOwnershipRange(y,&low,NULL);
736: if (size==1) {
737: VecGetSize(x,&vsize);
738: VecGetSize(y,&vsize1);
739: ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1);
740: VecScatterCreate(x,list1,y,list1,&vecscat);
741: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
742: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
743: VecScatterDestroy(&vecscat);
744: ISDestroy(&list1);
745: #if !PetscDefined(HAVE_MPIUNI)
746: } else {
747: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
748: PetscInt ndim = fft->ndim,*dim = fft->dim;
749: ptrdiff_t local_n0,local_0_start;
750: ptrdiff_t local_n1,local_1_start;
751: IS list2;
752: #if !defined(PETSC_USE_COMPLEX)
753: PetscInt i,j,k,partial_dim;
754: PetscInt *indx1, *indx2, tempindx, tempindx1;
755: PetscInt NM;
756: ptrdiff_t temp;
757: #endif
759: switch (ndim) {
760: case 1:
761: #if defined(PETSC_USE_COMPLEX)
762: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
764: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
765: ISCreateStride(comm,local_n0,low,1,&list2);
766: VecScatterCreate(x,list1,y,list2,&vecscat);
767: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
768: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
769: VecScatterDestroy(&vecscat);
770: ISDestroy(&list1);
771: ISDestroy(&list2);
772: #else
773: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
774: #endif
775: break;
776: case 2:
777: #if defined(PETSC_USE_COMPLEX)
778: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
780: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
781: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
782: VecScatterCreate(x,list1,y,list2,&vecscat);
783: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
784: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
785: VecScatterDestroy(&vecscat);
786: ISDestroy(&list1);
787: ISDestroy(&list2);
788: #else
789: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
791: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
792: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
794: if (dim[1]%2==0) {
795: NM = dim[1]+2;
796: } else {
797: NM = dim[1]+1;
798: }
799: for (i=0; i<local_n0; i++) {
800: for (j=0; j<dim[1]; j++) {
801: tempindx = i*dim[1] + j;
802: tempindx1 = i*NM + j;
804: indx1[tempindx]=local_0_start*dim[1]+tempindx;
805: indx2[tempindx]=low+tempindx1;
806: }
807: }
809: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
810: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
812: VecScatterCreate(x,list1,y,list2,&vecscat);
813: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
814: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
815: VecScatterDestroy(&vecscat);
816: ISDestroy(&list1);
817: ISDestroy(&list2);
818: PetscFree(indx1);
819: PetscFree(indx2);
820: #endif
821: break;
823: case 3:
824: #if defined(PETSC_USE_COMPLEX)
825: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
827: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
828: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
829: VecScatterCreate(x,list1,y,list2,&vecscat);
830: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
831: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
832: VecScatterDestroy(&vecscat);
833: ISDestroy(&list1);
834: ISDestroy(&list2);
835: #else
836: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
837: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
838: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
840: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
841: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
843: if (dim[2]%2==0) NM = dim[2]+2;
844: else NM = dim[2]+1;
846: for (i=0; i<local_n0; i++) {
847: for (j=0; j<dim[1]; j++) {
848: for (k=0; k<dim[2]; k++) {
849: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
850: tempindx1 = i*dim[1]*NM + j*NM + k;
852: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
853: indx2[tempindx]=low+tempindx1;
854: }
855: }
856: }
858: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
859: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
860: VecScatterCreate(x,list1,y,list2,&vecscat);
861: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
862: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
863: VecScatterDestroy(&vecscat);
864: ISDestroy(&list1);
865: ISDestroy(&list2);
866: PetscFree(indx1);
867: PetscFree(indx2);
868: #endif
869: break;
871: default:
872: #if defined(PETSC_USE_COMPLEX)
873: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
875: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
876: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
877: VecScatterCreate(x,list1,y,list2,&vecscat);
878: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
879: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
880: VecScatterDestroy(&vecscat);
881: ISDestroy(&list1);
882: ISDestroy(&list2);
883: #else
884: /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
885: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
886: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
888: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
890: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
892: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
894: partial_dim = fftw->partial_dim;
896: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
897: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
899: if (dim[ndim-1]%2==0) NM = 2;
900: else NM = 1;
902: j = low;
903: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
904: indx1[i] = local_0_start*partial_dim + i;
905: indx2[i] = j;
906: if (k%dim[ndim-1]==0) j+=NM;
907: j++;
908: }
909: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
910: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
911: VecScatterCreate(x,list1,y,list2,&vecscat);
912: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
913: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
914: VecScatterDestroy(&vecscat);
915: ISDestroy(&list1);
916: ISDestroy(&list2);
917: PetscFree(indx1);
918: PetscFree(indx2);
919: #endif
920: break;
921: }
922: #endif
923: }
924: return 0;
925: }
927: /*@
928: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
930: Collective on Mat
932: Input Parameters:
933: + A - FFTW matrix
934: - x - FFTW vector
936: Output Parameters:
937: . y - PETSc vector
939: Level: intermediate
941: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
942: VecScatterFFTWToPetsc removes those extra zeros.
944: .seealso: VecScatterPetscToFFTW()
945: @*/
946: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
947: {
948: PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
949: return 0;
950: }
952: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
953: {
954: MPI_Comm comm;
955: Mat_FFT *fft = (Mat_FFT*)A->data;
956: PetscInt low;
957: PetscMPIInt rank,size;
958: VecScatter vecscat;
959: IS list1;
961: PetscObjectGetComm((PetscObject)A,&comm);
962: MPI_Comm_size(comm, &size);
963: MPI_Comm_rank(comm, &rank);
964: VecGetOwnershipRange(x,&low,NULL);
966: if (size==1) {
967: ISCreateStride(comm,fft->N,0,1,&list1);
968: VecScatterCreate(x,list1,y,list1,&vecscat);
969: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
970: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
971: VecScatterDestroy(&vecscat);
972: ISDestroy(&list1);
974: #if !PetscDefined(HAVE_MPIUNI)
975: } else {
976: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
977: PetscInt ndim = fft->ndim,*dim = fft->dim;
978: ptrdiff_t local_n0,local_0_start;
979: ptrdiff_t local_n1,local_1_start;
980: IS list2;
981: #if !defined(PETSC_USE_COMPLEX)
982: PetscInt i,j,k,partial_dim;
983: PetscInt *indx1, *indx2, tempindx, tempindx1;
984: PetscInt NM;
985: ptrdiff_t temp;
986: #endif
987: switch (ndim) {
988: case 1:
989: #if defined(PETSC_USE_COMPLEX)
990: fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
992: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
993: ISCreateStride(comm,local_n1,low,1,&list2);
994: VecScatterCreate(x,list1,y,list2,&vecscat);
995: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
996: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
997: VecScatterDestroy(&vecscat);
998: ISDestroy(&list1);
999: ISDestroy(&list2);
1000: #else
1001: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1002: #endif
1003: break;
1004: case 2:
1005: #if defined(PETSC_USE_COMPLEX)
1006: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1008: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1009: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
1010: VecScatterCreate(x,list2,y,list1,&vecscat);
1011: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1012: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1013: VecScatterDestroy(&vecscat);
1014: ISDestroy(&list1);
1015: ISDestroy(&list2);
1016: #else
1017: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1019: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);
1020: PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);
1022: if (dim[1]%2==0) NM = dim[1]+2;
1023: else NM = dim[1]+1;
1025: for (i=0; i<local_n0; i++) {
1026: for (j=0; j<dim[1]; j++) {
1027: tempindx = i*dim[1] + j;
1028: tempindx1 = i*NM + j;
1030: indx1[tempindx]=local_0_start*dim[1]+tempindx;
1031: indx2[tempindx]=low+tempindx1;
1032: }
1033: }
1035: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
1036: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
1038: VecScatterCreate(x,list2,y,list1,&vecscat);
1039: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1040: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1041: VecScatterDestroy(&vecscat);
1042: ISDestroy(&list1);
1043: ISDestroy(&list2);
1044: PetscFree(indx1);
1045: PetscFree(indx2);
1046: #endif
1047: break;
1048: case 3:
1049: #if defined(PETSC_USE_COMPLEX)
1050: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1052: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1053: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1054: VecScatterCreate(x,list1,y,list2,&vecscat);
1055: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1056: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1057: VecScatterDestroy(&vecscat);
1058: ISDestroy(&list1);
1059: ISDestroy(&list2);
1060: #else
1061: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1063: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
1064: PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
1066: if (dim[2]%2==0) NM = dim[2]+2;
1067: else NM = dim[2]+1;
1069: for (i=0; i<local_n0; i++) {
1070: for (j=0; j<dim[1]; j++) {
1071: for (k=0; k<dim[2]; k++) {
1072: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1073: tempindx1 = i*dim[1]*NM + j*NM + k;
1075: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1076: indx2[tempindx]=low+tempindx1;
1077: }
1078: }
1079: }
1081: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1082: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1084: VecScatterCreate(x,list2,y,list1,&vecscat);
1085: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1086: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1087: VecScatterDestroy(&vecscat);
1088: ISDestroy(&list1);
1089: ISDestroy(&list2);
1090: PetscFree(indx1);
1091: PetscFree(indx2);
1092: #endif
1093: break;
1094: default:
1095: #if defined(PETSC_USE_COMPLEX)
1096: fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1098: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1099: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1100: VecScatterCreate(x,list1,y,list2,&vecscat);
1101: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1102: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1103: VecScatterDestroy(&vecscat);
1104: ISDestroy(&list1);
1105: ISDestroy(&list2);
1106: #else
1107: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1109: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1111: fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1113: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1115: partial_dim = fftw->partial_dim;
1117: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);
1118: PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);
1120: if (dim[ndim-1]%2==0) NM = 2;
1121: else NM = 1;
1123: j = low;
1124: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1125: indx1[i] = local_0_start*partial_dim + i;
1126: indx2[i] = j;
1127: if (k%dim[ndim-1]==0) j+=NM;
1128: j++;
1129: }
1130: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1131: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1133: VecScatterCreate(x,list2,y,list1,&vecscat);
1134: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1135: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1136: VecScatterDestroy(&vecscat);
1137: ISDestroy(&list1);
1138: ISDestroy(&list2);
1139: PetscFree(indx1);
1140: PetscFree(indx2);
1141: #endif
1142: break;
1143: }
1144: #endif
1145: }
1146: return 0;
1147: }
1149: /*
1150: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1152: Options Database Keys:
1153: + -mat_fftw_plannerflags - set FFTW planner flags
1155: Level: intermediate
1157: */
1158: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1159: {
1160: MPI_Comm comm;
1161: Mat_FFT *fft = (Mat_FFT*)A->data;
1162: Mat_FFTW *fftw;
1163: PetscInt ndim = fft->ndim,*dim = fft->dim;
1164: const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1165: unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1166: PetscBool flg;
1167: PetscInt p_flag,partial_dim=1,ctr;
1168: PetscMPIInt size,rank;
1169: ptrdiff_t *pdim;
1171: #if !defined(PETSC_USE_COMPLEX)
1172: PetscInt tot_dim;
1173: #endif
1175: PetscObjectGetComm((PetscObject)A,&comm);
1176: MPI_Comm_size(comm, &size);
1177: MPI_Comm_rank(comm, &rank);
1179: #if !PetscDefined(HAVE_MPIUNI)
1180: fftw_mpi_init();
1181: #endif
1182: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1183: pdim[0] = dim[0];
1184: #if !defined(PETSC_USE_COMPLEX)
1185: tot_dim = 2*dim[0];
1186: #endif
1187: for (ctr=1; ctr<ndim; ctr++) {
1188: partial_dim *= dim[ctr];
1189: pdim[ctr] = dim[ctr];
1190: #if !defined(PETSC_USE_COMPLEX)
1191: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1192: else tot_dim *= dim[ctr];
1193: #endif
1194: }
1196: if (size == 1) {
1197: #if defined(PETSC_USE_COMPLEX)
1198: MatSetSizes(A,fft->N,fft->N,fft->N,fft->N);
1199: fft->n = fft->N;
1200: #else
1201: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1202: fft->n = tot_dim;
1203: #endif
1204: #if !PetscDefined(HAVE_MPIUNI)
1205: } else {
1206: ptrdiff_t local_n0,local_0_start,local_n1,local_1_start;
1207: #if !defined(PETSC_USE_COMPLEX)
1208: ptrdiff_t temp;
1209: PetscInt N1;
1210: #endif
1212: switch (ndim) {
1213: case 1:
1214: #if !defined(PETSC_USE_COMPLEX)
1215: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1216: #else
1217: fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1218: fft->n = (PetscInt)local_n0;
1219: MatSetSizes(A,local_n1,fft->n,fft->N,fft->N);
1220: #endif
1221: break;
1222: case 2:
1223: #if defined(PETSC_USE_COMPLEX)
1224: fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1225: fft->n = (PetscInt)local_n0*dim[1];
1226: MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);
1227: #else
1228: fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1230: fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1231: MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1232: #endif
1233: break;
1234: case 3:
1235: #if defined(PETSC_USE_COMPLEX)
1236: fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1238: fft->n = (PetscInt)local_n0*dim[1]*dim[2];
1239: MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);
1240: #else
1241: fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1243: fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1244: MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1245: #endif
1246: break;
1247: default:
1248: #if defined(PETSC_USE_COMPLEX)
1249: fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1251: fft->n = (PetscInt)local_n0*partial_dim;
1252: MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);
1253: #else
1254: temp = pdim[ndim-1];
1256: pdim[ndim-1] = temp/2 + 1;
1258: fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1260: fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1261: N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1263: pdim[ndim-1] = temp;
1265: MatSetSizes(A,fft->n,fft->n,N1,N1);
1266: #endif
1267: break;
1268: }
1269: #endif
1270: }
1271: free(pdim);
1272: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1273: PetscNewLog(A,&fftw);
1274: fft->data = (void*)fftw;
1276: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1277: fftw->partial_dim = partial_dim;
1279: PetscMalloc1(ndim, &(fftw->dim_fftw));
1280: if (size == 1) {
1281: #if defined(PETSC_USE_64BIT_INDICES)
1282: fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1283: #else
1284: fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1285: #endif
1286: }
1288: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1290: fftw->p_forward = NULL;
1291: fftw->p_backward = NULL;
1292: fftw->p_flag = FFTW_ESTIMATE;
1294: if (size == 1) {
1295: A->ops->mult = MatMult_SeqFFTW;
1296: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1297: #if !PetscDefined(HAVE_MPIUNI)
1298: } else {
1299: A->ops->mult = MatMult_MPIFFTW;
1300: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1301: #endif
1302: }
1303: fft->matdestroy = MatDestroy_FFTW;
1304: A->assembled = PETSC_TRUE;
1305: A->preallocated = PETSC_TRUE;
1307: PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);
1308: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1309: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1311: /* get runtime options */
1312: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1313: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);
1314: if (flg) fftw->p_flag = iplans[p_flag];
1315: PetscOptionsEnd();
1316: return 0;
1317: }