Actual source code: fftw.c
petsc-3.4.5 2014-06-29
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: PetscInt partial_dim;
15: fftw_plan p_forward,p_backward;
16: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18: executed for the arrays with which the plan was created */
19: } Mat_FFTW;
21: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25: extern PetscErrorCode MatDestroy_FFTW(Mat);
26: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28: /* MatMult_SeqFFTW performs forward DFT in parallel
29: Input parameter:
30: A - the matrix
31: x - the vector on which FDFT will be performed
33: Output parameter:
34: y - vector that stores result of FDFT
35: */
38: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39: {
41: Mat_FFT *fft = (Mat_FFT*)A->data;
42: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
43: PetscScalar *x_array,*y_array;
44: PetscInt ndim=fft->ndim,*dim=fft->dim;
47: VecGetArray(x,&x_array);
48: VecGetArray(y,&y_array);
49: if (!fftw->p_forward) { /* create a plan, then excute it */
50: switch (ndim) {
51: case 1:
52: #if defined(PETSC_USE_COMPLEX)
53: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54: #else
55: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
56: #endif
57: break;
58: case 2:
59: #if defined(PETSC_USE_COMPLEX)
60: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61: #else
62: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
63: #endif
64: break;
65: case 3:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
70: #endif
71: break;
72: default:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77: #endif
78: break;
79: }
80: fftw->finarray = x_array;
81: fftw->foutarray = y_array;
82: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
83: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
84: fftw_execute(fftw->p_forward);
85: } else { /* use existing plan */
86: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
87: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
88: } else {
89: fftw_execute(fftw->p_forward);
90: }
91: }
92: VecRestoreArray(y,&y_array);
93: VecRestoreArray(x,&x_array);
94: return(0);
95: }
97: /* MatMultTranspose_SeqFFTW performs serial backward DFT
98: Input parameter:
99: A - the matrix
100: x - the vector on which BDFT will be performed
102: Output parameter:
103: y - vector that stores result of BDFT
104: */
108: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
109: {
111: Mat_FFT *fft = (Mat_FFT*)A->data;
112: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
113: PetscScalar *x_array,*y_array;
114: PetscInt ndim=fft->ndim,*dim=fft->dim;
117: VecGetArray(x,&x_array);
118: VecGetArray(y,&y_array);
119: if (!fftw->p_backward) { /* create a plan, then excute it */
120: switch (ndim) {
121: case 1:
122: #if defined(PETSC_USE_COMPLEX)
123: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
124: #else
125: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
126: #endif
127: break;
128: case 2:
129: #if defined(PETSC_USE_COMPLEX)
130: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
131: #else
132: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
133: #endif
134: break;
135: case 3:
136: #if defined(PETSC_USE_COMPLEX)
137: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
138: #else
139: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
140: #endif
141: break;
142: default:
143: #if defined(PETSC_USE_COMPLEX)
144: fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
145: #else
146: fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
147: #endif
148: break;
149: }
150: fftw->binarray = x_array;
151: fftw->boutarray = y_array;
152: fftw_execute(fftw->p_backward);
153: } else { /* use existing plan */
154: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
155: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
156: } else {
157: fftw_execute(fftw->p_backward);
158: }
159: }
160: VecRestoreArray(y,&y_array);
161: VecRestoreArray(x,&x_array);
162: return(0);
163: }
165: /* MatMult_MPIFFTW performs forward DFT in parallel
166: Input parameter:
167: A - the matrix
168: x - the vector on which FDFT will be performed
170: Output parameter:
171: y - vector that stores result of FDFT
172: */
175: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
176: {
178: Mat_FFT *fft = (Mat_FFT*)A->data;
179: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
180: PetscScalar *x_array,*y_array;
181: PetscInt ndim=fft->ndim,*dim=fft->dim;
182: MPI_Comm comm;
185: PetscObjectGetComm((PetscObject)A,&comm);
186: VecGetArray(x,&x_array);
187: VecGetArray(y,&y_array);
188: if (!fftw->p_forward) { /* create a plan, then excute it */
189: switch (ndim) {
190: case 1:
191: #if defined(PETSC_USE_COMPLEX)
192: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
193: #else
194: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
195: #endif
196: break;
197: case 2:
198: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
199: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
200: #else
201: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
202: #endif
203: break;
204: case 3:
205: #if defined(PETSC_USE_COMPLEX)
206: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
207: #else
208: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
209: #endif
210: break;
211: default:
212: #if defined(PETSC_USE_COMPLEX)
213: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
214: #else
215: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
216: #endif
217: break;
218: }
219: fftw->finarray = x_array;
220: fftw->foutarray = y_array;
221: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
222: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
223: fftw_execute(fftw->p_forward);
224: } else { /* use existing plan */
225: if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
226: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
227: } else {
228: fftw_execute(fftw->p_forward);
229: }
230: }
231: VecRestoreArray(y,&y_array);
232: VecRestoreArray(x,&x_array);
233: return(0);
234: }
236: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
237: Input parameter:
238: A - the matrix
239: x - the vector on which BDFT will be performed
241: Output parameter:
242: y - vector that stores result of BDFT
243: */
246: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
247: {
249: Mat_FFT *fft = (Mat_FFT*)A->data;
250: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
251: PetscScalar *x_array,*y_array;
252: PetscInt ndim=fft->ndim,*dim=fft->dim;
253: MPI_Comm comm;
256: PetscObjectGetComm((PetscObject)A,&comm);
257: VecGetArray(x,&x_array);
258: VecGetArray(y,&y_array);
259: if (!fftw->p_backward) { /* create a plan, then excute it */
260: switch (ndim) {
261: case 1:
262: #if defined(PETSC_USE_COMPLEX)
263: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
264: #else
265: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
266: #endif
267: break;
268: case 2:
269: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
270: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271: #else
272: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
273: #endif
274: break;
275: case 3:
276: #if defined(PETSC_USE_COMPLEX)
277: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
278: #else
279: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
280: #endif
281: break;
282: default:
283: #if defined(PETSC_USE_COMPLEX)
284: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
285: #else
286: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
287: #endif
288: break;
289: }
290: fftw->binarray = x_array;
291: fftw->boutarray = y_array;
292: fftw_execute(fftw->p_backward);
293: } else { /* use existing plan */
294: if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
295: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
296: } else {
297: fftw_execute(fftw->p_backward);
298: }
299: }
300: VecRestoreArray(y,&y_array);
301: VecRestoreArray(x,&x_array);
302: return(0);
303: }
307: PetscErrorCode MatDestroy_FFTW(Mat A)
308: {
309: Mat_FFT *fft = (Mat_FFT*)A->data;
310: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
314: fftw_destroy_plan(fftw->p_forward);
315: fftw_destroy_plan(fftw->p_backward);
316: PetscFree(fftw->dim_fftw);
317: PetscFree(fft->data);
318: fftw_mpi_cleanup();
319: return(0);
320: }
322: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
325: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
326: {
328: PetscScalar *array;
331: VecGetArray(v,&array);
332: fftw_free((fftw_complex*)array);
333: VecRestoreArray(v,&array);
334: VecDestroy_MPI(v);
335: return(0);
336: }
340: /*@
341: MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
342: parallel layout determined by FFTW
344: Collective on Mat
346: Input Parameter:
347: . A - the matrix
349: Output Parameter:
350: + x - (optional) input vector of forward FFTW
351: - y - (optional) output vector of forward FFTW
352: - z - (optional) output vector of backward FFTW
354: Level: advanced
356: Note: The parallel layout of output of forward FFTW is always same as the input
357: of the backward FFTW. But parallel layout of the input vector of forward
358: FFTW might not be same as the output of backward FFTW.
359: Also note that we need to provide enough space while doing parallel real transform.
360: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
361: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
362: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
363: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
364: zeros if it is odd only one zero is needed.
365: Lastly one needs some scratch space at the end of data set in each process. alloc_local
366: figures out how much space is needed, i.e. it figures out the data+scratch space for
367: each processor and returns that.
369: .seealso: MatCreateFFTW()
370: @*/
371: PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
372: {
376: PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
377: return(0);
378: }
382: PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
383: {
385: PetscMPIInt size,rank;
386: MPI_Comm comm;
387: Mat_FFT *fft = (Mat_FFT*)A->data;
388: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
389: PetscInt N = fft->N;
390: PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n;
395: PetscObjectGetComm((PetscObject)A,&comm);
397: MPI_Comm_size(comm, &size);
398: MPI_Comm_rank(comm, &rank);
399: if (size == 1) { /* sequential case */
400: #if defined(PETSC_USE_COMPLEX)
401: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
402: if (fout) {VecCreateSeq(PETSC_COMM_SELF,N,fout);}
403: if (bout) {VecCreateSeq(PETSC_COMM_SELF,N,bout);}
404: #else
405: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
406: if (fout) {VecCreateSeq(PETSC_COMM_SELF,n,fout);}
407: if (bout) {VecCreateSeq(PETSC_COMM_SELF,n,bout);}
408: #endif
409: } else { /* parallel cases */
410: ptrdiff_t alloc_local,local_n0,local_0_start;
411: ptrdiff_t local_n1;
412: fftw_complex *data_fout;
413: ptrdiff_t local_1_start;
414: #if defined(PETSC_USE_COMPLEX)
415: fftw_complex *data_fin,*data_bout;
416: #else
417: double *data_finr,*data_boutr;
418: PetscInt n1,N1;
419: ptrdiff_t temp;
420: #endif
422: switch (ndim) {
423: case 1:
424: #if !defined(PETSC_USE_COMPLEX)
425: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
426: #else
427: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
428: if (fin) {
429: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
430: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
432: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
433: }
434: if (fout) {
435: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
436: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
438: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
439: }
440: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
441: if (bout) {
442: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
443: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
445: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
446: }
447: break;
448: #endif
449: case 2:
450: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
451: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
452: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
453: if (fin) {
454: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
455: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
457: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
458: }
459: if (bout) {
460: data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
461: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
463: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
464: }
465: if (fout) {
466: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
467: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
469: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
470: }
471: #else
472: /* Get local size */
473: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
474: if (fin) {
475: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
476: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
478: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
479: }
480: if (fout) {
481: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
482: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
484: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
485: }
486: if (bout) {
487: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
488: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
490: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
491: }
492: #endif
493: break;
494: case 3:
495: #if !defined(PETSC_USE_COMPLEX)
496: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
497: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
498: if (fin) {
499: data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
500: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
502: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
503: }
504: if (bout) {
505: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
506: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
508: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
509: }
511: if (fout) {
512: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
513: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
515: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
516: }
517: #else
518: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
519: if (fin) {
520: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
523: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
524: }
525: if (fout) {
526: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
527: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
529: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
530: }
531: if (bout) {
532: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
535: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
536: }
537: #endif
538: break;
539: default:
540: #if !defined(PETSC_USE_COMPLEX)
541: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
543: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
545: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
546: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
548: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
550: if (fin) {
551: data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
552: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
554: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
555: }
556: if (bout) {
557: data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
558: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
560: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
561: }
563: if (fout) {
564: data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
565: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
567: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
568: }
569: #else
570: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
571: if (fin) {
572: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
573: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
575: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
576: }
577: if (fout) {
578: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
579: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
581: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
582: }
583: if (bout) {
584: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
585: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
587: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
588: }
589: #endif
590: break;
591: }
592: }
593: return(0);
594: }
598: /*@
599: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
601: Collective on Mat
603: Input Parameters:
604: + A - FFTW matrix
605: - x - the PETSc vector
607: Output Parameters:
608: . y - the FFTW vector
610: Options Database Keys:
611: . -mat_fftw_plannerflags - set FFTW planner flags
613: Level: intermediate
615: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
616: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
617: zeros. This routine does that job by scattering operation.
619: .seealso: VecScatterFFTWToPetsc()
620: @*/
621: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
622: {
626: PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
627: return(0);
628: }
632: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
633: {
635: MPI_Comm comm;
636: Mat_FFT *fft = (Mat_FFT*)A->data;
637: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
638: PetscInt N =fft->N;
639: PetscInt ndim =fft->ndim,*dim=fft->dim;
640: PetscInt low;
641: PetscInt rank,size,vsize,vsize1;
642: ptrdiff_t alloc_local,local_n0,local_0_start;
643: ptrdiff_t local_n1,local_1_start;
644: VecScatter vecscat;
645: IS list1,list2;
646: #if !defined(PETSC_USE_COMPLEX)
647: PetscInt i,j,k,partial_dim;
648: PetscInt *indx1, *indx2, tempindx, tempindx1;
649: PetscInt N1, n1,NM;
650: ptrdiff_t temp;
651: #endif
654: PetscObjectGetComm((PetscObject)A,&comm);
655: MPI_Comm_size(comm, &size);
656: MPI_Comm_rank(comm, &rank);
657: VecGetOwnershipRange(y,&low,NULL);
659: if (size==1) {
660: VecGetSize(x,&vsize);
661: VecGetSize(y,&vsize1);
662: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
663: VecScatterCreate(x,list1,y,list1,&vecscat);
664: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
665: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
666: VecScatterDestroy(&vecscat);
667: ISDestroy(&list1);
668: } else {
669: switch (ndim) {
670: case 1:
671: #if defined(PETSC_USE_COMPLEX)
672: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
674: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
675: ISCreateStride(comm,local_n0,low,1,&list2);
676: VecScatterCreate(x,list1,y,list2,&vecscat);
677: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
678: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
679: VecScatterDestroy(&vecscat);
680: ISDestroy(&list1);
681: ISDestroy(&list2);
682: #else
683: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
684: #endif
685: break;
686: case 2:
687: #if defined(PETSC_USE_COMPLEX)
688: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
690: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
691: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
692: VecScatterCreate(x,list1,y,list2,&vecscat);
693: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
694: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
695: VecScatterDestroy(&vecscat);
696: ISDestroy(&list1);
697: ISDestroy(&list2);
698: #else
699: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
701: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
702: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
703: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
705: if (dim[1]%2==0) {
706: NM = dim[1]+2;
707: } else {
708: NM = dim[1]+1;
709: }
710: for (i=0; i<local_n0; i++) {
711: for (j=0; j<dim[1]; j++) {
712: tempindx = i*dim[1] + j;
713: tempindx1 = i*NM + j;
715: indx1[tempindx]=local_0_start*dim[1]+tempindx;
716: indx2[tempindx]=low+tempindx1;
717: }
718: }
720: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
721: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
723: VecScatterCreate(x,list1,y,list2,&vecscat);
724: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
725: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
726: VecScatterDestroy(&vecscat);
727: ISDestroy(&list1);
728: ISDestroy(&list2);
729: PetscFree(indx1);
730: PetscFree(indx2);
731: #endif
732: break;
734: case 3:
735: #if defined(PETSC_USE_COMPLEX)
736: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
738: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
739: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
740: VecScatterCreate(x,list1,y,list2,&vecscat);
741: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
742: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
743: VecScatterDestroy(&vecscat);
744: ISDestroy(&list1);
745: ISDestroy(&list2);
746: #else
747: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
749: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
751: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
752: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
754: if (dim[2]%2==0) NM = dim[2]+2;
755: else NM = dim[2]+1;
757: for (i=0; i<local_n0; i++) {
758: for (j=0; j<dim[1]; j++) {
759: for (k=0; k<dim[2]; k++) {
760: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
761: tempindx1 = i*dim[1]*NM + j*NM + k;
763: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
764: indx2[tempindx]=low+tempindx1;
765: }
766: }
767: }
769: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
770: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
771: VecScatterCreate(x,list1,y,list2,&vecscat);
772: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
773: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
774: VecScatterDestroy(&vecscat);
775: ISDestroy(&list1);
776: ISDestroy(&list2);
777: PetscFree(indx1);
778: PetscFree(indx2);
779: #endif
780: break;
782: default:
783: #if defined(PETSC_USE_COMPLEX)
784: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
786: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
787: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
788: VecScatterCreate(x,list1,y,list2,&vecscat);
789: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
790: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
791: VecScatterDestroy(&vecscat);
792: ISDestroy(&list1);
793: ISDestroy(&list2);
794: #else
795: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
797: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
799: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
801: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
803: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
805: partial_dim = fftw->partial_dim;
807: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
808: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
810: if (dim[ndim-1]%2==0) NM = 2;
811: else NM = 1;
813: j = low;
814: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
815: indx1[i] = local_0_start*partial_dim + i;
816: indx2[i] = j;
817: if (k%dim[ndim-1]==0) j+=NM;
818: j++;
819: }
820: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
821: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
822: VecScatterCreate(x,list1,y,list2,&vecscat);
823: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
824: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
825: VecScatterDestroy(&vecscat);
826: ISDestroy(&list1);
827: ISDestroy(&list2);
828: PetscFree(indx1);
829: PetscFree(indx2);
830: #endif
831: break;
832: }
833: }
834: return(0);
835: }
839: /*@
840: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
842: Collective on Mat
844: Input Parameters:
845: + A - FFTW matrix
846: - x - FFTW vector
848: Output Parameters:
849: . y - PETSc vector
851: Level: intermediate
853: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
854: VecScatterFFTWToPetsc removes those extra zeros.
856: .seealso: VecScatterPetscToFFTW()
857: @*/
858: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
859: {
863: PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
864: return(0);
865: }
869: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
870: {
872: MPI_Comm comm;
873: Mat_FFT *fft = (Mat_FFT*)A->data;
874: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
875: PetscInt N = fft->N;
876: PetscInt ndim = fft->ndim,*dim=fft->dim;
877: PetscInt low;
878: PetscInt rank,size;
879: ptrdiff_t alloc_local,local_n0,local_0_start;
880: ptrdiff_t local_n1,local_1_start;
881: VecScatter vecscat;
882: IS list1,list2;
883: #if !defined(PETSC_USE_COMPLEX)
884: PetscInt i,j,k,partial_dim;
885: PetscInt *indx1, *indx2, tempindx, tempindx1;
886: PetscInt N1, n1,NM;
887: ptrdiff_t temp;
888: #endif
891: PetscObjectGetComm((PetscObject)A,&comm);
892: MPI_Comm_size(comm, &size);
893: MPI_Comm_rank(comm, &rank);
894: VecGetOwnershipRange(x,&low,NULL);
896: if (size==1) {
897: ISCreateStride(comm,N,0,1,&list1);
898: VecScatterCreate(x,list1,y,list1,&vecscat);
899: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
900: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
901: VecScatterDestroy(&vecscat);
902: ISDestroy(&list1);
904: } else {
905: switch (ndim) {
906: case 1:
907: #if defined(PETSC_USE_COMPLEX)
908: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
910: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
911: ISCreateStride(comm,local_n1,low,1,&list2);
912: VecScatterCreate(x,list1,y,list2,&vecscat);
913: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
914: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
915: VecScatterDestroy(&vecscat);
916: ISDestroy(&list1);
917: ISDestroy(&list2);
918: #else
919: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
920: #endif
921: break;
922: case 2:
923: #if defined(PETSC_USE_COMPLEX)
924: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
926: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
927: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
928: VecScatterCreate(x,list2,y,list1,&vecscat);
929: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
930: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
931: VecScatterDestroy(&vecscat);
932: ISDestroy(&list1);
933: ISDestroy(&list2);
934: #else
935: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
937: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
939: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
940: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
942: if (dim[1]%2==0) NM = dim[1]+2;
943: else NM = dim[1]+1;
945: for (i=0; i<local_n0; i++) {
946: for (j=0; j<dim[1]; j++) {
947: tempindx = i*dim[1] + j;
948: tempindx1 = i*NM + j;
950: indx1[tempindx]=local_0_start*dim[1]+tempindx;
951: indx2[tempindx]=low+tempindx1;
952: }
953: }
955: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
956: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
958: VecScatterCreate(x,list2,y,list1,&vecscat);
959: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
960: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
961: VecScatterDestroy(&vecscat);
962: ISDestroy(&list1);
963: ISDestroy(&list2);
964: PetscFree(indx1);
965: PetscFree(indx2);
966: #endif
967: break;
968: case 3:
969: #if defined(PETSC_USE_COMPLEX)
970: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
972: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
973: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
974: VecScatterCreate(x,list1,y,list2,&vecscat);
975: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
976: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
977: VecScatterDestroy(&vecscat);
978: ISDestroy(&list1);
979: ISDestroy(&list2);
980: #else
982: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
984: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
986: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
987: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
989: if (dim[2]%2==0) NM = dim[2]+2;
990: else NM = dim[2]+1;
992: for (i=0; i<local_n0; i++) {
993: for (j=0; j<dim[1]; j++) {
994: for (k=0; k<dim[2]; k++) {
995: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
996: tempindx1 = i*dim[1]*NM + j*NM + k;
998: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
999: indx2[tempindx]=low+tempindx1;
1000: }
1001: }
1002: }
1004: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
1005: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
1007: VecScatterCreate(x,list2,y,list1,&vecscat);
1008: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1009: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1010: VecScatterDestroy(&vecscat);
1011: ISDestroy(&list1);
1012: ISDestroy(&list2);
1013: PetscFree(indx1);
1014: PetscFree(indx2);
1015: #endif
1016: break;
1017: default:
1018: #if defined(PETSC_USE_COMPLEX)
1019: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1021: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1022: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1023: VecScatterCreate(x,list1,y,list2,&vecscat);
1024: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1025: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1026: VecScatterDestroy(&vecscat);
1027: ISDestroy(&list1);
1028: ISDestroy(&list2);
1029: #else
1030: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1032: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1034: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1036: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1038: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1040: partial_dim = fftw->partial_dim;
1042: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
1043: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
1045: if (dim[ndim-1]%2==0) NM = 2;
1046: else NM = 1;
1048: j = low;
1049: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1050: indx1[i] = local_0_start*partial_dim + i;
1051: indx2[i] = j;
1052: if (k%dim[ndim-1]==0) j+=NM;
1053: j++;
1054: }
1055: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1056: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1058: VecScatterCreate(x,list2,y,list1,&vecscat);
1059: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1060: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1061: VecScatterDestroy(&vecscat);
1062: ISDestroy(&list1);
1063: ISDestroy(&list2);
1064: PetscFree(indx1);
1065: PetscFree(indx2);
1066: #endif
1067: break;
1068: }
1069: }
1070: return(0);
1071: }
1075: /*
1076: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1078: Options Database Keys:
1079: + -mat_fftw_plannerflags - set FFTW planner flags
1081: Level: intermediate
1083: */
1084: PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1085: {
1087: MPI_Comm comm;
1088: Mat_FFT *fft=(Mat_FFT*)A->data;
1089: Mat_FFTW *fftw;
1090: PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1091: const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1092: PetscBool flg;
1093: PetscInt p_flag,partial_dim=1,ctr;
1094: PetscMPIInt size,rank;
1095: ptrdiff_t *pdim;
1096: ptrdiff_t local_n1,local_1_start;
1097: #if !defined(PETSC_USE_COMPLEX)
1098: ptrdiff_t temp;
1099: PetscInt N1,tot_dim;
1100: #else
1101: PetscInt n1;
1102: #endif
1105: PetscObjectGetComm((PetscObject)A,&comm);
1106: MPI_Comm_size(comm, &size);
1107: MPI_Comm_rank(comm, &rank);
1109: fftw_mpi_init();
1110: pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1111: pdim[0] = dim[0];
1112: #if !defined(PETSC_USE_COMPLEX)
1113: tot_dim = 2*dim[0];
1114: #endif
1115: for (ctr=1; ctr<ndim; ctr++) {
1116: partial_dim *= dim[ctr];
1117: pdim[ctr] = dim[ctr];
1118: #if !defined(PETSC_USE_COMPLEX)
1119: if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1120: else tot_dim *= dim[ctr];
1121: #endif
1122: }
1124: if (size == 1) {
1125: #if defined(PETSC_USE_COMPLEX)
1126: MatSetSizes(A,N,N,N,N);
1127: n = N;
1128: #else
1129: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1130: n = tot_dim;
1131: #endif
1133: } else {
1134: ptrdiff_t alloc_local,local_n0,local_0_start;
1135: switch (ndim) {
1136: case 1:
1137: #if !defined(PETSC_USE_COMPLEX)
1138: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1139: #else
1140: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1142: n = (PetscInt)local_n0;
1143: n1 = (PetscInt)local_n1;
1144: MatSetSizes(A,n1,n,N,N);
1145: #endif
1146: break;
1147: case 2:
1148: #if defined(PETSC_USE_COMPLEX)
1149: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1150: /*
1151: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1152: PetscSynchronizedFlush(comm);
1153: */
1154: n = (PetscInt)local_n0*dim[1];
1155: MatSetSizes(A,n,n,N,N);
1156: #else
1157: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1159: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1160: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1161: #endif
1162: break;
1163: case 3:
1164: #if defined(PETSC_USE_COMPLEX)
1165: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1167: n = (PetscInt)local_n0*dim[1]*dim[2];
1168: MatSetSizes(A,n,n,N,N);
1169: #else
1170: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1172: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1173: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1174: #endif
1175: break;
1176: default:
1177: #if defined(PETSC_USE_COMPLEX)
1178: alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1180: n = (PetscInt)local_n0*partial_dim;
1181: MatSetSizes(A,n,n,N,N);
1182: #else
1183: temp = pdim[ndim-1];
1185: pdim[ndim-1] = temp/2 + 1;
1187: alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1189: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1190: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1192: pdim[ndim-1] = temp;
1194: MatSetSizes(A,n,n,N1,N1);
1195: #endif
1196: break;
1197: }
1198: }
1199: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1200: PetscNewLog(A,Mat_FFTW,&fftw);
1201: fft->data = (void*)fftw;
1203: fft->n = n;
1204: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1205: fftw->partial_dim = partial_dim;
1207: PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));
1209: for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1211: fftw->p_forward = 0;
1212: fftw->p_backward = 0;
1213: fftw->p_flag = FFTW_ESTIMATE;
1215: if (size == 1) {
1216: A->ops->mult = MatMult_SeqFFTW;
1217: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1218: } else {
1219: A->ops->mult = MatMult_MPIFFTW;
1220: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1221: }
1222: fft->matdestroy = MatDestroy_FFTW;
1223: A->assembled = PETSC_TRUE;
1224: A->preallocated = PETSC_TRUE;
1226: PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);
1227: PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);
1228: PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);
1230: /* get runtime options */
1231: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1232: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);
1233: if (flg) fftw->p_flag = (unsigned)p_flag;
1234: PetscOptionsEnd();
1235: return(0);
1236: }