Actual source code: fftw.c
petsc-3.3-p7 2013-05-11
2: /*
3: Provides an interface to the FFTW package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <fftw3-mpi.h>
10: EXTERN_C_END
12: typedef struct {
13: ptrdiff_t ndim_fftw,*dim_fftw;
14: PetscInt partial_dim;
15: fftw_plan p_forward,p_backward;
16: unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17: PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18: executed for the arrays with which the plan was created */
19: } Mat_FFTW;
21: extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22: extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23: extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24: extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25: extern PetscErrorCode MatDestroy_FFTW(Mat);
26: extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28: /* MatMult_SeqFFTW performs forward DFT in parallel
29: Input parameter:
30: A - the matrix
31: x - the vector on which FDFT will be performed
33: Output parameter:
34: y - vector that stores result of FDFT
35: */
38: PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39: {
41: Mat_FFT *fft = (Mat_FFT*)A->data;
42: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
43: PetscScalar *x_array,*y_array;
44: PetscInt ndim=fft->ndim,*dim=fft->dim;
47: VecGetArray(x,&x_array);
48: VecGetArray(y,&y_array);
49: if (!fftw->p_forward){ /* create a plan, then excute it */
50: switch (ndim){
51: case 1:
52: #if defined(PETSC_USE_COMPLEX)
53: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54: #else
55: fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56: #endif
57: break;
58: case 2:
59: #if defined(PETSC_USE_COMPLEX)
60: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61: #else
62: fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63: #endif
64: break;
65: case 3:
66: #if defined(PETSC_USE_COMPLEX)
67: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68: #else
69: fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70: #endif
71: break;
72: default:
73: #if defined(PETSC_USE_COMPLEX)
74: fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75: #else
76: fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
77: #endif
78: break;
79: }
80: fftw->finarray = x_array;
81: fftw->foutarray = y_array;
82: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
83: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
84: fftw_execute(fftw->p_forward);
85: } else { /* use existing plan */
86: if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
87: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
88: } else {
89: fftw_execute(fftw->p_forward);
90: }
91: }
92: VecRestoreArray(y,&y_array);
93: VecRestoreArray(x,&x_array);
94: return(0);
95: }
97: /* MatMultTranspose_SeqFFTW performs serial backward DFT
98: Input parameter:
99: A - the matrix
100: x - the vector on which BDFT will be performed
102: Output parameter:
103: y - vector that stores result of BDFT
104: */
108: PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
109: {
111: Mat_FFT *fft = (Mat_FFT*)A->data;
112: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
113: PetscScalar *x_array,*y_array;
114: PetscInt ndim=fft->ndim,*dim=fft->dim;
117: VecGetArray(x,&x_array);
118: VecGetArray(y,&y_array);
119: if (!fftw->p_backward){ /* create a plan, then excute it */
120: switch (ndim){
121: case 1:
122: #if defined(PETSC_USE_COMPLEX)
123: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
124: #else
125: fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
126: #endif
127: break;
128: case 2:
129: #if defined(PETSC_USE_COMPLEX)
130: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
131: #else
132: fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
133: #endif
134: break;
135: case 3:
136: #if defined(PETSC_USE_COMPLEX)
137: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
138: #else
139: fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
140: #endif
141: break;
142: default:
143: #if defined(PETSC_USE_COMPLEX)
144: fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
145: #else
146: fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
147: #endif
148: break;
149: }
150: fftw->binarray = x_array;
151: fftw->boutarray = y_array;
152: fftw_execute(fftw->p_backward);
153: } else { /* use existing plan */
154: if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
155: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
156: } else {
157: fftw_execute(fftw->p_backward);
158: }
159: }
160: VecRestoreArray(y,&y_array);
161: VecRestoreArray(x,&x_array);
162: return(0);
163: }
165: /* MatMult_MPIFFTW performs forward DFT in parallel
166: Input parameter:
167: A - the matrix
168: x - the vector on which FDFT will be performed
170: Output parameter:
171: y - vector that stores result of FDFT
172: */
175: PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
176: {
178: Mat_FFT *fft = (Mat_FFT*)A->data;
179: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
180: PetscScalar *x_array,*y_array;
181: PetscInt ndim=fft->ndim,*dim=fft->dim;
182: MPI_Comm comm=((PetscObject)A)->comm;
185: VecGetArray(x,&x_array);
186: VecGetArray(y,&y_array);
187: if (!fftw->p_forward){ /* create a plan, then excute it */
188: switch (ndim){
189: case 1:
190: #if defined(PETSC_USE_COMPLEX)
191: fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
192: #else
193: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
194: #endif
195: break;
196: case 2:
197: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
198: fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199: #else
200: fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
201: #endif
202: break;
203: case 3:
204: #if defined(PETSC_USE_COMPLEX)
205: fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
206: #else
207: fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
208: #endif
209: break;
210: default:
211: #if defined(PETSC_USE_COMPLEX)
212: fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
213: #else
214: fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
215: #endif
216: break;
217: }
218: fftw->finarray = x_array;
219: fftw->foutarray = y_array;
220: /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
221: planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
222: fftw_execute(fftw->p_forward);
223: } else { /* use existing plan */
224: if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
225: fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
226: } else {
227: fftw_execute(fftw->p_forward);
228: }
229: }
230: VecRestoreArray(y,&y_array);
231: VecRestoreArray(x,&x_array);
232: return(0);
233: }
235: /* MatMultTranspose_MPIFFTW performs parallel backward DFT
236: Input parameter:
237: A - the matrix
238: x - the vector on which BDFT will be performed
240: Output parameter:
241: y - vector that stores result of BDFT
242: */
245: PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
246: {
248: Mat_FFT *fft = (Mat_FFT*)A->data;
249: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
250: PetscScalar *x_array,*y_array;
251: PetscInt ndim=fft->ndim,*dim=fft->dim;
252: MPI_Comm comm=((PetscObject)A)->comm;
255: VecGetArray(x,&x_array);
256: VecGetArray(y,&y_array);
257: if (!fftw->p_backward){ /* create a plan, then excute it */
258: switch (ndim){
259: case 1:
260: #if defined(PETSC_USE_COMPLEX)
261: fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
262: #else
263: SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
264: #endif
265: break;
266: case 2:
267: #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
268: fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
269: #else
270: fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
271: #endif
272: break;
273: case 3:
274: #if defined(PETSC_USE_COMPLEX)
275: fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
276: #else
277: fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
278: #endif
279: break;
280: default:
281: #if defined(PETSC_USE_COMPLEX)
282: fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
283: #else
284: fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
285: #endif
286: break;
287: }
288: fftw->binarray = x_array;
289: fftw->boutarray = y_array;
290: fftw_execute(fftw->p_backward);
291: } else { /* use existing plan */
292: if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
293: fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
294: } else {
295: fftw_execute(fftw->p_backward);
296: }
297: }
298: VecRestoreArray(y,&y_array);
299: VecRestoreArray(x,&x_array);
300: return(0);
301: }
305: PetscErrorCode MatDestroy_FFTW(Mat A)
306: {
307: Mat_FFT *fft = (Mat_FFT*)A->data;
308: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
312: fftw_destroy_plan(fftw->p_forward);
313: fftw_destroy_plan(fftw->p_backward);
314: PetscFree(fftw->dim_fftw);
315: PetscFree(fft->data);
316: fftw_mpi_cleanup();
317: return(0);
318: }
320: #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
323: PetscErrorCode VecDestroy_MPIFFTW(Vec v)
324: {
325: PetscErrorCode ierr;
326: PetscScalar *array;
329: VecGetArray(v,&array);
330: fftw_free((fftw_complex*)array);
331: VecRestoreArray(v,&array);
332: VecDestroy_MPI(v);
333: return(0);
334: }
338: /*@
339: MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
340: parallel layout determined by FFTW
342: Collective on Mat
344: Input Parameter:
345: . A - the matrix
347: Output Parameter:
348: + x - (optional) input vector of forward FFTW
349: - y - (optional) output vector of forward FFTW
350: - z - (optional) output vector of backward FFTW
352: Level: advanced
354: Note: The parallel layout of output of forward FFTW is always same as the input
355: of the backward FFTW. But parallel layout of the input vector of forward
356: FFTW might not be same as the output of backward FFTW.
357: Also note that we need to provide enough space while doing parallel real transform.
358: We need to pad extra zeros at the end of last dimension. For this reason the one needs to
359: invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
360: last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
361: depends on if the last dimension is even or odd. If the last dimension is even need to pad two
362: zeros if it is odd only one zero is needed.
363: Lastly one needs some scratch space at the end of data set in each process. alloc_local
364: figures out how much space is needed, i.e. it figures out the data+scratch space for
365: each processor and returns that.
367: .seealso: MatCreateFFTW()
368: @*/
369: PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
370: {
374: PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
375: return(0);
376: }
378: EXTERN_C_BEGIN
381: PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
382: {
384: PetscMPIInt size,rank;
385: MPI_Comm comm=((PetscObject)A)->comm;
386: Mat_FFT *fft = (Mat_FFT*)A->data;
387: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
388: PetscInt N=fft->N;
389: PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n;
395: MPI_Comm_size(comm, &size);
396: MPI_Comm_rank(comm, &rank);
397: if (size == 1){ /* sequential case */
398: #if defined(PETSC_USE_COMPLEX)
399: if (fin) {VecCreateSeq(PETSC_COMM_SELF,N,fin);}
400: if (fout){VecCreateSeq(PETSC_COMM_SELF,N,fout);}
401: if (bout){VecCreateSeq(PETSC_COMM_SELF,N,bout);}
402: #else
403: if (fin) {VecCreateSeq(PETSC_COMM_SELF,n,fin);}
404: if (fout){VecCreateSeq(PETSC_COMM_SELF,n,fout);}
405: if (bout){VecCreateSeq(PETSC_COMM_SELF,n,bout);}
406: #endif
407: } else { /* parallel cases */
408: ptrdiff_t alloc_local,local_n0,local_0_start;
409: ptrdiff_t local_n1;
410: fftw_complex *data_fout;
411: ptrdiff_t local_1_start;
412: #if defined(PETSC_USE_COMPLEX)
413: fftw_complex *data_fin,*data_bout;
414: #else
415: double *data_finr,*data_boutr;
416: PetscInt n1,N1;
417: ptrdiff_t temp;
418: #endif
420: switch (ndim){
421: case 1:
422: #if !defined(PETSC_USE_COMPLEX)
423: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
424: #else
425: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
426: if (fin) {
427: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
428: VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);
429: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
430: }
431: if (fout) {
432: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
433: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);
434: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
435: }
436: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
437: if (bout) {
438: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
439: VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);
440: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
441: }
442: break;
443: #endif
444: case 2:
445: #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
446: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
447: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
448: if (fin) {
449: data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
450: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
451: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
452: }
453: if (bout) {
454: data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
455: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
456: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
457: }
458: if (fout) {
459: data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
460: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);
461: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
462: }
463: #else
464: /* Get local size */
465: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
466: if (fin) {
467: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
468: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
469: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
470: }
471: if (fout) {
472: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
473: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
474: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
475: }
476: if (bout) {
477: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
479: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
480: }
481: #endif
482: break;
483: case 3:
484: #if !defined(PETSC_USE_COMPLEX)
485: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
486: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
487: if (fin) {
488: data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
489: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);
490: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
491: }
492: if (bout) {
493: data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
494: VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);
495: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
496: }
497:
498: if (fout) {
499: data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
500: VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);
501: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
502: }
503: #else
504: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
505: if (fin) {
506: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
507: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
508: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
509: }
510: if (fout) {
511: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
513: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
514: }
515: if (bout) {
516: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
518: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
519: }
520: #endif
521: break;
522: default:
523: #if !defined(PETSC_USE_COMPLEX)
524: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
525: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
526: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
527: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
528: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
530: if (fin) {
531: data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
532: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);
533: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
534: }
535: if (bout) {
536: data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
537: VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);
538: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
539: }
540:
541: if (fout) {
542: data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543: VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);
544: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
545: }
546: #else
547: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
548: if (fin) {
549: data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
550: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);
551: (*fin)->ops->destroy = VecDestroy_MPIFFTW;
552: }
553: if (fout) {
554: data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
555: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);
556: (*fout)->ops->destroy = VecDestroy_MPIFFTW;
557: }
558: if (bout) {
559: data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
560: VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);
561: (*bout)->ops->destroy = VecDestroy_MPIFFTW;
562: }
563: #endif
564: break;
565: }
566: }
567: return(0);
568: }
569: EXTERN_C_END
573: /*@
574: VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
576: Collective on Mat
578: Input Parameters:
579: + A - FFTW matrix
580: - x - the PETSc vector
582: Output Parameters:
583: . y - the FFTW vector
585: Options Database Keys:
586: . -mat_fftw_plannerflags - set FFTW planner flags
588: Level: intermediate
590: Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
591: one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
592: zeros. This routine does that job by scattering operation.
594: .seealso: VecScatterFFTWToPetsc()
595: @*/
596: PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
597: {
601: PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
602: return(0);
603: }
605: EXTERN_C_BEGIN
608: PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
609: {
611: MPI_Comm comm=((PetscObject)A)->comm;
612: Mat_FFT *fft = (Mat_FFT*)A->data;
613: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
614: PetscInt N=fft->N;
615: PetscInt ndim=fft->ndim,*dim=fft->dim;
616: PetscInt low;
617: PetscInt rank,size,vsize,vsize1;
618: ptrdiff_t alloc_local,local_n0,local_0_start;
619: ptrdiff_t local_n1,local_1_start;
620: VecScatter vecscat;
621: IS list1,list2;
622: #if !defined(PETSC_USE_COMPLEX)
623: PetscInt i,j,k,partial_dim;
624: PetscInt *indx1, *indx2, tempindx, tempindx1;
625: PetscInt N1, n1 ,NM;
626: ptrdiff_t temp;
627: #endif
630: MPI_Comm_size(comm, &size);
631: MPI_Comm_rank(comm, &rank);
632: VecGetOwnershipRange(y,&low,PETSC_NULL);
634: if (size==1){
635: VecGetSize(x,&vsize);
636: VecGetSize(y,&vsize1);
637: ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
638: VecScatterCreate(x,list1,y,list1,&vecscat);
639: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
640: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
641: VecScatterDestroy(&vecscat);
642: ISDestroy(&list1);
643: } else {
644: switch (ndim){
645: case 1:
646: #if defined(PETSC_USE_COMPLEX)
647: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
648: ISCreateStride(comm,local_n0,local_0_start,1,&list1);
649: ISCreateStride(comm,local_n0,low,1,&list2);
650: VecScatterCreate(x,list1,y,list2,&vecscat);
651: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
652: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
653: VecScatterDestroy(&vecscat);
654: ISDestroy(&list1);
655: ISDestroy(&list2);
656: #else
657: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
658: #endif
659: break;
660: case 2:
661: #if defined(PETSC_USE_COMPLEX)
662: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
663: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
664: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
665: VecScatterCreate(x,list1,y,list2,&vecscat);
666: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
667: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
668: VecScatterDestroy(&vecscat);
669: ISDestroy(&list1);
670: ISDestroy(&list2);
671: #else
672: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
673: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
674: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
675: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
676:
677: if (dim[1]%2==0){
678: NM = dim[1]+2;
679: } else {
680: NM = dim[1]+1;
681: }
682: for (i=0;i<local_n0;i++){
683: for (j=0;j<dim[1];j++){
684: tempindx = i*dim[1] + j;
685: tempindx1 = i*NM + j;
686: indx1[tempindx]=local_0_start*dim[1]+tempindx;
687: indx2[tempindx]=low+tempindx1;
688: }
689: }
690:
691: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
692: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
694: VecScatterCreate(x,list1,y,list2,&vecscat);
695: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
696: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
697: VecScatterDestroy(&vecscat);
698: ISDestroy(&list1);
699: ISDestroy(&list2);
700: PetscFree(indx1);
701: PetscFree(indx2);
702: #endif
703: break;
705: case 3:
706: #if defined(PETSC_USE_COMPLEX)
707: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
708: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
709: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
710: VecScatterCreate(x,list1,y,list2,&vecscat);
711: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
712: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
713: VecScatterDestroy(&vecscat);
714: ISDestroy(&list1);
715: ISDestroy(&list2);
716: #else
717: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
718: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
720: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
721: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
723: if (dim[2]%2==0)
724: NM = dim[2]+2;
725: else
726: NM = dim[2]+1;
728: for (i=0;i<local_n0;i++){
729: for (j=0;j<dim[1];j++){
730: for (k=0;k<dim[2];k++){
731: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
732: tempindx1 = i*dim[1]*NM + j*NM + k;
733: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
734: indx2[tempindx]=low+tempindx1;
735: }
736: }
737: }
739: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
740: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
741: VecScatterCreate(x,list1,y,list2,&vecscat);
742: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
743: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
744: VecScatterDestroy(&vecscat);
745: ISDestroy(&list1);
746: ISDestroy(&list2);
747: PetscFree(indx1);
748: PetscFree(indx2);
749: #endif
750: break;
752: default:
753: #if defined(PETSC_USE_COMPLEX)
754: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
755: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
756: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
757: VecScatterCreate(x,list1,y,list2,&vecscat);
758: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
759: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
760: VecScatterDestroy(&vecscat);
761: ISDestroy(&list1);
762: ISDestroy(&list2);
763: #else
764: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
765: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
766: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
767: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
768: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
770: partial_dim = fftw->partial_dim;
771:
772: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
773: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
775: if (dim[ndim-1]%2==0)
776: NM = 2;
777: else
778: NM = 1;
780: j = low;
781: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){
782: indx1[i] = local_0_start*partial_dim + i;
783: indx2[i] = j;
784: if (k%dim[ndim-1]==0){ j+=NM;}
785: j++;
786: }
787: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
788: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
789: VecScatterCreate(x,list1,y,list2,&vecscat);
790: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
791: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
792: VecScatterDestroy(&vecscat);
793: ISDestroy(&list1);
794: ISDestroy(&list2);
795: PetscFree(indx1);
796: PetscFree(indx2);
797: #endif
798: break;
799: }
800: }
801: return(0);
802: }
803: EXTERN_C_END
807: /*@
808: VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
810: Collective on Mat
812: Input Parameters:
813: + A - FFTW matrix
814: - x - FFTW vector
816: Output Parameters:
817: . y - PETSc vector
819: Level: intermediate
821: Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
822: VecScatterFFTWToPetsc removes those extra zeros.
823:
824: .seealso: VecScatterPetscToFFTW()
825: @*/
826: PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
827: {
830: PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
831: return(0);
832: }
834: EXTERN_C_BEGIN
837: PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
838: {
840: MPI_Comm comm=((PetscObject)A)->comm;
841: Mat_FFT *fft = (Mat_FFT*)A->data;
842: Mat_FFTW *fftw = (Mat_FFTW*)fft->data;
843: PetscInt N=fft->N;
844: PetscInt ndim=fft->ndim,*dim=fft->dim;
845: PetscInt low;
846: PetscInt rank,size;
847: ptrdiff_t alloc_local,local_n0,local_0_start;
848: ptrdiff_t local_n1,local_1_start;
849: VecScatter vecscat;
850: IS list1,list2;
851: #if !defined(PETSC_USE_COMPLEX)
852: PetscInt i,j,k,partial_dim;
853: PetscInt *indx1, *indx2, tempindx, tempindx1;
854: PetscInt N1, n1 ,NM;
855: ptrdiff_t temp;
856: #endif
859: MPI_Comm_size(comm, &size);
860: MPI_Comm_rank(comm, &rank);
861: VecGetOwnershipRange(x,&low,PETSC_NULL);
862:
863: if (size==1){
864: ISCreateStride(comm,N,0,1,&list1);
865: VecScatterCreate(x,list1,y,list1,&vecscat);
866: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
867: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
868: VecScatterDestroy(&vecscat);
869: ISDestroy(&list1);
871: } else{
872: switch (ndim){
873: case 1:
874: #if defined(PETSC_USE_COMPLEX)
875: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
876: ISCreateStride(comm,local_n1,local_1_start,1,&list1);
877: ISCreateStride(comm,local_n1,low,1,&list2);
878: VecScatterCreate(x,list1,y,list2,&vecscat);
879: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
880: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
881: VecScatterDestroy(&vecscat);
882: ISDestroy(&list1);
883: ISDestroy(&list2);
884: #else
885: SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
886: #endif
887: break;
888: case 2:
889: #if defined(PETSC_USE_COMPLEX)
890: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
891: ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
892: ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
893: VecScatterCreate(x,list2,y,list1,&vecscat);
894: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
895: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
896: VecScatterDestroy(&vecscat);
897: ISDestroy(&list1);
898: ISDestroy(&list2);
899: #else
900: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
901: N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
903: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);
904: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);
906: if (dim[1]%2==0)
907: NM = dim[1]+2;
908: else
909: NM = dim[1]+1;
910:
911: for (i=0;i<local_n0;i++){
912: for (j=0;j<dim[1];j++){
913: tempindx = i*dim[1] + j;
914: tempindx1 = i*NM + j;
915: indx1[tempindx]=local_0_start*dim[1]+tempindx;
916: indx2[tempindx]=low+tempindx1;
917: }
918: }
920: ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);
921: ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);
923: VecScatterCreate(x,list2,y,list1,&vecscat);
924: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
925: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
926: VecScatterDestroy(&vecscat);
927: ISDestroy(&list1);
928: ISDestroy(&list2);
929: PetscFree(indx1);
930: PetscFree(indx2);
931: #endif
932: break;
933: case 3:
934: #if defined(PETSC_USE_COMPLEX)
935: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
936: ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
937: ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
938: VecScatterCreate(x,list1,y,list2,&vecscat);
939: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
940: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
941: VecScatterDestroy(&vecscat);
942: ISDestroy(&list1);
943: ISDestroy(&list2);
944: #else
946: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
947: N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
949: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);
950: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);
952: if (dim[2]%2==0)
953: NM = dim[2]+2;
954: else
955: NM = dim[2]+1;
957: for (i=0;i<local_n0;i++){
958: for (j=0;j<dim[1];j++){
959: for (k=0;k<dim[2];k++){
960: tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
961: tempindx1 = i*dim[1]*NM + j*NM + k;
962: indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
963: indx2[tempindx]=low+tempindx1;
964: }
965: }
966: }
968: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);
969: ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);
971: VecScatterCreate(x,list2,y,list1,&vecscat);
972: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
973: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
974: VecScatterDestroy(&vecscat);
975: ISDestroy(&list1);
976: ISDestroy(&list2);
977: PetscFree(indx1);
978: PetscFree(indx2);
979: #endif
980: break;
981: default:
982: #if defined(PETSC_USE_COMPLEX)
983: alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
984: ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
985: ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
986: VecScatterCreate(x,list1,y,list2,&vecscat);
987: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
988: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
989: VecScatterDestroy(&vecscat);
990: ISDestroy(&list1);
991: ISDestroy(&list2);
992: #else
993: temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
994: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
995: alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
996: N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
997: (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
999: partial_dim = fftw->partial_dim;
1001: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);
1002: PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);
1004: if (dim[ndim-1]%2==0)
1005: NM = 2;
1006: else
1007: NM = 1;
1009: j = low;
1010: for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){
1011: indx1[i] = local_0_start*partial_dim + i;
1012: indx2[i] = j;
1013: if (k%dim[ndim-1]==0)j+=NM;
1014: j++;
1015: }
1016: ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);
1017: ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);
1019: VecScatterCreate(x,list2,y,list1,&vecscat);
1020: VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1021: VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);
1022: VecScatterDestroy(&vecscat);
1023: ISDestroy(&list1);
1024: ISDestroy(&list2);
1025: PetscFree(indx1);
1026: PetscFree(indx2);
1027: #endif
1028: break;
1029: }
1030: }
1031: return(0);
1032: }
1033: EXTERN_C_END
1035: EXTERN_C_BEGIN
1038: /*
1039: MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1041: Options Database Keys:
1042: + -mat_fftw_plannerflags - set FFTW planner flags
1044: Level: intermediate
1045:
1046: */
1047: PetscErrorCode MatCreate_FFTW(Mat A)
1048: {
1050: MPI_Comm comm=((PetscObject)A)->comm;
1051: Mat_FFT *fft=(Mat_FFT*)A->data;
1052: Mat_FFTW *fftw;
1053: PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1054: const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1055: PetscBool flg;
1056: PetscInt p_flag,partial_dim=1,ctr;
1057: PetscMPIInt size,rank;
1058: ptrdiff_t *pdim;
1059: ptrdiff_t local_n1,local_1_start;
1060: #if !defined(PETSC_USE_COMPLEX)
1061: ptrdiff_t temp;
1062: PetscInt N1,tot_dim;
1063: #else
1064: PetscInt n1;
1065: #endif
1068: MPI_Comm_size(comm, &size);
1069: MPI_Comm_rank(comm, &rank);
1071: fftw_mpi_init();
1072: pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1073: pdim[0] = dim[0];
1074: #if !defined(PETSC_USE_COMPLEX)
1075: tot_dim = 2*dim[0];
1076: #endif
1077: for (ctr=1;ctr<ndim;ctr++){
1078: partial_dim *= dim[ctr];
1079: pdim[ctr] = dim[ctr];
1080: #if !defined(PETSC_USE_COMPLEX)
1081: if (ctr==ndim-1)
1082: tot_dim *= (dim[ctr]/2+1);
1083: else
1084: tot_dim *= dim[ctr];
1085: #endif
1086: }
1087:
1088: if (size == 1) {
1089: #if defined(PETSC_USE_COMPLEX)
1090: MatSetSizes(A,N,N,N,N);
1091: n = N;
1092: #else
1093: MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);
1094: n = tot_dim;
1095: #endif
1097: } else {
1098: ptrdiff_t alloc_local,local_n0,local_0_start;
1099: switch (ndim){
1100: case 1:
1101: #if !defined(PETSC_USE_COMPLEX)
1102: SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1103: #else
1104: alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1105: n = (PetscInt)local_n0;
1106: n1 = (PetscInt) local_n1;
1107: MatSetSizes(A,n1,n,N,N);
1108: #endif
1109: break;
1110: case 2:
1111: #if defined(PETSC_USE_COMPLEX)
1112: alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1113: /*
1114: PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1115: PetscSynchronizedFlush(comm);
1116: */
1117: n = (PetscInt)local_n0*dim[1];
1118: MatSetSizes(A,n,n,N,N);
1119: #else
1120: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1121: n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1122: MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1123: #endif
1124: break;
1125: case 3:
1126: #if defined(PETSC_USE_COMPLEX)
1127: alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1128: n = (PetscInt)local_n0*dim[1]*dim[2];
1129: MatSetSizes(A,n,n,N,N);
1130: #else
1131: alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1132: n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1133: MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1134: #endif
1135: break;
1136: default:
1137: #if defined(PETSC_USE_COMPLEX)
1138: alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1139: n = (PetscInt)local_n0*partial_dim;
1140: MatSetSizes(A,n,n,N,N);
1141: #else
1142: temp = pdim[ndim-1];
1143: pdim[ndim-1] = temp/2 + 1;
1144: alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1145: n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1146: N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1147: pdim[ndim-1] = temp;
1148: MatSetSizes(A,n,n,N1,N1);
1149: #endif
1150: break;
1151: }
1152: }
1153: PetscObjectChangeTypeName((PetscObject)A,MATFFTW);
1154: PetscNewLog(A,Mat_FFTW,&fftw);
1155: fft->data = (void*)fftw;
1156:
1157: fft->n = n;
1158: fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */
1159: fftw->partial_dim = partial_dim;
1160: PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));
1161: for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1162:
1163: fftw->p_forward = 0;
1164: fftw->p_backward = 0;
1165: fftw->p_flag = FFTW_ESTIMATE;
1167: if (size == 1){
1168: A->ops->mult = MatMult_SeqFFTW;
1169: A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1170: } else {
1171: A->ops->mult = MatMult_MPIFFTW;
1172: A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1173: }
1174: fft->matdestroy = MatDestroy_FFTW;
1175: A->assembled = PETSC_TRUE;
1176: A->preallocated = PETSC_TRUE;
1177: PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1178: PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
1179: PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1180:
1181: /* get runtime options */
1182: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");
1183: PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);
1184: if (flg) {fftw->p_flag = (unsigned)p_flag;}
1185: PetscOptionsEnd();
1186: return(0);
1187: }
1188: EXTERN_C_END