Actual source code: cufft.cu
petsc-3.7.3 2016-08-01
2: /*
3: Provides an interface to the CUFFT package.
4: Testing examples can be found in ~src/mat/examples/tests
5: */
7: #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
8: EXTERN_C_BEGIN
9: #include <cuda.h>
10: #include <cuda_runtime.h>
11: #include <cufft.h>
12: EXTERN_C_END
14: typedef struct {
15: PetscInt ndim;
16: PetscInt *dim;
17: cufftHandle p_forward, p_backward;
18: cufftComplex *devArray;
19: } Mat_CUFFT;
23: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
24: {
25: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
26: cufftComplex *devArray = cufft->devArray;
27: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
28: PetscScalar *x_array, *y_array;
29: cufftResult result;
33: VecGetArray(x, &x_array);
34: VecGetArray(y, &y_array);
35: if (!cufft->p_forward) {
36: cufftResult result;
37: /* create a plan, then execute it */
38: switch (ndim) {
39: case 1:
40: result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
41: break;
42: case 2:
43: result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
44: break;
45: case 3:
46: result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
47: break;
48: default:
49: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
50: }
51: }
52: /* transfer to GPU memory */
53: cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
54: /* execute transform */
55: result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
56: /* transfer from GPU memory */
57: cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
58: VecRestoreArray(y, &y_array);
59: VecRestoreArray(x, &x_array);
60: return(0);
61: }
65: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
66: {
67: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
68: cufftComplex *devArray = cufft->devArray;
69: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
70: PetscScalar *x_array, *y_array;
71: cufftResult result;
75: VecGetArray(x, &x_array);
76: VecGetArray(y, &y_array);
77: if (!cufft->p_backward) {
78: /* create a plan, then execute it */
79: switch (ndim) {
80: case 1:
81: result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
82: break;
83: case 2:
84: result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
85: break;
86: case 3:
87: result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
88: break;
89: default:
90: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
91: }
92: }
93: /* transfer to GPU memory */
94: cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
95: /* execute transform */
96: result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
97: /* transfer from GPU memory */
98: cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
99: VecRestoreArray(y, &y_array);
100: VecRestoreArray(x, &x_array);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
107: {
108: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
109: cufftResult result;
113: PetscFree(cufft->dim);
114: if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
115: if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
116: cudaFree(cufft->devArray);
117: PetscFree(A->data);
118: PetscObjectChangeTypeName((PetscObject)A,0);
119: return(0);
120: }
124: /*@
125: MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
127: Collective on MPI_Comm
129: Input Parameters:
130: + comm - MPI communicator, set to PETSC_COMM_SELF
131: . ndim - the ndim-dimensional transform
132: - dim - array of size ndim, dim[i] contains the vector length in the i-dimension
134: Output Parameter:
135: . A - the matrix
137: Options Database Keys:
138: . -mat_cufft_plannerflags - set CUFFT planner flags
140: Level: intermediate
141: @*/
142: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
143: {
144: Mat_CUFFT *cufft;
145: PetscInt m, d;
149: if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
150: MatCreate(comm, A);
151: m = 1;
152: for (d = 0; d < ndim; ++d) {
153: if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
154: m *= dim[d];
155: }
156: MatSetSizes(*A, m, m, m, m);
157: PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);
159: PetscNewLog(*A,&cufft);
160: (*A)->data = (void*) cufft;
161: PetscMalloc1(ndim+1, &cufft->dim);
162: PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));
164: cufft->ndim = ndim;
165: cufft->p_forward = 0;
166: cufft->p_backward = 0;
167: cufft->dim[ndim] = m;
169: /* GPU memory allocation */
170: cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);
172: (*A)->ops->mult = MatMult_SeqCUFFT;
173: (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
174: (*A)->assembled = PETSC_TRUE;
175: (*A)->ops->destroy = MatDestroy_SeqCUFFT;
177: /* get runtime options */
178: PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
179: PetscOptionsEnd();
180: return(0);
181: }