Actual source code: cufft.cu
2: /*
3: Provides an interface to the CUFFT package.
4: Testing examples can be found in ~src/mat/tests
5: */
7: #include <petscdevice.h>
8: #include <petsc/private/matimpl.h>
10: typedef struct {
11: PetscInt ndim;
12: PetscInt *dim;
13: cufftHandle p_forward, p_backward;
14: cufftComplex *devArray;
15: } Mat_CUFFT;
17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
18: {
19: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
20: cufftComplex *devArray = cufft->devArray;
21: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
22: PetscScalar *x_array, *y_array;
23: cufftResult result;
24: cudaError_t cerr;
28: VecGetArray(x, &x_array);
29: VecGetArray(y, &y_array);
30: if (!cufft->p_forward) {
31: cufftResult result;
32: /* create a plan, then execute it */
33: switch (ndim) {
34: case 1:
35: result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
36: break;
37: case 2:
38: result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
39: break;
40: case 3:
41: result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
42: break;
43: default:
44: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
45: }
46: }
47: /* transfer to GPU memory */
48: cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
49: /* execute transform */
50: result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result);
51: /* transfer from GPU memory */
52: cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
53: VecRestoreArray(y, &y_array);
54: VecRestoreArray(x, &x_array);
55: return(0);
56: }
58: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
59: {
60: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
61: cufftComplex *devArray = cufft->devArray;
62: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
63: PetscScalar *x_array, *y_array;
64: cufftResult result;
65: cudaError_t cerr;
69: VecGetArray(x, &x_array);
70: VecGetArray(y, &y_array);
71: if (!cufft->p_backward) {
72: /* create a plan, then execute it */
73: switch (ndim) {
74: case 1:
75: result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
76: break;
77: case 2:
78: result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
79: break;
80: case 3:
81: result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
82: break;
83: default:
84: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
85: }
86: }
87: /* transfer to GPU memory */
88: cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
89: /* execute transform */
90: result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result);
91: /* transfer from GPU memory */
92: cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
93: VecRestoreArray(y, &y_array);
94: VecRestoreArray(x, &x_array);
95: return(0);
96: }
98: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
99: {
100: Mat_CUFFT *cufft = (Mat_CUFFT*) A->data;
101: cufftResult result;
102: cudaError_t cerr;
106: PetscFree(cufft->dim);
107: if (cufft->p_forward) {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);}
108: if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);}
109: cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr);
110: PetscFree(A->data);
111: PetscObjectChangeTypeName((PetscObject)A,0);
112: return(0);
113: }
115: /*@
116: MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT
118: Collective
120: Input Parameters:
121: + comm - MPI communicator, set to PETSC_COMM_SELF
122: . ndim - the ndim-dimensional transform
123: - dim - array of size ndim, dim[i] contains the vector length in the i-dimension
125: Output Parameter:
126: . A - the matrix
128: Options Database Keys:
129: . -mat_cufft_plannerflags - set CUFFT planner flags
131: Level: intermediate
132: @*/
133: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
134: {
135: Mat_CUFFT *cufft;
136: PetscInt m, d;
138: cudaError_t cerr;
141: if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
142: MatCreate(comm, A);
143: m = 1;
144: for (d = 0; d < ndim; ++d) {
145: if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
146: m *= dim[d];
147: }
148: MatSetSizes(*A, m, m, m, m);
149: PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);
151: PetscNewLog(*A,&cufft);
152: (*A)->data = (void*) cufft;
153: PetscMalloc1(ndim+1, &cufft->dim);
154: PetscArraycpy(cufft->dim, dim, ndim);
156: cufft->ndim = ndim;
157: cufft->p_forward = 0;
158: cufft->p_backward = 0;
159: cufft->dim[ndim] = m;
161: /* GPU memory allocation */
162: cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr);
164: (*A)->ops->mult = MatMult_SeqCUFFT;
165: (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
166: (*A)->assembled = PETSC_TRUE;
167: (*A)->ops->destroy = MatDestroy_SeqCUFFT;
169: /* get runtime options */
170: PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
171: PetscOptionsEnd();
172: return(0);
173: }