Actual source code: cufft.cu
1: /*
2: Provides an interface to the CUFFT package.
3: Testing examples can be found in ~src/mat/tests
4: */
6: #include <petscdevice_cuda.h>
7: #include <petsc/private/matimpl.h>
9: typedef struct {
10: PetscInt ndim;
11: PetscInt *dim;
12: cufftHandle p_forward, p_backward;
13: cufftComplex *devArray;
14: } Mat_CUFFT;
16: static PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
17: {
18: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
19: cufftComplex *devArray = cufft->devArray;
20: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
21: PetscScalar *x_array, *y_array;
23: PetscFunctionBegin;
24: PetscCall(VecGetArray(x, &x_array));
25: PetscCall(VecGetArray(y, &y_array));
26: if (!cufft->p_forward) {
27: /* create a plan, then execute it */
28: switch (ndim) {
29: case 1:
30: PetscCallCUFFT(cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1));
31: break;
32: case 2:
33: PetscCallCUFFT(cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C));
34: break;
35: case 3:
36: PetscCallCUFFT(cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C));
37: break;
38: default:
39: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
40: }
41: }
42: /* transfer to GPU memory */
43: PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
44: /* execute transform */
45: PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD));
46: /* transfer from GPU memory */
47: PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
48: PetscCall(VecRestoreArray(y, &y_array));
49: PetscCall(VecRestoreArray(x, &x_array));
50: PetscFunctionReturn(PETSC_SUCCESS);
51: }
53: static PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
54: {
55: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
56: cufftComplex *devArray = cufft->devArray;
57: PetscInt ndim = cufft->ndim, *dim = cufft->dim;
58: PetscScalar *x_array, *y_array;
60: PetscFunctionBegin;
61: PetscCall(VecGetArray(x, &x_array));
62: PetscCall(VecGetArray(y, &y_array));
63: if (!cufft->p_backward) {
64: /* create a plan, then execute it */
65: switch (ndim) {
66: case 1:
67: PetscCallCUFFT(cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1));
68: break;
69: case 2:
70: PetscCallCUFFT(cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C));
71: break;
72: case 3:
73: PetscCallCUFFT(cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C));
74: break;
75: default:
76: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %" PetscInt_FMT "-dimensional transform", ndim);
77: }
78: }
79: /* transfer to GPU memory */
80: PetscCallCUDA(cudaMemcpy(devArray, x_array, sizeof(cufftComplex) * dim[ndim], cudaMemcpyHostToDevice));
81: /* execute transform */
82: PetscCallCUFFT(cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE));
83: /* transfer from GPU memory */
84: PetscCallCUDA(cudaMemcpy(y_array, devArray, sizeof(cufftComplex) * dim[ndim], cudaMemcpyDeviceToHost));
85: PetscCall(VecRestoreArray(y, &y_array));
86: PetscCall(VecRestoreArray(x, &x_array));
87: PetscFunctionReturn(PETSC_SUCCESS);
88: }
90: static PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
91: {
92: Mat_CUFFT *cufft = (Mat_CUFFT *)A->data;
94: PetscFunctionBegin;
95: PetscCall(PetscFree(cufft->dim));
96: if (cufft->p_forward) PetscCallCUFFT(cufftDestroy(cufft->p_forward));
97: if (cufft->p_backward) PetscCallCUFFT(cufftDestroy(cufft->p_backward));
98: PetscCallCUDA(cudaFree(cufft->devArray));
99: PetscCall(PetscFree(A->data));
100: PetscCall(PetscObjectChangeTypeName((PetscObject)A, 0));
101: PetscFunctionReturn(PETSC_SUCCESS);
102: }
104: /*@
105: MatCreateSeqCUFFT - Creates a matrix object that provides `MATSEQCUFFT` via the NVIDIA package CuFFT
107: Collective
109: Input Parameters:
110: + comm - MPI communicator, set to `PETSC_COMM_SELF`
111: . ndim - the ndim-dimensional transform
112: - dim - array of size `ndim`, dim[i] contains the vector length in the i-dimension
114: Output Parameter:
115: . A - the matrix
117: Options Database Key:
118: . -mat_cufft_plannerflags - set CuFFT planner flags
120: Level: intermediate
122: .seealso: [](ch_matrices), `Mat`, `MATSEQCUFFT`
123: @*/
124: PetscErrorCode MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
125: {
126: Mat_CUFFT *cufft;
127: PetscInt m = 1;
129: PetscFunctionBegin;
130: PetscCheck(ndim >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %" PetscInt_FMT " must be > 0", ndim);
131: if (ndim) PetscAssertPointer(dim, 3);
132: PetscAssertPointer(A, 4);
133: PetscCall(MatCreate(comm, A));
134: for (PetscInt d = 0; d < ndim; ++d) {
135: PetscCheck(dim[d] >= 0, PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%" PetscInt_FMT "]=%" PetscInt_FMT " must be > 0", d, dim[d]);
136: m *= dim[d];
137: }
138: PetscCall(MatSetSizes(*A, m, m, m, m));
139: PetscCall(PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT));
141: PetscCall(PetscNew(&cufft));
142: (*A)->data = (void *)cufft;
143: PetscCall(PetscMalloc1(ndim + 1, &cufft->dim));
144: PetscCall(PetscArraycpy(cufft->dim, dim, ndim));
146: cufft->ndim = ndim;
147: cufft->p_forward = 0;
148: cufft->p_backward = 0;
149: cufft->dim[ndim] = m;
151: /* GPU memory allocation */
152: PetscCallCUDA(cudaMalloc((void **)&cufft->devArray, sizeof(cufftComplex) * m));
154: (*A)->ops->mult = MatMult_SeqCUFFT;
155: (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
156: (*A)->assembled = PETSC_TRUE;
157: (*A)->ops->destroy = MatDestroy_SeqCUFFT;
159: /* get runtime options ...what options????? */
160: PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
161: PetscOptionsEnd();
162: PetscFunctionReturn(PETSC_SUCCESS);
163: }