Actual source code: cufft.cu

petsc-3.4.5 2014-06-29
  2: /*
  3:     Provides an interface to the CUFFT package.
  4:     Testing examples can be found in ~src/mat/examples/tests
  5: */

  7: #include <petsc-private/matimpl.h>          /*I "petscmat.h" I*/
  8: EXTERN_C_BEGIN
  9: #include <cuda.h>
 10: #include <cuda_runtime.h>
 11: #include <cufft.h>
 12: EXTERN_C_END

 14: typedef struct {
 15:   PetscInt     ndim;
 16:   PetscInt     *dim;
 17:   cufftHandle  p_forward, p_backward;
 18:   cufftComplex *devArray;
 19: } Mat_CUFFT;

 23: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
 24: {
 25:   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
 26:   cufftComplex   *devArray = cufft->devArray;
 27:   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
 28:   PetscScalar    *x_array, *y_array;
 29:   cufftResult    result;

 33:   VecGetArray(x, &x_array);
 34:   VecGetArray(y, &y_array);
 35:   if (!cufft->p_forward) {
 36:     cufftResult result;
 37:     /* create a plan, then execute it */
 38:     switch (ndim) {
 39:     case 1:
 40:       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
 41:       break;
 42:     case 2:
 43:       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
 44:       break;
 45:     case 3:
 46:       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
 47:       break;
 48:     default:
 49:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
 50:     }
 51:   }
 52:   /* transfer to GPU memory */
 53:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
 54:   /* execute transform */
 55:   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRQ(result != CUFFT_SUCCESS);
 56:   /* transfer from GPU memory */
 57:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
 58:   VecRestoreArray(y, &y_array);
 59:   VecRestoreArray(x, &x_array);
 60:   return(0);
 61: }

 65: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
 66: {
 67:   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
 68:   cufftComplex   *devArray = cufft->devArray;
 69:   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
 70:   PetscScalar    *x_array, *y_array;
 71:   cufftResult    result;

 75:   VecGetArray(x, &x_array);
 76:   VecGetArray(y, &y_array);
 77:   if (!cufft->p_backward) {
 78:     /* create a plan, then execute it */
 79:     switch (ndim) {
 80:     case 1:
 81:       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRQ(result != CUFFT_SUCCESS);
 82:       break;
 83:     case 2:
 84:       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
 85:       break;
 86:     case 3:
 87:       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRQ(result != CUFFT_SUCCESS);
 88:       break;
 89:     default:
 90:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
 91:     }
 92:   }
 93:   /* transfer to GPU memory */
 94:   cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);
 95:   /* execute transform */
 96:   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRQ(result != CUFFT_SUCCESS);
 97:   /* transfer from GPU memory */
 98:   cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);
 99:   VecRestoreArray(y, &y_array);
100:   VecRestoreArray(x, &x_array);
101:   return(0);
102: }

106: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
107: {
108:   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
109:   cufftResult    result;

113:   PetscFree(cufft->dim);
114:   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRQ(result != CUFFT_SUCCESS);}
115:   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRQ(result != CUFFT_SUCCESS);}
116:   cudaFree(cufft->devArray);
117:   PetscFree(A->data);
118:   PetscObjectChangeTypeName((PetscObject)A,0);
119:   return(0);
120: }

124: /*@
125:   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT

127:   Collective on MPI_Comm

129:   Input Parameters:
130: + comm - MPI communicator, set to PETSC_COMM_SELF
131: . ndim - the ndim-dimensional transform
132: - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension

134:   Output Parameter:
135: . A - the matrix

137:   Options Database Keys:
138: . -mat_cufft_plannerflags - set CUFFT planner flags

140:   Level: intermediate
141: @*/
142: PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
143: {
144:   Mat_CUFFT      *cufft;
145:   PetscInt       m, d;

149:   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
150:   MatCreate(comm, A);
151:   m    = 1;
152:   for (d = 0; d < ndim; ++d) {
153:     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
154:     m *= dim[d];
155:   }
156:   MatSetSizes(*A, m, m, m, m);
157:   PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);

159:   PetscNewLog(*A, Mat_CUFFT, &cufft);
160:   (*A)->data = (void*) cufft;
161:   PetscMalloc((ndim+1)*sizeof(PetscInt), &cufft->dim);
162:   PetscMemcpy(cufft->dim, dim, ndim*sizeof(PetscInt));

164:   cufft->ndim       = ndim;
165:   cufft->p_forward  = 0;
166:   cufft->p_backward = 0;
167:   cufft->dim[ndim]  = m;

169:   /* GPU memory allocation */
170:   cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);

172:   (*A)->ops->mult          = MatMult_SeqCUFFT;
173:   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
174:   (*A)->assembled          = PETSC_TRUE;
175:   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;

177:   /* get runtime options */
178:   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
179:   PetscOptionsEnd();
180:   return(0);
181: }