Actual source code: cufft.cu


  2: /*
  3:     Provides an interface to the CUFFT package.
  4:     Testing examples can be found in ~src/mat/tests
  5: */

  7: #include <petscdevice.h>
  8: #include <petsc/private/matimpl.h>

 10: typedef struct {
 11:   PetscInt     ndim;
 12:   PetscInt     *dim;
 13:   cufftHandle  p_forward, p_backward;
 14:   cufftComplex *devArray;
 15: } Mat_CUFFT;

 17: PetscErrorCode MatMult_SeqCUFFT(Mat A, Vec x, Vec y)
 18: {
 19:   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
 20:   cufftComplex   *devArray = cufft->devArray;
 21:   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
 22:   PetscScalar    *x_array, *y_array;
 23:   cufftResult    result;
 24:   cudaError_t    cerr;

 28:   VecGetArray(x, &x_array);
 29:   VecGetArray(y, &y_array);
 30:   if (!cufft->p_forward) {
 31:     cufftResult result;
 32:     /* create a plan, then execute it */
 33:     switch (ndim) {
 34:     case 1:
 35:       result = cufftPlan1d(&cufft->p_forward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
 36:       break;
 37:     case 2:
 38:       result = cufftPlan2d(&cufft->p_forward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
 39:       break;
 40:     case 3:
 41:       result = cufftPlan3d(&cufft->p_forward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
 42:       break;
 43:     default:
 44:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
 45:     }
 46:   }
 47:   /* transfer to GPU memory */
 48:   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
 49:   /* execute transform */
 50:   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_FORWARD);CHKERRCUFFT(result);
 51:   /* transfer from GPU memory */
 52:   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
 53:   VecRestoreArray(y, &y_array);
 54:   VecRestoreArray(x, &x_array);
 55:   return(0);
 56: }

 58: PetscErrorCode MatMultTranspose_SeqCUFFT(Mat A, Vec x, Vec y)
 59: {
 60:   Mat_CUFFT      *cufft    = (Mat_CUFFT*) A->data;
 61:   cufftComplex   *devArray = cufft->devArray;
 62:   PetscInt       ndim      = cufft->ndim, *dim = cufft->dim;
 63:   PetscScalar    *x_array, *y_array;
 64:   cufftResult    result;
 65:   cudaError_t    cerr;

 69:   VecGetArray(x, &x_array);
 70:   VecGetArray(y, &y_array);
 71:   if (!cufft->p_backward) {
 72:     /* create a plan, then execute it */
 73:     switch (ndim) {
 74:     case 1:
 75:       result = cufftPlan1d(&cufft->p_backward, dim[0], CUFFT_C2C, 1);CHKERRCUFFT(result);
 76:       break;
 77:     case 2:
 78:       result = cufftPlan2d(&cufft->p_backward, dim[0], dim[1], CUFFT_C2C);CHKERRCUFFT(result);
 79:       break;
 80:     case 3:
 81:       result = cufftPlan3d(&cufft->p_backward, dim[0], dim[1], dim[2], CUFFT_C2C);CHKERRCUFFT(result);
 82:       break;
 83:     default:
 84:       SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "Cannot create plan for %d-dimensional transform", ndim);
 85:     }
 86:   }
 87:   /* transfer to GPU memory */
 88:   cerr = cudaMemcpy(devArray, x_array, sizeof(cufftComplex)*dim[ndim], cudaMemcpyHostToDevice);CHKERRCUDA(cerr);
 89:   /* execute transform */
 90:   result = cufftExecC2C(cufft->p_forward, devArray, devArray, CUFFT_INVERSE);CHKERRCUFFT(result);
 91:   /* transfer from GPU memory */
 92:   cerr = cudaMemcpy(y_array, devArray, sizeof(cufftComplex)*dim[ndim], cudaMemcpyDeviceToHost);CHKERRCUDA(cerr);
 93:   VecRestoreArray(y, &y_array);
 94:   VecRestoreArray(x, &x_array);
 95:   return(0);
 96: }

 98: PetscErrorCode MatDestroy_SeqCUFFT(Mat A)
 99: {
100:   Mat_CUFFT      *cufft = (Mat_CUFFT*) A->data;
101:   cufftResult    result;
102:   cudaError_t    cerr;

106:   PetscFree(cufft->dim);
107:   if (cufft->p_forward)  {result = cufftDestroy(cufft->p_forward);CHKERRCUFFT(result);}
108:   if (cufft->p_backward) {result = cufftDestroy(cufft->p_backward);CHKERRCUFFT(result);}
109:   cerr = cudaFree(cufft->devArray);CHKERRCUDA(cerr);
110:   PetscFree(A->data);
111:   PetscObjectChangeTypeName((PetscObject)A,0);
112:   return(0);
113: }

115: /*@
116:   MatCreateSeqCUFFT - Creates a matrix object that provides sequential FFT via the external package CUFFT

118:   Collective

120:   Input Parameters:
121: + comm - MPI communicator, set to PETSC_COMM_SELF
122: . ndim - the ndim-dimensional transform
123: - dim  - array of size ndim, dim[i] contains the vector length in the i-dimension

125:   Output Parameter:
126: . A - the matrix

128:   Options Database Keys:
129: . -mat_cufft_plannerflags - set CUFFT planner flags

131:   Level: intermediate
132: @*/
133: PetscErrorCode  MatCreateSeqCUFFT(MPI_Comm comm, PetscInt ndim, const PetscInt dim[], Mat *A)
134: {
135:   Mat_CUFFT      *cufft;
136:   PetscInt       m, d;
138:   cudaError_t    cerr;

141:   if (ndim < 0) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_USER, "ndim %d must be > 0", ndim);
142:   MatCreate(comm, A);
143:   m    = 1;
144:   for (d = 0; d < ndim; ++d) {
145:     if (dim[d] < 0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_USER, "dim[%d]=%d must be > 0", d, dim[d]);
146:     m *= dim[d];
147:   }
148:   MatSetSizes(*A, m, m, m, m);
149:   PetscObjectChangeTypeName((PetscObject)*A, MATSEQCUFFT);

151:   PetscNewLog(*A,&cufft);
152:   (*A)->data = (void*) cufft;
153:   PetscMalloc1(ndim+1, &cufft->dim);
154:   PetscArraycpy(cufft->dim, dim, ndim);

156:   cufft->ndim       = ndim;
157:   cufft->p_forward  = 0;
158:   cufft->p_backward = 0;
159:   cufft->dim[ndim]  = m;

161:   /* GPU memory allocation */
162:   cerr = cudaMalloc((void**) &cufft->devArray, sizeof(cufftComplex)*m);CHKERRCUDA(cerr);

164:   (*A)->ops->mult          = MatMult_SeqCUFFT;
165:   (*A)->ops->multtranspose = MatMultTranspose_SeqCUFFT;
166:   (*A)->assembled          = PETSC_TRUE;
167:   (*A)->ops->destroy       = MatDestroy_SeqCUFFT;

169:   /* get runtime options */
170:   PetscOptionsBegin(comm, ((PetscObject)(*A))->prefix, "CUFFT Options", "Mat");
171:   PetscOptionsEnd();
172:   return(0);
173: }