Actual source code: ex47cu.cu

  1: static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
  2: /*
  3:    Same as ex47.c except it also uses the GPU to evaluate the function
  4: */

  6: #include <petscdm.h>
  7: #include <petscdmda.h>
  8: #include <petscsnes.h>

 10: #include <thrust/device_ptr.h>
 11: #include <thrust/for_each.h>
 12: #include <thrust/tuple.h>
 13: #include <thrust/iterator/constant_iterator.h>
 14: #include <thrust/iterator/counting_iterator.h>
 15: #include <thrust/iterator/zip_iterator.h>

 17: extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
 18: PetscBool             useCUDA = PETSC_FALSE;

 20: int main(int argc, char **argv)
 21: {
 22:   SNES      snes;
 23:   Vec       x, f;
 24:   Mat       J;
 25:   DM        da;
 26:   char     *tmp, typeName[256];
 27:   PetscBool flg;

 30:   PetscInitialize(&argc, &argv, (char *)0, help);
 31:   PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg);
 32:   if (flg) {
 33:     PetscStrstr(typeName, "cuda", &tmp);
 34:     if (tmp) useCUDA = PETSC_TRUE;
 35:   }

 37:   DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da);
 38:   DMSetFromOptions(da);
 39:   DMSetUp(da);
 40:   DMCreateGlobalVector(da, &x);
 41:   VecDuplicate(x, &f);
 42:   DMCreateMatrix(da, &J);

 44:   SNESCreate(PETSC_COMM_WORLD, &snes);
 45:   SNESSetFunction(snes, f, ComputeFunction, da);
 46:   SNESSetJacobian(snes, J, J, ComputeJacobian, da);
 47:   SNESSetFromOptions(snes);
 48:   SNESSolve(snes, NULL, x);

 50:   MatDestroy(&J);
 51:   VecDestroy(&x);
 52:   VecDestroy(&f);
 53:   SNESDestroy(&snes);
 54:   DMDestroy(&da);

 56:   PetscFinalize();
 57:   return 0;
 58: }

 60: struct ApplyStencil {
 61:   template <typename Tuple>
 62:   __host__ __device__ void operator()(Tuple t)
 63:   {
 64:     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
 65:     thrust::get<0>(t) = 1;
 66:     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
 67:       thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
 68:     } else if (thrust::get<4>(t) == 0) {
 69:       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 70:     } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
 71:       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
 72:     }
 73:   }
 74: };

 76: PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx)
 77: {
 78:   PetscInt           i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
 79:   PetscScalar       *xx, *ff, hx;
 80:   DM                 da = (DM)ctx;
 81:   Vec                xlocal;
 82:   PetscMPIInt        rank, size;
 83:   MPI_Comm           comm;
 84:   PetscScalar const *xarray;
 85:   PetscScalar       *farray;

 87:   DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE);
 88:   hx = 1.0 / (PetscReal)(Mx - 1);
 89:   DMGetLocalVector(da, &xlocal);
 90:   DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal);
 91:   DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal);

 93:   if (useCUDA) {
 94:     PetscObjectGetComm((PetscObject)da, &comm);
 95:     MPI_Comm_size(comm, &size);
 96:     MPI_Comm_rank(comm, &rank);
 97:     VecCUDAGetArrayRead(xlocal, &xarray);
 98:     VecCUDAGetArrayWrite(f, &farray);
 99:     if (rank) xstartshift = 1;
100:     else xstartshift = 0;
101:     if (rank != size - 1) xendshift = 1;
102:     else xendshift = 0;
103:     VecGetOwnershipRange(f, &fstart, NULL);
104:     VecGetLocalSize(x, &lsize);
105:     // clang-format off
106:     try {
107:       thrust::for_each(
108:         thrust::make_zip_iterator(
109:           thrust::make_tuple(
110:             thrust::device_ptr<PetscScalar>(farray),
111:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
112:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
113:             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
114:             thrust::counting_iterator<int>(fstart),
115:             thrust::constant_iterator<int>(Mx),
116:             thrust::constant_iterator<PetscScalar>(hx))),
117:         thrust::make_zip_iterator(
118:           thrust::make_tuple(
119:             thrust::device_ptr<PetscScalar>(farray + lsize),
120:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
121:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
122:             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
123:             thrust::counting_iterator<int>(fstart) + lsize,
124:             thrust::constant_iterator<int>(Mx),
125:             thrust::constant_iterator<PetscScalar>(hx))),
126:         ApplyStencil());
127:     }
128:     // clang-format on
129:     catch (char *all) {
130:       PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
131:     }
132:     VecCUDARestoreArrayRead(xlocal, &xarray);
133:     VecCUDARestoreArrayWrite(f, &farray);
134:   } else {
135:     DMDAVecGetArray(da, xlocal, &xx);
136:     DMDAVecGetArray(da, f, &ff);
137:     DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);

139:     for (i = xs; i < xs + xm; i++) {
140:       if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
141:       else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
142:     }
143:     DMDAVecRestoreArray(da, xlocal, &xx);
144:     DMDAVecRestoreArray(da, f, &ff);
145:   }
146:   DMRestoreLocalVector(da, &xlocal);
147:   return 0;
148: }
149: PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx)
150: {
151:   DM          da = (DM)ctx;
152:   PetscInt    i, Mx, xm, xs;
153:   PetscScalar hx, *xx;
154:   Vec         xlocal;

156:   DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE);
157:   hx = 1.0 / (PetscReal)(Mx - 1);
158:   DMGetLocalVector(da, &xlocal);
159:   DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal);
160:   DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal);
161:   DMDAVecGetArray(da, xlocal, &xx);
162:   DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL);

164:   for (i = xs; i < xs + xm; i++) {
165:     if (i == 0 || i == Mx - 1) {
166:       MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES);
167:     } else {
168:       MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES);
169:       MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES);
170:       MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES);
171:     }
172:   }
173:   MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY);
174:   MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY);
175:   DMDAVecRestoreArray(da, xlocal, &xx);
176:   DMRestoreLocalVector(da, &xlocal);
177:   return 0;
178: }

180: /*TEST

182:    build:
183:       requires: cuda

185:    testset:
186:       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
187:       output_file: output/ex47cu_1.out
188:       test:
189:         suffix: 1
190:         nsize:  1
191:       test:
192:         suffix: 2
193:         nsize:  2

195: TEST*/