Actual source code: ex47cu.cu
petsc-3.13.6 2020-09-29
1: static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2: /*
3: Same as ex47.c except it also uses the GPU to evaluate the function
4: */
6: #include <petscdm.h>
7: #include <petscdmda.h>
8: #include <petscsnes.h>
10: #include <thrust/device_ptr.h>
11: #include <thrust/for_each.h>
12: #include <thrust/tuple.h>
13: #include <thrust/iterator/constant_iterator.h>
14: #include <thrust/iterator/counting_iterator.h>
15: #include <thrust/iterator/zip_iterator.h>
17: extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18: PetscBool useCUDA = PETSC_FALSE;
20: int main(int argc,char **argv)
21: {
22: SNES snes;
23: Vec x,f;
24: Mat J;
25: DM da;
27: char *tmp,typeName[256];
28: PetscBool flg;
30: PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
31: PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,256,&flg);
32: if (flg) {
33: PetscStrstr(typeName,"cuda",&tmp);
34: if (tmp) useCUDA = PETSC_TRUE;
35: }
37: DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da);
38: DMSetFromOptions(da);
39: DMSetUp(da);
40: DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);
41: DMCreateMatrix(da,&J);
43: SNESCreate(PETSC_COMM_WORLD,&snes);
44: SNESSetFunction(snes,f,ComputeFunction,da);
45: SNESSetJacobian(snes,J,J,ComputeJacobian,da);
46: SNESSetFromOptions(snes);
47: SNESSolve(snes,NULL,x);
49: MatDestroy(&J);
50: VecDestroy(&x);
51: VecDestroy(&f);
52: SNESDestroy(&snes);
53: DMDestroy(&da);
55: PetscFinalize();
56: return ierr;
57: }
59: struct ApplyStencil
60: {
61: template <typename Tuple>
62: __host__ __device__
63: void operator()(Tuple t)
64: {
65: /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
66: thrust::get<0>(t) = 1;
67: if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
68: thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
69: } else if (thrust::get<4>(t) == 0) {
70: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
71: } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
72: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
73: }
74: }
75: };
77: PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
78: {
79: PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
80: PetscScalar *xx,*ff,hx;
81: DM da = (DM) ctx;
82: Vec xlocal;
83: PetscErrorCode ierr;
84: PetscMPIInt rank,size;
85: MPI_Comm comm;
86: PetscScalar const *xarray;
87: PetscScalar *farray;
89: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
90: hx = 1.0/(PetscReal)(Mx-1);
91: DMGetLocalVector(da,&xlocal);
92: DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
93: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
95: if (useCUDA) {
96: VecCUDAGetArrayRead(xlocal,&xarray);
97: VecCUDAGetArrayWrite(f,&farray);
98: PetscObjectGetComm((PetscObject)da,&comm);
99: MPI_Comm_size(comm,&size);
100: MPI_Comm_rank(comm,&rank);
101: if (rank) xstartshift = 1;
102: else xstartshift = 0;
103: if (rank != size-1) xendshift = 1;
104: else xendshift = 0;
105: VecGetOwnershipRange(f,&fstart,NULL);
106: VecGetLocalSize(x,&lsize);
107: try {
108: thrust::for_each(
109: thrust::make_zip_iterator(
110: thrust::make_tuple(
111: thrust::device_ptr<PetscScalar>(farray),
112: thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
113: thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
114: thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
115: thrust::counting_iterator<int>(fstart),
116: thrust::constant_iterator<int>(Mx),
117: thrust::constant_iterator<PetscScalar>(hx))),
118: thrust::make_zip_iterator(
119: thrust::make_tuple(
120: thrust::device_ptr<PetscScalar>(farray + lsize),
121: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
122: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
123: thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
124: thrust::counting_iterator<int>(fstart) + lsize,
125: thrust::constant_iterator<int>(Mx),
126: thrust::constant_iterator<PetscScalar>(hx))),
127: ApplyStencil());
128: }
129: catch (char *all) {
130: PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
131: }
132: VecCUDARestoreArrayRead(xlocal,&xarray);
133: VecCUDARestoreArrayWrite(f,&farray);
134: } else {
135: DMDAVecGetArray(da,xlocal,&xx);
136: DMDAVecGetArray(da,f,&ff);
137: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
139: for (i=xs; i<xs+xm; i++) {
140: if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
141: else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
142: }
143: DMDAVecRestoreArray(da,xlocal,&xx);
144: DMDAVecRestoreArray(da,f,&ff);
145: }
146: DMRestoreLocalVector(da,&xlocal);
147: // VecView(x,0);printf("f\n");
148: // VecView(f,0);
149: return 0;
151: }
152: PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
153: {
154: DM da = (DM) ctx;
155: PetscInt i,Mx,xm,xs;
156: PetscScalar hx,*xx;
157: Vec xlocal;
160: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
161: hx = 1.0/(PetscReal)(Mx-1);
162: DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
163: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
164: DMDAVecGetArray(da,xlocal,&xx);
165: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
167: for (i=xs; i<xs+xm; i++) {
168: if (i == 0 || i == Mx-1) {
169: MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);
170: } else {
171: MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);
172: MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);
173: MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);
174: }
175: }
176: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
177: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
178: DMDAVecRestoreArray(da,xlocal,&xx);
179: DMRestoreLocalVector(da,&xlocal);
180: return 0;
181: }
185: /*TEST
187: build:
188: requires: cuda
190: testset:
191: args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
192: output_file: output/ex47cu_1.out
193: test:
194: suffix: 1
195: nsize: 1
196: test:
197: suffix: 2
198: nsize: 2
200: TEST*/