Actual source code: ex47cu.cu
petsc-3.5.4 2015-05-23
1: static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2: /*
3: Same as ex47.c except it also uses the GPU to evaluate the function
4: */
6: #include <petscdm.h>
7: #include <petscdmda.h>
8: #include <petscsnes.h>
9: #include <petsccusp.h>
11: #include <thrust/for_each.h>
12: #include <thrust/tuple.h>
13: #include <thrust/iterator/constant_iterator.h>
14: #include <thrust/iterator/counting_iterator.h>
15: #include <thrust/iterator/zip_iterator.h>
17: extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18: PetscBool useCUSP = PETSC_FALSE;
20: int main(int argc,char **argv)
21: {
22: SNES snes;
23: Vec x,f;
24: Mat J;
25: DM da;
27: char *tmp,typeName[256];
28: PetscBool flg;
30: PetscInitialize(&argc,&argv,(char*)0,help);
31: PetscOptionsGetString(NULL,"-dm_vec_type",typeName,256,&flg);
32: if (flg) {
33: PetscStrstr(typeName,"cusp",&tmp);
34: if (tmp) useCUSP = PETSC_TRUE;
35: }
37: DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,-8,1,1,NULL,&da);
38: DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);
39: DMSetMatType(da,MATAIJ);
40: DMCreateMatrix(da,&J);
42: SNESCreate(PETSC_COMM_WORLD,&snes);
43: SNESSetFunction(snes,f,ComputeFunction,da);
44: SNESSetJacobian(snes,J,J,ComputeJacobian,da);
45: SNESSetFromOptions(snes);
46: SNESSolve(snes,NULL,x);
48: MatDestroy(&J);
49: VecDestroy(&x);
50: VecDestroy(&f);
51: SNESDestroy(&snes);
52: DMDestroy(&da);
54: PetscFinalize();
55: return 0;
56: }
58: struct ApplyStencil
59: {
60: template <typename Tuple>
61: __host__ __device__
62: void operator()(Tuple t)
63: {
64: /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65: thrust::get<0>(t) = 1;
66: if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
67: thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
68: } else if (thrust::get<4>(t) == 0) {
69: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70: } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
71: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72: }
73: }
74: };
76: PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
77: {
78: PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
79: PetscScalar *xx,*ff,hx;
80: DM da = (DM) ctx;
81: Vec xlocal;
83: PetscMPIInt rank,size;
84: MPI_Comm comm;
85: cusp::array1d<PetscScalar,cusp::device_memory> *xarray,*farray;
87: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
88: hx = 1.0/(PetscReal)(Mx-1);
89: DMGetLocalVector(da,&xlocal);
90: DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
91: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
93: if (useCUSP) {
94: VecCUSPGetArrayRead(xlocal,&xarray);
95: VecCUSPGetArrayWrite(f,&farray);
96: PetscObjectGetComm((PetscObject)da,&comm);
97: MPI_Comm_size(comm,&size);
98: MPI_Comm_rank(comm,&rank);
99: if (rank) xstartshift = 1;
100: else xstartshift = 0;
101: if (rank != size-1) xendshift = 1;
102: else xendshift = 0;
103: VecGetOwnershipRange(f,&fstart,NULL);
104: VecGetLocalSize(x,&lsize);
105: try {
106: thrust::for_each(
107: thrust::make_zip_iterator(
108: thrust::make_tuple(
109: farray->begin(),
110: xarray->begin()+xstartshift,
111: xarray->begin()+xstartshift + 1,
112: xarray->begin()+xstartshift - 1,
113: thrust::counting_iterator<int>(fstart),
114: thrust::constant_iterator<int>(Mx),
115: thrust::constant_iterator<PetscScalar>(hx))),
116: thrust::make_zip_iterator(
117: thrust::make_tuple(
118: farray->end(),
119: xarray->end()-xendshift,
120: xarray->end()-xendshift + 1,
121: xarray->end()-xendshift - 1,
122: thrust::counting_iterator<int>(fstart) + lsize,
123: thrust::constant_iterator<int>(Mx),
124: thrust::constant_iterator<PetscScalar>(hx))),
125: ApplyStencil());
126: }
127: catch (char *all) {
128: PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
129: }
130: VecCUSPRestoreArrayRead(xlocal,&xarray);
131: VecCUSPRestoreArrayWrite(f,&farray);
132: } else {
133: DMDAVecGetArray(da,xlocal,&xx);
134: DMDAVecGetArray(da,f,&ff);
135: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
137: for (i=xs; i<xs+xm; i++) {
138: if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
139: else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
140: }
141: DMDAVecRestoreArray(da,xlocal,&xx);
142: DMDAVecRestoreArray(da,f,&ff);
143: }
144: DMRestoreLocalVector(da,&xlocal);
145: // VecView(x,0);printf("f\n");
146: // VecView(f,0);
147: return 0;
149: }
150: PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
151: {
152: DM da = (DM) ctx;
153: PetscInt i,Mx,xm,xs;
154: PetscScalar hx,*xx;
155: Vec xlocal;
158: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
159: hx = 1.0/(PetscReal)(Mx-1);
160: DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
161: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
162: DMDAVecGetArray(da,xlocal,&xx);
163: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
165: for (i=xs; i<xs+xm; i++) {
166: if (i == 0 || i == Mx-1) {
167: MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);
168: } else {
169: MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);
170: MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);
171: MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);
172: }
173: }
174: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
175: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
176: DMDAVecRestoreArray(da,xlocal,&xx);
177: DMRestoreLocalVector(da,&xlocal);
178: return 0;
179: }