Actual source code: ex47cu.cu
petsc-3.4.5 2014-06-29
1: static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2: /*
3: Same as ex47.c except it also uses the GPU to evaluate the function
4: */
6: #include <petscdmda.h>
7: #include <petscsnes.h>
8: #include <petsccusp.h>
10: #include <thrust/for_each.h>
11: #include <thrust/tuple.h>
12: #include <thrust/iterator/constant_iterator.h>
13: #include <thrust/iterator/counting_iterator.h>
14: #include <thrust/iterator/zip_iterator.h>
16: extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat*,Mat*,MatStructure*,void*);
17: PetscBool useCUSP = PETSC_FALSE;
19: int main(int argc,char **argv)
20: {
21: SNES snes;
22: Vec x,f;
23: Mat J;
24: DM da;
26: char *tmp,typeName[256];
27: PetscBool flg;
29: PetscInitialize(&argc,&argv,(char*)0,help);
30: PetscOptionsGetString(NULL,"-dm_vec_type",typeName,256,&flg);
31: if (flg) {
32: PetscStrstr(typeName,"cusp",&tmp);
33: if (tmp) useCUSP = PETSC_TRUE;
34: }
36: DMDACreate1d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,-8,1,1,NULL,&da);
37: DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);
38: DMCreateMatrix(da,MATAIJ,&J);
40: SNESCreate(PETSC_COMM_WORLD,&snes);
41: SNESSetFunction(snes,f,ComputeFunction,da);
42: SNESSetJacobian(snes,J,J,ComputeJacobian,da);
43: SNESSetFromOptions(snes);
44: SNESSolve(snes,NULL,x);
46: MatDestroy(&J);
47: VecDestroy(&x);
48: VecDestroy(&f);
49: SNESDestroy(&snes);
50: DMDestroy(&da);
52: PetscFinalize();
53: return 0;
54: }
56: struct ApplyStencil
57: {
58: template <typename Tuple>
59: __host__ __device__
60: void operator()(Tuple t)
61: {
62: /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
63: thrust::get<0>(t) = 1;
64: if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
65: thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
66: } else if (thrust::get<4>(t) == 0) {
67: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
68: } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
69: thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70: }
71: }
72: };
74: PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
75: {
76: PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
77: PetscScalar *xx,*ff,hx;
78: DM da = (DM) ctx;
79: Vec xlocal;
81: PetscMPIInt rank,size;
82: MPI_Comm comm;
83: cusp::array1d<PetscScalar,cusp::device_memory> *xarray,*farray;
85: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
86: hx = 1.0/(PetscReal)(Mx-1);
87: DMGetLocalVector(da,&xlocal);
88: DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
89: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
91: if (useCUSP) {
92: VecCUSPGetArrayRead(xlocal,&xarray);
93: VecCUSPGetArrayWrite(f,&farray);
94: PetscObjectGetComm((PetscObject)da,&comm);
95: MPI_Comm_size(comm,&size);
96: MPI_Comm_rank(comm,&rank);
97: if (rank) xstartshift = 1;
98: else xstartshift = 0;
99: if (rank != size-1) xendshift = 1;
100: else xendshift = 0;
101: VecGetOwnershipRange(f,&fstart,NULL);
102: VecGetLocalSize(x,&lsize);
103: try {
104: thrust::for_each(
105: thrust::make_zip_iterator(
106: thrust::make_tuple(
107: farray->begin(),
108: xarray->begin()+xstartshift,
109: xarray->begin()+xstartshift + 1,
110: xarray->begin()+xstartshift - 1,
111: thrust::counting_iterator<int>(fstart),
112: thrust::constant_iterator<int>(Mx),
113: thrust::constant_iterator<PetscScalar>(hx))),
114: thrust::make_zip_iterator(
115: thrust::make_tuple(
116: farray->end(),
117: xarray->end()-xendshift,
118: xarray->end()-xendshift + 1,
119: xarray->end()-xendshift - 1,
120: thrust::counting_iterator<int>(fstart) + lsize,
121: thrust::constant_iterator<int>(Mx),
122: thrust::constant_iterator<PetscScalar>(hx))),
123: ApplyStencil());
124: }
125: catch (char *all) {
126: PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");
127: }
128: VecCUSPRestoreArrayRead(xlocal,&xarray);
129: VecCUSPRestoreArrayWrite(f,&farray);
130: } else {
131: DMDAVecGetArray(da,xlocal,&xx);
132: DMDAVecGetArray(da,f,&ff);
133: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
135: for (i=xs; i<xs+xm; i++) {
136: if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
137: else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
138: }
139: DMDAVecRestoreArray(da,xlocal,&xx);
140: DMDAVecRestoreArray(da,f,&ff);
141: }
142: DMRestoreLocalVector(da,&xlocal);
143: // VecView(x,0);printf("f\n");
144: // VecView(f,0);
145: return 0;
147: }
148: PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat *J,Mat *B,MatStructure *flag,void *ctx)
149: {
150: DM da = (DM) ctx;
151: PetscInt i,Mx,xm,xs;
152: PetscScalar hx,*xx;
153: Vec xlocal;
156: DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);
157: hx = 1.0/(PetscReal)(Mx-1);
158: DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);
159: DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);
160: DMDAVecGetArray(da,xlocal,&xx);
161: DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);
163: for (i=xs; i<xs+xm; i++) {
164: if (i == 0 || i == Mx-1) {
165: MatSetValue(*J,i,i,1.0/hx,INSERT_VALUES);
166: } else {
167: MatSetValue(*J,i,i-1,-1.0/hx,INSERT_VALUES);
168: MatSetValue(*J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);
169: MatSetValue(*J,i,i+1,-1.0/hx,INSERT_VALUES);
170: }
171: }
172: MatAssemblyBegin(*J,MAT_FINAL_ASSEMBLY);
173: MatAssemblyEnd(*J,MAT_FINAL_ASSEMBLY);
174: *flag = SAME_NONZERO_PATTERN;
175: DMDAVecRestoreArray(da,xlocal,&xx);
176: DMRestoreLocalVector(da,&xlocal);
177: return 0;
178: }