Actual source code: ex3.c

petsc-3.4.5 2014-06-29
  1: static char help[] = "Test to demonstrate interface for thread reductions and passing scalar values.\n\n";

  3: /*T
  4:    Concepts: PetscThreadComm^basic example: Threaded reductions and passing scalar values
  5: T*/

  7: /*
  8:   Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
  9: */
 10: #include <petscthreadcomm.h>

 12: PetscInt *trstarts;

 14: PetscErrorCode set_kernel(PetscInt myrank,PetscScalar *a,PetscScalar *alphap)
 15: {
 16:   PetscScalar alpha=*alphap;
 17:   PetscInt    i;

 19:   for (i=trstarts[myrank]; i < trstarts[myrank+1]; i++) a[i] = alpha+(PetscScalar)i;

 21:   return 0;
 22: }

 24: PetscErrorCode sum_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
 25: {
 26:   PetscScalar my_sum=0.0;
 27:   PetscInt    i;

 29:   for (i=trstarts[myrank]; i < trstarts[myrank+1]; i++) my_sum += a[i];

 31:   PetscThreadReductionKernelPost(myrank,red,&my_sum);

 33:   return 0;
 34: }

 36: PetscErrorCode max_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
 37: {
 38:   PetscScalar my_max=a[trstarts[myrank]];
 39:   PetscInt    i;

 41:   for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
 42:     if (PetscRealPart(a[i]) > PetscRealPart(my_max)) my_max = a[i];
 43:   }

 45:   PetscThreadReductionKernelPost(myrank,red,&my_max);

 47:   return 0;
 48: }

 50: PetscErrorCode min_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
 51: {
 52:   PetscScalar my_min=a[trstarts[myrank]];
 53:   PetscInt    i;

 55:   for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
 56:     if (PetscRealPart(a[i]) < PetscRealPart(my_min)) my_min = a[i];
 57:   }

 59:   PetscThreadReductionKernelPost(myrank,red,&my_min);

 61:   return 0;
 62: }

 64: PetscErrorCode maxloc_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
 65: {
 66:   PetscScalar my_maxloc[2];
 67:   PetscInt    i;

 69:   my_maxloc[0] = a[trstarts[myrank]];
 70:   my_maxloc[1] = trstarts[myrank];
 71:   for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
 72:     if (PetscRealPart(a[i]) > PetscRealPart(my_maxloc[0])) {
 73:       my_maxloc[0] = a[i];
 74:       my_maxloc[1] = i;
 75:     }
 76:   }

 78:   PetscThreadReductionKernelPost(myrank,red,my_maxloc);

 80:   return 0;
 81: }

 83: PetscErrorCode minloc_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
 84: {
 85:   PetscScalar my_minloc[2];
 86:   PetscInt    i;

 88:   my_minloc[0] = a[trstarts[myrank]];
 89:   my_minloc[1] = trstarts[myrank];
 90:   for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
 91:     if (PetscRealPart(a[i]) < PetscRealPart(my_minloc[0]))
 92:     {
 93:       my_minloc[0] = a[i];
 94:       my_minloc[1] = i;
 95:     }
 96:   }

 98:   PetscThreadReductionKernelPost(myrank,red,my_minloc);

100:   return 0;
101: }

103: PetscErrorCode mult_reds_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
104: {
105:   minloc_kernel(myrank,a,red);
106:   maxloc_kernel(myrank,a,red);
107:   return 0;
108: }

112: int main(int argc,char **argv)
113: {
114:   PetscErrorCode           ierr;
115:   PetscInt                 N=8;
116:   PetscScalar              *a,sum=0.0,alpha=0.0,*scalar,max=0.0,min=N,maxloc[2],minloc[2];
117:   PetscThreadCommReduction red;

119:   PetscInitialize(&argc,&argv,(char*)0,help);

121:   PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);

123:   PetscOptionsGetInt(NULL,"-N",&N,NULL);
124:   PetscMalloc(N*sizeof(PetscScalar),&a);

126:   /* Set thread ownership ranges for the array */
127:   PetscThreadCommGetOwnershipRanges(PETSC_COMM_WORLD,N,&trstarts);

129:   /* Set a[i] = 1.0 .. i = 1,N */
130:   /* Get location to store the scalar value alpha from threadcomm */
131:   PetscThreadCommGetScalars(PETSC_COMM_WORLD,&scalar,NULL,NULL);
132:   *scalar = alpha;
133:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)set_kernel,2,a,scalar);

135:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_SUM,PETSC_SCALAR,1,&red);
136:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)sum_kernel,2,a,red);
137:   PetscThreadReductionEnd(red,&sum);

139:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

141:   PetscPrintf(PETSC_COMM_SELF,"Sum(x) = %f\n",sum);

143:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAX,PETSC_SCALAR,1,&red);
144:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)max_kernel,2,a,red);
145:   PetscThreadReductionEnd(red,&max);

147:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

149:   PetscPrintf(PETSC_COMM_SELF,"Max = %f\n",PetscRealPart(max));

151:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MIN,PETSC_SCALAR,1,&red);
152:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)min_kernel,2,a,red);
153:   PetscThreadReductionEnd(red,&min);

155:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

157:   PetscPrintf(PETSC_COMM_SELF,"Min = %f\n",PetscRealPart(min));

159:   /*  PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAXLOC,PETSC_SCALAR,1,&red);
160:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)maxloc_kernel,2,a,red);
161:   PetscThreadReductionEnd(red,maxloc);

163:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

165:   PetscPrintf(PETSC_COMM_SELF,"Max = %f, location = %d\n",PetscRealPart(maxloc[0]),(PetscInt)maxloc[1]);

167:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MINLOC,PETSC_SCALAR,1,&red);
168:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)minloc_kernel,2,a,red);
169:   PetscThreadReductionEnd(red,minloc);

171:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

173:   PetscPrintf(PETSC_COMM_SELF,"Min = %f, location = %d\n",PetscRealPart(minloc[0]),(PetscInt)minloc[1]);
174: */
175:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MINLOC,PETSC_SCALAR,1,&red);
176:   PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAXLOC,PETSC_SCALAR,1,&red);

178:   PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)mult_reds_kernel,2,a,red);

180:   PetscThreadReductionEnd(red,minloc);
181:   PetscThreadReductionEnd(red,maxloc);

183:   PetscThreadCommBarrier(PETSC_COMM_WORLD);

185:   PetscPrintf(PETSC_COMM_SELF,"Min = %f, location = %d\n",PetscRealPart(minloc[0]),(PetscInt)minloc[1]);
186:   PetscPrintf(PETSC_COMM_SELF,"Max = %f, location = %d\n",PetscRealPart(maxloc[0]),(PetscInt)maxloc[1]);


189:   PetscFree(a);
190:   PetscFree(trstarts);
191:   PetscFinalize();
192:   return 0;
193: }