Actual source code: ex3.c
petsc-3.5.4 2015-05-23
1: static char help[] = "Test to demonstrate interface for thread reductions and passing scalar values.\n\n";
3: /*T
4: Concepts: PetscThreadComm^basic example: Threaded reductions and passing scalar values
5: T*/
7: /*
8: Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
9: */
10: #include <petscthreadcomm.h>
12: PetscInt *trstarts;
14: PetscErrorCode set_kernel(PetscInt myrank,PetscScalar *a,PetscScalar *alphap)
15: {
16: PetscScalar alpha=*alphap;
17: PetscInt i;
19: for (i=trstarts[myrank]; i < trstarts[myrank+1]; i++) a[i] = alpha+(PetscScalar)i;
21: return 0;
22: }
24: PetscErrorCode sum_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
25: {
26: PetscScalar my_sum=0.0;
27: PetscInt i;
29: for (i=trstarts[myrank]; i < trstarts[myrank+1]; i++) my_sum += a[i];
31: PetscThreadReductionKernelPost(myrank,red,&my_sum);
33: return 0;
34: }
36: PetscErrorCode max_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
37: {
38: PetscScalar my_max=a[trstarts[myrank]];
39: PetscInt i;
41: for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
42: if (PetscRealPart(a[i]) > PetscRealPart(my_max)) my_max = a[i];
43: }
45: PetscThreadReductionKernelPost(myrank,red,&my_max);
47: return 0;
48: }
50: PetscErrorCode min_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
51: {
52: PetscScalar my_min=a[trstarts[myrank]];
53: PetscInt i;
55: for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
56: if (PetscRealPart(a[i]) < PetscRealPart(my_min)) my_min = a[i];
57: }
59: PetscThreadReductionKernelPost(myrank,red,&my_min);
61: return 0;
62: }
64: PetscErrorCode maxloc_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
65: {
66: PetscScalar my_maxloc[2];
67: PetscInt i;
69: my_maxloc[0] = a[trstarts[myrank]];
70: my_maxloc[1] = trstarts[myrank];
71: for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
72: if (PetscRealPart(a[i]) > PetscRealPart(my_maxloc[0])) {
73: my_maxloc[0] = a[i];
74: my_maxloc[1] = i;
75: }
76: }
78: PetscThreadReductionKernelPost(myrank,red,my_maxloc);
80: return 0;
81: }
83: PetscErrorCode minloc_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
84: {
85: PetscScalar my_minloc[2];
86: PetscInt i;
88: my_minloc[0] = a[trstarts[myrank]];
89: my_minloc[1] = trstarts[myrank];
90: for (i=trstarts[myrank]+1;i < trstarts[myrank+1];i++) {
91: if (PetscRealPart(a[i]) < PetscRealPart(my_minloc[0]))
92: {
93: my_minloc[0] = a[i];
94: my_minloc[1] = i;
95: }
96: }
98: PetscThreadReductionKernelPost(myrank,red,my_minloc);
100: return 0;
101: }
103: PetscErrorCode mult_reds_kernel(PetscInt myrank,PetscScalar *a,PetscThreadCommReduction red)
104: {
105: minloc_kernel(myrank,a,red);
106: maxloc_kernel(myrank,a,red);
107: return 0;
108: }
112: int main(int argc,char **argv)
113: {
114: PetscErrorCode ierr;
115: PetscInt N=8;
116: PetscScalar *a,sum=0.0,alpha=0.0,*scalar,max=0.0,min=N,maxloc[2],minloc[2];
117: PetscThreadCommReduction red;
119: PetscInitialize(&argc,&argv,(char*)0,help);
121: PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
123: PetscOptionsGetInt(NULL,"-N",&N,NULL);
124: PetscMalloc1(N,&a);
126: /* Set thread ownership ranges for the array */
127: PetscThreadCommGetOwnershipRanges(PETSC_COMM_WORLD,N,&trstarts);
129: /* Set a[i] = 1.0 .. i = 1,N */
130: /* Get location to store the scalar value alpha from threadcomm */
131: PetscThreadCommGetScalars(PETSC_COMM_WORLD,&scalar,NULL,NULL);
132: *scalar = alpha;
133: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)set_kernel,2,a,scalar);
135: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_SUM,PETSC_SCALAR,1,&red);
136: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)sum_kernel,2,a,red);
137: PetscThreadReductionEnd(red,&sum);
139: PetscThreadCommBarrier(PETSC_COMM_WORLD);
141: PetscPrintf(PETSC_COMM_SELF,"Sum(x) = %f\n",sum);
143: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAX,PETSC_SCALAR,1,&red);
144: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)max_kernel,2,a,red);
145: PetscThreadReductionEnd(red,&max);
147: PetscThreadCommBarrier(PETSC_COMM_WORLD);
149: PetscPrintf(PETSC_COMM_SELF,"Max = %f\n",PetscRealPart(max));
151: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MIN,PETSC_SCALAR,1,&red);
152: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)min_kernel,2,a,red);
153: PetscThreadReductionEnd(red,&min);
155: PetscThreadCommBarrier(PETSC_COMM_WORLD);
157: PetscPrintf(PETSC_COMM_SELF,"Min = %f\n",PetscRealPart(min));
159: /* PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAXLOC,PETSC_SCALAR,1,&red);
160: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)maxloc_kernel,2,a,red);
161: PetscThreadReductionEnd(red,maxloc);
163: PetscThreadCommBarrier(PETSC_COMM_WORLD);
165: PetscPrintf(PETSC_COMM_SELF,"Max = %f, location = %d\n",PetscRealPart(maxloc[0]),(PetscInt)maxloc[1]);
167: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MINLOC,PETSC_SCALAR,1,&red);
168: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)minloc_kernel,2,a,red);
169: PetscThreadReductionEnd(red,minloc);
171: PetscThreadCommBarrier(PETSC_COMM_WORLD);
173: PetscPrintf(PETSC_COMM_SELF,"Min = %f, location = %d\n",PetscRealPart(minloc[0]),(PetscInt)minloc[1]);
174: */
175: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MINLOC,PETSC_SCALAR,1,&red);
176: PetscThreadReductionBegin(PETSC_COMM_WORLD,THREADCOMM_MAXLOC,PETSC_SCALAR,1,&red);
178: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)mult_reds_kernel,2,a,red);
180: PetscThreadReductionEnd(red,minloc);
181: PetscThreadReductionEnd(red,maxloc);
183: PetscThreadCommBarrier(PETSC_COMM_WORLD);
185: PetscPrintf(PETSC_COMM_SELF,"Min = %f, location = %d\n",PetscRealPart(minloc[0]),(PetscInt)minloc[1]);
186: PetscPrintf(PETSC_COMM_SELF,"Max = %f, location = %d\n",PetscRealPart(maxloc[0]),(PetscInt)maxloc[1]);
189: PetscFree(a);
190: PetscFree(trstarts);
191: PetscFinalize();
192: return 0;
193: }