Actual source code: ex5.c
petsc-3.5.4 2015-05-23
2: static char help[] = "Micro-benchmark kernel times.\n\n";
4: /*
5: Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
6: */
7: #include <petscthreadcomm.h>
8: #include <petsc-private/threadcommimpl.h>
9: #include <petscvec.h>
10: #include <petsctime.h>
11: #if defined(PETSC_HAVE_OPENMP)
12: # include <omp.h>
13: #endif
15: static PetscErrorCode CounterInit_kernel(PetscInt trank,PetscInt **counters)
16: {
17: counters[trank] = malloc(sizeof(PetscInt)); /* Separate allocation per thread */
18: *counters[trank] = 0; /* Initialize memory to fault it */
19: return 0;
20: }
22: static PetscErrorCode CounterIncrement_kernel(PetscInt trank,PetscInt **counters)
23: {
24: (*counters[trank])++;
25: return 0;
26: }
28: static PetscErrorCode CounterFree_kernel(PetscInt trank,PetscInt **counters)
29: {
30: free(counters[trank]);
31: return 0;
32: }
36: int main(int argc,char **argv)
37: {
39: PetscInt i,j,k,N=100,**counters,tsize;
41: PetscInitialize(&argc,&argv,(char*)0,help);
43: PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
44: PetscOptionsGetInt(NULL,"-N",&N,NULL);
46: PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
47: PetscMalloc1(tsize,&counters);
48: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);
50: for (i=0; i<10; i++) {
51: PetscReal t0,t1;
52: PetscThreadCommBarrier(PETSC_COMM_WORLD);
53: PetscTime(&t0);
54: for (j=0; j<N; j++) {
55: /* PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters); */
56: PetscThreadCommRunKernel1(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,counters);
57: }
58: PetscThreadCommBarrier(PETSC_COMM_WORLD);
59: PetscTime(&t1);
60: PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
61: }
63: for (i=0; i<10; i++) {
64: PetscReal t0,t1;
65: PetscThreadCommBarrier(PETSC_COMM_WORLD);
66: PetscTime(&t0);
67: for (j=0; j<N; j++) {
68: #pragma omp parallel num_threads(tsize)
69: {
70: PetscInt trank = omp_get_thread_num();
71: CounterIncrement_kernel(trank,counters);
72: }
73: }
74: PetscThreadCommBarrier(PETSC_COMM_WORLD);
75: PetscTime(&t1);
76: PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
77: }
79: for (i=0; i<10; i++) {
80: PetscReal t0,t1;
81: PetscTime(&t0);
82: for (j=0; j<N; j++) CounterIncrement_kernel(0,counters);
83: PetscTime(&t1);
84: PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per single kernel: %g us\n",1e6*(t1-t0)/N);
85: }
87: for (i=0; i<10; i++) {
88: PetscReal t0,t1;
89: PetscTime(&t0);
90: for (j=0; j<N; j++) {
91: for (k=0; k<tsize; k++) CounterIncrement_kernel(k,counters);
92: }
93: PetscTime(&t1);
94: PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
95: }
97: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
98: PetscThreadCommBarrier(PETSC_COMM_WORLD);
99: PetscFree(counters);
100: PetscFinalize();
101: return 0;
102: }