Actual source code: ex5.c
petsc-3.3-p7 2013-05-11
1: static char help[] = "Micro-benchmark kernel times.\n\n";
3: /*
4: Include "petscthreadcomm.h" so that we can use the PetscThreadComm interface.
5: */
6: #include <petscthreadcomm.h>
7: #include <petscvec.h>
8: #if defined(PETSC_HAVE_OPENMP)
9: # include <omp.h>
10: #endif
12: static PetscErrorCode CounterInit_kernel(PetscInt trank,PetscInt **counters)
13: {
14: counters[trank] = malloc(sizeof(PetscInt)); /* Separate allocation per thread */
15: *counters[trank] = 0; /* Initialize memory to fault it */
16: return 0;
17: }
19: static PetscErrorCode CounterIncrement_kernel(PetscInt trank,PetscInt **counters)
20: {
21: (*counters[trank])++;
22: return 0;
23: }
25: static PetscErrorCode CounterFree_kernel(PetscInt trank,PetscInt **counters)
26: {
27: free(counters[trank]);
28: return 0;
29: }
33: int main(int argc,char **argv)
34: {
36: PetscInt i,j,N=100,**counters,tsize;
38: PetscInitialize(&argc,&argv,(char *)0,help);
40: #if defined(PETSC_THREADCOMM_ACTIVE)
41: PetscThreadCommView(PETSC_COMM_WORLD,PETSC_VIEWER_STDOUT_WORLD);
42: #endif
43: PetscOptionsGetInt(PETSC_NULL,"-N",&N,PETSC_NULL);
45: PetscThreadCommGetNThreads(PETSC_COMM_WORLD,&tsize);
46: PetscMalloc(tsize*sizeof *counters,&counters);
47: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterInit_kernel,1,counters);
49: for (i=0; i<10; i++) {
50: PetscReal t0,t1;
51: PetscThreadCommBarrier(PETSC_COMM_WORLD);
52: PetscGetTime(&t0);
53: for (j=0; j<N; j++) {
54: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterIncrement_kernel,1,counters);
55: }
56: PetscThreadCommBarrier(PETSC_COMM_WORLD);
57: PetscGetTime(&t1);
58: PetscPrintf(PETSC_COMM_WORLD,"Time per kernel: %g us\n",1e6*(t1-t0)/N);
59: }
61: for (i=0; i<10; i++) {
62: PetscReal t0,t1;
63: PetscThreadCommBarrier(PETSC_COMM_WORLD);
64: PetscGetTime(&t0);
65: for (j=0; j<N; j++) {
66: #pragma omp parallel num_threads(tsize)
67: {
68: int trank = omp_get_thread_num();
69: (*counters[trank])++;
70: }
71: }
72: PetscThreadCommBarrier(PETSC_COMM_WORLD);
73: PetscGetTime(&t1);
74: PetscPrintf(PETSC_COMM_WORLD,"OpenMP inline time per kernel: %g us\n",1e6*(t1-t0)/N);
75: }
77: for (i=0; i<10; i++) {
78: PetscReal t0,t1;
79: PetscGetTime(&t0);
80: for (j=0; j<N; j++) {
81: (*(volatile PetscInt*)counters[0])++;
82: }
83: PetscGetTime(&t1);
84: PetscPrintf(PETSC_COMM_WORLD,"Serial inline time per kernel: %g us\n",1e6*(t1-t0)/N);
85: }
87: PetscThreadCommRunKernel(PETSC_COMM_WORLD,(PetscThreadKernel)CounterFree_kernel,1,counters);
88: PetscFree(counters);
89: PetscFinalize();
90: return 0;
91: }