Actual source code: petsc-threadcommimpl.h
petsc-3.5.4 2015-05-23
2: #ifndef __THREADCOMMIMPL_H
5: #include <petscthreadcomm.h>
6: #include <petsc-private/petscimpl.h>
8: #if defined(PETSC_HAVE_SYS_SYSINFO_H)
9: #include <sys/sysinfo.h>
10: #endif
11: #if defined(PETSC_HAVE_UNISTD_H)
12: #include <unistd.h>
13: #endif
14: #if defined(PETSC_HAVE_SYS_SYSCTL_H)
15: #include <sys/sysctl.h>
16: #endif
17: #if defined(PETSC_HAVE_WINDOWS_H)
18: #include <windows.h>
19: #endif
21: PETSC_EXTERN PetscMPIInt Petsc_ThreadComm_keyval;
23: /* Max. number of arguments for kernel */
24: #define PETSC_KERNEL_NARGS_MAX 10
26: /* Reduction status of threads */
27: #define THREADCOMM_THREAD_WAITING_FOR_NEWRED 0
28: #define THREADCOMM_THREAD_POSTED_LOCALRED 1
29: /* Status of the reduction */
30: #define THREADCOMM_REDUCTION_NONE -1
31: #define THREADCOMM_REDUCTION_NEW 0
32: #define THREADCOMM_REDUCTION_COMPLETE 1
34: /* Job status for threads */
35: #define THREAD_JOB_NONE -1
36: #define THREAD_JOB_POSTED 1
37: #define THREAD_JOB_RECIEVED 2
38: #define THREAD_JOB_COMPLETED 0
40: #define PetscReadOnce(type,val) (*(volatile type *)&val)
42: #if defined(PETSC_MEMORY_BARRIER)
43: #define PetscMemoryBarrier() do {PETSC_MEMORY_BARRIER();} while(0)
44: #else
45: #define PetscMemoryBarrier()
46: #endif
47: #if defined(PETSC_READ_MEMORY_BARRIER)
48: #define PetscReadMemoryBarrier() do {PETSC_READ_MEMORY_BARRIER();} while(0)
49: #else
50: #define PetscReadMemoryBarrier()
51: #endif
52: #if defined(PETSC_WRITE_MEMORY_BARRIER)
53: #define PetscWriteMemoryBarrier() do {PETSC_WRITE_MEMORY_BARRIER();} while(0)
54: #else
55: #define PetscWriteMemoryBarrier()
56: #endif
58: typedef struct _p_PetscThreadCommRedCtx *PetscThreadCommRedCtx;
59: struct _p_PetscThreadCommRedCtx{
60: PetscThreadComm tcomm; /* The associated threadcomm */
61: PetscInt red_status; /* Reduction status */
62: PetscInt *thread_status; /* Reduction status of each thread */
63: void *local_red; /* Array to hold local reduction contribution from each thread */
64: PetscThreadCommReductionOp op; /* The reduction operation */
65: PetscDataType type; /* The reduction data type */
66: };
68: struct _p_PetscThreadCommReduction{
69: PetscInt nreds; /* Number of reductions in operation */
70: PetscThreadCommRedCtx redctx; /* Reduction objects */
71: PetscInt ctr; /* Global Reduction counter */
72: PetscInt *thread_ctr; /* Reduction counter for each thread */
73: };
75: typedef struct _p_PetscThreadCommJobCtx* PetscThreadCommJobCtx;
76: struct _p_PetscThreadCommJobCtx{
77: PetscThreadComm tcomm; /* The thread communicator */
78: PetscInt nargs; /* Number of arguments for the kernel */
79: PetscThreadKernel pfunc; /* Kernel function */
80: void *args[PETSC_KERNEL_NARGS_MAX]; /* Array of void* to hold the arguments */
81: PetscScalar scalars[3]; /* Array to hold three scalar values */
82: PetscInt ints[3]; /* Array to hold three integer values */
83: PetscInt *job_status; /* Thread job status */
84: };
86: /* Structure to manage job queue */
87: typedef struct _p_PetscThreadCommJobQueue* PetscThreadCommJobQueue;
88: struct _p_PetscThreadCommJobQueue{
89: PetscInt ctr; /* job counter */
90: PetscInt kernel_ctr; /* kernel counter .. need this otherwise race conditions are unavoidable */
91: PetscThreadCommJobCtx jobs; /* queue of jobs */
92: };
94: extern PetscThreadCommJobQueue PetscJobQueue;
96: typedef struct _PetscThreadCommOps* PetscThreadCommOps;
97: struct _PetscThreadCommOps {
98: PetscErrorCode (*destroy)(PetscThreadComm);
99: PetscErrorCode (*runkernel)(PetscThreadComm,PetscThreadCommJobCtx);
100: PetscErrorCode (*view)(PetscThreadComm,PetscViewer);
101: PetscErrorCode (*barrier)(PetscThreadComm);
102: PetscErrorCode (*getrank)(PetscInt*);
103: };
105: struct _p_PetscThreadComm{
106: PetscInt refct;
107: PetscInt nworkThreads; /* Number of threads in the pool */
108: PetscInt *affinities; /* Thread affinity */
109: PetscThreadCommOps ops; /* Operations table */
110: void *data; /* implementation specific data */
111: char type[256]; /* Thread model type */
112: PetscInt leader; /* Rank of the leader thread. This thread manages
113: the synchronization for collective operatons like reductions.
114: */
115: PetscThreadCommReduction red; /* Reduction context */
116: PetscInt job_ctr; /* which job is this threadcomm running in the job queue */
117: PetscBool isnothread; /* No threading model used */
118: PetscInt nkernels; /* Maximum kernels launched */
119: };
121: /* Global thread communicator that manages all the threads. Other threadcomms
122: use threads from PETSC_THREAD_COMM_WORLD
123: */
124: extern PetscThreadComm PETSC_THREAD_COMM_WORLD;
126: /* register thread communicator models */
127: PETSC_EXTERN PetscErrorCode PetscThreadCommRegister(const char[],PetscErrorCode(*)(PetscThreadComm));
128: PETSC_EXTERN PetscErrorCode PetscThreadCommRegisterAll(void);
132: PETSC_STATIC_INLINE PetscErrorCode PetscRunKernel(PetscInt trank,PetscInt nargs,PetscThreadCommJobCtx job)
133: {
134: switch(nargs) {
135: case 0:
136: (*job->pfunc)(trank);
137: break;
138: case 1:
139: (*job->pfunc)(trank,job->args[0]);
140: break;
141: case 2:
142: (*job->pfunc)(trank,job->args[0],job->args[1]);
143: break;
144: case 3:
145: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2]);
146: break;
147: case 4:
148: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3]);
149: break;
150: case 5:
151: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4]);
152: break;
153: case 6:
154: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5]);
155: break;
156: case 7:
157: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6]);
158: break;
159: case 8:
160: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7]);
161: break;
162: case 9:
163: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7],job->args[8]);
164: break;
165: case 10:
166: (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7],job->args[8],job->args[9]);
167: break;
168: }
169: return 0;
170: }
172: PETSC_EXTERN PetscErrorCode PetscThreadCommReductionCreate(PetscThreadComm,PetscThreadCommReduction*);
173: PETSC_EXTERN PetscErrorCode PetscThreadCommReductionDestroy(PetscThreadCommReduction);
175: PETSC_EXTERN PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;
176: #endif