Actual source code: tcpthread.c
petsc-3.3-p7 2013-05-11
1: /* Define feature test macros to make sure CPU_SET and other functions are available
2: */
3: #define PETSC_DESIRE_FEATURE_TEST_MACROS
5: #include <../src/sys/threadcomm/impls/pthread/tcpthreadimpl.h>
7: #if defined(PETSC_PTHREAD_LOCAL)
8: PETSC_PTHREAD_LOCAL PetscInt PetscPThreadRank;
9: #else
10: pthread_key_t PetscPThreadRankkey;
11: #endif
13: static PetscBool PetscPThreadCommInitializeCalled = PETSC_FALSE;
15: const char *const PetscPThreadCommSynchronizationTypes[] = {"LOCKFREE","PetscPThreadCommSynchronizationType","PTHREADSYNC_",0};
16: const char *const PetscPThreadCommAffinityPolicyTypes[] = {"ALL","ONECORE","NONE","PetscPThreadCommAffinityPolicyType","PTHREADAFFPOLICY_",0};
17: const char *const PetscPThreadCommPoolSparkTypes[] = {"LEADER","CHAIN","PetscPThreadCommPoolSparkType","PTHREADPOOLSPARK_",0};
19: static PetscInt ptcommcrtct = 0; /* PThread communicator creation count. Incremented whenever a pthread
20: communicator is created and decremented when it is destroyed. On the
21: last pthread communicator destruction, the thread pool is also terminated
22: */
24: PetscInt PetscThreadCommGetRank_PThread()
25: {
26: #if defined(PETSC_PTHREAD_LOCAL)
27: return PetscPThreadRank;
28: #else
29: return *((PetscInt*)pthread_getspecific(PetscPThreadRankkey));
30: #endif
31: }
34: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
35: void PetscPThreadCommDoCoreAffinity(void)
36: {
37: PetscInt i,icorr=0;
38: cpu_set_t mset;
39: PetscInt ncores,myrank=PetscThreadCommGetRank_PThread();
40: PetscThreadComm tcomm;
41: PetscThreadComm_PThread gptcomm;
42:
43: PetscCommGetThreadComm(PETSC_COMM_WORLD,&tcomm);
44: PetscGetNCores(&ncores);
45: gptcomm=(PetscThreadComm_PThread)tcomm->data;
46: switch(gptcomm->aff) {
47: case PTHREADAFFPOLICY_ONECORE:
48: icorr = tcomm->affinities[myrank];
49: CPU_ZERO(&mset);
50: CPU_SET(icorr%ncores,&mset);
51: pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
52: break;
53: case PTHREADAFFPOLICY_ALL:
54: CPU_ZERO(&mset);
55: for(i=0;i<ncores;i++) CPU_SET(i,&mset);
56: pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
57: break;
58: case PTHREADAFFPOLICY_NONE:
59: break;
60: }
61: }
62: #endif
66: PetscErrorCode PetscThreadCommDestroy_PThread(PetscThreadComm tcomm)
67: {
68: PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
69: PetscErrorCode ierr;
72: if(!ptcomm) return(0);
73: ptcommcrtct--;
74: if(!ptcommcrtct) {
75: /* Terminate the thread pool */
76: (*ptcomm->finalize)(tcomm);
77: PetscFree(ptcomm->tid);
78: }
79: PetscFree(ptcomm->granks);
80: if(ptcomm->spark == PTHREADPOOLSPARK_CHAIN) {
81: PetscFree(ptcomm->ngranks);
82: }
83: PetscFree(ptcomm);
84: return(0);
85: }
87: EXTERN_C_BEGIN
90: PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm)
91: {
92: PetscThreadComm_PThread ptcomm;
93: PetscErrorCode ierr;
94: PetscInt i;
97: ptcommcrtct++;
98: PetscStrcpy(tcomm->type,PTHREAD);
99: PetscNew(struct _p_PetscThreadComm_PThread,&ptcomm);
100: tcomm->data = (void*)ptcomm;
101: ptcomm->nthreads = 0;
102: ptcomm->sync = PTHREADSYNC_LOCKFREE;
103: ptcomm->aff = PTHREADAFFPOLICY_ONECORE;
104: ptcomm->spark = PTHREADPOOLSPARK_LEADER;
105: ptcomm->ismainworker = PETSC_TRUE;
106: ptcomm->synchronizeafter = PETSC_TRUE;
107: tcomm->ops->destroy = PetscThreadCommDestroy_PThread;
108: tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
109: tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree;
110: tcomm->ops->getrank = PetscThreadCommGetRank_PThread;
112: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->granks);
114: if(!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */
115: PetscPThreadCommInitializeCalled = PETSC_TRUE;
116: PetscBool flg1,flg2,flg3,flg4;
118: PetscOptionsBegin(PETSC_COMM_WORLD,PETSC_NULL,"PThread communicator options",PETSC_NULL);
119: PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",PETSC_NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);
120: PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);
121: PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);
122: PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);
123: PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",PETSC_NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);
124: PetscOptionsEnd();
126: if(ptcomm->ismainworker) {
127: ptcomm->nthreads = tcomm->nworkThreads-1;
128: ptcomm->thread_num_start = 1;
129: } else {
130: ptcomm->nthreads = tcomm->nworkThreads;
131: ptcomm->thread_num_start = 0;
132: }
134: switch(ptcomm->sync) {
135: case PTHREADSYNC_LOCKFREE:
136: ptcomm->initialize = PetscPThreadCommInitialize_LockFree;
137: ptcomm->finalize = PetscPThreadCommFinalize_LockFree;
138: tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
139: tcomm->ops->barrier = PetscThreadCommBarrier_PThread_LockFree;
140: break;
141: default:
142: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
143: }
144: /* Set up thread ranks */
145: for(i=0;i< tcomm->nworkThreads;i++) ptcomm->granks[i] = i;
147: if(ptcomm->ismainworker) {
148: #if defined(PETSC_PTHREAD_LOCAL)
149: PetscPThreadRank=0; /* Main thread rank */
150: #else
151: pthread_key_create(&PetscPThreadRankkey,NULL);
152: pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);
153: #endif
154: }
155: /* Set the leader thread rank */
156: if(ptcomm->nthreads) {
157: if(ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1];
158: else tcomm->leader = ptcomm->granks[0];
159: }
160:
161: if(ptcomm->spark == PTHREADPOOLSPARK_CHAIN) {
162: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->ngranks);
163: for(i=ptcomm->thread_num_start;i < tcomm->nworkThreads-1;i++) ptcomm->ngranks[i] = ptcomm->granks[i+1];
164: ptcomm->ngranks[tcomm->nworkThreads-1] = -1;
165: }
167: /* Create array holding pthread ids */
168: PetscMalloc(tcomm->nworkThreads*sizeof(pthread_t),&ptcomm->tid);
170: /* Set affinity of the main thread */
171:
172: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
173: cpu_set_t mset;
174: PetscInt ncores,icorr;
175:
177: PetscGetNCores(&ncores);
178: CPU_ZERO(&mset);
179: icorr = tcomm->affinities[0]%ncores;
180: CPU_SET(icorr,&mset);
181: sched_setaffinity(0,sizeof(cpu_set_t),&mset);
182: #endif
184: /* Initialize thread pool */
185: (*ptcomm->initialize)(tcomm);
187: } else {
188: PetscThreadComm gtcomm;
189: PetscThreadComm_PThread gptcomm;
190: PetscInt *granks,j,*gaffinities;
192: PetscCommGetThreadComm(PETSC_COMM_WORLD,>comm);
193: gaffinities = gtcomm->affinities;
194: gptcomm = (PetscThreadComm_PThread)tcomm->data;
195: granks = gptcomm->granks;
196: /* Copy over the data from the global thread communicator structure */
197: ptcomm->ismainworker = gptcomm->ismainworker;
198: ptcomm->thread_num_start = gptcomm->thread_num_start;
199: ptcomm->sync = gptcomm->sync;
200: ptcomm->aff = gptcomm->aff;
201: tcomm->ops->runkernel = gtcomm->ops->runkernel;
202: tcomm->ops->barrier = gtcomm->ops->barrier;
203:
204: for(i=0; i < tcomm->nworkThreads;i++) {
205: for(j=0;j < gtcomm->nworkThreads; j++) {
206: if(tcomm->affinities[i] == gaffinities[j]) {
207: ptcomm->granks[i] = granks[j];
208: }
209: }
210: }
211: }
213: return(0);
214: }
215: EXTERN_C_END
216: