Actual source code: tcpthread.c

petsc-3.3-p7 2013-05-11
  1: /* Define feature test macros to make sure CPU_SET and other functions are available
  2:  */
  3: #define PETSC_DESIRE_FEATURE_TEST_MACROS

  5: #include <../src/sys/threadcomm/impls/pthread/tcpthreadimpl.h>

  7: #if defined(PETSC_PTHREAD_LOCAL)
  8: PETSC_PTHREAD_LOCAL PetscInt PetscPThreadRank;
  9: #else
 10: pthread_key_t PetscPThreadRankkey;
 11: #endif

 13: static PetscBool PetscPThreadCommInitializeCalled = PETSC_FALSE;

 15: const char *const PetscPThreadCommSynchronizationTypes[] = {"LOCKFREE","PetscPThreadCommSynchronizationType","PTHREADSYNC_",0};
 16: const char *const PetscPThreadCommAffinityPolicyTypes[] = {"ALL","ONECORE","NONE","PetscPThreadCommAffinityPolicyType","PTHREADAFFPOLICY_",0};
 17: const char *const PetscPThreadCommPoolSparkTypes[] = {"LEADER","CHAIN","PetscPThreadCommPoolSparkType","PTHREADPOOLSPARK_",0};

 19: static PetscInt ptcommcrtct = 0; /* PThread communicator creation count. Incremented whenever a pthread
 20:                                     communicator is created and decremented when it is destroyed. On the
 21:                                     last pthread communicator destruction, the thread pool is also terminated
 22:                                   */

 24: PetscInt PetscThreadCommGetRank_PThread()
 25: {
 26: #if defined(PETSC_PTHREAD_LOCAL)
 27:   return PetscPThreadRank;
 28: #else
 29:   return *((PetscInt*)pthread_getspecific(PetscPThreadRankkey));
 30: #endif
 31: }


 34: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
 35: void PetscPThreadCommDoCoreAffinity(void)
 36: {
 37:   PetscInt                 i,icorr=0;
 38:   cpu_set_t                mset;
 39:   PetscInt                 ncores,myrank=PetscThreadCommGetRank_PThread();
 40:   PetscThreadComm          tcomm;
 41:   PetscThreadComm_PThread  gptcomm;
 42: 
 43:   PetscCommGetThreadComm(PETSC_COMM_WORLD,&tcomm);
 44:   PetscGetNCores(&ncores);
 45:   gptcomm=(PetscThreadComm_PThread)tcomm->data;
 46:   switch(gptcomm->aff) {
 47:   case PTHREADAFFPOLICY_ONECORE:
 48:     icorr = tcomm->affinities[myrank];
 49:     CPU_ZERO(&mset);
 50:     CPU_SET(icorr%ncores,&mset);
 51:     pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
 52:     break;
 53:   case PTHREADAFFPOLICY_ALL:
 54:     CPU_ZERO(&mset);
 55:     for(i=0;i<ncores;i++) CPU_SET(i,&mset);
 56:     pthread_setaffinity_np(pthread_self(),sizeof(cpu_set_t),&mset);
 57:     break;
 58:   case PTHREADAFFPOLICY_NONE:
 59:     break;
 60:   }
 61: }
 62: #endif

 66: PetscErrorCode PetscThreadCommDestroy_PThread(PetscThreadComm tcomm)
 67: {
 68:   PetscThreadComm_PThread ptcomm=(PetscThreadComm_PThread)tcomm->data;
 69:   PetscErrorCode          ierr;

 72:   if(!ptcomm) return(0);
 73:   ptcommcrtct--;
 74:   if(!ptcommcrtct) {
 75:     /* Terminate the thread pool */
 76:     (*ptcomm->finalize)(tcomm);
 77:     PetscFree(ptcomm->tid);
 78:   }
 79:   PetscFree(ptcomm->granks);
 80:   if(ptcomm->spark == PTHREADPOOLSPARK_CHAIN) {
 81:     PetscFree(ptcomm->ngranks);
 82:   }
 83:   PetscFree(ptcomm);
 84:   return(0);
 85: }

 87: EXTERN_C_BEGIN
 90: PetscErrorCode PetscThreadCommCreate_PThread(PetscThreadComm tcomm)
 91: {
 92:   PetscThreadComm_PThread ptcomm;
 93:   PetscErrorCode          ierr;
 94:   PetscInt                i;

 97:   ptcommcrtct++;
 98:   PetscStrcpy(tcomm->type,PTHREAD);
 99:   PetscNew(struct _p_PetscThreadComm_PThread,&ptcomm);
100:   tcomm->data = (void*)ptcomm;
101:   ptcomm->nthreads = 0;
102:   ptcomm->sync = PTHREADSYNC_LOCKFREE;
103:   ptcomm->aff = PTHREADAFFPOLICY_ONECORE;
104:   ptcomm->spark = PTHREADPOOLSPARK_LEADER;
105:   ptcomm->ismainworker = PETSC_TRUE;
106:   ptcomm->synchronizeafter = PETSC_TRUE;
107:   tcomm->ops->destroy = PetscThreadCommDestroy_PThread;
108:   tcomm->ops->runkernel = PetscThreadCommRunKernel_PThread_LockFree;
109:   tcomm->ops->barrier   = PetscThreadCommBarrier_PThread_LockFree;
110:   tcomm->ops->getrank   = PetscThreadCommGetRank_PThread;

112:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->granks);

114:   if(!PetscPThreadCommInitializeCalled) { /* Only done for PETSC_THREAD_COMM_WORLD */
115:     PetscPThreadCommInitializeCalled = PETSC_TRUE;
116:     PetscBool               flg1,flg2,flg3,flg4;

118:     PetscOptionsBegin(PETSC_COMM_WORLD,PETSC_NULL,"PThread communicator options",PETSC_NULL);
119:     PetscOptionsBool("-threadcomm_pthread_main_is_worker","Main thread is also a worker thread",PETSC_NULL,PETSC_TRUE,&ptcomm->ismainworker,&flg1);
120:     PetscOptionsEnum("-threadcomm_pthread_affpolicy","Thread affinity policy"," ",PetscPThreadCommAffinityPolicyTypes,(PetscEnum)ptcomm->aff,(PetscEnum*)&ptcomm->aff,&flg2);
121:     PetscOptionsEnum("-threadcomm_pthread_type","Thread pool type"," ",PetscPThreadCommSynchronizationTypes,(PetscEnum)ptcomm->sync,(PetscEnum*)&ptcomm->sync,&flg3);
122:     PetscOptionsEnum("-threadcomm_pthread_spark","Thread pool spark type"," ",PetscPThreadCommPoolSparkTypes,(PetscEnum)ptcomm->spark,(PetscEnum*)&ptcomm->spark,&flg4);
123:     PetscOptionsBool("-threadcomm_pthread_synchronizeafter","Puts a barrier after every kernel call",PETSC_NULL,PETSC_TRUE,&ptcomm->synchronizeafter,&flg1);
124:     PetscOptionsEnd();

126:     if(ptcomm->ismainworker) {
127:       ptcomm->nthreads = tcomm->nworkThreads-1;
128:       ptcomm->thread_num_start = 1;
129:     } else {
130:       ptcomm->nthreads = tcomm->nworkThreads;
131:       ptcomm->thread_num_start = 0;
132:     }

134:     switch(ptcomm->sync) {
135:     case PTHREADSYNC_LOCKFREE:
136:       ptcomm->initialize      = PetscPThreadCommInitialize_LockFree;
137:       ptcomm->finalize        = PetscPThreadCommFinalize_LockFree;
138:       tcomm->ops->runkernel   = PetscThreadCommRunKernel_PThread_LockFree;
139:       tcomm->ops->barrier     = PetscThreadCommBarrier_PThread_LockFree;
140:       break;
141:     default:
142:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only Lock-free synchronization scheme supported currently");
143:     }
144:     /* Set up thread ranks */
145:     for(i=0;i< tcomm->nworkThreads;i++) ptcomm->granks[i] = i;

147:     if(ptcomm->ismainworker) {
148: #if defined(PETSC_PTHREAD_LOCAL)
149:       PetscPThreadRank=0; /* Main thread rank */
150: #else
151:       pthread_key_create(&PetscPThreadRankkey,NULL);
152:       pthread_setspecific(PetscPThreadRankkey,&ptcomm->granks[0]);
153: #endif
154:     }
155:     /* Set the leader thread rank */
156:     if(ptcomm->nthreads) {
157:       if(ptcomm->ismainworker) tcomm->leader = ptcomm->granks[1];
158:       else tcomm->leader = ptcomm->granks[0];
159:     }
160: 
161:     if(ptcomm->spark == PTHREADPOOLSPARK_CHAIN) {
162:       PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&ptcomm->ngranks);
163:       for(i=ptcomm->thread_num_start;i < tcomm->nworkThreads-1;i++) ptcomm->ngranks[i] = ptcomm->granks[i+1];
164:       ptcomm->ngranks[tcomm->nworkThreads-1] = -1;
165:     }

167:     /* Create array holding pthread ids */
168:     PetscMalloc(tcomm->nworkThreads*sizeof(pthread_t),&ptcomm->tid);

170:     /* Set affinity of the main thread */
171: 
172: #if defined(PETSC_HAVE_SCHED_CPU_SET_T)
173:     cpu_set_t mset;
174:     PetscInt  ncores,icorr;
175: 

177:     PetscGetNCores(&ncores);
178:     CPU_ZERO(&mset);
179:     icorr = tcomm->affinities[0]%ncores;
180:     CPU_SET(icorr,&mset);
181:     sched_setaffinity(0,sizeof(cpu_set_t),&mset);
182: #endif

184:     /* Initialize thread pool */
185:     (*ptcomm->initialize)(tcomm);

187:   } else {
188:     PetscThreadComm          gtcomm;
189:     PetscThreadComm_PThread  gptcomm;
190:     PetscInt                 *granks,j,*gaffinities;

192:     PetscCommGetThreadComm(PETSC_COMM_WORLD,&gtcomm);
193:     gaffinities = gtcomm->affinities;
194:     gptcomm = (PetscThreadComm_PThread)tcomm->data;
195:     granks = gptcomm->granks;
196:     /* Copy over the data from the global thread communicator structure */
197:     ptcomm->ismainworker     = gptcomm->ismainworker;
198:     ptcomm->thread_num_start = gptcomm->thread_num_start;
199:     ptcomm->sync             = gptcomm->sync;
200:     ptcomm->aff              = gptcomm->aff;
201:     tcomm->ops->runkernel    = gtcomm->ops->runkernel;
202:     tcomm->ops->barrier      = gtcomm->ops->barrier;
203: 
204:     for(i=0; i < tcomm->nworkThreads;i++) {
205:       for(j=0;j < gtcomm->nworkThreads; j++) {
206:         if(tcomm->affinities[i] == gaffinities[j]) {
207:           ptcomm->granks[i] = granks[j];
208:         }
209:       }
210:     }
211:   }

213:   return(0);
214: }
215: EXTERN_C_END
216: