Actual source code: petsc-threadcommimpl.h

petsc-3.4.5 2014-06-29
  2: #ifndef __THREADCOMMIMPL_H

  5: #include <petscthreadcomm.h>
  6: #include <petsc-private/petscimpl.h>

  8: #if defined(PETSC_HAVE_SYS_SYSINFO_H)
  9: #include <sys/sysinfo.h>
 10: #endif
 11: #if defined(PETSC_HAVE_UNISTD_H)
 12: #include <unistd.h>
 13: #endif
 14: #if defined(PETSC_HAVE_SYS_SYSCTL_H)
 15: #include <sys/sysctl.h>
 16: #endif
 17: #if defined(PETSC_HAVE_WINDOWS_H)
 18: #include <windows.h>
 19: #endif

 21: PETSC_EXTERN PetscMPIInt Petsc_ThreadComm_keyval;

 23: /* Max. number of arguments for kernel */
 24: #define PETSC_KERNEL_NARGS_MAX 10

 26: /* Reduction status of threads */
 27: #define THREADCOMM_THREAD_WAITING_FOR_NEWRED 0
 28: #define THREADCOMM_THREAD_POSTED_LOCALRED    1
 29: /* Status of the reduction */
 30: #define THREADCOMM_REDUCTION_NONE           -1
 31: #define THREADCOMM_REDUCTION_NEW             0
 32: #define THREADCOMM_REDUCTION_COMPLETE        1

 34: /* Job status for threads */
 35: #define THREAD_JOB_NONE       -1
 36: #define THREAD_JOB_POSTED      1
 37: #define THREAD_JOB_RECIEVED    2
 38: #define THREAD_JOB_COMPLETED   0

 40: #define PetscReadOnce(type,val) (*(volatile type *)&val)

 42: #if defined(PETSC_MEMORY_BARRIER)
 43: #define PetscMemoryBarrier() do {PETSC_MEMORY_BARRIER();} while(0)
 44: #else
 45: #define PetscMemoryBarrier()
 46: #endif
 47: #if defined(PETSC_READ_MEMORY_BARRIER)
 48: #define PetscReadMemoryBarrier() do {PETSC_READ_MEMORY_BARRIER();} while(0)
 49: #else
 50: #define PetscReadMemoryBarrier()
 51: #endif
 52: #if defined(PETSC_WRITE_MEMORY_BARRIER)
 53: #define PetscWriteMemoryBarrier() do {PETSC_WRITE_MEMORY_BARRIER();} while(0)
 54: #else
 55: #define PetscWriteMemoryBarrier()
 56: #endif

 58: typedef struct _p_PetscThreadCommRedCtx *PetscThreadCommRedCtx;
 59: struct _p_PetscThreadCommRedCtx{
 60:   PetscThreadComm               tcomm;          /* The associated threadcomm */
 61:   PetscInt                      red_status;     /* Reduction status */
 62:   PetscInt                      *thread_status; /* Reduction status of each thread */
 63:   void                          *local_red;     /* Array to hold local reduction contribution from each thread */
 64:   PetscThreadCommReductionOp    op;             /* The reduction operation */
 65:   PetscDataType                 type;           /* The reduction data type */
 66: };

 68: struct _p_PetscThreadCommReduction{
 69:   PetscInt              nreds;                              /* Number of reductions in operation */
 70:   PetscThreadCommRedCtx redctx;                             /* Reduction objects */
 71:   PetscInt               ctr;                               /* Global Reduction counter */
 72:   PetscInt              *thread_ctr;                        /* Reduction counter for each thread */
 73: };

 75: typedef struct _p_PetscThreadCommJobCtx* PetscThreadCommJobCtx;
 76: struct  _p_PetscThreadCommJobCtx{
 77:   PetscThreadComm   tcomm;                         /* The thread communicator */
 78:   PetscInt          nargs;                         /* Number of arguments for the kernel */
 79:   PetscThreadKernel pfunc;                         /* Kernel function */
 80:   void              *args[PETSC_KERNEL_NARGS_MAX]; /* Array of void* to hold the arguments */
 81:   PetscScalar       scalars[3];                    /* Array to hold three scalar values */
 82:   PetscInt          ints[3];                       /* Array to hold three integer values */
 83:   PetscInt          *job_status;                   /* Thread job status */
 84: };

 86: /* Structure to manage job queue */
 87: typedef struct _p_PetscThreadCommJobQueue* PetscThreadCommJobQueue;
 88: struct _p_PetscThreadCommJobQueue{
 89:   PetscInt ctr;                                         /* job counter */
 90:   PetscInt kernel_ctr;                                  /* kernel counter .. need this otherwise race conditions are unavoidable */
 91:   PetscThreadCommJobCtx jobs;                           /* queue of jobs */
 92: };

 94: extern PetscThreadCommJobQueue PetscJobQueue;

 96: typedef struct _PetscThreadCommOps* PetscThreadCommOps;
 97: struct _PetscThreadCommOps {
 98:   PetscErrorCode (*destroy)(PetscThreadComm);
 99:   PetscErrorCode (*runkernel)(PetscThreadComm,PetscThreadCommJobCtx);
100:   PetscErrorCode (*view)(PetscThreadComm,PetscViewer);
101:   PetscErrorCode (*barrier)(PetscThreadComm);
102:   PetscErrorCode (*getrank)(PetscInt*);
103: };

105: struct _p_PetscThreadComm{
106:   PetscInt                refct;
107:   PetscInt                nworkThreads; /* Number of threads in the pool */
108:   PetscInt                *affinities;  /* Thread affinity */
109:   PetscThreadCommOps      ops;          /* Operations table */
110:   void                    *data;        /* implementation specific data */
111:   char                    type[256];    /* Thread model type */
112:   PetscInt                leader;       /* Rank of the leader thread. This thread manages
113:                                            the synchronization for collective operatons like reductions.
114:                                         */
115:   PetscThreadCommReduction red;         /* Reduction context */
116:   PetscInt                job_ctr;      /* which job is this threadcomm running in the job queue */
117:   PetscBool               isnothread;   /* No threading model used */
118:   PetscInt                nkernels;     /* Maximum kernels launched */
119: };

121: /* Global thread communicator that manages all the threads. Other threadcomms
122:    use threads from PETSC_THREAD_COMM_WORLD
123: */
124: extern PetscThreadComm PETSC_THREAD_COMM_WORLD;

126: /* register thread communicator models */
127: PETSC_EXTERN PetscErrorCode PetscThreadCommRegister(const char[],PetscErrorCode(*)(PetscThreadComm));
128: PETSC_EXTERN PetscErrorCode PetscThreadCommRegisterAll(void);

132: PETSC_STATIC_INLINE PetscErrorCode PetscRunKernel(PetscInt trank,PetscInt nargs,PetscThreadCommJobCtx job)
133: {
134:   switch(nargs) {
135:   case 0:
136:     (*job->pfunc)(trank);
137:     break;
138:   case 1:
139:     (*job->pfunc)(trank,job->args[0]);
140:     break;
141:   case 2:
142:     (*job->pfunc)(trank,job->args[0],job->args[1]);
143:     break;
144:   case 3:
145:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2]);
146:     break;
147:   case 4:
148:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3]);
149:     break;
150:   case 5:
151:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4]);
152:     break;
153:   case 6:
154:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5]);
155:     break;
156:   case 7:
157:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6]);
158:     break;
159:   case 8:
160:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7]);
161:     break;
162:   case 9:
163:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7],job->args[8]);
164:     break;
165:   case 10:
166:     (*job->pfunc)(trank,job->args[0],job->args[1],job->args[2],job->args[3],job->args[4],job->args[5],job->args[6],job->args[7],job->args[8],job->args[9]);
167:     break;
168:   }
169:   return 0;
170: }

172: PETSC_EXTERN PetscErrorCode PetscThreadCommReductionCreate(PetscThreadComm,PetscThreadCommReduction*);
173: PETSC_EXTERN PetscErrorCode PetscThreadCommReductionDestroy(PetscThreadCommReduction);

175: PETSC_EXTERN PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;
176: #endif