Actual source code: mpinit.c
petsc-3.4.5 2014-06-29
3: #include <petscsys.h> /*I "petscsys.h" I*/
5: static MPI_Comm saved_PETSC_COMM_WORLD = 0;
6: MPI_Comm PETSC_COMM_LOCAL_WORLD = 0; /* comm for a single node (local set of processes) */
7: PetscBool PetscHMPIWorker = PETSC_FALSE; /* this is a regular process, nonworker process */
8: void * PetscHMPICtx = 0;
10: extern PetscErrorCode PetscHMPIHandle(MPI_Comm);
12: #if defined(PETSC_HAVE_MPI_COMM_SPAWN)
15: /*@C
16: PetscHMPISpawn - Initialize additional processes to be used as "worker" processes. This is not generally
17: called by users. One should use -hmpi_spawn_size <n> to indicate that you wish to have n-1 new MPI
18: processes spawned for each current process.
20: Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up)
22: Input Parameter:
23: . nodesize - size of each compute node that will share processors
25: Options Database:
26: . -hmpi_spawn_size nodesize
28: Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine.
30: $ Comparison of two approaches for HMPI usage (MPI started with N processes)
31: $
32: $ -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
33: $ and n-1 worker processes (used by PETSc) for each application node.
34: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
35: $
36: $ -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
37: $ (used by PETSc)
38: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
39: $
40: $ petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
41: $ petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
42: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
43: $
44: $ These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
45: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
46: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
47: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
48: $ are always working on p task, never more than p.
49: $
50: $ See PCHMPI for a PETSc preconditioner that can use this functionality
51: $
53: For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
54: consists of all the processes in a "node."
56: In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).
58: Level: developer
60: Concepts: HMPI
62: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPIMerge(), PetscHMPIRun()
64: @*/
65: PetscErrorCode PetscHMPISpawn(PetscMPIInt nodesize)
66: {
68: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HMPI functionality is currently broken");
69: #if defined(broken_functionality_commented_out)
71: PetscMPIInt size;
72: MPI_Comm parent,children;
75: MPI_Comm_get_parent(&parent);
76: if (parent == MPI_COMM_NULL) { /* the original processes started by user */
77: char programname[PETSC_MAX_PATH_LEN];
78: char **argv;
80: PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);
81: PetscGetArguments(&argv);
82: MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);
83: PetscFreeArguments(argv);
84: MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);
86: MPI_Comm_size(PETSC_COMM_WORLD,&size);
87: PetscInfo2(0,"PETSc HMPI successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);
89: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
90: } else { /* worker nodes that get spawned */
91: MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);
92: PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
93: PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
94: PetscEnd(); /* cannot continue into user code */
95: }
96: return(0);
97: #endif
98: }
99: #endif
103: /*@C
104: PetscHMPIMerge - Initializes the PETSc and MPI to work with HMPI. This is not usually called
105: by the user. One should use -hmpi_merge_size <n> to indicate the node size of merged communicator
106: to be.
108: Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set
110: Input Parameter:
111: + nodesize - size of each compute node that will share processors
112: . func - optional function to call on the master nodes
113: - ctx - context passed to function on master nodes
115: Options Database:
116: . -hmpi_merge_size <n>
118: Level: developer
120: $ Comparison of two approaches for HMPI usage (MPI started with N processes)
121: $
122: $ -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
123: $ and n-1 worker processes (used by PETSc) for each application node.
124: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
125: $
126: $ -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
127: $ (used by PETSc)
128: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
129: $
130: $ petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
131: $ petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
132: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
133: $
134: $ These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
135: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
136: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
137: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
138: $ are always working on p task, never more than p.
139: $
140: $ See PCHMPI for a PETSc preconditioner that can use this functionality
141: $
143: For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
144: consists of all the processes in a "node."
146: In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).
148: Concepts: HMPI
150: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPISpawn(), PetscHMPIRun()
152: @*/
153: PetscErrorCode PetscHMPIMerge(PetscMPIInt nodesize,PetscErrorCode (*func)(void*),void *ctx)
154: {
156: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HMPI functionality is currently broken");
157: #if defined(broken_functionality_commented_out)
159: PetscMPIInt size,rank,*ranks,i;
160: MPI_Group group,newgroup;
163: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
165: MPI_Comm_size(saved_PETSC_COMM_WORLD,&size);
166: if (size % nodesize) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Total number of process nodes %d is not divisible by number of processes per node %d",size,nodesize);
167: MPI_Comm_rank(saved_PETSC_COMM_WORLD,&rank);
170: /* create two communicators
171: *) one that contains the first process from each node: 0,nodesize,2*nodesize,...
172: *) one that contains all processes in a node: (0,1,2...,nodesize-1), (nodesize,nodesize+1,...2*nodesize-), ...
173: */
174: MPI_Comm_group(saved_PETSC_COMM_WORLD,&group);
175: PetscMalloc((size/nodesize)*sizeof(PetscMPIInt),&ranks);
176: for (i=0; i<(size/nodesize); i++) ranks[i] = i*nodesize;
177: MPI_Group_incl(group,size/nodesize,ranks,&newgroup);
178: PetscFree(ranks);
179: MPI_Comm_create(saved_PETSC_COMM_WORLD,newgroup,&PETSC_COMM_WORLD);
180: if (rank % nodesize) PETSC_COMM_WORLD = 0; /* mark invalid processes for easy debugging */
181: MPI_Group_free(&group);
182: MPI_Group_free(&newgroup);
184: MPI_Comm_split(saved_PETSC_COMM_WORLD,rank/nodesize,rank % nodesize,&PETSC_COMM_LOCAL_WORLD);
186: PetscInfo2(0,"PETSc HMPI successfully started: number of nodes = %d node size = %d\n",size/nodesize,nodesize);
187: PetscInfo1(0,"PETSc HMPI process %sactive\n",(rank % nodesize) ? "in" : "");
189: PetscHMPICtx = ctx;
190: /*
191: All process not involved in user application code wait here
192: */
193: if (!PETSC_COMM_WORLD) {
194: PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
195: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
196: PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
197: PetscInfo(0,"PETSc HMPI inactive process becoming active");
198: } else if (func) {
199: (*func)(ctx);
200: }
201: return(0);
202: #endif
203: }
207: /*@C
208: PetscHMPIFinalize - Finalizes the PETSc and MPI to work with HMPI. Called by PetscFinalize() cannot
209: be called by user.
211: Collective on the entire system
213: Level: developer
215: .seealso: PetscFinalize(), PetscGetArgs(), PetscHMPIMerge(), PCHMPIRun()
217: @*/
218: PetscErrorCode PetscHMPIFinalize(void)
219: {
220: PetscErrorCode 0;
221: PetscInt command = 3;
224: if (!PetscHMPIWorker && PETSC_COMM_LOCAL_WORLD) {
225: MPI_Bcast(&command,1,MPIU_INT,0,PETSC_COMM_LOCAL_WORLD); /* broadcast to my worker group to end program */
227: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
229: PetscInfo(0,"PETSc HMPI active process ending PetscHMPIMerge()");
230: }
231: PetscFunctionReturn(ierr);
232: }
234: static PetscInt numberobjects = 0;
235: static void *objects[100];
239: /*@C
240: PetscHMPIHandle - Receives commands from the master node and processes them
242: Collective on MPI_Comm
244: Input Parameter:
245: . comm - Must be PETSC_COMM_LOCAL_WORLD
247: Level: developer
249: Notes: this is usually handled automatically, likely you do not need to use this directly
251: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
253: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPINew()
255: @*/
256: PetscErrorCode PetscHMPIHandle(MPI_Comm comm)
257: {
259: PetscInt command = 0; /* dummy value so MPI-Uni doesn't think it is not set*/
260: PetscBool exitwhileloop = PETSC_FALSE;
263: while (!exitwhileloop) {
264: MPI_Bcast(&command,1,MPIU_INT,0,comm);
265: switch (command) {
266: case 0: { /* allocate some memory on this worker process */
267: size_t n = 0; /* dummy value so MPI-Uni doesn't think it is not set*/
268: void *ptr;
269: MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);
270: /* cannot use PetscNew() cause it requires struct argument */
271: PetscMalloc(n,&ptr);
272: PetscMemzero(ptr,n);
274: objects[numberobjects++] = ptr;
275: break;
276: }
277: case 1: { /* free some memory on this worker process */
278: PetscInt i;
279: MPI_Bcast(&i,1,MPIU_INT,0,comm);
280: PetscFree(objects[i]);
281: break;
282: }
283: case 2: { /* run a function on this worker process */
284: PetscInt i;
285: PetscErrorCode (*f)(MPI_Comm,void*);
286: MPI_Bcast(&i,1,MPIU_INT,0,comm);
287: MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
288: (*f)(comm,objects[i]);
289: break;
290: }
291: case 4: { /* run a function on this worker process with provided context */
292: PetscInt i;
293: PetscErrorCode (*f)(MPI_Comm,void*,void*);
294: MPI_Bcast(&i,1,MPIU_INT,0,comm);
295: MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
296: (*f)(comm,PetscHMPICtx,objects[i]);
297: break;
298: }
299: case 3: {
300: exitwhileloop = PETSC_TRUE;
301: break;
302: }
303: default:
304: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unknown HMPI command %D",command);
305: }
306: }
307: return(0);
308: }
312: /*@C
313: PetscHMPIMalloc - Creates a "c struct" on all nodes of an HMPI communicator
315: Collective on MPI_Comm
317: Input Parameters:
318: + comm - Must be PETSC_COMM_LOCAL_WORLD
319: - n - amount of memory requested
321: Level: developer
323: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
325: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPIFree()
327: @*/
328: PetscErrorCode PetscHMPIMalloc(MPI_Comm comm,size_t n,void **ptr)
329: {
331: PetscInt command = 0;
334: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
336: MPI_Bcast(&command,1,MPIU_INT,0,comm);
337: MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);
339: /* cannot use PetscNew() cause it requires struct argument */
340: PetscMalloc(n,ptr);
341: PetscMemzero(*ptr,n);
343: objects[numberobjects++] = *ptr;
344: return(0);
345: }
349: /*@C
350: PetscHMPIFree - Frees a "c struct" on all nodes of an HMPI communicator
352: Collective on MPI_Comm
354: Input Parameters:
355: + comm - Must be PETSC_COMM_LOCAL_WORLD
356: - ptr - pointer to data to be freed, must have been obtained with PetscHMPIMalloc()
358: Level: developer
360: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
362: .seealso: PetscHMPIMerge(), PetscHMPIMalloc()
364: @*/
365: PetscErrorCode PetscHMPIFree(MPI_Comm comm,void *ptr)
366: {
368: PetscInt command = 1,i;
371: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
373: MPI_Bcast(&command,1,MPIU_INT,0,comm);
374: for (i=0; i<numberobjects; i++) {
375: if (objects[i] == ptr) {
376: MPI_Bcast(&i,1,MPIU_INT,0,comm);
377: PetscFree(objects[i]);
378: return(0);
379: }
380: }
381: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
382: PetscFunctionReturn(ierr);
383: }
387: /*@C
388: PetscHMPIRun - runs a function on all the processes of a node
390: Collective on MPI_Comm
392: Input Parameters:
393: + comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
394: . f - function to run
395: - ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()
397: Level: developer
399: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
401: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRunCtx()
403: @*/
404: PetscErrorCode PetscHMPIRun(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void*),void *ptr)
405: {
407: PetscInt command = 2,i;
410: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
412: MPI_Bcast(&command,1,MPIU_INT,0,comm);
413: for (i=0; i<numberobjects; i++) {
414: if (objects[i] == ptr) {
415: MPI_Bcast(&i,1,MPIU_INT,0,comm);
416: MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
417: (*f)(comm,ptr);
418: return(0);
419: }
420: }
421: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
422: PetscFunctionReturn(ierr);
423: }
427: /*@C
428: PetscHMPIRunCtx - runs a function on all the processes of a node
430: Collective on MPI_Comm
432: Input Parameters:
433: + comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
434: . f - function to run
435: - ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()
437: Notes: This is like PetscHMPIRun() except it also passes the context passed in PetscHMPIMerge()
438: Level: developer
440: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
442: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRun()
444: @*/
445: PetscErrorCode PetscHMPIRunCtx(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void*,void*),void *ptr)
446: {
448: PetscInt command = 4,i;
451: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
453: MPI_Bcast(&command,1,MPIU_INT,0,comm);
454: for (i=0; i<numberobjects; i++) {
455: if (objects[i] == ptr) {
456: MPI_Bcast(&i,1,MPIU_INT,0,comm);
457: MPI_Bcast((PETSC_UINTPTR_T*)&f,1,MPIU_SIZE_T,0,comm);
458: (*f)(comm,PetscHMPICtx,ptr);
459: return(0);
460: }
461: }
462: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
463: PetscFunctionReturn(ierr);
464: }