Actual source code: mpinit.c
petsc-3.3-p7 2013-05-11
3: #include <petscsys.h> /*I "petscsys.h" I*/
5: static MPI_Comm saved_PETSC_COMM_WORLD = 0;
6: MPI_Comm PETSC_COMM_LOCAL_WORLD = 0; /* comm for a single node (local set of processes) */
7: PetscBool PetscHMPIWorker = PETSC_FALSE; /* this is a regular process, nonworker process */
8: void* PetscHMPICtx = 0;
10: extern PetscErrorCode PetscHMPIHandle(MPI_Comm);
12: #if defined(PETSC_HAVE_MPI_COMM_SPAWN)
15: /*@C
16: PetscHMPISpawn - Initialize additional processes to be used as "worker" processes. This is not generally
17: called by users. One should use -hmpi_spawn_size <n> to indicate that you wish to have n-1 new MPI
18: processes spawned for each current process.
20: Not Collective (could make collective on MPI_COMM_WORLD, generate one huge comm and then split it up)
22: Input Parameter:
23: . nodesize - size of each compute node that will share processors
25: Options Database:
26: . -hmpi_spawn_size nodesize
28: Notes: This is only supported on systems with an MPI 2 implementation that includes the MPI_Comm_Spawn() routine.
30: $ Comparison of two approaches for HMPI usage (MPI started with N processes)
31: $
32: $ -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
33: $ and n-1 worker processes (used by PETSc) for each application node.
34: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
35: $
36: $ -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
37: $ (used by PETSc)
38: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
39: $
40: $ petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
41: $ petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
42: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
43: $
44: $ These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
45: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
46: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
47: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
48: $ are always working on p task, never more than p.
49: $
50: $ See PCHMPI for a PETSc preconditioner that can use this functionality
51: $
53: For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
54: consists of all the processes in a "node."
56: In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).
58: Level: developer
60: Concepts: HMPI
61:
62: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPIMerge(), PetscHMPIRun()
64: @*/
65: PetscErrorCode PetscHMPISpawn(PetscMPIInt nodesize)
66: {
68: PetscMPIInt size;
69: MPI_Comm parent,children;
70:
72: MPI_Comm_get_parent(&parent);
73: if (parent == MPI_COMM_NULL) { /* the original processes started by user */
74: char programname[PETSC_MAX_PATH_LEN];
75: char **argv;
77: PetscGetProgramName(programname,PETSC_MAX_PATH_LEN);
78: PetscGetArguments(&argv);
79: MPI_Comm_spawn(programname,argv,nodesize-1,MPI_INFO_NULL,0,PETSC_COMM_SELF,&children,MPI_ERRCODES_IGNORE);
80: PetscFreeArguments(argv);
81: MPI_Intercomm_merge(children,0,&PETSC_COMM_LOCAL_WORLD);
83: MPI_Comm_size(PETSC_COMM_WORLD,&size);
84: PetscInfo2(0,"PETSc HMPI successfully spawned: number of nodes = %d node size = %d\n",size,nodesize);
85: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
86: } else { /* worker nodes that get spawned */
87: MPI_Intercomm_merge(parent,1,&PETSC_COMM_LOCAL_WORLD);
88: PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
89: PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
90: PetscEnd(); /* cannot continue into user code */
91: }
92: return(0);
93: }
94: #endif
98: /*@C
99: PetscHMPIMerge - Initializes the PETSc and MPI to work with HMPI. This is not usually called
100: by the user. One should use -hmpi_merge_size <n> to indicate the node size of merged communicator
101: to be.
103: Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set
105: Input Parameter:
106: + nodesize - size of each compute node that will share processors
107: . func - optional function to call on the master nodes
108: - ctx - context passed to function on master nodes
110: Options Database:
111: . -hmpi_merge_size <n>
113: Level: developer
115: $ Comparison of two approaches for HMPI usage (MPI started with N processes)
116: $
117: $ -hmpi_spawn_size <n> requires MPI 2, results in n*N total processes with N directly used by application code
118: $ and n-1 worker processes (used by PETSc) for each application node.
119: $ You MUST launch MPI so that only ONE MPI process is created for each hardware node.
120: $
121: $ -hmpi_merge_size <n> results in N total processes, N/n used by the application code and the rest worker processes
122: $ (used by PETSc)
123: $ You MUST launch MPI so that n MPI processes are created for each hardware node.
124: $
125: $ petscmpiexec -n 2 ./ex1 -hmpi_spawn_size 3 gives 2 application nodes (and 4 PETSc worker nodes)
126: $ petscmpiexec -n 6 ./ex1 -hmpi_merge_size 3 gives the SAME 2 application nodes and 4 PETSc worker nodes
127: $ This is what would use if each of the computers hardware nodes had 3 CPUs.
128: $
129: $ These are intended to be used in conjunction with USER HMPI code. The user will have 1 process per
130: $ computer (hardware) node (where the computer node has p cpus), the user's code will use threads to fully
131: $ utilize all the CPUs on the node. The PETSc code will have p processes to fully use the compute node for
132: $ PETSc calculations. The user THREADS and PETSc PROCESSES will NEVER run at the same time so the p CPUs
133: $ are always working on p task, never more than p.
134: $
135: $ See PCHMPI for a PETSc preconditioner that can use this functionality
136: $
138: For both PetscHMPISpawn() and PetscHMPIMerge() PETSC_COMM_WORLD consists of one process per "node", PETSC_COMM_LOCAL_WORLD
139: consists of all the processes in a "node."
141: In both cases the user's code is running ONLY on PETSC_COMM_WORLD (that was newly generated by running this command).
143: Concepts: HMPI
144:
145: .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscHMPIFinalize(), PetscInitialize(), PetscHMPISpawn(), PetscHMPIRun()
147: @*/
148: PetscErrorCode PetscHMPIMerge(PetscMPIInt nodesize,PetscErrorCode (*func)(void*),void *ctx)
149: {
151: PetscMPIInt size,rank,*ranks,i;
152: MPI_Group group,newgroup;
155: saved_PETSC_COMM_WORLD = PETSC_COMM_WORLD;
157: MPI_Comm_size(saved_PETSC_COMM_WORLD,&size);
158: if (size % nodesize) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Total number of process nodes %d is not divisible by number of processes per node %d",size,nodesize);
159: MPI_Comm_rank(saved_PETSC_COMM_WORLD,&rank);
162: /* create two communicators
163: *) one that contains the first process from each node: 0,nodesize,2*nodesize,...
164: *) one that contains all processes in a node: (0,1,2...,nodesize-1), (nodesize,nodesize+1,...2*nodesize-), ...
165: */
166: MPI_Comm_group(saved_PETSC_COMM_WORLD,&group);
167: PetscMalloc((size/nodesize)*sizeof(PetscMPIInt),&ranks);
168: for (i=0; i<(size/nodesize); i++) ranks[i] = i*nodesize;
169: MPI_Group_incl(group,size/nodesize,ranks,&newgroup);
170: PetscFree(ranks);
171: MPI_Comm_create(saved_PETSC_COMM_WORLD,newgroup,&PETSC_COMM_WORLD);
172: if (rank % nodesize) PETSC_COMM_WORLD = 0; /* mark invalid processes for easy debugging */
173: MPI_Group_free(&group);
174: MPI_Group_free(&newgroup);
176: MPI_Comm_split(saved_PETSC_COMM_WORLD,rank/nodesize,rank % nodesize,&PETSC_COMM_LOCAL_WORLD);
178: PetscInfo2(0,"PETSc HMPI successfully started: number of nodes = %d node size = %d\n",size/nodesize,nodesize);
179: PetscInfo1(0,"PETSc HMPI process %sactive\n",(rank % nodesize) ? "in" : "");
181: PetscHMPICtx = ctx;
182: /*
183: All process not involved in user application code wait here
184: */
185: if (!PETSC_COMM_WORLD) {
186: PetscHMPIHandle(PETSC_COMM_LOCAL_WORLD);
187: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
188: PetscHMPIWorker = PETSC_TRUE; /* so that PetscHMPIFinalize() will not attempt a broadcast from this process */
189: PetscInfo(0,"PETSc HMPI inactive process becoming active");
190: } else {
191: if (func) {
192: (*func)(ctx);
193: }
194: }
195: return(0);
196: }
200: /*@C
201: PetscHMPIFinalize - Finalizes the PETSc and MPI to work with HMPI. Called by PetscFinalize() cannot
202: be called by user.
204: Collective on the entire system
206: Level: developer
207:
208: .seealso: PetscFinalize(), PetscGetArgs(), PetscHMPIMerge(), PCHMPIRun()
210: @*/
211: PetscErrorCode PetscHMPIFinalize(void)
212: {
213: PetscErrorCode 0;
214: PetscInt command = 3;
217: if (!PetscHMPIWorker && PETSC_COMM_LOCAL_WORLD) {
218: MPI_Bcast(&command,1,MPIU_INT,0,PETSC_COMM_LOCAL_WORLD); /* broadcast to my worker group to end program */
219: PETSC_COMM_WORLD = saved_PETSC_COMM_WORLD;
220: PetscInfo(0,"PETSc HMPI active process ending PetscHMPIMerge()");
221: }
222: PetscFunctionReturn(ierr);
223: }
225: static PetscInt numberobjects = 0;
226: static void *objects[100];
230: /*@C
231: PetscHMPIHandle - Receives commands from the master node and processes them
233: Collective on MPI_Comm
235: Input Parameter:
236: . comm - Must be PETSC_COMM_LOCAL_WORLD
238: Level: developer
240: Notes: this is usually handled automatically, likely you do not need to use this directly
242: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
243:
244: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPINew()
246: @*/
247: PetscErrorCode PetscHMPIHandle(MPI_Comm comm)
248: {
250: PetscInt command = 0; /* dummy value so MPI-Uni doesn't think it is not set*/
251: PetscBool exitwhileloop = PETSC_FALSE;
254: while (!exitwhileloop) {
255: MPI_Bcast(&command,1,MPIU_INT,0,comm);
256: switch (command) {
257: case 0: { /* allocate some memory on this worker process */
258: size_t n = 0; /* dummy value so MPI-Uni doesn't think it is not set*/
259: void *ptr;
260: MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);
261: /* cannot use PetscNew() cause it requires struct argument */
262: PetscMalloc(n,&ptr);
263: PetscMemzero(ptr,n);
264: objects[numberobjects++] = ptr;
265: break;
266: }
267: case 1: { /* free some memory on this worker process */
268: PetscInt i;
269: MPI_Bcast(&i,1,MPIU_INT,0,comm);
270: PetscFree(objects[i]);
271: break;
272: }
273: case 2: { /* run a function on this worker process */
274: PetscInt i;
275: PetscErrorCode (*f)(MPI_Comm,void*);
276: MPI_Bcast(&i,1,MPIU_INT,0,comm);
277: MPI_Bcast(&f,1,MPIU_SIZE_T,0,comm);
278: (*f)(comm,objects[i]);
279: break;
280: }
281: case 4: { /* run a function on this worker process with provided context */
282: PetscInt i;
283: PetscErrorCode (*f)(MPI_Comm,void*,void*);
284: MPI_Bcast(&i,1,MPIU_INT,0,comm);
285: MPI_Bcast(&f,1,MPIU_SIZE_T,0,comm);
286: (*f)(comm,PetscHMPICtx,objects[i]);
287: break;
288: }
289: case 3: {
290: exitwhileloop = PETSC_TRUE;
291: break;
292: }
293: default:
294: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unknown HMPI command %D",command);
295: }
296: }
297: return(0);
298: }
302: /*@C
303: PetscHMPIMalloc - Creates a "c struct" on all nodes of an HMPI communicator
305: Collective on MPI_Comm
307: Input Parameters:
308: + comm - Must be PETSC_COMM_LOCAL_WORLD
309: - n - amount of memory requested
311: Level: developer
313: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
315: .seealso: PetscHMPIMerge(), PCHMPIRun(), PCHMPIFree()
317: @*/
318: PetscErrorCode PetscHMPIMalloc(MPI_Comm comm,size_t n,void **ptr)
319: {
321: PetscInt command = 0;
324: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
326: MPI_Bcast(&command,1,MPIU_INT,0,comm);
327: MPI_Bcast(&n,1,MPIU_SIZE_T,0,comm);
328: /* cannot use PetscNew() cause it requires struct argument */
329: PetscMalloc(n,ptr);
330: PetscMemzero(*ptr,n);
331: objects[numberobjects++] = *ptr;
332: return(0);
333: }
337: /*@C
338: PetscHMPIFree - Frees a "c struct" on all nodes of an HMPI communicator
340: Collective on MPI_Comm
342: Input Parameters:
343: + comm - Must be PETSC_COMM_LOCAL_WORLD
344: - ptr - pointer to data to be freed, must have been obtained with PetscHMPIMalloc()
346: Level: developer
347:
348: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
350: .seealso: PetscHMPIMerge(), PetscHMPIMalloc()
352: @*/
353: PetscErrorCode PetscHMPIFree(MPI_Comm comm,void *ptr)
354: {
356: PetscInt command = 1,i;
359: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
361: MPI_Bcast(&command,1,MPIU_INT,0,comm);
362: for (i=0; i<numberobjects; i++) {
363: if (objects[i] == ptr) {
364: MPI_Bcast(&i,1,MPIU_INT,0,comm);
365: PetscFree(objects[i]);
366: return(0);
367: }
368: }
369: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
370: PetscFunctionReturn(ierr);
371: }
375: /*@C
376: PetscHMPIRun - runs a function on all the processes of a node
378: Collective on MPI_Comm
380: Input Parameters:
381: + comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
382: . f - function to run
383: - ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()
385: Level: developer
386:
387: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
389: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRunCtx()
391: @*/
392: PetscErrorCode PetscHMPIRun(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void *),void *ptr)
393: {
395: PetscInt command = 2,i;
398: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
400: MPI_Bcast(&command,1,MPIU_INT,0,comm);
401: for (i=0; i<numberobjects; i++) {
402: if (objects[i] == ptr) {
403: MPI_Bcast(&i,1,MPIU_INT,0,comm);
404: MPI_Bcast(&f,1,MPIU_SIZE_T,0,comm);
405: (*f)(comm,ptr);
406: return(0);
407: }
408: }
409: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
410: PetscFunctionReturn(ierr);
411: }
415: /*@C
416: PetscHMPIRunCtx - runs a function on all the processes of a node
418: Collective on MPI_Comm
420: Input Parameters:
421: + comm - communicator to run function on, must be PETSC_COMM_LOCAL_WORLD
422: . f - function to run
423: - ptr - pointer to data to pass to function; must be obtained with PetscHMPIMalloc()
425: Notes: This is like PetscHMPIRun() except it also passes the context passed in PetscHMPIMerge()
426: Level: developer
427:
428: Developer Notes: Since comm must be PETSC_COMM_LOCAL_WORLD, why have this argument?
430: .seealso: PetscHMPIMerge(), PetscHMPIMalloc(), PetscHMPIFree(), PetscHMPIRun()
432: @*/
433: PetscErrorCode PetscHMPIRunCtx(MPI_Comm comm,PetscErrorCode (*f)(MPI_Comm,void*,void *),void *ptr)
434: {
436: PetscInt command = 4,i;
439: if (PetscHMPIWorker) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not using HMPI feature of PETSc");
441: MPI_Bcast(&command,1,MPIU_INT,0,comm);
442: for (i=0; i<numberobjects; i++) {
443: if (objects[i] == ptr) {
444: MPI_Bcast(&i,1,MPIU_INT,0,comm);
445: MPI_Bcast(&f,1,MPIU_SIZE_T,0,comm);
446: (*f)(comm,PetscHMPICtx,ptr);
447: return(0);
448: }
449: }
450: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Pointer does not appear to have been created with PetscHMPIMalloc()");
451: PetscFunctionReturn(ierr);
452: }