Actual source code: comm.c

  2: /***********************************comm.c*************************************
  3: SPARSE GATHER-SCATTER PACKAGE: bss_malloc bss_malloc ivec error comm gs queue

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 11.21.97
 16: ***********************************comm.c*************************************/

 18: /***********************************comm.c*************************************
 19: File Description:
 20: -----------------

 22: ***********************************comm.c*************************************/
 23:  #include petsc.h

 25:  #include const.h
 26:  #include types.h
 27:  #include error.h
 28:  #include ivec.h
 29: #include "bss_malloc.h"
 30:  #include comm.h


 33: /* global program control variables - explicitly exported */
 34: int my_id            = 0;
 35: int num_nodes        = 1;
 36: int floor_num_nodes  = 0;
 37: int i_log2_num_nodes = 0;

 39: /* global program control variables */
 40: static int p_init = 0;
 41: static int modfl_num_nodes;
 42: static int edge_not_pow_2;

 44: static unsigned int edge_node[INT_LEN*32];

 46: /***********************************comm.c*************************************
 47: Function: giop()

 49: Input : 
 50: Output: 
 51: Return: 
 52: Description: 
 53: ***********************************comm.c*************************************/
 54: void
 55: comm_init (void)
 56: {
 57: #ifdef DEBUG  
 58:   error_msg_warning("c_init() :: start\n");
 59: #endif

 61:   if (p_init++) return;

 63: #if PETSC_SIZEOF_INT != 4
 64:   error_msg_warning("Int != 4 Bytes!");
 65: #endif


 68:   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
 69:   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);

 71:   if (num_nodes> (INT_MAX >> 1))
 72:   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}

 74:   ivec_zero((int*)edge_node,INT_LEN*32);

 76:   floor_num_nodes = 1;
 77:   i_log2_num_nodes = modfl_num_nodes = 0;
 78:   while (floor_num_nodes <= num_nodes)
 79:     {
 80:       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
 81:       floor_num_nodes <<= 1;
 82:       i_log2_num_nodes++;
 83:     }

 85:   i_log2_num_nodes--;
 86:   floor_num_nodes >>= 1;
 87:   modfl_num_nodes = (num_nodes - floor_num_nodes);

 89:   if ((my_id > 0) && (my_id <= modfl_num_nodes))
 90:     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
 91:   else if (my_id >= floor_num_nodes)
 92:     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
 93:     }
 94:   else
 95:     {edge_not_pow_2 = 0;}

 97: #ifdef DEBUG  
 98:   error_msg_warning("c_init() done :: my_id=%d, num_nodes=%D",my_id,num_nodes);
 99: #endif
100: }



104: /***********************************comm.c*************************************
105: Function: giop()

107: Input : 
108: Output: 
109: Return: 
110: Description: fan-in/out version
111: ***********************************comm.c*************************************/
112: void
113: giop(int *vals, int *work, int n, int *oprs)
114: {
115:   register int mask, edge;
116:   int type, dest;
117:   vfp fp;
118:   MPI_Status  status;


121: #ifdef SAFE
122:   /* ok ... should have some data, work, and operator(s) */
123:   if (!vals||!work||!oprs)
124:     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

126:   /* non-uniform should have at least two entries */
127:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
128:     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}

130:   /* check to make sure comm package has been initialized */
131:   if (!p_init)
132:     {comm_init();}
133: #endif

135:   /* if there's nothing to do return */
136:   if ((num_nodes<2)||(!n))
137:     {
138: #ifdef DEBUG
139:       error_msg_warning("giop() :: n=%D num_nodes=%d",n,num_nodes);
140: #endif
141:       return;
142:     }

144:   /* a negative number if items to send ==> fatal */
145:   if (n<0)
146:     {error_msg_fatal("giop() :: n=%D<0?",n);}

148:   /* advance to list of n operations for custom */
149:   if ((type=oprs[0])==NON_UNIFORM)
150:     {oprs++;}

152:   /* major league hack */
153:   if (!(fp = (vfp) ivec_fct_addr(type))) {
154:     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
155:     fp = (vfp) oprs;
156:   }

158:   /* all msgs will be of the same length */
159:   /* if not a hypercube must colapse partial dim */
160:   if (edge_not_pow_2)
161:     {
162:       if (my_id >= floor_num_nodes)
163:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
164:       else
165:         {
166:           MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
167:                    MPI_COMM_WORLD,&status);
168:           (*fp)(vals,work,n,oprs);
169:         }
170:     }

172:   /* implement the mesh fan in/out exchange algorithm */
173:   if (my_id<floor_num_nodes)
174:     {
175:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
176:         {
177:           dest = my_id^mask;
178:           if (my_id > dest)
179:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
180:           else
181:             {
182:               MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
183:                        MPI_COMM_WORLD, &status);
184:               (*fp)(vals, work, n, oprs);
185:             }
186:         }

188:       mask=floor_num_nodes>>1;
189:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
190:         {
191:           if (my_id%mask)
192:             {continue;}
193: 
194:           dest = my_id^mask;
195:           if (my_id < dest)
196:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
197:           else
198:             {
199:               MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
200:                        MPI_COMM_WORLD, &status);
201:             }
202:         }
203:     }

205:   /* if not a hypercube must expand to partial dim */
206:   if (edge_not_pow_2)
207:     {
208:       if (my_id >= floor_num_nodes)
209:         {
210:           MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
211:                    MPI_COMM_WORLD,&status);
212:         }
213:       else
214:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
215:     }
216: }

218: /***********************************comm.c*************************************
219: Function: grop()

221: Input : 
222: Output: 
223: Return: 
224: Description: fan-in/out version
225: ***********************************comm.c*************************************/
226: void
227: grop(REAL *vals, REAL *work, int n, int *oprs)
228: {
229:   register int mask, edge;
230:   int type, dest;
231:   vfp fp;
232:   MPI_Status  status;


235: #ifdef SAFE
236:   /* ok ... should have some data, work, and operator(s) */
237:   if (!vals||!work||!oprs)
238:     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

240:   /* non-uniform should have at least two entries */
241:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
242:     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}

244:   /* check to make sure comm package has been initialized */
245:   if (!p_init)
246:     {comm_init();}
247: #endif

249:   /* if there's nothing to do return */
250:   if ((num_nodes<2)||(!n))
251:     {return;}

253:   /* a negative number of items to send ==> fatal */
254:   if (n<0)
255:     {error_msg_fatal("gdop() :: n=%D<0?",n);}

257:   /* advance to list of n operations for custom */
258:   if ((type=oprs[0])==NON_UNIFORM)
259:     {oprs++;}

261:   if (!(fp = (vfp) rvec_fct_addr(type))) {
262:     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
263:     fp = (vfp) oprs;
264:   }

266:   /* all msgs will be of the same length */
267:   /* if not a hypercube must colapse partial dim */
268:   if (edge_not_pow_2)
269:     {
270:       if (my_id >= floor_num_nodes)
271:         {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG0+my_id,
272:                   MPI_COMM_WORLD);}
273:       else
274:         {
275:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
276:                    MPI_COMM_WORLD,&status);
277:           (*fp)(vals,work,n,oprs);
278:         }
279:     }

281:   /* implement the mesh fan in/out exchange algorithm */
282:   if (my_id<floor_num_nodes)
283:     {
284:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
285:         {
286:           dest = my_id^mask;
287:           if (my_id > dest)
288:             {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
289:           else
290:             {
291:               MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
292:                        MPI_COMM_WORLD, &status);
293:               (*fp)(vals, work, n, oprs);
294:             }
295:         }

297:       mask=floor_num_nodes>>1;
298:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
299:         {
300:           if (my_id%mask)
301:             {continue;}
302: 
303:           dest = my_id^mask;
304:           if (my_id < dest)
305:             {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
306:           else
307:             {
308:               MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,
309:                        MPI_COMM_WORLD, &status);
310:             }
311:         }
312:     }

314:   /* if not a hypercube must expand to partial dim */
315:   if (edge_not_pow_2)
316:     {
317:       if (my_id >= floor_num_nodes)
318:         {
319:           MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
320:                    MPI_COMM_WORLD,&status);
321:         }
322:       else
323:         {MPI_Send(vals,n,REAL_TYPE,edge_not_pow_2,MSGTAG5+my_id,
324:                   MPI_COMM_WORLD);}
325:     }
326: }


329: /***********************************comm.c*************************************
330: Function: grop()

332: Input : 
333: Output: 
334: Return: 
335: Description: fan-in/out version

337: note good only for num_nodes=2^k!!!

339: ***********************************comm.c*************************************/
340: void
341: grop_hc(REAL *vals, REAL *work, int n, int *oprs, int dim)
342: {
343:   register int mask, edge;
344:   int type, dest;
345:   vfp fp;
346:   MPI_Status  status;

348: #ifdef SAFE
349:   /* ok ... should have some data, work, and operator(s) */
350:   if (!vals||!work||!oprs)
351:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

353:   /* non-uniform should have at least two entries */
354:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
355:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}

357:   /* check to make sure comm package has been initialized */
358:   if (!p_init)
359:     {comm_init();}
360: #endif

362:   /* if there's nothing to do return */
363:   if ((num_nodes<2)||(!n)||(dim<=0))
364:     {return;}

366:   /* the error msg says it all!!! */
367:   if (modfl_num_nodes)
368:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

370:   /* a negative number of items to send ==> fatal */
371:   if (n<0)
372:     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}

374:   /* can't do more dimensions then exist */
375:   dim = PetscMin(dim,i_log2_num_nodes);

377:   /* advance to list of n operations for custom */
378:   if ((type=oprs[0])==NON_UNIFORM)
379:     {oprs++;}

381:   if (!(fp = (vfp) rvec_fct_addr(type))) {
382:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
383:     fp = (vfp) oprs;
384:   }

386:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
387:     {
388:       dest = my_id^mask;
389:       if (my_id > dest)
390:         {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
391:       else
392:         {
393:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
394:                    &status);
395:           (*fp)(vals, work, n, oprs);
396:         }
397:     }

399:   if (edge==dim)
400:     {mask>>=1;}
401:   else
402:     {while (++edge<dim) {mask<<=1;}}

404:   for (edge=0; edge<dim; edge++,mask>>=1)
405:     {
406:       if (my_id%mask)
407:         {continue;}
408: 
409:       dest = my_id^mask;
410:       if (my_id < dest)
411:         {MPI_Send(vals,n,REAL_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
412:       else
413:         {
414:           MPI_Recv(vals,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
415:                    &status);
416:         }
417:     }
418: }


421: /***********************************comm.c*************************************
422: Function: gop()

424: Input : 
425: Output: 
426: Return: 
427: Description: fan-in/out version
428: ***********************************comm.c*************************************/
429: void gfop(void *vals, void *work, int n, vbfp fp, DATA_TYPE dt, int comm_type)
430: {
431:   register int mask, edge;
432:   int dest;
433:   MPI_Status  status;
434:   MPI_Op op;

436: #ifdef SAFE
437:   /* check to make sure comm package has been initialized */
438:   if (!p_init)
439:     {comm_init();}

441:   /* ok ... should have some data, work, and operator(s) */
442:   if (!vals||!work||!fp)
443:     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
444: #endif

446:   /* if there's nothing to do return */
447:   if ((num_nodes<2)||(!n))
448:     {return;}

450:   /* a negative number of items to send ==> fatal */
451:   if (n<0)
452:     {error_msg_fatal("gop() :: n=%D<0?",n);}

454:   if (comm_type==MPI)
455:     {
456:       MPI_Op_create(fp,TRUE,&op);
457:       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
458:       MPI_Op_free(&op);
459:       return;
460:     }

462:   /* if not a hypercube must colapse partial dim */
463:   if (edge_not_pow_2)
464:     {
465:       if (my_id >= floor_num_nodes)
466:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
467:                   MPI_COMM_WORLD);}
468:       else
469:         {
470:           MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
471:                    MPI_COMM_WORLD,&status);
472:           (*fp)(vals,work,&n,&dt);
473:         }
474:     }

476:   /* implement the mesh fan in/out exchange algorithm */
477:   if (my_id<floor_num_nodes)
478:     {
479:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
480:         {
481:           dest = my_id^mask;
482:           if (my_id > dest)
483:             {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
484:           else
485:             {
486:               MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
487:                        MPI_COMM_WORLD, &status);
488:               (*fp)(vals, work, &n, &dt);
489:             }
490:         }

492:       mask=floor_num_nodes>>1;
493:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
494:         {
495:           if (my_id%mask)
496:             {continue;}
497: 
498:           dest = my_id^mask;
499:           if (my_id < dest)
500:             {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
501:           else
502:             {
503:               MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
504:                        MPI_COMM_WORLD, &status);
505:             }
506:         }
507:     }

509:   /* if not a hypercube must expand to partial dim */
510:   if (edge_not_pow_2)
511:     {
512:       if (my_id >= floor_num_nodes)
513:         {
514:           MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
515:                    MPI_COMM_WORLD,&status);
516:         }
517:       else
518:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
519:                   MPI_COMM_WORLD);}
520:     }
521: }






528: /******************************************************************************
529: Function: giop()

531: Input : 
532: Output: 
533: Return: 
534: Description: 
535:  
536: ii+1 entries in seg :: 0 .. ii

538: ******************************************************************************/
539: void 
540: ssgl_radd(register REAL *vals, register REAL *work, register int level, 
541:           register int *segs)
542: {
543:   register int edge, type, dest, mask;
544:   register int stage_n;
545:   MPI_Status  status;

547: #ifdef DEBUG
548:   if (level > i_log2_num_nodes)
549:     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
550: #endif

552: #ifdef SAFE
553:   /* check to make sure comm package has been initialized */
554:   if (!p_init)
555:     {comm_init();}
556: #endif


559:   /* all msgs are *NOT* the same length */
560:   /* implement the mesh fan in/out exchange algorithm */
561:   for (mask=0, edge=0; edge<level; edge++, mask++)
562:     {
563:       stage_n = (segs[level] - segs[edge]);
564:       if (stage_n && !(my_id & mask))
565:         {
566:           dest = edge_node[edge];
567:           type = MSGTAG3 + my_id + (num_nodes*edge);
568:           if (my_id>dest)
569: /*            {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
570:           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
571:                       MPI_COMM_WORLD);}
572:           else
573:             {
574:               type =  type - my_id + dest;
575: /*              crecv(type,work,stage_n*REAL_LEN); */
576:               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
577:                        MPI_COMM_WORLD,&status);
578:               rvec_add(vals+segs[edge], work, stage_n);
579: /*            daxpy(vals+segs[edge], work, stage_n); */
580:             }
581:         }
582:       mask <<= 1;
583:     }
584:   mask>>=1;
585:   for (edge=0; edge<level; edge++)
586:     {
587:       stage_n = (segs[level] - segs[level-1-edge]);
588:       if (stage_n && !(my_id & mask))
589:         {
590:           dest = edge_node[level-edge-1];
591:           type = MSGTAG6 + my_id + (num_nodes*edge);
592:           if (my_id<dest)
593: /*            {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
594:             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
595:                       MPI_COMM_WORLD);}
596:           else
597:             {
598:               type =  type - my_id + dest;
599: /*               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
600:               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
601:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
602:             }
603:         }
604:       mask >>= 1;
605:     }
606: }



610: /***********************************comm.c*************************************
611: Function: grop_hc_vvl()

613: Input : 
614: Output: 
615: Return: 
616: Description: fan-in/out version

618: note good only for num_nodes=2^k!!!

620: ***********************************comm.c*************************************/
621: void
622: grop_hc_vvl(REAL *vals, REAL *work, int *segs, int *oprs, int dim)
623: {
624:   register int mask, edge, n;
625:   int type, dest;
626:   vfp fp;
627:   MPI_Status  status;

629:   error_msg_fatal("grop_hc_vvl() :: is not working!\n");

631: #ifdef SAFE
632:   /* ok ... should have some data, work, and operator(s) */
633:   if (!vals||!work||!oprs||!segs)
634:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

636:   /* non-uniform should have at least two entries */
637: #if defined(not_used)
638:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
639:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}
640: #endif

642:   /* check to make sure comm package has been initialized */
643:   if (!p_init)
644:     {comm_init();}
645: #endif

647:   /* if there's nothing to do return */
648:   if ((num_nodes<2)||(dim<=0))
649:     {return;}

651:   /* the error msg says it all!!! */
652:   if (modfl_num_nodes)
653:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

655:   /* can't do more dimensions then exist */
656:   dim = PetscMin(dim,i_log2_num_nodes);

658:   /* advance to list of n operations for custom */
659:   if ((type=oprs[0])==NON_UNIFORM)
660:     {oprs++;}

662:   if (!(fp = (vfp) rvec_fct_addr(type))){
663:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
664:     fp = (vfp) oprs;
665:   }

667:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
668:     {
669:       n = segs[dim]-segs[edge];
670:       dest = my_id^mask;
671:       if (my_id > dest)
672:         {MPI_Send(vals+segs[edge],n,REAL_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
673:       else
674:         {
675:           MPI_Recv(work,n,REAL_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,
676:                    MPI_COMM_WORLD, &status);
677:           (*fp)(vals+segs[edge], work, n, oprs);
678:         }
679:     }

681:   if (edge==dim)
682:     {mask>>=1;}
683:   else
684:     {while (++edge<dim) {mask<<=1;}}

686:   for (edge=0; edge<dim; edge++,mask>>=1)
687:     {
688:       if (my_id%mask)
689:         {continue;}
690: 
691:       n = (segs[dim]-segs[dim-1-edge]);
692: 
693:       dest = my_id^mask;
694:       if (my_id < dest)
695:         {MPI_Send(vals+segs[dim-1-edge],n,REAL_TYPE,dest,MSGTAG4+my_id,
696:                   MPI_COMM_WORLD);}
697:       else
698:         {
699:           MPI_Recv(vals+segs[dim-1-edge],n,REAL_TYPE,MPI_ANY_SOURCE,
700:                    MSGTAG4+dest,MPI_COMM_WORLD, &status);
701:         }
702:     }
703: }

705: /******************************************************************************
706: Function: giop()

708: Input : 
709: Output: 
710: Return: 
711: Description: 
712:  
713: ii+1 entries in seg :: 0 .. ii

715: ******************************************************************************/
716: void new_ssgl_radd(register REAL *vals, register REAL *work, register int level,register int *segs)
717: {
718:   register int edge, type, dest, mask;
719:   register int stage_n;
720:   MPI_Status  status;


723: #ifdef DEBUG
724:   if (level > i_log2_num_nodes)
725:     {error_msg_fatal("sgl_add() :: level > log_2(P)!!!");}
726: #endif

728: #ifdef SAFE
729:   /* check to make sure comm package has been initialized */
730:   if (!p_init)
731:     {comm_init();}
732: #endif

734:   /* all msgs are *NOT* the same length */
735:   /* implement the mesh fan in/out exchange algorithm */
736:   for (mask=0, edge=0; edge<level; edge++, mask++)
737:     {
738:       stage_n = (segs[level] - segs[edge]);
739:       if (stage_n && !(my_id & mask))
740:         {
741:           dest = edge_node[edge];
742:           type = MSGTAG3 + my_id + (num_nodes*edge);
743:           if (my_id>dest)
744: /*            {csend(type, vals+segs[edge],stage_n*REAL_LEN,dest,0);} */
745:           {MPI_Send(vals+segs[edge],stage_n,REAL_TYPE,dest,type,
746:                       MPI_COMM_WORLD);}
747:           else
748:             {
749:               type =  type - my_id + dest;
750: /*              crecv(type,work,stage_n*REAL_LEN); */
751:               MPI_Recv(work,stage_n,REAL_TYPE,MPI_ANY_SOURCE,type,
752:                        MPI_COMM_WORLD,&status);
753:               rvec_add(vals+segs[edge], work, stage_n);
754: /*            daxpy(vals+segs[edge], work, stage_n); */
755:             }
756:         }
757:       mask <<= 1;
758:     }
759:   mask>>=1;
760:   for (edge=0; edge<level; edge++)
761:     {
762:       stage_n = (segs[level] - segs[level-1-edge]);
763:       if (stage_n && !(my_id & mask))
764:         {
765:           dest = edge_node[level-edge-1];
766:           type = MSGTAG6 + my_id + (num_nodes*edge);
767:           if (my_id<dest)
768: /*            {csend(type,vals+segs[level-1-edge],stage_n*REAL_LEN,dest,0);} */
769:             {MPI_Send(vals+segs[level-1-edge],stage_n,REAL_TYPE,dest,type,
770:                       MPI_COMM_WORLD);}
771:           else
772:             {
773:               type =  type - my_id + dest;
774: /*               crecv(type,vals+segs[level-1-edge],stage_n*REAL_LEN); */
775:               MPI_Recv(vals+segs[level-1-edge],stage_n,REAL_TYPE,
776:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
777:             }
778:         }
779:       mask >>= 1;
780:     }
781: }



785: /***********************************comm.c*************************************
786: Function: giop()

788: Input : 
789: Output: 
790: Return: 
791: Description: fan-in/out version

793: note good only for num_nodes=2^k!!!

795: ***********************************comm.c*************************************/
796: void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
797: {
798:   register int mask, edge;
799:   int type, dest;
800:   vfp fp;
801:   MPI_Status  status;

803: #ifdef SAFE
804:   /* ok ... should have some data, work, and operator(s) */
805:   if (!vals||!work||!oprs)
806:     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

808:   /* non-uniform should have at least two entries */
809:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
810:     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}

812:   /* check to make sure comm package has been initialized */
813:   if (!p_init)
814:     {comm_init();}
815: #endif

817:   /* if there's nothing to do return */
818:   if ((num_nodes<2)||(!n)||(dim<=0))
819:     {return;}

821:   /* the error msg says it all!!! */
822:   if (modfl_num_nodes)
823:     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}

825:   /* a negative number of items to send ==> fatal */
826:   if (n<0)
827:     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}

829:   /* can't do more dimensions then exist */
830:   dim = PetscMin(dim,i_log2_num_nodes);

832:   /* advance to list of n operations for custom */
833:   if ((type=oprs[0])==NON_UNIFORM)
834:     {oprs++;}

836:   if (!(fp = (vfp) ivec_fct_addr(type))){
837:     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
838:     fp = (vfp) oprs;
839:   }

841:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
842:     {
843:       dest = my_id^mask;
844:       if (my_id > dest)
845:         {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
846:       else
847:         {
848:           MPI_Recv(work,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
849:                    &status);
850:           (*fp)(vals, work, n, oprs);
851:         }
852:     }

854:   if (edge==dim)
855:     {mask>>=1;}
856:   else
857:     {while (++edge<dim) {mask<<=1;}}

859:   for (edge=0; edge<dim; edge++,mask>>=1)
860:     {
861:       if (my_id%mask)
862:         {continue;}
863: 
864:       dest = my_id^mask;
865:       if (my_id < dest)
866:         {MPI_Send(vals,n,INT_TYPE,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
867:       else
868:         {
869:           MPI_Recv(vals,n,INT_TYPE,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
870:                    &status);
871:         }
872:     }
873: }