Loading [MathJax]/extensions/tex2jax.js
Mesh Oriented datABase  (version 5.5.1)
An array-based unstructured mesh library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CrystalRouterExample.cpp
Go to the documentation of this file.
1 /* 2  * This example will show one of the building blocks of parallel infrastructure in MOAB 3  * More exactly, if we have some homogeneous data to communicate from each processor to a list of 4  * other processors, how do we do it? 5  * 6  * introduce the TupleList and crystal router to MOAB users. 7  * 8  * This technology is used in resolving shared vertices / sets between partitions 9  * It is used in the mbcoupler for sending data (target points) to the proper processor, and 10  * communicate back the results. Also, it is used to communicate departure mesh for intersection in 11  * parallel 12  * 13  * It is a way of doing MPI_gatheralltoallv(), when the communication matrix is sparse 14  * 15  * It is assumed that every proc needs to communicate only with a few of the other processors. 16  * If every processor needs to communicate with all other, then we will have to use paired isend 17  * and irecv, the communication matrix is full 18  * 19  * the example needs to be launched in parallel. 20  * Every proc will build a list of tuples, that will be send to a few procs; 21  * In general, we will send to num_comms tasks, and about num_tuples to each task 22  * We vary num_comms and num_tuples for processor 23  * 24  * we will send long ints of the form 25  * 100000 * send + 1000* rank +j, where j is the index of tuple 26  * 27  * after routing, we verify we received 28  * 100000 * rank + 1000 * from 29  * 30  * For some reportrank we also print the tuples. 31  * 32  * after routing, we will see if we received, as expected. Should run on at least 2 processors. 33  * 34  * Note: We do not need a moab instance for this example 35  * 36  */ 37  38 /** @example CrystalRouterExample.cpp \n 39  * \brief generalized gather scatter using tuples \n 40  * <b>To run</b>: mpiexec -np <n> CrystalRouterExample -r [reportrank] -t [num_tuples] -n 41  * [num_comms] \n 42  * 43  */ 44 // 45 #include "moab/MOABConfig.h" 46 #ifdef MOAB_HAVE_MPI 47 #include "moab/ProcConfig.hpp" 48 #endif 49 #include "moab/TupleList.hpp" 50 #include "moab/ProgOptions.hpp" 51 #include "moab/ErrorHandler.hpp" 52 #include <ctime> 53 #include <iostream> 54 #include <sstream> 55  56 const char BRIEF_DESC[] = "Example of gather scatter with tuple lists \n"; 57 std::ostringstream LONG_DESC; 58  59 using namespace moab; 60 using namespace std; 61  62 int main( int argc, char** argv ) 63 { 64 #ifdef MOAB_HAVE_MPI 65  MPI_Init( &argc, &argv ); 66  67  // Initialize error handler, required for this example (not using a moab instance) 68  MBErrorHandler_Init(); 69  70  ProcConfig pc( MPI_COMM_WORLD ); 71  int size = pc.proc_size(); 72  int rank = pc.proc_rank(); 73  74  // Start copy 75  LONG_DESC << "This program does a gather scatter with a list of tuples. \n" 76  " It tries to see how much communication costs in terms of time and memory. \n" 77  << "It starts with creating a list of tuples to be sent from each processor, \n to a " 78  "list of other processors.\n" 79  << "The number of tuples and how many tasks to communicate to are controlled by " 80  "input parameters.\n" 81  << "After communication, we verify locally if we received what we expected. \n"; 82  ProgOptions opts( LONG_DESC.str(), BRIEF_DESC ); 83  84  // How many procs communicate to current proc, on average (we will vary that too) 85  int num_comms = 2; 86  opts.addOpt< int >( "num_comms,n", "each task will send to about num_comms other tasks some tuples (default 2)", 87  &num_comms ); 88  89  int num_tuples = 4; 90  opts.addOpt< int >( "num_tuples,t", "each task will send to some task about num_tuples tuples (default 4)", 91  &num_tuples ); 92  93  int reportrank = size + 1; 94  opts.addOpt< int >( "reporting_rank,r", 95  "this rank will report the tuples sent and the tuples received; it could " 96  "be higher than num_procs, then no reporting", 97  &reportrank ); 98  99  opts.parseCommandLine( argc, argv ); 100  101  if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 102  { 103  cout << " There are " << size << " tasks in example.\n"; 104  cout << " We will send groups of " << num_tuples << " from each task towards " << num_comms 105  << " other tasks.\n"; 106  } 107  108  // Send some data from proc i to i + n/2, also to i + n/2 + 1 modulo n, where n is num procs 109  110  gs_data::crystal_data* cd = pc.crystal_router(); 111  112  long total_n_tuples = num_comms * num_tuples; 113  114  // Vary the number of tasks to send to, and the number of tuples to send 115  if( rank < size / 2 ) 116  num_comms--; 117  else 118  num_comms++; 119  120  if( rank < size / 3 ) 121  num_tuples *= 2; 122  else if( rank > size - size / 3 ) 123  num_tuples /= 2; 124  125  TupleList tl; 126  // At most num_tuples* num_comms to send 127  // We do a preallocate with this; some tuples on some processors might need more memory, to be 128  // able to grow locally; Some tasks might receive more tuples though, and in the process, some 129  // might grow more than others. By doing these logP sends/receives, we do not grow local memory 130  // too much. 131  tl.initialize( 1, 1, 0, 1, num_tuples * num_comms ); 132  tl.enableWriteAccess(); 133  // Form num_tuples*num_comms tuples, send to various ranks 134  unsigned int n = tl.get_n(); 135  for( int i = 0; i < num_comms; i++ ) 136  { 137  int sendTo = rank + i * size / 2 + 1; // Spread out the send to, for a stress-like test 138  sendTo = sendTo % size; // 139  long intToSend = 1000 * rank + 100000 * sendTo; 140  for( int j = 0; j < num_tuples; j++ ) 141  { 142  n = tl.get_n(); 143  tl.vi_wr[n] = sendTo; 144  tl.vl_wr[n] = intToSend + j; 145  tl.vr_wr[n] = 10000. * rank + j; 146  tl.inc_n(); 147  } 148  } 149  150  if( rank == reportrank ) 151  { 152  cout << "rank " << rank << "\n"; 153  tl.print( " before sending" ); 154  } 155  156  clock_t tt = clock(); 157  // All communication happens here; no mpi calls for the user 158  ErrorCode rval = cd->gs_transfer( 1, tl, 0 );MB_CHK_SET_ERR( rval, "Error in tuple transfer" ); 159  160  double secs = 0; 161  if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 162  { 163  secs = ( clock() - tt ) / (double)CLOCKS_PER_SEC; 164  } 165  if( rank == reportrank ) 166  { 167  cout << "rank " << rank << "\n"; 168  tl.print( " after transfer" ); 169  } 170  171  // Check that all tuples received have the form 10000*rank + 100*from 172  unsigned int received = tl.get_n(); 173  for( int i = 0; i < (int)received; i++ ) 174  { 175  int from = tl.vi_rd[i]; 176  long valrec = tl.vl_rd[i]; 177  int remainder = valrec - 100000 * rank - 1000 * from; 178  if( remainder < 0 || remainder >= num_tuples * 4 ) 179  cout << " error: tuple " << i << " received at proc rank " << rank << " from proc " << from << " has value " 180  << valrec << " remainder " << remainder << "\n"; 181  } 182  183  if( rank == reportrank || ( reportrank >= size && 0 == rank ) ) 184  { 185  cout << "communication of about " << total_n_tuples << " tuples/per proc took " << secs << " seconds" 186  << "\n"; 187  tt = clock(); 188  } 189  190  // Finalize error handler, required for this example (not using a moab instance) 191  MBErrorHandler_Finalize(); 192  193  MPI_Finalize(); 194 #else 195  std::cout << " Build with MPI for this example to work\n"; 196 #endif 197  198  return 0; 199 }