Mesh Oriented datABase  (version 5.5.0)
An array-based unstructured mesh library
CrystalRouterExample.cpp
Go to the documentation of this file.
1 /*
2  * This example will show one of the building blocks of parallel infrastructure in MOAB
3  * More exactly, if we have some homogeneous data to communicate from each processor to a list of
4  * other processors, how do we do it?
5  *
6  * introduce the TupleList and crystal router to MOAB users.
7  *
8  * This technology is used in resolving shared vertices / sets between partitions
9  * It is used in the mbcoupler for sending data (target points) to the proper processor, and
10  * communicate back the results. Also, it is used to communicate departure mesh for intersection in
11  * parallel
12  *
13  * It is a way of doing MPI_gatheralltoallv(), when the communication matrix is sparse
14  *
15  * It is assumed that every proc needs to communicate only with a few of the other processors.
16  * If every processor needs to communicate with all other, then we will have to use paired isend
17  * and irecv, the communication matrix is full
18  *
19  * the example needs to be launched in parallel.
20  * Every proc will build a list of tuples, that will be send to a few procs;
21  * In general, we will send to num_comms tasks, and about num_tuples to each task
22  * We vary num_comms and num_tuples for processor
23  *
24  * we will send long ints of the form
25  * 100000 * send + 1000* rank +j, where j is the index of tuple
26  *
27  * after routing, we verify we received
28  * 100000 * rank + 1000 * from
29  *
30  * For some reportrank we also print the tuples.
31  *
32  * after routing, we will see if we received, as expected. Should run on at least 2 processors.
33  *
34  * Note: We do not need a moab instance for this example
35  *
36  */
37 
38 /** @example CrystalRouterExample.cpp \n
39  * \brief generalized gather scatter using tuples \n
40  * <b>To run</b>: mpiexec -np <n> CrystalRouterExample -r [reportrank] -t [num_tuples] -n
41  * [num_comms] \n
42  *
43  */
44 //
45 #include "moab/MOABConfig.h"
46 #ifdef MOAB_HAVE_MPI
47 #include "moab/ProcConfig.hpp"
48 #endif
49 #include "moab/TupleList.hpp"
50 #include "moab/ProgOptions.hpp"
51 #include "moab/ErrorHandler.hpp"
52 #include <ctime>
53 #include <iostream>
54 #include <sstream>
55 
56 const char BRIEF_DESC[] = "Example of gather scatter with tuple lists \n";
57 std::ostringstream LONG_DESC;
58 
59 using namespace moab;
60 using namespace std;
61 
62 int main( int argc, char** argv )
63 {
64 #ifdef MOAB_HAVE_MPI
65  MPI_Init( &argc, &argv );
66 
67  // Initialize error handler, required for this example (not using a moab instance)
69 
70  ProcConfig pc( MPI_COMM_WORLD );
71  int size = pc.proc_size();
72  int rank = pc.proc_rank();
73 
74  // Start copy
75  LONG_DESC << "This program does a gather scatter with a list of tuples. \n"
76  " It tries to see how much communication costs in terms of time and memory. \n"
77  << "It starts with creating a list of tuples to be sent from each processor, \n to a "
78  "list of other processors.\n"
79  << "The number of tuples and how many tasks to communicate to are controlled by "
80  "input parameters.\n"
81  << "After communication, we verify locally if we received what we expected. \n";
82  ProgOptions opts( LONG_DESC.str(), BRIEF_DESC );
83 
84  // How many procs communicate to current proc, on average (we will vary that too)
85  int num_comms = 2;
86  opts.addOpt< int >( "num_comms,n", "each task will send to about num_comms other tasks some tuples (default 2)",
87  &num_comms );
88 
89  int num_tuples = 4;
90  opts.addOpt< int >( "num_tuples,t", "each task will send to some task about num_tuples tuples (default 4)",
91  &num_tuples );
92 
93  int reportrank = size + 1;
94  opts.addOpt< int >( "reporting_rank,r",
95  "this rank will report the tuples sent and the tuples received; it could "
96  "be higher than num_procs, then no reporting",
97  &reportrank );
98 
99  opts.parseCommandLine( argc, argv );
100 
101  if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
102  {
103  cout << " There are " << size << " tasks in example.\n";
104  cout << " We will send groups of " << num_tuples << " from each task towards " << num_comms
105  << " other tasks.\n";
106  }
107 
108  // Send some data from proc i to i + n/2, also to i + n/2 + 1 modulo n, where n is num procs
109 
111 
112  long total_n_tuples = num_comms * num_tuples;
113 
114  // Vary the number of tasks to send to, and the number of tuples to send
115  if( rank < size / 2 )
116  num_comms--;
117  else
118  num_comms++;
119 
120  if( rank < size / 3 )
121  num_tuples *= 2;
122  else if( rank > size - size / 3 )
123  num_tuples /= 2;
124 
125  TupleList tl;
126  // At most num_tuples* num_comms to send
127  // We do a preallocate with this; some tuples on some processors might need more memory, to be
128  // able to grow locally; Some tasks might receive more tuples though, and in the process, some
129  // might grow more than others. By doing these logP sends/receives, we do not grow local memory
130  // too much.
131  tl.initialize( 1, 1, 0, 1, num_tuples * num_comms );
132  tl.enableWriteAccess();
133  // Form num_tuples*num_comms tuples, send to various ranks
134  unsigned int n = tl.get_n();
135  for( int i = 0; i < num_comms; i++ )
136  {
137  int sendTo = rank + i * size / 2 + 1; // Spread out the send to, for a stress-like test
138  sendTo = sendTo % size; //
139  long intToSend = 1000 * rank + 100000 * sendTo;
140  for( int j = 0; j < num_tuples; j++ )
141  {
142  n = tl.get_n();
143  tl.vi_wr[n] = sendTo;
144  tl.vl_wr[n] = intToSend + j;
145  tl.vr_wr[n] = 10000. * rank + j;
146  tl.inc_n();
147  }
148  }
149 
150  if( rank == reportrank )
151  {
152  cout << "rank " << rank << "\n";
153  tl.print( " before sending" );
154  }
155 
156  clock_t tt = clock();
157  // All communication happens here; no mpi calls for the user
158  ErrorCode rval = cd->gs_transfer( 1, tl, 0 );MB_CHK_SET_ERR( rval, "Error in tuple transfer" );
159 
160  double secs = 0;
161  if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
162  {
163  secs = ( clock() - tt ) / (double)CLOCKS_PER_SEC;
164  }
165  if( rank == reportrank )
166  {
167  cout << "rank " << rank << "\n";
168  tl.print( " after transfer" );
169  }
170 
171  // Check that all tuples received have the form 10000*rank + 100*from
172  unsigned int received = tl.get_n();
173  for( int i = 0; i < (int)received; i++ )
174  {
175  int from = tl.vi_rd[i];
176  long valrec = tl.vl_rd[i];
177  int remainder = valrec - 100000 * rank - 1000 * from;
178  if( remainder < 0 || remainder >= num_tuples * 4 )
179  cout << " error: tuple " << i << " received at proc rank " << rank << " from proc " << from << " has value "
180  << valrec << " remainder " << remainder << "\n";
181  }
182 
183  if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
184  {
185  cout << "communication of about " << total_n_tuples << " tuples/per proc took " << secs << " seconds"
186  << "\n";
187  tt = clock();
188  }
189 
190  // Finalize error handler, required for this example (not using a moab instance)
192 
193  MPI_Finalize();
194 #else
195  std::cout << " Build with MPI for this example to work\n";
196 #endif
197 
198  return 0;
199 }