1/*
2 * This example will show one of the building blocks of parallel infrastructure in MOAB
3 * More exactly, if we have some homogeneous data to communicate from each processor to a list of
4 * other processors, how do we do it?
5 *
6 * introduce the TupleList and crystal router to MOAB users.
7 *
8 * This technology is used in resolving shared vertices / sets between partitions
9 * It is used in the mbcoupler for sending data (target points) to the proper processor, and
10 * communicate back the results. Also, it is used to communicate departure mesh for intersection in
11 * parallel
12 *
13 * It is a way of doing MPI_gatheralltoallv(), when the communication matrix is sparse
14 *
15 * It is assumed that every proc needs to communicate only with a few of the other processors.
16 * If every processor needs to communicate with all other, then we will have to use paired isend
17 * and irecv, the communication matrix is full
18 *
19 * the example needs to be launched in parallel.
20 * Every proc will build a list of tuples, that will be send to a few procs;
21 * In general, we will send to num_comms tasks, and about num_tuples to each task
22 * We vary num_comms and num_tuples for processor
23 *
24 * we will send long ints of the form
25 * 100000 * send + 1000* rank +j, where j is the index of tuple
26 *
27 * after routing, we verify we received
28 * 100000 * rank + 1000 * from
29 *
30 * For some reportrank we also print the tuples.
31 *
32 * after routing, we will see if we received, as expected. Should run on at least 2 processors.
33 *
34 * Note: We do not need a moab instance for this example
35 *
36 */3738/** @example CrystalRouterExample.cpp \n
39 * \brief generalized gather scatter using tuples \n
40 * <b>To run</b>: mpiexec -np <n> CrystalRouterExample -r [reportrank] -t [num_tuples] -n
41 * [num_comms] \n
42 *
43 */44//45#include"moab/MOABConfig.h"46#ifdef MOAB_HAVE_MPI47#include"moab/ProcConfig.hpp"48#endif49#include"moab/TupleList.hpp"50#include"moab/ProgOptions.hpp"51#include"moab/ErrorHandler.hpp"52#include<ctime>53#include<iostream>54#include<sstream>5556constchar BRIEF_DESC[] = "Example of gather scatter with tuple lists \n";
57 std::ostringstream LONG_DESC;
5859usingnamespace moab;
60usingnamespace std;
6162intmain( int argc, char** argv )
63 {
64#ifdef MOAB_HAVE_MPI65MPI_Init( &argc, &argv );
6667// Initialize error handler, required for this example (not using a moab instance)68MBErrorHandler_Init();
6970ProcConfig pc( MPI_COMM_WORLD );
71int size = pc.proc_size();
72int rank = pc.proc_rank();
7374// Start copy75 LONG_DESC << "This program does a gather scatter with a list of tuples. \n"76" It tries to see how much communication costs in terms of time and memory. \n"77 << "It starts with creating a list of tuples to be sent from each processor, \n to a "78"list of other processors.\n"79 << "The number of tuples and how many tasks to communicate to are controlled by "80"input parameters.\n"81 << "After communication, we verify locally if we received what we expected. \n";
82ProgOptions opts( LONG_DESC.str(), BRIEF_DESC );
8384// How many procs communicate to current proc, on average (we will vary that too)85int num_comms = 2;
86 opts.addOpt< int >( "num_comms,n", "each task will send to about num_comms other tasks some tuples (default 2)",
87 &num_comms );
8889int num_tuples = 4;
90 opts.addOpt< int >( "num_tuples,t", "each task will send to some task about num_tuples tuples (default 4)",
91 &num_tuples );
9293int reportrank = size + 1;
94 opts.addOpt< int >( "reporting_rank,r",
95"this rank will report the tuples sent and the tuples received; it could "96"be higher than num_procs, then no reporting",
97 &reportrank );
9899 opts.parseCommandLine( argc, argv );
100101if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
102 {
103 cout << " There are " << size << " tasks in example.\n";
104 cout << " We will send groups of " << num_tuples << " from each task towards " << num_comms
105 << " other tasks.\n";
106 }
107108// Send some data from proc i to i + n/2, also to i + n/2 + 1 modulo n, where n is num procs109110 gs_data::crystal_data* cd = pc.crystal_router();
111112long total_n_tuples = num_comms * num_tuples;
113114// Vary the number of tasks to send to, and the number of tuples to send115if( rank < size / 2 )
116 num_comms--;
117else118 num_comms++;
119120if( rank < size / 3 )
121 num_tuples *= 2;
122elseif( rank > size - size / 3 )
123 num_tuples /= 2;
124125 TupleList tl;
126// At most num_tuples* num_comms to send127// We do a preallocate with this; some tuples on some processors might need more memory, to be128// able to grow locally; Some tasks might receive more tuples though, and in the process, some129// might grow more than others. By doing these logP sends/receives, we do not grow local memory130// too much.131 tl.initialize( 1, 1, 0, 1, num_tuples * num_comms );
132 tl.enableWriteAccess();
133// Form num_tuples*num_comms tuples, send to various ranks134unsignedint n = tl.get_n();
135for( int i = 0; i < num_comms; i++ )
136 {
137int sendTo = rank + i * size / 2 + 1; // Spread out the send to, for a stress-like test138 sendTo = sendTo % size; //139long intToSend = 1000 * rank + 100000 * sendTo;
140for( int j = 0; j < num_tuples; j++ )
141 {
142 n = tl.get_n();
143 tl.vi_wr[n] = sendTo;
144 tl.vl_wr[n] = intToSend + j;
145 tl.vr_wr[n] = 10000. * rank + j;
146 tl.inc_n();
147 }
148 }
149150if( rank == reportrank )
151 {
152 cout << "rank " << rank << "\n";
153 tl.print( " before sending" );
154 }
155156clock_t tt = clock();
157// All communication happens here; no mpi calls for the user158 ErrorCode rval = cd->gs_transfer( 1, tl, 0 );MB_CHK_SET_ERR( rval, "Error in tuple transfer" );
159160double secs = 0;
161if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
162 {
163 secs = ( clock() - tt ) / (double)CLOCKS_PER_SEC;
164 }
165if( rank == reportrank )
166 {
167 cout << "rank " << rank << "\n";
168 tl.print( " after transfer" );
169 }
170171// Check that all tuples received have the form 10000*rank + 100*from172unsignedint received = tl.get_n();
173for( int i = 0; i < (int)received; i++ )
174 {
175int from = tl.vi_rd[i];
176long valrec = tl.vl_rd[i];
177int remainder = valrec - 100000 * rank - 1000 * from;
178if( remainder < 0 || remainder >= num_tuples * 4 )
179 cout << " error: tuple " << i << " received at proc rank " << rank << " from proc " << from << " has value "180 << valrec << " remainder " << remainder << "\n";
181 }
182183if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
184 {
185 cout << "communication of about " << total_n_tuples << " tuples/per proc took " << secs << " seconds"186 << "\n";
187 tt = clock();
188 }
189190// Finalize error handler, required for this example (not using a moab instance)191MBErrorHandler_Finalize();
192193MPI_Finalize();
194#else195 std::cout << " Build with MPI for this example to work\n";
196#endif197198return0;
199 }