Go to the documentation of this file.
    1 /*
    2  * This example will show one of the building blocks of parallel infrastructure in MOAB
    3  * More exactly, if we have some homogeneous data to communicate from each processor to a list of
    4  * other processors, how do we do it?
    5  *
    6  * introduce the TupleList and crystal router to MOAB users.
    7  *
    8  * This technology is used in resolving shared vertices / sets between partitions
    9  * It is used in the mbcoupler for sending data (target points) to the proper processor, and
   10  * communicate back the results. Also, it is used to communicate departure mesh for intersection in
   11  * parallel
   12  *
   13  *  It is a way of doing  MPI_gatheralltoallv(), when the communication matrix is sparse
   14  *
   15  *  It is assumed that every proc needs to communicate only with a few of the other processors.
   16  *  If every processor needs to communicate with all other, then we will have to use paired isend
   17  * and irecv, the communication matrix is full
   18  *
   19  *  the example needs to be launched in parallel.
   20  *  Every proc will build a list of tuples, that will be send to a few procs;
   21  *  In general, we will send to num_comms tasks, and about num_tuples to each task
   22  *  We vary num_comms and num_tuples for processor
   23  *
   24  *  we will send long ints of the form
   25  *    100000 * send + 1000* rank +j, where j is the index of tuple
   26  *
   27  *  after routing, we verify we received
   28  *    100000 * rank + 1000 * from
   29  *
   30  *    For some reportrank we also print the tuples.
   31  *
   32  *  after routing, we will see if we received, as expected. Should run on at least 2 processors.
   33  *
   34  * Note: We do not need a moab instance for this example
   35  *
   36  */
   37  
   38 /** @example CrystalRouterExample.cpp \n
   39  * \brief generalized gather scatter using tuples \n
   40  * <b>To run</b>: mpiexec -np <n> CrystalRouterExample -r [reportrank] -t [num_tuples] -n
   41  * [num_comms] \n
   42  *
   43  */
   44 //
   45 #include "moab/MOABConfig.h"
   46 #ifdef MOAB_HAVE_MPI
   47 #include "moab/ProcConfig.hpp"
   48 #endif
   49 #include "moab/TupleList.hpp"
   50 #include "moab/ProgOptions.hpp"
   51 #include "moab/ErrorHandler.hpp"
   52 #include <ctime>
   53 #include <iostream>
   54 #include <sstream>
   55  
   56 const char BRIEF_DESC[] = "Example of gather scatter with tuple lists \n";
   57 std::ostringstream LONG_DESC;
   58  
   59 using namespace moab;
   60 using namespace std;
   61  
   62 int main( int argc, char** argv )
   63 {
   64 #ifdef MOAB_HAVE_MPI
   65     MPI_Init( &argc, &argv );
   66  
   67     // Initialize error handler, required for this example (not using a moab instance)
   68     MBErrorHandler_Init();
   69  
   70     ProcConfig pc( MPI_COMM_WORLD );
   71     int size = pc.proc_size();
   72     int rank = pc.proc_rank();
   73  
   74     // Start copy
   75     LONG_DESC << "This program does a gather scatter with a list of tuples. \n"
   76                  " It tries to see how much communication costs in terms of time and memory. \n"
   77               << "It starts with creating a list of tuples to be sent from each processor, \n to a "
   78                  "list of other processors.\n"
   79               << "The number of tuples and how many tasks to communicate to are controlled by "
   80                  "input parameters.\n"
   81               << "After communication, we verify locally if we received what we expected. \n";
   82     ProgOptions opts( LONG_DESC.str(), BRIEF_DESC );
   83  
   84     // How many procs communicate to current proc, on average (we will vary that too)
   85     int num_comms = 2;
   86     opts.addOpt< int >( "num_comms,n", "each task will send to about num_comms other tasks some tuples (default 2)",
   87                         &num_comms );
   88  
   89     int num_tuples = 4;
   90     opts.addOpt< int >( "num_tuples,t", "each task will send to some task about num_tuples tuples (default 4)",
   91                         &num_tuples );
   92  
   93     int reportrank = size + 1;
   94     opts.addOpt< int >( "reporting_rank,r",
   95                         "this rank will report the tuples sent and the tuples received; it could "
   96                         "be higher than num_procs, then no reporting",
   97                         &reportrank );
   98  
   99     opts.parseCommandLine( argc, argv );
  100  
  101     if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
  102     {
  103         cout << " There are " << size << " tasks in example.\n";
  104         cout << " We will send groups of " << num_tuples << " from each task towards " << num_comms
  105              << " other tasks.\n";
  106     }
  107  
  108     // Send some data from proc i to i + n/2, also to i + n/2 + 1 modulo n, where n is num procs
  109  
  110     gs_data::crystal_data* cd = pc.crystal_router();
  111  
  112     long total_n_tuples = num_comms * num_tuples;
  113  
  114     // Vary the number of tasks to send to, and the number of tuples to send
  115     if( rank < size / 2 )
  116         num_comms--;
  117     else
  118         num_comms++;
  119  
  120     if( rank < size / 3 )
  121         num_tuples *= 2;
  122     else if( rank > size - size / 3 )
  123         num_tuples /= 2;
  124  
  125     TupleList tl;
  126     // At most num_tuples* num_comms to send
  127     // We do a preallocate with this; some tuples on some processors might need more memory, to be
  128     // able to grow locally; Some tasks might receive more tuples though, and in the process, some
  129     // might grow more than others. By doing these logP sends/receives, we do not grow local memory
  130     // too much.
  131     tl.initialize( 1, 1, 0, 1, num_tuples * num_comms );
  132     tl.enableWriteAccess();
  133     // Form num_tuples*num_comms tuples, send to various ranks
  134     unsigned int n = tl.get_n();
  135     for( int i = 0; i < num_comms; i++ )
  136     {
  137         int sendTo     = rank + i * size / 2 + 1;  // Spread out the send to, for a stress-like test
  138         sendTo         = sendTo % size;            //
  139         long intToSend = 1000 * rank + 100000 * sendTo;
  140         for( int j = 0; j < num_tuples; j++ )
  141         {
  142             n           = tl.get_n();
  143             tl.vi_wr[n] = sendTo;
  144             tl.vl_wr[n] = intToSend + j;
  145             tl.vr_wr[n] = 10000. * rank + j;
  146             tl.inc_n();
  147         }
  148     }
  149  
  150     if( rank == reportrank )
  151     {
  152         cout << "rank " << rank << "\n";
  153         tl.print( " before sending" );
  154     }
  155  
  156     clock_t tt = clock();
  157     // All communication happens here; no mpi calls for the user
  158     ErrorCode rval = cd->gs_transfer( 1, tl, 0 );MB_CHK_SET_ERR( rval, "Error in tuple transfer" );
  159  
  160     double secs = 0;
  161     if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
  162     {
  163         secs = ( clock() - tt ) / (double)CLOCKS_PER_SEC;
  164     }
  165     if( rank == reportrank )
  166     {
  167         cout << "rank " << rank << "\n";
  168         tl.print( " after transfer" );
  169     }
  170  
  171     // Check that all tuples received have the form 10000*rank + 100*from
  172     unsigned int received = tl.get_n();
  173     for( int i = 0; i < (int)received; i++ )
  174     {
  175         int from      = tl.vi_rd[i];
  176         long valrec   = tl.vl_rd[i];
  177         int remainder = valrec - 100000 * rank - 1000 * from;
  178         if( remainder < 0 || remainder >= num_tuples * 4 )
  179             cout << " error: tuple " << i << " received at proc rank " << rank << " from proc " << from << " has value "
  180                  << valrec << " remainder " << remainder << "\n";
  181     }
  182  
  183     if( rank == reportrank || ( reportrank >= size && 0 == rank ) )
  184     {
  185         cout << "communication of about " << total_n_tuples << " tuples/per proc took " << secs << " seconds"
  186              << "\n";
  187         tt = clock();
  188     }
  189  
  190     // Finalize error handler, required for this example (not using a moab instance)
  191     MBErrorHandler_Finalize();
  192  
  193     MPI_Finalize();
  194 #else
  195     std::cout << " Build with MPI for this example to work\n";
  196 #endif
  197  
  198     return 0;
  199 }