Go to the documentation of this file.
    1 /*
    2  * ParCommGraph.hpp
    3  *
    4  *  will be used to setup communication between 2 distributed meshes, in which one mesh was migrated
    5  * from the other. (one example is atmosphere mesh migrated to coupler pes)
    6  *
    7  *  there are 3 communicators in play, one for each mesh, and one for the joined
    8  *  communicator, that spans both sets of processes; to send mesh or tag data we need to use the
    9  * joint communicator, use nonblocking MPI_iSend and blocking MPI_Recv receives
   10  *
   11  *  various methods should be available to migrate meshes; trivial, using graph partitioner (Zoltan
   12  * PHG) and using a geometric partitioner  (Zoltan RCB)
   13  *
   14  *  communicators are represented by their MPI groups, not by their communicators, because
   15  *  the groups are always defined, irrespective of what tasks are they on. Communicators can be
   16  * MPI_NULL, while MPI_Groups are always defined
   17  *
   18  *  Some of the methods in here are executed over the sender communicator, some are over the
   19  * receiver communicator They can switch places, what was sender becomes the receiver and viceversa
   20  *
   21  *  The name "graph" is in the sense of a bipartite graph, in which we can separate senders and
   22  * receivers tasks
   23  *
   24  *  The info stored in the ParCommGraph helps in migrating fields (MOAB tags) from component to the
   25  * coupler and back
   26  *
   27  *  So initially the ParCommGraph is assisting in mesh migration (from component to coupler) and
   28  * then is used to migrate tag data from component to coupler and back from coupler to component.
   29  *
   30  *  The same class is used after intersection (which is done on the coupler pes between 2 different
   31  * component migrated meshes) and it alters communication pattern between the original component pes
   32  * and coupler pes;
   33  *
   34  *   We added a new way to send tags between 2 models; the first application of the new method is to
   35  * send tag from atm dynamics model (spectral elements, with np x np tags defined on each element,
   36  * according to the GLOBAL_DOFS tag associated to each element) towards the atm physics model, which
   37  * is just a point cloud of vertices distributed differently to the physics model pes; matching is
   38  * done using GLOBAL_ID tag on vertices; Right now, we assume that the models are on different pes,
   39  * but the joint communicator covers both and that the ids of the tasks are with respect to the
   40  * joint communicator
   41  *
   42  *
   43  */
   44 #include "moab_mpi.h"
   45 #include "moab/Interface.hpp"
   46 #include "moab/ParallelComm.hpp"
   47 #include <map>
   48  
   49 #ifndef SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
   50 #define SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
   51  
   52 namespace moab
   53 {
   54  
   55 class ParCommGraph
   56 {
   57   public:
   58     enum TypeGraph
   59     {
   60         INITIAL_MIGRATE,
   61         COVERAGE,
   62         DOF_BASED
   63     };
   64     virtual ~ParCommGraph();
   65  
   66     /**
   67      * \brief collective constructor, will be called on all sender tasks and receiver tasks
   68      * \param[in]  joincomm  joint MPI communicator that covers both sender and receiver MPI groups
   69      * \param[in]  group1   MPI group formed with sender tasks; (sender usually loads the mesh in a
   70      * migrate scenario) \param[in]  group2   MPI group formed with receiver tasks; (receiver
   71      * usually receives the mesh in a migrate scenario) \param[in]  coid1    sender component unique
   72      * identifier in a coupled application (in climate simulations could be the Atmosphere Comp id =
   73      * 5 or Ocean Comp ID , 17) \param[in]  coid2    receiver component unique identifier in a
   74      * coupled application (it is usually the coupler, 2, in E3SM)
   75      *
   76      * this graph will be formed on sender and receiver tasks, and, in principle, will hold info
   77      * about how the local entities are distributed on the other side
   78      *
   79      * Its major role is to help in migration of data, from component to the coupler and vice-versa;
   80      * Each local entity has a corresponding task (or tasks) on the other side, to where the data
   81      * needs to be sent
   82      *
   83      * important data stored in ParCommGraph, immediately it is created
   84      *   - all sender and receiver tasks ids, with respect to the joint communicator
   85      *   - local rank in sender and receiver group (-1 if not part of the respective group)
   86      *   - rank in the joint communicator (from 0)
   87      */
   88     ParCommGraph( MPI_Comm joincomm, MPI_Group group1, MPI_Group group2, int coid1, int coid2 );
   89  
   90     /**
   91      * \brief copy constructor will copy only the senders, receivers, compid1, etc
   92      */
   93     ParCommGraph( const ParCommGraph& );
   94  
   95     /**
   96       \brief  Based on the number of elements on each task in group 1, partition for group 2,
   97    trivially
   98  
   99    <B>Operations:</B> it is called on every receiver task; decides how are all elements distributed
  100  
  101     Note:  establish how many elements are sent from each task in group 1 to tasks in group 2
  102           This call is usually made on a root / master process, and will construct local maps that
  103    are member data, which contain the communication graph, in both directions Also, number of
  104    elements migrated/exchanged between each sender/receiver
  105  
  106      \param[in]  numElemsPerTaskInGroup1 (std::vector<int> &)  number of elements on each sender
  107    task
  108      */
  109  
  110     ErrorCode compute_trivial_partition( std::vector< int >& numElemsPerTaskInGroup1 );
  111  
  112     /**
  113        \brief  pack information about receivers view of the graph, for future sending to receiver
  114       root
  115  
  116       <B>Operations:</B> Local, called on root process of the senders group
  117  
  118        \param[out] packed_recv_array
  119          packed data will be sent to the root of receivers, and distributed from there, and
  120            will have this information, for each receiver, concatenated
  121          receiver 1 task, number of senders for receiver 1, then sender tasks for receiver 1,
  122       receiver 2 task, number of senders for receiver 2, sender tasks for receiver 2, etc Note: only
  123       the root of senders will compute this, and send it over to the receiver root, which will
  124         distribute it over each receiver; We do not pack the sizes of data to be sent, only the
  125       senders for each of the receivers (could be of size O(n^2) , where n is the number of tasks ;
  126       but in general, it should be more like O(n) ). Each sender sends to a "finite" number of
  127       receivers, and each receiver receives from a finite number of senders). We need this info to
  128       decide how to set up the send/receive waiting game for non-blocking communication )
  129      */
  130     ErrorCode pack_receivers_graph( std::vector< int >& packed_recv_array );
  131  
  132     // get methods for private data
  133     bool is_root_sender()
  134     {
  135         return rootSender;
  136     }
  137  
  138     bool is_root_receiver()
  139     {
  140         return rootReceiver;
  141     }
  142  
  143     int sender( int index )
  144     {
  145         return senderTasks[index];
  146     }
  147  
  148     int receiver( int index )
  149     {
  150         return receiverTasks[index];
  151     }
  152  
  153     int get_component_id1()
  154     {
  155         return compid1;
  156     }
  157     int get_component_id2()
  158     {
  159         return compid2;
  160     }
  161  
  162     int get_context_id()
  163     {
  164         return context_id;
  165     }
  166     void set_context_id( int other_id )
  167     {
  168         context_id = other_id;
  169     }
  170  
  171     EntityHandle get_cover_set()
  172     {
  173         return cover_set;
  174     }
  175     void set_cover_set( EntityHandle cover )
  176     {
  177         cover_set = cover;
  178     }
  179  
  180     // return local graph for a specific task
  181     ErrorCode split_owned_range( int sender_rank, Range& owned );
  182  
  183     ErrorCode split_owned_range( Range& owned );
  184  
  185     ErrorCode send_graph( MPI_Comm jcomm );
  186  
  187     ErrorCode send_graph_partition( ParallelComm* pco, MPI_Comm jcomm );
  188  
  189     ErrorCode send_mesh_parts( MPI_Comm jcomm, ParallelComm* pco, Range& owned );
  190  
  191     // this is called on receiver side
  192     ErrorCode receive_comm_graph( MPI_Comm jcomm, ParallelComm* pco, std::vector< int >& pack_array );
  193  
  194     ErrorCode receive_mesh( MPI_Comm jcomm,
  195                             ParallelComm* pco,
  196                             EntityHandle local_set,
  197                             std::vector< int >& senders_local );
  198  
  199     ErrorCode release_send_buffers();
  200  
  201     ErrorCode send_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
  202  
  203     ErrorCode receive_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
  204  
  205     // getter method
  206     const std::vector< int >& senders()
  207     {
  208         return senderTasks;
  209     }  // reference copy; refers to sender tasks in joint comm
  210     const std::vector< int >& receivers()
  211     {
  212         return receiverTasks;
  213     }
  214  
  215     ErrorCode settle_send_graph( TupleList& TLcovIDs );
  216  
  217     // this will set after_cov_rec_sizes
  218     void SetReceivingAfterCoverage(
  219         std::map< int, std::set< int > >& idsFromProcs );  // will make sense only on receivers, right now after cov
  220  
  221     // strideComp is np x np, or 1, in our cases
  222     // will fill up ordered lists for corresponding IDs on the other component
  223     // will form back and forth information, from ordered list of IDs, to valuesComp
  224     void settle_comm_by_ids( int comp, TupleList& TLBackToComp, std::vector< int >& valuesComp );
  225  
  226     // after map read, we need to know what entities we need to send to receiver
  227     ErrorCode set_split_ranges( int comp,
  228                                 TupleList& TLBackToComp1,
  229                                 std::vector< int >& valuesComp1,
  230                                 int lenTag,
  231                                 Range& ents_of_interest,
  232                                 int type );
  233  
  234     // new methods to migrate mesh after reading map
  235     ErrorCode form_tuples_to_migrate_mesh( Interface* mb, TupleList& TLv, TupleList& TLc, int type, int lenTagType1 );
  236     ErrorCode form_mesh_from_tuples( Interface* mb,
  237                                      TupleList& TLv,
  238                                      TupleList& TLc,
  239                                      int type,
  240                                      int lenTagType1,
  241                                      EntityHandle fset,
  242                                      Range& primary_ents,
  243                                      std::vector< int >& values_entities );
  244  
  245     // new partition calculation
  246     ErrorCode compute_partition( ParallelComm* pco, Range& owned, int met );
  247  
  248     // dump local information about graph
  249     ErrorCode dump_comm_information( std::string prefix, int is_send );
  250  
  251   private:
  252     /**
  253     \brief find ranks of a group with respect to an encompassing communicator
  254  
  255     <B>Operations:</B> Local, usually called on root process of the group
  256  
  257     \param[in]  joincomm (MPI_Comm)
  258     \param[in]  group (MPI_Group)
  259     \param[out] ranks ( std::vector<int>)  ranks with respect to the joint communicator
  260   */
  261     void find_group_ranks( MPI_Group group, MPI_Comm join, std::vector< int >& ranks );
  262  
  263     MPI_Comm comm;
  264     std::vector< int > senderTasks;    // these are the sender tasks in joint comm
  265     std::vector< int > receiverTasks;  // these are all the receiver tasks in joint comm
  266     bool rootSender;
  267     bool rootReceiver;
  268     int rankInGroup1, rankInGroup2;  // group 1 is sender, 2 is receiver
  269     int rankInJoin, joinSize;
  270     int compid1, compid2;
  271     int context_id;          // used to identify the other comp for intersection
  272     EntityHandle cover_set;  // will be initialized only if it is the receiver parcomm graph, in
  273                              // CoverageGraph
  274  
  275     // communication graph from group1 to group2;
  276     //  graph[task1] = vec1; // vec1 is a stl vector of tasks in group2
  277     std::map< int, std::vector< int > > recv_graph;  // to what tasks from group2 to send  (actual communication graph)
  278     std::map< int, std::vector< int > >
  279         recv_sizes;  // how many elements to actually send from a sender task to receiver tasks
  280     std::map< int, std::vector< int > >
  281         sender_graph;  // to what tasks from group2 to send  (actual communication graph)
  282     std::map< int, std::vector< int > >
  283         sender_sizes;  // how many elements to actually send from a sender task to receiver tasks
  284  
  285     std::vector< ParallelComm::Buffer* > localSendBuffs;  // this will store the pointers to the Buffers
  286     //                                    will be  released only when all mpi requests are waited
  287     //                                    for
  288     int* comm_graph;  // this will store communication graph, on sender master, sent by nonblocking
  289                       // send to the master receiver first integer will be the size of the graph,
  290                       // the rest will be the packed graph, for trivial partition
  291  
  292     // these will be now used to store ranges to be sent from current sender to each receiver in
  293     // joint comm
  294     std::map< int, Range > split_ranges;
  295  
  296     std::vector< MPI_Request > sendReqs;  // there will be multiple requests, 2 for comm graph, 2 for each Buffer
  297     // there are as many buffers as sender_graph[rankInJoin].size()
  298  
  299     // active on both receiver and sender sides
  300     std::vector< int > corr_tasks;  // subset of the senderTasks, in the joint comm for sender;
  301                                     // subset of receiverTasks for receiver side
  302     std::vector< int > corr_sizes;  // how many primary entities corresponding to the other side
  303     // so what we know is that the local range corresponds to remote corr_sizes[i] size ranges on
  304     // tasks corr_tasks[i]
  305  
  306     // these will be used now after coverage, quick fix; they will also be populated by
  307     // iMOAB_CoverageGraph
  308     TypeGraph graph_type;  // this should be false , set to true in settle send graph, to use send_IDs_map
  309     std::map< int, std::vector< int > > involved_IDs_map;  // replace send and recv IDs_mapp with involved_IDs_map
  310     // used only for third method: DOF_BASED
  311     std::map< int, std::vector< int > >
  312         map_index;  // from index in involved[] to index in values[] of tag, for each corr task
  313     std::map< int, std::vector< int > > map_ptr;  //  lmap[ie], lmap[ie+1], pointer into map_index[corrTask]
  314 };
  315  
  316 }  // namespace moab
  317 #endif /* SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_ */