1 /*
2 * ParCommGraph.hpp
3 *
4 * will be used to setup communication between 2 distributed meshes, in which one mesh was migrated
5 * from the other. (one example is atmosphere mesh migrated to coupler pes)
6 *
7 * there are 3 communicators in play, one for each mesh, and one for the joined
8 * communicator, that spans both sets of processes; to send mesh or tag data we need to use the
9 * joint communicator, use nonblocking MPI_iSend and blocking MPI_Recv receives
10 *
11 * various methods should be available to migrate meshes; trivial, using graph partitioner (Zoltan
12 * PHG) and using a geometric partitioner (Zoltan RCB)
13 *
14 * communicators are represented by their MPI groups, not by their communicators, because
15 * the groups are always defined, irrespective of what tasks are they on. Communicators can be
16 * MPI_NULL, while MPI_Groups are always defined
17 *
18 * Some of the methods in here are executed over the sender communicator, some are over the
19 * receiver communicator They can switch places, what was sender becomes the receiver and viceversa
20 *
21 * The name "graph" is in the sense of a bipartite graph, in which we can separate senders and
22 * receivers tasks
23 *
24 * The info stored in the ParCommGraph helps in migrating fields (MOAB tags) from component to the
25 * coupler and back
26 *
27 * So initially the ParCommGraph is assisting in mesh migration (from component to coupler) and
28 * then is used to migrate tag data from component to coupler and back from coupler to component.
29 *
30 * The same class is used after intersection (which is done on the coupler pes between 2 different
31 * component migrated meshes) and it alters communication pattern between the original component pes
32 * and coupler pes;
33 *
34 * We added a new way to send tags between 2 models; the first application of the new method is to
35 * send tag from atm dynamics model (spectral elements, with np x np tags defined on each element,
36 * according to the GLOBAL_DOFS tag associated to each element) towards the atm physics model, which
37 * is just a point cloud of vertices distributed differently to the physics model pes; matching is
38 * done using GLOBAL_ID tag on vertices; Right now, we assume that the models are on different pes,
39 * but the joint communicator covers both and that the ids of the tasks are with respect to the
40 * joint communicator
41 *
42 *
43 */
44 #include "moab_mpi.h"
45 #include "moab/Interface.hpp"
46 #include "moab/ParallelComm.hpp"
47 #include <map>
48
49 #ifndef SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
50 #define SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_
51
52 namespace moab
53 {
54
55 class ParCommGraph
56 {
57 public:
58 enum TypeGraph
59 {
60 INITIAL_MIGRATE,
61 COVERAGE,
62 DOF_BASED
63 };
64 virtual ~ParCommGraph();
65
66 /**
67 * \brief collective constructor, will be called on all sender tasks and receiver tasks
68 * \param[in] joincomm joint MPI communicator that covers both sender and receiver MPI groups
69 * \param[in] group1 MPI group formed with sender tasks; (sender usually loads the mesh in a
70 * migrate scenario) \param[in] group2 MPI group formed with receiver tasks; (receiver
71 * usually receives the mesh in a migrate scenario) \param[in] coid1 sender component unique
72 * identifier in a coupled application (in climate simulations could be the Atmosphere Comp id =
73 * 5 or Ocean Comp ID , 17) \param[in] coid2 receiver component unique identifier in a
74 * coupled application (it is usually the coupler, 2, in E3SM)
75 *
76 * this graph will be formed on sender and receiver tasks, and, in principle, will hold info
77 * about how the local entities are distributed on the other side
78 *
79 * Its major role is to help in migration of data, from component to the coupler and vice-versa;
80 * Each local entity has a corresponding task (or tasks) on the other side, to where the data
81 * needs to be sent
82 *
83 * important data stored in ParCommGraph, immediately it is created
84 * - all sender and receiver tasks ids, with respect to the joint communicator
85 * - local rank in sender and receiver group (-1 if not part of the respective group)
86 * - rank in the joint communicator (from 0)
87 */
88 ParCommGraph( MPI_Comm joincomm, MPI_Group group1, MPI_Group group2, int coid1, int coid2 );
89
90 /**
91 * \brief copy constructor will copy only the senders, receivers, compid1, etc
92 */
93 ParCommGraph( const ParCommGraph& );
94
95 /**
96 \brief Based on the number of elements on each task in group 1, partition for group 2,
97 trivially
98
99 <B>Operations:</B> it is called on every receiver task; decides how are all elements distributed
100
101 Note: establish how many elements are sent from each task in group 1 to tasks in group 2
102 This call is usually made on a root / master process, and will construct local maps that
103 are member data, which contain the communication graph, in both directions Also, number of
104 elements migrated/exchanged between each sender/receiver
105
106 \param[in] numElemsPerTaskInGroup1 (std::vector<int> &) number of elements on each sender
107 task
108 */
109
110 ErrorCode compute_trivial_partition( std::vector< int >& numElemsPerTaskInGroup1 );
111
112 /**
113 \brief pack information about receivers view of the graph, for future sending to receiver
114 root
115
116 <B>Operations:</B> Local, called on root process of the senders group
117
118 \param[out] packed_recv_array
119 packed data will be sent to the root of receivers, and distributed from there, and
120 will have this information, for each receiver, concatenated
121 receiver 1 task, number of senders for receiver 1, then sender tasks for receiver 1,
122 receiver 2 task, number of senders for receiver 2, sender tasks for receiver 2, etc Note: only
123 the root of senders will compute this, and send it over to the receiver root, which will
124 distribute it over each receiver; We do not pack the sizes of data to be sent, only the
125 senders for each of the receivers (could be of size O(n^2) , where n is the number of tasks ;
126 but in general, it should be more like O(n) ). Each sender sends to a "finite" number of
127 receivers, and each receiver receives from a finite number of senders). We need this info to
128 decide how to set up the send/receive waiting game for non-blocking communication )
129 */
130 ErrorCode pack_receivers_graph( std::vector< int >& packed_recv_array );
131
132 // get methods for private data
133 bool is_root_sender()
134 {
135 return rootSender;
136 }
137
138 bool is_root_receiver()
139 {
140 return rootReceiver;
141 }
142
143 int sender( int index )
144 {
145 return senderTasks[index];
146 }
147
148 int receiver( int index )
149 {
150 return receiverTasks[index];
151 }
152
153 int get_component_id1()
154 {
155 return compid1;
156 }
157 int get_component_id2()
158 {
159 return compid2;
160 }
161
162 int get_context_id()
163 {
164 return context_id;
165 }
166 void set_context_id( int other_id )
167 {
168 context_id = other_id;
169 }
170
171 EntityHandle get_cover_set()
172 {
173 return cover_set;
174 }
175 void set_cover_set( EntityHandle cover )
176 {
177 cover_set = cover;
178 }
179
180 // return local graph for a specific task
181 ErrorCode split_owned_range( int sender_rank, Range& owned );
182
183 ErrorCode split_owned_range( Range& owned );
184
185 ErrorCode send_graph( MPI_Comm jcomm );
186
187 ErrorCode send_graph_partition( ParallelComm* pco, MPI_Comm jcomm );
188
189 ErrorCode send_mesh_parts( MPI_Comm jcomm, ParallelComm* pco, Range& owned );
190
191 // this is called on receiver side
192 ErrorCode receive_comm_graph( MPI_Comm jcomm, ParallelComm* pco, std::vector< int >& pack_array );
193
194 ErrorCode receive_mesh( MPI_Comm jcomm,
195 ParallelComm* pco,
196 EntityHandle local_set,
197 std::vector< int >& senders_local );
198
199 ErrorCode release_send_buffers();
200
201 ErrorCode send_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
202
203 ErrorCode receive_tag_values( MPI_Comm jcomm, ParallelComm* pco, Range& owned, std::vector< Tag >& tag_handles );
204
205 // getter method
206 const std::vector< int >& senders()
207 {
208 return senderTasks;
209 } // reference copy; refers to sender tasks in joint comm
210 const std::vector< int >& receivers()
211 {
212 return receiverTasks;
213 }
214
215 ErrorCode settle_send_graph( TupleList& TLcovIDs );
216
217 // this will set after_cov_rec_sizes
218 void SetReceivingAfterCoverage(
219 std::map< int, std::set< int > >& idsFromProcs ); // will make sense only on receivers, right now after cov
220
221 // strideComp is np x np, or 1, in our cases
222 // will fill up ordered lists for corresponding IDs on the other component
223 // will form back and forth information, from ordered list of IDs, to valuesComp
224 void settle_comm_by_ids( int comp, TupleList& TLBackToComp, std::vector< int >& valuesComp );
225
226 // after map read, we need to know what entities we need to send to receiver
227 ErrorCode set_split_ranges( int comp,
228 TupleList& TLBackToComp1,
229 std::vector< int >& valuesComp1,
230 int lenTag,
231 Range& ents_of_interest,
232 int type );
233
234 // new methods to migrate mesh after reading map
235 ErrorCode form_tuples_to_migrate_mesh( Interface* mb, TupleList& TLv, TupleList& TLc, int type, int lenTagType1 );
236 ErrorCode form_mesh_from_tuples( Interface* mb,
237 TupleList& TLv,
238 TupleList& TLc,
239 int type,
240 int lenTagType1,
241 EntityHandle fset,
242 Range& primary_ents,
243 std::vector< int >& values_entities );
244
245 // new partition calculation
246 ErrorCode compute_partition( ParallelComm* pco, Range& owned, int met );
247
248 // dump local information about graph
249 ErrorCode dump_comm_information( std::string prefix, int is_send );
250
251 private:
252 /**
253 \brief find ranks of a group with respect to an encompassing communicator
254
255 <B>Operations:</B> Local, usually called on root process of the group
256
257 \param[in] joincomm (MPI_Comm)
258 \param[in] group (MPI_Group)
259 \param[out] ranks ( std::vector<int>) ranks with respect to the joint communicator
260 */
261 void find_group_ranks( MPI_Group group, MPI_Comm join, std::vector< int >& ranks );
262
263 MPI_Comm comm;
264 std::vector< int > senderTasks; // these are the sender tasks in joint comm
265 std::vector< int > receiverTasks; // these are all the receiver tasks in joint comm
266 bool rootSender;
267 bool rootReceiver;
268 int rankInGroup1, rankInGroup2; // group 1 is sender, 2 is receiver
269 int rankInJoin, joinSize;
270 int compid1, compid2;
271 int context_id; // used to identify the other comp for intersection
272 EntityHandle cover_set; // will be initialized only if it is the receiver parcomm graph, in
273 // CoverageGraph
274
275 // communication graph from group1 to group2;
276 // graph[task1] = vec1; // vec1 is a stl vector of tasks in group2
277 std::map< int, std::vector< int > > recv_graph; // to what tasks from group2 to send (actual communication graph)
278 std::map< int, std::vector< int > >
279 recv_sizes; // how many elements to actually send from a sender task to receiver tasks
280 std::map< int, std::vector< int > >
281 sender_graph; // to what tasks from group2 to send (actual communication graph)
282 std::map< int, std::vector< int > >
283 sender_sizes; // how many elements to actually send from a sender task to receiver tasks
284
285 std::vector< ParallelComm::Buffer* > localSendBuffs; // this will store the pointers to the Buffers
286 // will be released only when all mpi requests are waited
287 // for
288 int* comm_graph; // this will store communication graph, on sender master, sent by nonblocking
289 // send to the master receiver first integer will be the size of the graph,
290 // the rest will be the packed graph, for trivial partition
291
292 // these will be now used to store ranges to be sent from current sender to each receiver in
293 // joint comm
294 std::map< int, Range > split_ranges;
295
296 std::vector< MPI_Request > sendReqs; // there will be multiple requests, 2 for comm graph, 2 for each Buffer
297 // there are as many buffers as sender_graph[rankInJoin].size()
298
299 // active on both receiver and sender sides
300 std::vector< int > corr_tasks; // subset of the senderTasks, in the joint comm for sender;
301 // subset of receiverTasks for receiver side
302 std::vector< int > corr_sizes; // how many primary entities corresponding to the other side
303 // so what we know is that the local range corresponds to remote corr_sizes[i] size ranges on
304 // tasks corr_tasks[i]
305
306 // these will be used now after coverage, quick fix; they will also be populated by
307 // iMOAB_CoverageGraph
308 TypeGraph graph_type; // this should be false , set to true in settle send graph, to use send_IDs_map
309 std::map< int, std::vector< int > > involved_IDs_map; // replace send and recv IDs_mapp with involved_IDs_map
310 // used only for third method: DOF_BASED
311 std::map< int, std::vector< int > >
312 map_index; // from index in involved[] to index in values[] of tag, for each corr task
313 std::map< int, std::vector< int > > map_ptr; // lmap[ie], lmap[ie+1], pointer into map_index[corrTask]
314 };
315
316 } // namespace moab
317 #endif /* SRC_PARALLEL_MOAB_PARCOMMGRAPH_HPP_ */