MOAB: Mesh Oriented datABase  (version 5.5.0)
test_mapweights_bcast.cpp
Go to the documentation of this file.
1 /**
2  * @file Read and distribute mapping weights to multiple processes
3  * @author Vijay Mahadevan ([email protected])
4  * @brief
5  * @version 0.1
6  * @date 2020-07-13
7  *
8  */
9 
10 #include <iostream>
11 #include <vector>
12 #include "moab/Core.hpp"
13 
14 #ifndef MOAB_HAVE_TEMPESTREMAP
15 #error Need TempestRemap dependency for this test
16 #endif
17 
18 #include "OfflineMap.h"
21 #include "moab/ParallelComm.hpp"
22 
23 #ifndef IS_BUILDING_MB
24 #define IS_BUILDING_MB
25 #endif
26 
27 #include "Internals.hpp"
28 #include "TestRunner.hpp"
29 
30 #include <netcdf.h>
31 #include <netcdf_par.h>
32 
33 using namespace moab;
34 
35 std::string inMapFilename = "";
36 
38 void read_buffered_map();
39 void read_map_from_disk();
40 
41 int main( int argc, char** argv )
42 {
43  if( argc > 1 )
44  inMapFilename = std::string( argv[1] );
45  else
46  inMapFilename = TestDir + "unittest/outCS5ICOD5_map.nc";
47 
51 
52 #ifdef MOAB_HAVE_MPI
53  MPI_Init( &argc, &argv );
54 #endif
55  int result = RUN_TESTS( argc, argv );
56 #ifdef MOAB_HAVE_MPI
57  MPI_Finalize();
58 #endif
59  return result;
60 }
61 
62 #define NCERROREXIT( err, MSG ) \
63  if( err ) \
64  { \
65  std::cout << "Error (" << ( err ) << "): " << ( MSG ); \
66  std::cout << "\nAborting with message: " << nc_strerror( err ) << "... \n"; \
67  return; \
68  }
69 
71 {
72  NcError error( NcError::silent_nonfatal );
73 
74  // Define our total buffer size to use
75  NcFile* ncMap = NULL;
76  NcVar *varRow = NULL, *varCol = NULL, *varS = NULL;
77 
78 #ifdef MOAB_HAVE_MPI
79  int mpierr = 0;
80  const int rootProc = 0;
81 #endif
82  // Main inputs;
83  // 1. Map Filename
84  // 2. Ownership info from the processes to be gathered on root
85  const int nBufferSize = 64 * 1024; // 64 KB
86  const int nNNZBytes = 2 * sizeof( int ) + sizeof( double );
87  const int nMaxEntries = nBufferSize / nNNZBytes;
88  int nA = 0, nB = 0, nVA = 0, nVB = 0, nS = 0;
89  double dTolerance = 1e-08;
90 
91  int nprocs = 1, rank = 0;
92 #ifdef MOAB_HAVE_MPI
93  MPI_Comm commW = MPI_COMM_WORLD;
94  MPI_Comm_size( commW, &nprocs );
95  MPI_Comm_rank( commW, &rank );
96 #endif
97 
98 #ifdef MOAB_HAVE_MPI
99  if( rank == rootProc )
100 #endif
101  {
102  ncMap = new NcFile( inMapFilename.c_str(), NcFile::ReadOnly );
103  if( !ncMap->is_valid() )
104  {
105  _EXCEPTION1( "Unable to open input map file \"%s\"", inMapFilename.c_str() );
106  }
107 
108  // Source and Target mesh resolutions
109  NcDim* dimNA = ncMap->get_dim( "n_a" );
110  if( dimNA == NULL )
111  {
112  _EXCEPTIONT( "Input map missing dimension \"n_a\"" );
113  }
114  else
115  nA = dimNA->size();
116 
117  NcDim* dimNB = ncMap->get_dim( "n_b" );
118  if( dimNB == NULL )
119  {
120  _EXCEPTIONT( "Input map missing dimension \"n_b\"" );
121  }
122  else
123  nB = dimNB->size();
124 
125  NcDim* dimNVA = ncMap->get_dim( "nv_a" );
126  if( dimNVA == NULL )
127  {
128  _EXCEPTIONT( "Input map missing dimension \"nv_a\"" );
129  }
130  else
131  nVA = dimNVA->size();
132 
133  NcDim* dimNVB = ncMap->get_dim( "nv_b" );
134  if( dimNVB == NULL )
135  {
136  _EXCEPTIONT( "Input map missing dimension \"nv_b\"" );
137  }
138  else
139  nVB = dimNVB->size();
140 
141  // Read SparseMatrix entries
142  NcDim* dimNS = ncMap->get_dim( "n_s" );
143  if( dimNS == NULL )
144  {
145  _EXCEPTION1( "Map file \"%s\" does not contain dimension \"n_s\"", inMapFilename.c_str() );
146  }
147  else
148  nS = dimNS->size(); // store total number of nonzeros
149 
150  varRow = ncMap->get_var( "row" );
151  if( varRow == NULL )
152  {
153  _EXCEPTION1( "Map file \"%s\" does not contain variable \"row\"", inMapFilename.c_str() );
154  }
155 
156  varCol = ncMap->get_var( "col" );
157  if( varCol == NULL )
158  {
159  _EXCEPTION1( "Map file \"%s\" does not contain variable \"col\"", inMapFilename.c_str() );
160  }
161 
162  varS = ncMap->get_var( "S" );
163  if( varS == NULL )
164  {
165  _EXCEPTION1( "Map file \"%s\" does not contain variable \"S\"", inMapFilename.c_str() );
166  }
167  }
168 
169  // const int nTotalBytes = nS * nNNZBytes;
170  int nEntriesRemaining = nS;
171  int nBufferedReads = static_cast< int >( ceil( ( 1.0 * nS ) / ( 1.0 * nMaxEntries ) ) );
172  int runData[6] = { nA, nB, nVA, nVB, nS, nBufferedReads };
173 
174  mpierr = MPI_Bcast( runData, 6, MPI_INT, rootProc, commW );
175  CHECK_EQUAL( mpierr, 0 );
176 
177 #ifdef MOAB_HAVE_MPI
178  if( rank != rootProc )
179  {
180  nA = runData[0];
181  nB = runData[1];
182  nVA = runData[2];
183  nVB = runData[3];
184  nS = runData[4];
185  nBufferedReads = runData[5];
186  }
187  else
188  printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS );
189 #else
190  printf( "Global parameters: nA = %d, nB = %d, nS = %d\n", nA, nB, nS );
191 #endif
192 
193  std::vector< int > rowOwnership( nprocs );
194  int nGRowPerPart = nB / nprocs;
195  int nGRowRemainder = nB % nprocs; // Keep the remainder in root
196  rowOwnership[0] = nGRowPerPart + nGRowRemainder;
197  for( int ip = 1, roffset = rowOwnership[0]; ip < nprocs; ++ip )
198  {
199  roffset += nGRowPerPart;
200  rowOwnership[ip] = roffset;
201  }
202 
203  // Let us declare the map object for every process
204  OfflineMap mapRemap;
205  SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix();
206 
207 #ifdef MOAB_HAVE_MPI
208  if( rank == rootProc )
209 #endif
210  printf( "Parameters: nA=%d, nB=%d, nVA=%d, nVB=%d, nS=%d, nNNZBytes = %d, nBufferedReads = %d\n", nA, nB, nVA,
211  nVB, nS, nNNZBytes, nBufferedReads );
212 
213  std::map< int, int > rowMap, colMap;
214  int rindexMax = 0, cindexMax = 0;
215  long offset = 0;
216 
217  /* Split the rows and send to processes in chunks
218  Let us start the buffered read */
219  for( int iRead = 0; iRead < nBufferedReads; ++iRead )
220  {
221  // pointers to the data
222  DataArray1D< int > vecRow;
223  DataArray1D< int > vecCol;
224  DataArray1D< double > vecS;
225  std::vector< std::vector< int > > dataPerProcess( nprocs );
226  std::vector< int > nDataPerProcess( nprocs, 0 );
227  for( int ip = 0; ip < nprocs; ++ip )
228  dataPerProcess[ip].reserve( std::max( 100, nMaxEntries / nprocs ) );
229 
230 #ifdef MOAB_HAVE_MPI
231  if( rank == rootProc )
232 #endif
233  {
234  int nLocSize = std::min( nEntriesRemaining, static_cast< int >( ceil( nBufferSize * 1.0 / nNNZBytes ) ) );
235 
236  printf( "Reading file: elements %ld to %ld\n", offset, offset + nLocSize );
237 
238  // Allocate and resize based on local buffer size
239  vecRow.Allocate( nLocSize );
240  vecCol.Allocate( nLocSize );
241  vecS.Allocate( nLocSize );
242 
243  varRow->set_cur( (long)( offset ) );
244  varRow->get( &( vecRow[0] ), nLocSize );
245 
246  varCol->set_cur( (long)( offset ) );
247  varCol->get( &( vecCol[0] ), nLocSize );
248 
249  varS->set_cur( (long)( offset ) );
250  varS->get( &( vecS[0] ), nLocSize );
251 
252  // Decrement vecRow and vecCol
253  for( size_t i = 0; i < vecRow.GetRows(); i++ )
254  {
255  vecRow[i]--;
256  vecCol[i]--;
257 
258  // TODO: Fix this linear search to compute process ownership
259  int pOwner = -1;
260  if( rowOwnership[0] > vecRow[i] )
261  pOwner = 0;
262  else
263  {
264  for( int ip = 1; ip < nprocs; ++ip )
265  {
266  if( rowOwnership[ip - 1] <= vecRow[i] && rowOwnership[ip] > vecRow[i] )
267  {
268  pOwner = ip;
269  break;
270  }
271  }
272  }
273 
274  assert( pOwner >= 0 && pOwner < nprocs );
275  dataPerProcess[pOwner].push_back( i );
276  }
277 
278  offset += nLocSize;
279  nEntriesRemaining -= nLocSize;
280 
281  for( int ip = 0; ip < nprocs; ++ip )
282  nDataPerProcess[ip] = dataPerProcess[ip].size();
283  }
284 
285  // Communicate the number of DoF data to expect from root
286  int nEntriesComm = 0;
287  mpierr = MPI_Scatter( nDataPerProcess.data(), 1, MPI_INT, &nEntriesComm, 1, MPI_INT, rootProc, commW );
288  CHECK_EQUAL( mpierr, 0 );
289 
290 #ifdef MOAB_HAVE_MPI
291  if( rank == rootProc )
292 #endif
293  {
294  std::vector< MPI_Request > cRequests;
295  std::vector< MPI_Status > cStats( 2 * nprocs - 2 );
296  cRequests.reserve( 2 * nprocs - 2 );
297 
298  // First send out all the relevant data buffers to other process
299  std::vector< std::vector< int > > dataRowColsAll( nprocs );
300  std::vector< std::vector< double > > dataEntriesAll( nprocs );
301  for( int ip = 1; ip < nprocs; ++ip )
302  {
303  const int nDPP = nDataPerProcess[ip];
304  if( nDPP > 0 )
305  {
306  std::vector< int >& dataRowCols = dataRowColsAll[ip];
307  dataRowCols.resize( 2 * nDPP );
308  std::vector< double >& dataEntries = dataEntriesAll[ip];
309  dataEntries.resize( nDPP );
310 
311  for( int ij = 0; ij < nDPP; ++ij )
312  {
313  dataRowCols[ij * 2] = vecRow[dataPerProcess[ip][ij]];
314  dataRowCols[ij * 2 + 1] = vecCol[dataPerProcess[ip][ij]];
315  dataEntries[ij] = vecS[dataPerProcess[ip][ij]];
316  }
317 
318  MPI_Request rcsend, dsend;
319  mpierr = MPI_Isend( dataRowCols.data(), 2 * nDPP, MPI_INT, ip, iRead * 1000, commW, &rcsend );
320  CHECK_EQUAL( mpierr, 0 );
321  mpierr = MPI_Isend( dataEntries.data(), nDPP, MPI_DOUBLE, ip, iRead * 1000 + 1, commW, &dsend );
322  CHECK_EQUAL( mpierr, 0 );
323 
324  cRequests.push_back( rcsend );
325  cRequests.push_back( dsend );
326  }
327  }
328 
329  // Next perform all necessary local work while we wait for the buffers to be sent out
330  // Compute an offset for the rows and columns by creating a local to global mapping for rootProc
331  int rindex = 0, cindex = 0;
332  assert( dataPerProcess[0].size() - nEntriesComm == 0 ); // sanity check
333  for( int i = 0; i < nEntriesComm; ++i )
334  {
335  const int& vecRowValue = vecRow[dataPerProcess[0][i]];
336  const int& vecColValue = vecCol[dataPerProcess[0][i]];
337 
338  std::map< int, int >::iterator riter = rowMap.find( vecRowValue );
339  if( riter == rowMap.end() )
340  {
341  rowMap[vecRowValue] = rindexMax;
342  rindex = rindexMax;
343  rindexMax++;
344  }
345  else
346  rindex = riter->second;
347 
348  std::map< int, int >::iterator citer = colMap.find( vecColValue );
349  if( citer == colMap.end() )
350  {
351  colMap[vecColValue] = cindexMax;
352  cindex = cindexMax;
353  cindexMax++;
354  }
355  else
356  cindex = citer->second;
357 
358  sparseMatrix( rindex, cindex ) = vecS[i];
359  }
360 
361  // Wait until all communication is pushed out
362  mpierr = MPI_Waitall( cRequests.size(), cRequests.data(), cStats.data() );
363  CHECK_EQUAL( mpierr, 0 );
364  } // if( rank == rootProc )
365 #ifdef MOAB_HAVE_MPI
366  else
367  {
368  if( nEntriesComm > 0 )
369  {
370  MPI_Request cRequests[2];
371  MPI_Status cStats[2];
372  /* code */
373  // create buffers to receive the data from root process
374  std::vector< int > dataRowCols( 2 * nEntriesComm );
375  vecRow.Allocate( nEntriesComm );
376  vecCol.Allocate( nEntriesComm );
377  vecS.Allocate( nEntriesComm );
378 
379  /* TODO: combine row and column scatters together. Save one communication by better packing */
380  // Scatter the rows, cols and entries to the processes according to the partition
381  mpierr = MPI_Irecv( dataRowCols.data(), 2 * nEntriesComm, MPI_INT, rootProc, iRead * 1000, commW,
382  &cRequests[0] );
383  CHECK_EQUAL( mpierr, 0 );
384 
385  mpierr = MPI_Irecv( vecS, nEntriesComm, MPI_DOUBLE, rootProc, iRead * 1000 + 1, commW, &cRequests[1] );
386  CHECK_EQUAL( mpierr, 0 );
387 
388  // Wait until all communication is pushed out
389  mpierr = MPI_Waitall( 2, cRequests, cStats );
390  CHECK_EQUAL( mpierr, 0 );
391 
392  // Compute an offset for the rows and columns by creating a local to global mapping
393  int rindex = 0, cindex = 0;
394  for( int i = 0; i < nEntriesComm; ++i )
395  {
396  const int& vecRowValue = dataRowCols[i * 2];
397  const int& vecColValue = dataRowCols[i * 2 + 1];
398 
399  std::map< int, int >::iterator riter = rowMap.find( vecRowValue );
400  if( riter == rowMap.end() )
401  {
402  rowMap[vecRowValue] = rindexMax;
403  rindex = rindexMax;
404  rindexMax++;
405  }
406  else
407  rindex = riter->second;
408  vecRow[i] = rindex;
409 
410  std::map< int, int >::iterator citer = colMap.find( vecColValue );
411  if( citer == colMap.end() )
412  {
413  colMap[vecColValue] = cindexMax;
414  cindex = cindexMax;
415  cindexMax++;
416  }
417  else
418  cindex = citer->second;
419  vecCol[i] = cindex;
420 
421  sparseMatrix( rindex, cindex ) = vecS[i];
422  }
423 
424  // clear the local buffer
425  dataRowCols.clear();
426  } // if( nEntriesComm > 0 )
427  } // if( rank != rootProc )
428 
429  MPI_Barrier( commW );
430 #endif
431  }
432 
433 #ifdef MOAB_HAVE_MPI
434  if( rank == rootProc )
435 #endif
436  {
437  assert( nEntriesRemaining == 0 );
438  ncMap->close();
439  }
440 
441  /**
442  * Perform a series of checks to ensure that the local maps in each process still preserve map properties
443  * Let us run our usual tests to verify that the map data has been distributed correctly
444  * 1) Consistency check -- row sum = 1.0
445  * 2) Disable conservation checks that require global column sums (and hence more communication/reduction)
446  * 3) Perform monotonicity check and ensure failures globally are same as serial case
447  */
448  printf( "Rank %d: sparsematrix size = %d x %d\n", rank, sparseMatrix.GetRows(), sparseMatrix.GetColumns() );
449 
450  // consistency: row sums
451  {
452  int isConsistentP = mapRemap.IsConsistent( dTolerance );
453  if( 0 != isConsistentP )
454  {
455  printf( "Rank %d failed consistency checks...\n", rank );
456  }
457  CHECK_EQUAL( 0, isConsistentP );
458  }
459 
460  // conservation: we will disable this conservation check for now.
461  // int isConservativeP = mapRemap.IsConservative( dTolerance );
462  // CHECK_EQUAL( 0, isConservativeP );
463 
464  // monotonicity: local failures
465  {
466  int isMonotoneP = mapRemap.IsMonotone( dTolerance );
467  // Accumulate sum of failures to ensure that it is exactly same as serial case
468  int isMonotoneG;
469  mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW );
470  CHECK_EQUAL( mpierr, 0 );
471 
472  // 4600 monotonicity fails in serial and parallel for the test map file
473  CHECK_EQUAL( 4600, isMonotoneG );
474  }
475 
476  delete ncMap;
477 
478  return;
479 }
480 
482 {
483  moab::ErrorCode rval;
484  NcError error( NcError::verbose_nonfatal );
485  MPI_Comm commW = MPI_COMM_WORLD;
486  std::vector< int > tmp_owned_ids;
487  double dTolerance = 1e-08;
488 
489  std::string remap_weights_filename = TestDir + "unittest/outCS5ICOD5_map.nc";
490 
491  moab::Interface* mb = new moab::Core();
492  moab::ParallelComm pcomm( mb, commW );
493 
494  moab::TempestRemapper remapper( mb, &pcomm );
495  remapper.meshValidate = true;
496  remapper.constructEdgeMap = true;
497 
498  // Do not create new filesets; Use the sets from our respective applications
499  remapper.initialize( false );
500 
501  moab::TempestOnlineMap onlinemap( &remapper );
502 
503  rval = onlinemap.ReadParallelMap( remap_weights_filename.c_str(), tmp_owned_ids, true );
504  CHECK_EQUAL( rval, moab::MB_SUCCESS );
505 
506  // consistency: row sums
507  int isConsistentP = onlinemap.IsConsistent( dTolerance );
508  CHECK_EQUAL( 0, isConsistentP );
509 
510  // conservation: we will disable this conservation check for now
511  // since we do not have information about source and target areas
512  // to accurately decipher conservation data satisfaction
513  // int isConservativeP = onlinemap.IsConservative( dTolerance );
514  // CHECK_EQUAL( 0, isConservativeP );
515 
516  // monotonicity: global failures
517  int isMonotoneP = onlinemap.IsMonotone( dTolerance );
518  // Accumulate sum of failures to ensure that it is exactly same as serial case
519  // 4600 fails in serial. What about parallel ? Check and confirm.
520  CHECK_EQUAL( 4600, isMonotoneP );
521 }
522 
524 {
525  /*
526  *
527  * Load mapping file generated with CS5 and ICOD5 meshes with FV-FV
528  * projection for field data. It has following attributes:
529  *
530  * Analyzing map
531  * ..Per-dof consistency (tol 1.00000e-08).. PASS
532  * ..Per-dof conservation (tol 1.00000e-08).. PASS
533  * ..Weights within range [-10,+10].. Done
534  * ..
535  * .. Total nonzero entries: 8976
536  * .. Column index min/max: 1 / 150 (150 source dofs)
537  * .. Row index min/max: 1 / 252 (252 target dofs)
538  * .. Source area / 4pi: 9.999999999991666e-01
539  * .. Source area min/max: 7.758193626656797e-02 / 9.789674024202324e-02
540  * .. Target area / 4pi: 9.999999999999928e-01
541  * .. Target area min/max: 3.418848585325412e-02 / 5.362041648788460e-02
542  * .. Source frac min/max: 9.999999999997851e-01 / 1.000000000003865e+00
543  * .. Target frac min/max: 9.999999999993099e-01 / 1.000000000000784e+00
544  * .. Map weights min/max: -2.062659472710135e-01 / 1.143815352351317e+00
545  * .. Row sums min/max: 9.999999999993099e-01 / 1.000000000000784e+00
546  * .. Consist. err min/max: -6.901146321069973e-13 / 7.838174553853605e-13
547  * .. Col wt.sums min/max: 9.999999999997854e-01 / 1.000000000003865e+00
548  * .. Conserv. err min/max: -2.146061106600428e-13 / 3.865352482534945e-12
549  * ..
550  * ..Histogram of nonzero entries in sparse matrix
551  * ....Column 1: Number of nonzero entries (bin minimum)
552  * ....Column 2: Number of columns with that many nonzero values
553  * ....Column 3: Number of rows with that many nonzero values
554  * ..[[25, 0, 2],[29, 0, 8],[30, 0, 10],[31, 150, 232]]
555  * ..
556  * ..Histogram of weights
557  * ....Column 1: Lower bound on weights
558  * ....Column 2: Upper bound on weights
559  * ....Column 3: # of weights in that bin
560  * ..[[-1, 0, 4577],[0, 1, 4365],[1, 2, 34]]
561  *
562  */
563  NcError error( NcError::silent_nonfatal );
564  MPI_Comm commW = MPI_COMM_WORLD;
565  const int rootProc = 0;
566  const std::string outFilename = inMapFilename;
567  double dTolerance = 1e-08;
568  int mpierr = 0;
569 
570  int nprocs, rank;
571  MPI_Comm_size( MPI_COMM_WORLD, &nprocs );
572  MPI_Comm_rank( MPI_COMM_WORLD, &rank );
573 
574  const double dFillValueOverride = 0.0;
575  OfflineMap mapRemap;
576  if( rank == 0 )
577  {
578  std::cout << "Reading file: " << outFilename << std::endl;
579  mapRemap.Read( outFilename );
580  mapRemap.SetFillValueOverrideDbl( dFillValueOverride );
581  mapRemap.SetFillValueOverride( static_cast< float >( dFillValueOverride ) );
582 
583  int isConsistent = mapRemap.IsConsistent( dTolerance );
584  CHECK_EQUAL( 0, isConsistent );
585  int isConservative = mapRemap.IsConservative( dTolerance );
586  CHECK_EQUAL( 0, isConservative );
587  int isMonotone = mapRemap.IsMonotone( dTolerance );
588  CHECK_EQUAL( 4600, isMonotone );
589 
590  // verify that the source and target mesh areas are equal
591  DataArray1D< double >& srcAreas = mapRemap.GetSourceAreas();
592  DataArray1D< double >& tgtAreas = mapRemap.GetTargetAreas();
593 
594  double totSrcArea = 0.0;
595  for( size_t i = 0; i < srcAreas.GetRows(); ++i )
596  totSrcArea += srcAreas[i];
597 
598  double totTgtArea = 0.0;
599  for( size_t i = 0; i < tgtAreas.GetRows(); ++i )
600  totTgtArea += tgtAreas[i];
601 
602  CHECK_REAL_EQUAL( totSrcArea, totTgtArea, 1e-10 );
603  }
604 
605  MPI_Barrier( commW );
606 
607  SparseMatrix< double >& sparseMatrix = mapRemap.GetSparseMatrix();
608 
609  // Get the row, col and entry data tupl
610  // Retrieve all data from the map operator on root process
611  DataArray1D< int > dataRowsGlobal, dataColsGlobal;
612  DataArray1D< double > dataEntriesGlobal;
613  if( rank == 0 )
614  {
615  sparseMatrix.GetEntries( dataRowsGlobal, dataColsGlobal, dataEntriesGlobal );
616  std::cout << "Total NNZ in remap matrix = " << dataRowsGlobal.GetRows() << std::endl;
617  }
618 
619  int *rg = nullptr, *cg = nullptr;
620  double* de = nullptr;
621  int nMapGlobalRowCols[3] = { sparseMatrix.GetRows(), sparseMatrix.GetColumns(),
622  static_cast< int >( dataEntriesGlobal.GetRows() ) };
623 
624  /* split the rows and send to processes in chunks */
625  std::vector< int > rowAllocation;
626  std::vector< int > sendCounts;
627  std::vector< int > displs;
628  const int NDATA = 3;
629  if( rank == 0 )
630  {
631  rg = dataRowsGlobal;
632  cg = dataColsGlobal;
633  de = dataEntriesGlobal;
634 
635  // Let us do a trivial partitioning of the row data
636  rowAllocation.resize( nprocs * NDATA );
637  displs.resize( nprocs );
638  sendCounts.resize( nprocs );
639 
640  int nRowPerPart = nMapGlobalRowCols[0] / nprocs;
641  int remainder = nMapGlobalRowCols[0] % nprocs; // Keep the remainder in root
642  // printf( "nRowPerPart = %d, remainder = %d\n", nRowPerPart, remainder );
643  int sum = 0;
644  for( int i = 0; i < nprocs; ++i )
645  {
646  rowAllocation[i * NDATA] = nRowPerPart + ( i == 0 ? remainder : 0 );
647  rowAllocation[i * NDATA + 1] = std::find( rg, rg + dataRowsGlobal.GetRows(), sum ) - rg;
648  sum += rowAllocation[i * NDATA];
649  displs[i] = rowAllocation[i * NDATA + 1];
650  }
651  for( int i = 0; i < nprocs - 1; ++i )
652  {
653  rowAllocation[i * NDATA + 2] = rowAllocation[( i + 1 ) * NDATA + 1] - rowAllocation[i * NDATA + 1];
654  sendCounts[i] = rowAllocation[i * NDATA + 2];
655  }
656  rowAllocation[( nprocs - 1 ) * NDATA + 2] = nMapGlobalRowCols[2] - displs[nprocs - 1];
657  sendCounts[nprocs - 1] = rowAllocation[( nprocs - 1 ) * NDATA + 2];
658 
659  // for( int i = 0; i < nprocs; i++ )
660  // printf( "sendcounts[%d] = %d, %d, %d\tdispls[%d] = %d, %d\n", i, rowAllocation[i * NDATA],
661  // rowAllocation[i * NDATA + 1], rowAllocation[i * NDATA + 2], i, displs[i], sendCounts[i] );
662  }
663 
664  mpierr = MPI_Bcast( nMapGlobalRowCols, 3, MPI_INT, rootProc, commW );
665  CHECK_EQUAL( mpierr, 0 );
666 
667  int allocationSize[NDATA] = { 0, 0, 0 };
668  mpierr = MPI_Scatter( rowAllocation.data(), NDATA, MPI_INT, &allocationSize, NDATA, MPI_INT, rootProc, commW );
669  CHECK_EQUAL( mpierr, 0 );
670 
671  // create buffers to receive the data from root process
672  DataArray1D< int > dataRows, dataCols;
673  DataArray1D< double > dataEntries;
674  dataRows.Allocate( allocationSize[2] );
675  dataCols.Allocate( allocationSize[2] );
676  dataEntries.Allocate( allocationSize[2] );
677 
678  /* TODO: combine row and column scatters together. Save one communication by better packing */
679  // Scatter the rows, cols and entries to the processes according to the partition
680  mpierr = MPI_Scatterv( rg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataRows ), dataRows.GetByteSize(),
681  MPI_INT, rootProc, commW );
682  CHECK_EQUAL( mpierr, 0 );
683 
684  mpierr = MPI_Scatterv( cg, sendCounts.data(), displs.data(), MPI_INT, (int*)( dataCols ), dataCols.GetByteSize(),
685  MPI_INT, rootProc, commW );
686  CHECK_EQUAL( mpierr, 0 );
687 
688  mpierr = MPI_Scatterv( de, sendCounts.data(), displs.data(), MPI_DOUBLE, (double*)( dataEntries ),
689  dataEntries.GetByteSize(), MPI_DOUBLE, rootProc, commW );
690  CHECK_EQUAL( mpierr, 0 );
691 
692  // Compute an offset for the rows and columns by creating a local to global mapping
693  std::vector< int > rowMap, colMap;
694  for( int i = 0; i < allocationSize[2]; ++i )
695  {
696  int rindex, cindex;
697  std::vector< int >::iterator riter = std::find( rowMap.begin(), rowMap.end(), dataRows[i] );
698  if( riter == rowMap.end() )
699  {
700  rowMap.push_back( dataRows[i] );
701  rindex = rowMap.size() - 1;
702  }
703  else
704  rindex = riter - rowMap.begin();
705  dataRows[i] = rindex;
706 
707  std::vector< int >::iterator citer = std::find( colMap.begin(), colMap.end(), dataCols[i] );
708  if( citer == colMap.end() )
709  {
710  colMap.push_back( dataCols[i] );
711  cindex = colMap.size() - 1;
712  }
713  else
714  cindex = citer - colMap.begin();
715  dataCols[i] = cindex;
716  }
717 
718  // Now set the data received from root onto the sparse matrix
719  sparseMatrix.SetEntries( dataRows, dataCols, dataEntries );
720 
721  /**
722  * Perform a series of checks to ensure that the local maps in each process still preserve map properties
723  * Let us run our usual tests to verify that the map data has been distributed correctly
724  * 1) Consistency check -- row sum = 1.0
725  * 2) Disable conservation checks that require global column sums (and hence more communication/reduction)
726  * 3) Perform monotonicity check and ensure failures globally are same as serial case
727  */
728 
729  // consistency: row sums
730  int isConsistentP = mapRemap.IsConsistent( dTolerance );
731  CHECK_EQUAL( 0, isConsistentP );
732 
733  // conservation: we will disable this conservation check for now.
734  // int isConservativeP = mapRemap.IsConservative( dTolerance );
735  // CHECK_EQUAL( 0, isConservativeP );
736 
737  // monotonicity: local failures
738  int isMonotoneP = mapRemap.IsMonotone( dTolerance );
739  // Accumulate sum of failures to ensure that it is exactly same as serial case
740  int isMonotoneG;
741  mpierr = MPI_Allreduce( &isMonotoneP, &isMonotoneG, 1, MPI_INT, MPI_SUM, commW );
742  CHECK_EQUAL( mpierr, 0 );
743  // 4600 fails in serial. What about parallel ? Check and confirm.
744  CHECK_EQUAL( 4600, isMonotoneG );
745 
746  return;
747 }